diff options
Diffstat (limited to 'fs')
423 files changed, 10951 insertions, 7865 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index f47c6bbb01b3..88418c419ea7 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h | |||
@@ -52,7 +52,7 @@ void v9fs_destroy_inode(struct inode *inode); | |||
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); | 54 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); |
55 | void v9fs_clear_inode(struct inode *inode); | 55 | void v9fs_evict_inode(struct inode *inode); |
56 | ino_t v9fs_qid2ino(struct p9_qid *qid); | 56 | ino_t v9fs_qid2ino(struct p9_qid *qid); |
57 | void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); | 57 | void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); |
58 | void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *); | 58 | void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *); |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 6e94f3247cec..d97c34a24f7a 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -430,8 +430,10 @@ error: | |||
430 | * @inode: inode to release | 430 | * @inode: inode to release |
431 | * | 431 | * |
432 | */ | 432 | */ |
433 | void v9fs_clear_inode(struct inode *inode) | 433 | void v9fs_evict_inode(struct inode *inode) |
434 | { | 434 | { |
435 | truncate_inode_pages(inode->i_mapping, 0); | ||
436 | end_writeback(inode); | ||
435 | filemap_fdatawrite(inode->i_mapping); | 437 | filemap_fdatawrite(inode->i_mapping); |
436 | 438 | ||
437 | #ifdef CONFIG_9P_FSCACHE | 439 | #ifdef CONFIG_9P_FSCACHE |
@@ -1209,10 +1211,19 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
1209 | } | 1211 | } |
1210 | 1212 | ||
1211 | retval = p9_client_wstat(fid, &wstat); | 1213 | retval = p9_client_wstat(fid, &wstat); |
1212 | if (retval >= 0) | 1214 | if (retval < 0) |
1213 | retval = inode_setattr(dentry->d_inode, iattr); | 1215 | return retval; |
1214 | 1216 | ||
1215 | return retval; | 1217 | if ((iattr->ia_valid & ATTR_SIZE) && |
1218 | iattr->ia_size != i_size_read(dentry->d_inode)) { | ||
1219 | retval = vmtruncate(dentry->d_inode, iattr->ia_size); | ||
1220 | if (retval) | ||
1221 | return retval; | ||
1222 | } | ||
1223 | |||
1224 | setattr_copy(dentry->d_inode, iattr); | ||
1225 | mark_inode_dirty(dentry->d_inode); | ||
1226 | return 0; | ||
1216 | } | 1227 | } |
1217 | 1228 | ||
1218 | /** | 1229 | /** |
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 4b9ede0b41b7..f9311077de68 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -266,7 +266,7 @@ static const struct super_operations v9fs_super_ops = { | |||
266 | .destroy_inode = v9fs_destroy_inode, | 266 | .destroy_inode = v9fs_destroy_inode, |
267 | #endif | 267 | #endif |
268 | .statfs = simple_statfs, | 268 | .statfs = simple_statfs, |
269 | .clear_inode = v9fs_clear_inode, | 269 | .evict_inode = v9fs_evict_inode, |
270 | .show_options = generic_show_options, | 270 | .show_options = generic_show_options, |
271 | .umount_begin = v9fs_umount_begin, | 271 | .umount_begin = v9fs_umount_begin, |
272 | }; | 272 | }; |
@@ -277,7 +277,7 @@ static const struct super_operations v9fs_super_ops_dotl = { | |||
277 | .destroy_inode = v9fs_destroy_inode, | 277 | .destroy_inode = v9fs_destroy_inode, |
278 | #endif | 278 | #endif |
279 | .statfs = v9fs_statfs, | 279 | .statfs = v9fs_statfs, |
280 | .clear_inode = v9fs_clear_inode, | 280 | .evict_inode = v9fs_evict_inode, |
281 | .show_options = generic_show_options, | 281 | .show_options = generic_show_options, |
282 | .umount_begin = v9fs_umount_begin, | 282 | .umount_begin = v9fs_umount_begin, |
283 | }; | 283 | }; |
diff --git a/fs/Kconfig b/fs/Kconfig index 5f85b5947613..3d185308ec88 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -64,7 +64,7 @@ source "fs/autofs4/Kconfig" | |||
64 | source "fs/fuse/Kconfig" | 64 | source "fs/fuse/Kconfig" |
65 | 65 | ||
66 | config CUSE | 66 | config CUSE |
67 | tristate "Character device in Userpace support" | 67 | tristate "Character device in Userspace support" |
68 | depends on FUSE_FS | 68 | depends on FUSE_FS |
69 | help | 69 | help |
70 | This FUSE extension allows character devices to be | 70 | This FUSE extension allows character devices to be |
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 6f850b06ab62..65794b8fe79e 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c | |||
@@ -50,10 +50,19 @@ static int adfs_write_begin(struct file *file, struct address_space *mapping, | |||
50 | loff_t pos, unsigned len, unsigned flags, | 50 | loff_t pos, unsigned len, unsigned flags, |
51 | struct page **pagep, void **fsdata) | 51 | struct page **pagep, void **fsdata) |
52 | { | 52 | { |
53 | int ret; | ||
54 | |||
53 | *pagep = NULL; | 55 | *pagep = NULL; |
54 | return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 56 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
55 | adfs_get_block, | 57 | adfs_get_block, |
56 | &ADFS_I(mapping->host)->mmu_private); | 58 | &ADFS_I(mapping->host)->mmu_private); |
59 | if (unlikely(ret)) { | ||
60 | loff_t isize = mapping->host->i_size; | ||
61 | if (pos + len > isize) | ||
62 | vmtruncate(mapping->host, isize); | ||
63 | } | ||
64 | |||
65 | return ret; | ||
57 | } | 66 | } |
58 | 67 | ||
59 | static sector_t _adfs_bmap(struct address_space *mapping, sector_t block) | 68 | static sector_t _adfs_bmap(struct address_space *mapping, sector_t block) |
@@ -324,10 +333,7 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr) | |||
324 | 333 | ||
325 | /* XXX: this is missing some actual on-disk truncation.. */ | 334 | /* XXX: this is missing some actual on-disk truncation.. */ |
326 | if (ia_valid & ATTR_SIZE) | 335 | if (ia_valid & ATTR_SIZE) |
327 | error = simple_setsize(inode, attr->ia_size); | 336 | truncate_setsize(inode, attr->ia_size); |
328 | |||
329 | if (error) | ||
330 | goto out; | ||
331 | 337 | ||
332 | if (ia_valid & ATTR_MTIME) { | 338 | if (ia_valid & ATTR_MTIME) { |
333 | inode->i_mtime = attr->ia_mtime; | 339 | inode->i_mtime = attr->ia_mtime; |
diff --git a/fs/affs/affs.h b/fs/affs/affs.h index f05b6155ccc8..a8cbdeb34025 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h | |||
@@ -171,8 +171,7 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
171 | extern unsigned long affs_parent_ino(struct inode *dir); | 171 | extern unsigned long affs_parent_ino(struct inode *dir); |
172 | extern struct inode *affs_new_inode(struct inode *dir); | 172 | extern struct inode *affs_new_inode(struct inode *dir); |
173 | extern int affs_notify_change(struct dentry *dentry, struct iattr *attr); | 173 | extern int affs_notify_change(struct dentry *dentry, struct iattr *attr); |
174 | extern void affs_delete_inode(struct inode *inode); | 174 | extern void affs_evict_inode(struct inode *inode); |
175 | extern void affs_clear_inode(struct inode *inode); | ||
176 | extern struct inode *affs_iget(struct super_block *sb, | 175 | extern struct inode *affs_iget(struct super_block *sb, |
177 | unsigned long ino); | 176 | unsigned long ino); |
178 | extern int affs_write_inode(struct inode *inode, | 177 | extern int affs_write_inode(struct inode *inode, |
diff --git a/fs/affs/file.c b/fs/affs/file.c index 322710c3eedf..c4a9875bd1a6 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
@@ -406,10 +406,19 @@ static int affs_write_begin(struct file *file, struct address_space *mapping, | |||
406 | loff_t pos, unsigned len, unsigned flags, | 406 | loff_t pos, unsigned len, unsigned flags, |
407 | struct page **pagep, void **fsdata) | 407 | struct page **pagep, void **fsdata) |
408 | { | 408 | { |
409 | int ret; | ||
410 | |||
409 | *pagep = NULL; | 411 | *pagep = NULL; |
410 | return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 412 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
411 | affs_get_block, | 413 | affs_get_block, |
412 | &AFFS_I(mapping->host)->mmu_private); | 414 | &AFFS_I(mapping->host)->mmu_private); |
415 | if (unlikely(ret)) { | ||
416 | loff_t isize = mapping->host->i_size; | ||
417 | if (pos + len > isize) | ||
418 | vmtruncate(mapping->host, isize); | ||
419 | } | ||
420 | |||
421 | return ret; | ||
413 | } | 422 | } |
414 | 423 | ||
415 | static sector_t _affs_bmap(struct address_space *mapping, sector_t block) | 424 | static sector_t _affs_bmap(struct address_space *mapping, sector_t block) |
diff --git a/fs/affs/inode.c b/fs/affs/inode.c index f4b2a4ee4f91..3a0fdec175ba 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c | |||
@@ -235,31 +235,36 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr) | |||
235 | goto out; | 235 | goto out; |
236 | } | 236 | } |
237 | 237 | ||
238 | error = inode_setattr(inode, attr); | 238 | if ((attr->ia_valid & ATTR_SIZE) && |
239 | if (!error && (attr->ia_valid & ATTR_MODE)) | 239 | attr->ia_size != i_size_read(inode)) { |
240 | error = vmtruncate(inode, attr->ia_size); | ||
241 | if (error) | ||
242 | return error; | ||
243 | } | ||
244 | |||
245 | setattr_copy(inode, attr); | ||
246 | mark_inode_dirty(inode); | ||
247 | |||
248 | if (attr->ia_valid & ATTR_MODE) | ||
240 | mode_to_prot(inode); | 249 | mode_to_prot(inode); |
241 | out: | 250 | out: |
242 | return error; | 251 | return error; |
243 | } | 252 | } |
244 | 253 | ||
245 | void | 254 | void |
246 | affs_delete_inode(struct inode *inode) | 255 | affs_evict_inode(struct inode *inode) |
247 | { | ||
248 | pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); | ||
249 | truncate_inode_pages(&inode->i_data, 0); | ||
250 | inode->i_size = 0; | ||
251 | affs_truncate(inode); | ||
252 | clear_inode(inode); | ||
253 | affs_free_block(inode->i_sb, inode->i_ino); | ||
254 | } | ||
255 | |||
256 | void | ||
257 | affs_clear_inode(struct inode *inode) | ||
258 | { | 256 | { |
259 | unsigned long cache_page; | 257 | unsigned long cache_page; |
258 | pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); | ||
259 | truncate_inode_pages(&inode->i_data, 0); | ||
260 | 260 | ||
261 | pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); | 261 | if (!inode->i_nlink) { |
262 | inode->i_size = 0; | ||
263 | affs_truncate(inode); | ||
264 | } | ||
262 | 265 | ||
266 | invalidate_inode_buffers(inode); | ||
267 | end_writeback(inode); | ||
263 | affs_free_prealloc(inode); | 268 | affs_free_prealloc(inode); |
264 | cache_page = (unsigned long)AFFS_I(inode)->i_lc; | 269 | cache_page = (unsigned long)AFFS_I(inode)->i_lc; |
265 | if (cache_page) { | 270 | if (cache_page) { |
@@ -271,6 +276,9 @@ affs_clear_inode(struct inode *inode) | |||
271 | affs_brelse(AFFS_I(inode)->i_ext_bh); | 276 | affs_brelse(AFFS_I(inode)->i_ext_bh); |
272 | AFFS_I(inode)->i_ext_last = ~1; | 277 | AFFS_I(inode)->i_ext_last = ~1; |
273 | AFFS_I(inode)->i_ext_bh = NULL; | 278 | AFFS_I(inode)->i_ext_bh = NULL; |
279 | |||
280 | if (!inode->i_nlink) | ||
281 | affs_free_block(inode->i_sb, inode->i_ino); | ||
274 | } | 282 | } |
275 | 283 | ||
276 | struct inode * | 284 | struct inode * |
diff --git a/fs/affs/super.c b/fs/affs/super.c index 16a3e4765f68..33c4e7eef470 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -26,7 +26,7 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); | |||
26 | static int affs_remount (struct super_block *sb, int *flags, char *data); | 26 | static int affs_remount (struct super_block *sb, int *flags, char *data); |
27 | 27 | ||
28 | static void | 28 | static void |
29 | affs_commit_super(struct super_block *sb, int clean) | 29 | affs_commit_super(struct super_block *sb, int wait, int clean) |
30 | { | 30 | { |
31 | struct affs_sb_info *sbi = AFFS_SB(sb); | 31 | struct affs_sb_info *sbi = AFFS_SB(sb); |
32 | struct buffer_head *bh = sbi->s_root_bh; | 32 | struct buffer_head *bh = sbi->s_root_bh; |
@@ -36,6 +36,8 @@ affs_commit_super(struct super_block *sb, int clean) | |||
36 | secs_to_datestamp(get_seconds(), &tail->disk_change); | 36 | secs_to_datestamp(get_seconds(), &tail->disk_change); |
37 | affs_fix_checksum(sb, bh); | 37 | affs_fix_checksum(sb, bh); |
38 | mark_buffer_dirty(bh); | 38 | mark_buffer_dirty(bh); |
39 | if (wait) | ||
40 | sync_dirty_buffer(bh); | ||
39 | } | 41 | } |
40 | 42 | ||
41 | static void | 43 | static void |
@@ -46,8 +48,8 @@ affs_put_super(struct super_block *sb) | |||
46 | 48 | ||
47 | lock_kernel(); | 49 | lock_kernel(); |
48 | 50 | ||
49 | if (!(sb->s_flags & MS_RDONLY)) | 51 | if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt) |
50 | affs_commit_super(sb, 1); | 52 | affs_commit_super(sb, 1, 1); |
51 | 53 | ||
52 | kfree(sbi->s_prefix); | 54 | kfree(sbi->s_prefix); |
53 | affs_free_bitmap(sb); | 55 | affs_free_bitmap(sb); |
@@ -61,27 +63,20 @@ affs_put_super(struct super_block *sb) | |||
61 | static void | 63 | static void |
62 | affs_write_super(struct super_block *sb) | 64 | affs_write_super(struct super_block *sb) |
63 | { | 65 | { |
64 | int clean = 2; | ||
65 | |||
66 | lock_super(sb); | 66 | lock_super(sb); |
67 | if (!(sb->s_flags & MS_RDONLY)) { | 67 | if (!(sb->s_flags & MS_RDONLY)) |
68 | // if (sbi->s_bitmap[i].bm_bh) { | 68 | affs_commit_super(sb, 1, 2); |
69 | // if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) { | 69 | sb->s_dirt = 0; |
70 | // clean = 0; | ||
71 | affs_commit_super(sb, clean); | ||
72 | sb->s_dirt = !clean; /* redo until bitmap synced */ | ||
73 | } else | ||
74 | sb->s_dirt = 0; | ||
75 | unlock_super(sb); | 70 | unlock_super(sb); |
76 | 71 | ||
77 | pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean); | 72 | pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds()); |
78 | } | 73 | } |
79 | 74 | ||
80 | static int | 75 | static int |
81 | affs_sync_fs(struct super_block *sb, int wait) | 76 | affs_sync_fs(struct super_block *sb, int wait) |
82 | { | 77 | { |
83 | lock_super(sb); | 78 | lock_super(sb); |
84 | affs_commit_super(sb, 2); | 79 | affs_commit_super(sb, wait, 2); |
85 | sb->s_dirt = 0; | 80 | sb->s_dirt = 0; |
86 | unlock_super(sb); | 81 | unlock_super(sb); |
87 | return 0; | 82 | return 0; |
@@ -140,8 +135,7 @@ static const struct super_operations affs_sops = { | |||
140 | .alloc_inode = affs_alloc_inode, | 135 | .alloc_inode = affs_alloc_inode, |
141 | .destroy_inode = affs_destroy_inode, | 136 | .destroy_inode = affs_destroy_inode, |
142 | .write_inode = affs_write_inode, | 137 | .write_inode = affs_write_inode, |
143 | .delete_inode = affs_delete_inode, | 138 | .evict_inode = affs_evict_inode, |
144 | .clear_inode = affs_clear_inode, | ||
145 | .put_super = affs_put_super, | 139 | .put_super = affs_put_super, |
146 | .write_super = affs_write_super, | 140 | .write_super = affs_write_super, |
147 | .sync_fs = affs_sync_fs, | 141 | .sync_fs = affs_sync_fs, |
@@ -554,9 +548,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) | |||
554 | return 0; | 548 | return 0; |
555 | } | 549 | } |
556 | if (*flags & MS_RDONLY) { | 550 | if (*flags & MS_RDONLY) { |
557 | sb->s_dirt = 1; | 551 | affs_write_super(sb); |
558 | while (sb->s_dirt) | ||
559 | affs_write_super(sb); | ||
560 | affs_free_bitmap(sb); | 552 | affs_free_bitmap(sb); |
561 | } else | 553 | } else |
562 | res = affs_init_bitmap(sb, flags); | 554 | res = affs_init_bitmap(sb, flags); |
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig index 5c4e61d3c772..8f975f25b486 100644 --- a/fs/afs/Kconfig +++ b/fs/afs/Kconfig | |||
@@ -2,6 +2,7 @@ config AFS_FS | |||
2 | tristate "Andrew File System support (AFS) (EXPERIMENTAL)" | 2 | tristate "Andrew File System support (AFS) (EXPERIMENTAL)" |
3 | depends on INET && EXPERIMENTAL | 3 | depends on INET && EXPERIMENTAL |
4 | select AF_RXRPC | 4 | select AF_RXRPC |
5 | select DNS_RESOLVER | ||
5 | help | 6 | help |
6 | If you say Y here, you will get an experimental Andrew File System | 7 | If you say Y here, you will get an experimental Andrew File System |
7 | driver. It currently only supports unsecured read-only AFS access. | 8 | driver. It currently only supports unsecured read-only AFS access. |
diff --git a/fs/afs/cell.c b/fs/afs/cell.c index e19c13f059ed..ffea35c63879 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/key.h> | 14 | #include <linux/key.h> |
15 | #include <linux/ctype.h> | 15 | #include <linux/ctype.h> |
16 | #include <linux/dns_resolver.h> | ||
16 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
17 | #include <keys/rxrpc-type.h> | 18 | #include <keys/rxrpc-type.h> |
18 | #include "internal.h" | 19 | #include "internal.h" |
@@ -36,6 +37,8 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) | |||
36 | struct key *key; | 37 | struct key *key; |
37 | size_t namelen; | 38 | size_t namelen; |
38 | char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; | 39 | char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next; |
40 | char *dvllist = NULL, *_vllist = NULL; | ||
41 | char delimiter = ':'; | ||
39 | int ret; | 42 | int ret; |
40 | 43 | ||
41 | _enter("%s,%s", name, vllist); | 44 | _enter("%s,%s", name, vllist); |
@@ -43,8 +46,10 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) | |||
43 | BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ | 46 | BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */ |
44 | 47 | ||
45 | namelen = strlen(name); | 48 | namelen = strlen(name); |
46 | if (namelen > AFS_MAXCELLNAME) | 49 | if (namelen > AFS_MAXCELLNAME) { |
50 | _leave(" = -ENAMETOOLONG"); | ||
47 | return ERR_PTR(-ENAMETOOLONG); | 51 | return ERR_PTR(-ENAMETOOLONG); |
52 | } | ||
48 | 53 | ||
49 | /* allocate and initialise a cell record */ | 54 | /* allocate and initialise a cell record */ |
50 | cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL); | 55 | cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL); |
@@ -64,15 +69,31 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) | |||
64 | INIT_LIST_HEAD(&cell->vl_list); | 69 | INIT_LIST_HEAD(&cell->vl_list); |
65 | spin_lock_init(&cell->vl_lock); | 70 | spin_lock_init(&cell->vl_lock); |
66 | 71 | ||
72 | /* if the ip address is invalid, try dns query */ | ||
73 | if (!vllist || strlen(vllist) < 7) { | ||
74 | ret = dns_query("afsdb", name, namelen, "ipv4", &dvllist, NULL); | ||
75 | if (ret < 0) { | ||
76 | _leave(" = %d", ret); | ||
77 | return ERR_PTR(ret); | ||
78 | } | ||
79 | _vllist = dvllist; | ||
80 | |||
81 | /* change the delimiter for user-space reply */ | ||
82 | delimiter = ','; | ||
83 | |||
84 | } else { | ||
85 | _vllist = vllist; | ||
86 | } | ||
87 | |||
67 | /* fill in the VL server list from the rest of the string */ | 88 | /* fill in the VL server list from the rest of the string */ |
68 | do { | 89 | do { |
69 | unsigned a, b, c, d; | 90 | unsigned a, b, c, d; |
70 | 91 | ||
71 | next = strchr(vllist, ':'); | 92 | next = strchr(_vllist, delimiter); |
72 | if (next) | 93 | if (next) |
73 | *next++ = 0; | 94 | *next++ = 0; |
74 | 95 | ||
75 | if (sscanf(vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4) | 96 | if (sscanf(_vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4) |
76 | goto bad_address; | 97 | goto bad_address; |
77 | 98 | ||
78 | if (a > 255 || b > 255 || c > 255 || d > 255) | 99 | if (a > 255 || b > 255 || c > 255 || d > 255) |
@@ -81,7 +102,7 @@ static struct afs_cell *afs_cell_alloc(const char *name, char *vllist) | |||
81 | cell->vl_addrs[cell->vl_naddrs++].s_addr = | 102 | cell->vl_addrs[cell->vl_naddrs++].s_addr = |
82 | htonl((a << 24) | (b << 16) | (c << 8) | d); | 103 | htonl((a << 24) | (b << 16) | (c << 8) | d); |
83 | 104 | ||
84 | } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (vllist = next)); | 105 | } while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (_vllist = next)); |
85 | 106 | ||
86 | /* create a key to represent an anonymous user */ | 107 | /* create a key to represent an anonymous user */ |
87 | memcpy(keyname, "afs@", 4); | 108 | memcpy(keyname, "afs@", 4); |
@@ -110,6 +131,7 @@ bad_address: | |||
110 | ret = -EINVAL; | 131 | ret = -EINVAL; |
111 | error: | 132 | error: |
112 | key_put(cell->anonymous_key); | 133 | key_put(cell->anonymous_key); |
134 | kfree(dvllist); | ||
113 | kfree(cell); | 135 | kfree(cell); |
114 | _leave(" = %d", ret); | 136 | _leave(" = %d", ret); |
115 | return ERR_PTR(ret); | 137 | return ERR_PTR(ret); |
@@ -201,14 +223,12 @@ int afs_cell_init(char *rootcell) | |||
201 | } | 223 | } |
202 | 224 | ||
203 | cp = strchr(rootcell, ':'); | 225 | cp = strchr(rootcell, ':'); |
204 | if (!cp) { | 226 | if (!cp) |
205 | printk(KERN_ERR "kAFS: no VL server IP addresses specified\n"); | 227 | _debug("kAFS: no VL server IP addresses specified"); |
206 | _leave(" = -EINVAL"); | 228 | else |
207 | return -EINVAL; | 229 | *cp++ = 0; |
208 | } | ||
209 | 230 | ||
210 | /* allocate a cell record for the root cell */ | 231 | /* allocate a cell record for the root cell */ |
211 | *cp++ = 0; | ||
212 | new_root = afs_cell_create(rootcell, cp); | 232 | new_root = afs_cell_create(rootcell, cp); |
213 | if (IS_ERR(new_root)) { | 233 | if (IS_ERR(new_root)) { |
214 | _leave(" = %ld", PTR_ERR(new_root)); | 234 | _leave(" = %ld", PTR_ERR(new_root)); |
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index d00b312e3110..320ffef11574 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
@@ -316,7 +316,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
316 | /* | 316 | /* |
317 | * clear an AFS inode | 317 | * clear an AFS inode |
318 | */ | 318 | */ |
319 | void afs_clear_inode(struct inode *inode) | 319 | void afs_evict_inode(struct inode *inode) |
320 | { | 320 | { |
321 | struct afs_permits *permits; | 321 | struct afs_permits *permits; |
322 | struct afs_vnode *vnode; | 322 | struct afs_vnode *vnode; |
@@ -335,6 +335,9 @@ void afs_clear_inode(struct inode *inode) | |||
335 | 335 | ||
336 | ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); | 336 | ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); |
337 | 337 | ||
338 | truncate_inode_pages(&inode->i_data, 0); | ||
339 | end_writeback(inode); | ||
340 | |||
338 | afs_give_up_callback(vnode); | 341 | afs_give_up_callback(vnode); |
339 | 342 | ||
340 | if (vnode->server) { | 343 | if (vnode->server) { |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 5f679b77ce24..8679089ce9a1 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -565,7 +565,7 @@ extern void afs_zap_data(struct afs_vnode *); | |||
565 | extern int afs_validate(struct afs_vnode *, struct key *); | 565 | extern int afs_validate(struct afs_vnode *, struct key *); |
566 | extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 566 | extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
567 | extern int afs_setattr(struct dentry *, struct iattr *); | 567 | extern int afs_setattr(struct dentry *, struct iattr *); |
568 | extern void afs_clear_inode(struct inode *); | 568 | extern void afs_evict_inode(struct inode *); |
569 | 569 | ||
570 | /* | 570 | /* |
571 | * main.c | 571 | * main.c |
diff --git a/fs/afs/main.c b/fs/afs/main.c index 66d54d348c55..cfd1cbe25b22 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c | |||
@@ -111,6 +111,8 @@ static int __init afs_init(void) | |||
111 | 111 | ||
112 | /* initialise the callback update process */ | 112 | /* initialise the callback update process */ |
113 | ret = afs_callback_update_init(); | 113 | ret = afs_callback_update_init(); |
114 | if (ret < 0) | ||
115 | goto error_callback_update_init; | ||
114 | 116 | ||
115 | /* create the RxRPC transport */ | 117 | /* create the RxRPC transport */ |
116 | ret = afs_open_socket(); | 118 | ret = afs_open_socket(); |
@@ -127,15 +129,16 @@ static int __init afs_init(void) | |||
127 | error_fs: | 129 | error_fs: |
128 | afs_close_socket(); | 130 | afs_close_socket(); |
129 | error_open_socket: | 131 | error_open_socket: |
132 | afs_callback_update_kill(); | ||
133 | error_callback_update_init: | ||
134 | afs_vlocation_purge(); | ||
130 | error_vl_update_init: | 135 | error_vl_update_init: |
136 | afs_cell_purge(); | ||
131 | error_cell_init: | 137 | error_cell_init: |
132 | #ifdef CONFIG_AFS_FSCACHE | 138 | #ifdef CONFIG_AFS_FSCACHE |
133 | fscache_unregister_netfs(&afs_cache_netfs); | 139 | fscache_unregister_netfs(&afs_cache_netfs); |
134 | error_cache: | 140 | error_cache: |
135 | #endif | 141 | #endif |
136 | afs_callback_update_kill(); | ||
137 | afs_vlocation_purge(); | ||
138 | afs_cell_purge(); | ||
139 | afs_proc_cleanup(); | 142 | afs_proc_cleanup(); |
140 | rcu_barrier(); | 143 | rcu_barrier(); |
141 | printk(KERN_ERR "kAFS: failed to register: %d\n", ret); | 144 | printk(KERN_ERR "kAFS: failed to register: %d\n", ret); |
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 67cf810e0fd6..654d8fdbf01f 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c | |||
@@ -100,6 +100,7 @@ int afs_open_socket(void) | |||
100 | ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); | 100 | ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); |
101 | if (ret < 0) { | 101 | if (ret < 0) { |
102 | sock_release(socket); | 102 | sock_release(socket); |
103 | destroy_workqueue(afs_async_calls); | ||
103 | _leave(" = %d [bind]", ret); | 104 | _leave(" = %d [bind]", ret); |
104 | return ret; | 105 | return ret; |
105 | } | 106 | } |
diff --git a/fs/afs/super.c b/fs/afs/super.c index e932e5a3a0c1..9cf80f02da16 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -49,7 +49,7 @@ static const struct super_operations afs_super_ops = { | |||
49 | .statfs = afs_statfs, | 49 | .statfs = afs_statfs, |
50 | .alloc_inode = afs_alloc_inode, | 50 | .alloc_inode = afs_alloc_inode, |
51 | .destroy_inode = afs_destroy_inode, | 51 | .destroy_inode = afs_destroy_inode, |
52 | .clear_inode = afs_clear_inode, | 52 | .evict_inode = afs_evict_inode, |
53 | .put_super = afs_put_super, | 53 | .put_super = afs_put_super, |
54 | .show_options = generic_show_options, | 54 | .show_options = generic_show_options, |
55 | }; | 55 | }; |
@@ -1277,7 +1277,7 @@ out: | |||
1277 | /* sys_io_destroy: | 1277 | /* sys_io_destroy: |
1278 | * Destroy the aio_context specified. May cancel any outstanding | 1278 | * Destroy the aio_context specified. May cancel any outstanding |
1279 | * AIOs and block on completion. Will fail with -ENOSYS if not | 1279 | * AIOs and block on completion. Will fail with -ENOSYS if not |
1280 | * implemented. May fail with -EFAULT if the context pointed to | 1280 | * implemented. May fail with -EINVAL if the context pointed to |
1281 | * is invalid. | 1281 | * is invalid. |
1282 | */ | 1282 | */ |
1283 | SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) | 1283 | SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) |
@@ -1795,15 +1795,16 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, | |||
1795 | 1795 | ||
1796 | /* io_getevents: | 1796 | /* io_getevents: |
1797 | * Attempts to read at least min_nr events and up to nr events from | 1797 | * Attempts to read at least min_nr events and up to nr events from |
1798 | * the completion queue for the aio_context specified by ctx_id. May | 1798 | * the completion queue for the aio_context specified by ctx_id. If |
1799 | * fail with -EINVAL if ctx_id is invalid, if min_nr is out of range, | 1799 | * it succeeds, the number of read events is returned. May fail with |
1800 | * if nr is out of range, if when is out of range. May fail with | 1800 | * -EINVAL if ctx_id is invalid, if min_nr is out of range, if nr is |
1801 | * -EFAULT if any of the memory specified to is invalid. May return | 1801 | * out of range, if timeout is out of range. May fail with -EFAULT |
1802 | * 0 or < min_nr if no events are available and the timeout specified | 1802 | * if any of the memory specified is invalid. May return 0 or |
1803 | * by when has elapsed, where when == NULL specifies an infinite | 1803 | * < min_nr if the timeout specified by timeout has elapsed |
1804 | * timeout. Note that the timeout pointed to by when is relative and | 1804 | * before sufficient events are available, where timeout == NULL |
1805 | * will be updated if not NULL and the operation blocks. Will fail | 1805 | * specifies an infinite timeout. Note that the timeout pointed to by |
1806 | * with -ENOSYS if not implemented. | 1806 | * timeout is relative and will be updated if not NULL and the |
1807 | * operation blocks. Will fail with -ENOSYS if not implemented. | ||
1807 | */ | 1808 | */ |
1808 | SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, | 1809 | SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, |
1809 | long, min_nr, | 1810 | long, min_nr, |
@@ -14,35 +14,53 @@ | |||
14 | #include <linux/fcntl.h> | 14 | #include <linux/fcntl.h> |
15 | #include <linux/security.h> | 15 | #include <linux/security.h> |
16 | 16 | ||
17 | /* Taken over from the old code... */ | 17 | /** |
18 | 18 | * inode_change_ok - check if attribute changes to an inode are allowed | |
19 | /* POSIX UID/GID verification for setting inode attributes. */ | 19 | * @inode: inode to check |
20 | * @attr: attributes to change | ||
21 | * | ||
22 | * Check if we are allowed to change the attributes contained in @attr | ||
23 | * in the given inode. This includes the normal unix access permission | ||
24 | * checks, as well as checks for rlimits and others. | ||
25 | * | ||
26 | * Should be called as the first thing in ->setattr implementations, | ||
27 | * possibly after taking additional locks. | ||
28 | */ | ||
20 | int inode_change_ok(const struct inode *inode, struct iattr *attr) | 29 | int inode_change_ok(const struct inode *inode, struct iattr *attr) |
21 | { | 30 | { |
22 | int retval = -EPERM; | ||
23 | unsigned int ia_valid = attr->ia_valid; | 31 | unsigned int ia_valid = attr->ia_valid; |
24 | 32 | ||
33 | /* | ||
34 | * First check size constraints. These can't be overriden using | ||
35 | * ATTR_FORCE. | ||
36 | */ | ||
37 | if (ia_valid & ATTR_SIZE) { | ||
38 | int error = inode_newsize_ok(inode, attr->ia_size); | ||
39 | if (error) | ||
40 | return error; | ||
41 | } | ||
42 | |||
25 | /* If force is set do it anyway. */ | 43 | /* If force is set do it anyway. */ |
26 | if (ia_valid & ATTR_FORCE) | 44 | if (ia_valid & ATTR_FORCE) |
27 | goto fine; | 45 | return 0; |
28 | 46 | ||
29 | /* Make sure a caller can chown. */ | 47 | /* Make sure a caller can chown. */ |
30 | if ((ia_valid & ATTR_UID) && | 48 | if ((ia_valid & ATTR_UID) && |
31 | (current_fsuid() != inode->i_uid || | 49 | (current_fsuid() != inode->i_uid || |
32 | attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN)) | 50 | attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN)) |
33 | goto error; | 51 | return -EPERM; |
34 | 52 | ||
35 | /* Make sure caller can chgrp. */ | 53 | /* Make sure caller can chgrp. */ |
36 | if ((ia_valid & ATTR_GID) && | 54 | if ((ia_valid & ATTR_GID) && |
37 | (current_fsuid() != inode->i_uid || | 55 | (current_fsuid() != inode->i_uid || |
38 | (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) && | 56 | (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) && |
39 | !capable(CAP_CHOWN)) | 57 | !capable(CAP_CHOWN)) |
40 | goto error; | 58 | return -EPERM; |
41 | 59 | ||
42 | /* Make sure a caller can chmod. */ | 60 | /* Make sure a caller can chmod. */ |
43 | if (ia_valid & ATTR_MODE) { | 61 | if (ia_valid & ATTR_MODE) { |
44 | if (!is_owner_or_cap(inode)) | 62 | if (!is_owner_or_cap(inode)) |
45 | goto error; | 63 | return -EPERM; |
46 | /* Also check the setgid bit! */ | 64 | /* Also check the setgid bit! */ |
47 | if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : | 65 | if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : |
48 | inode->i_gid) && !capable(CAP_FSETID)) | 66 | inode->i_gid) && !capable(CAP_FSETID)) |
@@ -52,12 +70,10 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) | |||
52 | /* Check for setting the inode time. */ | 70 | /* Check for setting the inode time. */ |
53 | if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { | 71 | if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { |
54 | if (!is_owner_or_cap(inode)) | 72 | if (!is_owner_or_cap(inode)) |
55 | goto error; | 73 | return -EPERM; |
56 | } | 74 | } |
57 | fine: | 75 | |
58 | retval = 0; | 76 | return 0; |
59 | error: | ||
60 | return retval; | ||
61 | } | 77 | } |
62 | EXPORT_SYMBOL(inode_change_ok); | 78 | EXPORT_SYMBOL(inode_change_ok); |
63 | 79 | ||
@@ -105,21 +121,21 @@ out_big: | |||
105 | EXPORT_SYMBOL(inode_newsize_ok); | 121 | EXPORT_SYMBOL(inode_newsize_ok); |
106 | 122 | ||
107 | /** | 123 | /** |
108 | * generic_setattr - copy simple metadata updates into the generic inode | 124 | * setattr_copy - copy simple metadata updates into the generic inode |
109 | * @inode: the inode to be updated | 125 | * @inode: the inode to be updated |
110 | * @attr: the new attributes | 126 | * @attr: the new attributes |
111 | * | 127 | * |
112 | * generic_setattr must be called with i_mutex held. | 128 | * setattr_copy must be called with i_mutex held. |
113 | * | 129 | * |
114 | * generic_setattr updates the inode's metadata with that specified | 130 | * setattr_copy updates the inode's metadata with that specified |
115 | * in attr. Noticably missing is inode size update, which is more complex | 131 | * in attr. Noticably missing is inode size update, which is more complex |
116 | * as it requires pagecache updates. See simple_setsize. | 132 | * as it requires pagecache updates. |
117 | * | 133 | * |
118 | * The inode is not marked as dirty after this operation. The rationale is | 134 | * The inode is not marked as dirty after this operation. The rationale is |
119 | * that for "simple" filesystems, the struct inode is the inode storage. | 135 | * that for "simple" filesystems, the struct inode is the inode storage. |
120 | * The caller is free to mark the inode dirty afterwards if needed. | 136 | * The caller is free to mark the inode dirty afterwards if needed. |
121 | */ | 137 | */ |
122 | void generic_setattr(struct inode *inode, const struct iattr *attr) | 138 | void setattr_copy(struct inode *inode, const struct iattr *attr) |
123 | { | 139 | { |
124 | unsigned int ia_valid = attr->ia_valid; | 140 | unsigned int ia_valid = attr->ia_valid; |
125 | 141 | ||
@@ -144,32 +160,7 @@ void generic_setattr(struct inode *inode, const struct iattr *attr) | |||
144 | inode->i_mode = mode; | 160 | inode->i_mode = mode; |
145 | } | 161 | } |
146 | } | 162 | } |
147 | EXPORT_SYMBOL(generic_setattr); | 163 | EXPORT_SYMBOL(setattr_copy); |
148 | |||
149 | /* | ||
150 | * note this function is deprecated, the new truncate sequence should be | ||
151 | * used instead -- see eg. simple_setsize, generic_setattr. | ||
152 | */ | ||
153 | int inode_setattr(struct inode *inode, const struct iattr *attr) | ||
154 | { | ||
155 | unsigned int ia_valid = attr->ia_valid; | ||
156 | |||
157 | if (ia_valid & ATTR_SIZE && | ||
158 | attr->ia_size != i_size_read(inode)) { | ||
159 | int error; | ||
160 | |||
161 | error = vmtruncate(inode, attr->ia_size); | ||
162 | if (error) | ||
163 | return error; | ||
164 | } | ||
165 | |||
166 | generic_setattr(inode, attr); | ||
167 | |||
168 | mark_inode_dirty(inode); | ||
169 | |||
170 | return 0; | ||
171 | } | ||
172 | EXPORT_SYMBOL(inode_setattr); | ||
173 | 164 | ||
174 | int notify_change(struct dentry * dentry, struct iattr * attr) | 165 | int notify_change(struct dentry * dentry, struct iattr * attr) |
175 | { | 166 | { |
@@ -237,13 +228,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr) | |||
237 | if (ia_valid & ATTR_SIZE) | 228 | if (ia_valid & ATTR_SIZE) |
238 | down_write(&dentry->d_inode->i_alloc_sem); | 229 | down_write(&dentry->d_inode->i_alloc_sem); |
239 | 230 | ||
240 | if (inode->i_op && inode->i_op->setattr) { | 231 | if (inode->i_op->setattr) |
241 | error = inode->i_op->setattr(dentry, attr); | 232 | error = inode->i_op->setattr(dentry, attr); |
242 | } else { | 233 | else |
243 | error = inode_change_ok(inode, attr); | 234 | error = simple_setattr(dentry, attr); |
244 | if (!error) | ||
245 | error = inode_setattr(inode, attr); | ||
246 | } | ||
247 | 235 | ||
248 | if (ia_valid & ATTR_SIZE) | 236 | if (ia_valid & ATTR_SIZE) |
249 | up_write(&dentry->d_inode->i_alloc_sem); | 237 | up_write(&dentry->d_inode->i_alloc_sem); |
diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 9a0520b50663..11b1ea786d00 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/param.h> | 17 | #include <linux/param.h> |
18 | #include <linux/time.h> | 18 | #include <linux/time.h> |
19 | #include <linux/compat.h> | ||
19 | #include <linux/smp_lock.h> | 20 | #include <linux/smp_lock.h> |
20 | #include "autofs_i.h" | 21 | #include "autofs_i.h" |
21 | 22 | ||
@@ -25,13 +26,17 @@ static int autofs_root_symlink(struct inode *,struct dentry *,const char *); | |||
25 | static int autofs_root_unlink(struct inode *,struct dentry *); | 26 | static int autofs_root_unlink(struct inode *,struct dentry *); |
26 | static int autofs_root_rmdir(struct inode *,struct dentry *); | 27 | static int autofs_root_rmdir(struct inode *,struct dentry *); |
27 | static int autofs_root_mkdir(struct inode *,struct dentry *,int); | 28 | static int autofs_root_mkdir(struct inode *,struct dentry *,int); |
28 | static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); | 29 | static long autofs_root_ioctl(struct file *,unsigned int,unsigned long); |
30 | static long autofs_root_compat_ioctl(struct file *,unsigned int,unsigned long); | ||
29 | 31 | ||
30 | const struct file_operations autofs_root_operations = { | 32 | const struct file_operations autofs_root_operations = { |
31 | .llseek = generic_file_llseek, | 33 | .llseek = generic_file_llseek, |
32 | .read = generic_read_dir, | 34 | .read = generic_read_dir, |
33 | .readdir = autofs_root_readdir, | 35 | .readdir = autofs_root_readdir, |
34 | .ioctl = autofs_root_ioctl, | 36 | .unlocked_ioctl = autofs_root_ioctl, |
37 | #ifdef CONFIG_COMPAT | ||
38 | .compat_ioctl = autofs_root_compat_ioctl, | ||
39 | #endif | ||
35 | }; | 40 | }; |
36 | 41 | ||
37 | const struct inode_operations autofs_root_inode_operations = { | 42 | const struct inode_operations autofs_root_inode_operations = { |
@@ -492,6 +497,25 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
492 | } | 497 | } |
493 | 498 | ||
494 | /* Get/set timeout ioctl() operation */ | 499 | /* Get/set timeout ioctl() operation */ |
500 | #ifdef CONFIG_COMPAT | ||
501 | static inline int autofs_compat_get_set_timeout(struct autofs_sb_info *sbi, | ||
502 | unsigned int __user *p) | ||
503 | { | ||
504 | unsigned long ntimeout; | ||
505 | |||
506 | if (get_user(ntimeout, p) || | ||
507 | put_user(sbi->exp_timeout / HZ, p)) | ||
508 | return -EFAULT; | ||
509 | |||
510 | if (ntimeout > UINT_MAX/HZ) | ||
511 | sbi->exp_timeout = 0; | ||
512 | else | ||
513 | sbi->exp_timeout = ntimeout * HZ; | ||
514 | |||
515 | return 0; | ||
516 | } | ||
517 | #endif | ||
518 | |||
495 | static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi, | 519 | static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi, |
496 | unsigned long __user *p) | 520 | unsigned long __user *p) |
497 | { | 521 | { |
@@ -546,7 +570,7 @@ static inline int autofs_expire_run(struct super_block *sb, | |||
546 | * ioctl()'s on the root directory is the chief method for the daemon to | 570 | * ioctl()'s on the root directory is the chief method for the daemon to |
547 | * generate kernel reactions | 571 | * generate kernel reactions |
548 | */ | 572 | */ |
549 | static int autofs_root_ioctl(struct inode *inode, struct file *filp, | 573 | static int autofs_do_root_ioctl(struct inode *inode, struct file *filp, |
550 | unsigned int cmd, unsigned long arg) | 574 | unsigned int cmd, unsigned long arg) |
551 | { | 575 | { |
552 | struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb); | 576 | struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb); |
@@ -571,6 +595,10 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp, | |||
571 | return 0; | 595 | return 0; |
572 | case AUTOFS_IOC_PROTOVER: /* Get protocol version */ | 596 | case AUTOFS_IOC_PROTOVER: /* Get protocol version */ |
573 | return autofs_get_protover(argp); | 597 | return autofs_get_protover(argp); |
598 | #ifdef CONFIG_COMPAT | ||
599 | case AUTOFS_IOC_SETTIMEOUT32: | ||
600 | return autofs_compat_get_set_timeout(sbi, argp); | ||
601 | #endif | ||
574 | case AUTOFS_IOC_SETTIMEOUT: | 602 | case AUTOFS_IOC_SETTIMEOUT: |
575 | return autofs_get_set_timeout(sbi, argp); | 603 | return autofs_get_set_timeout(sbi, argp); |
576 | case AUTOFS_IOC_EXPIRE: | 604 | case AUTOFS_IOC_EXPIRE: |
@@ -579,4 +607,37 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp, | |||
579 | default: | 607 | default: |
580 | return -ENOSYS; | 608 | return -ENOSYS; |
581 | } | 609 | } |
610 | |||
611 | } | ||
612 | |||
613 | static long autofs_root_ioctl(struct file *filp, | ||
614 | unsigned int cmd, unsigned long arg) | ||
615 | { | ||
616 | int ret; | ||
617 | |||
618 | lock_kernel(); | ||
619 | ret = autofs_do_root_ioctl(filp->f_path.dentry->d_inode, | ||
620 | filp, cmd, arg); | ||
621 | unlock_kernel(); | ||
622 | |||
623 | return ret; | ||
624 | } | ||
625 | |||
626 | #ifdef CONFIG_COMPAT | ||
627 | static long autofs_root_compat_ioctl(struct file *filp, | ||
628 | unsigned int cmd, unsigned long arg) | ||
629 | { | ||
630 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
631 | int ret; | ||
632 | |||
633 | lock_kernel(); | ||
634 | if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) | ||
635 | ret = autofs_do_root_ioctl(inode, filp, cmd, arg); | ||
636 | else | ||
637 | ret = autofs_do_root_ioctl(inode, filp, cmd, | ||
638 | (unsigned long)compat_ptr(arg)); | ||
639 | unlock_kernel(); | ||
640 | |||
641 | return ret; | ||
582 | } | 642 | } |
643 | #endif | ||
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index db4117ed7803..cb1bd38dc08c 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -18,7 +18,9 @@ | |||
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/param.h> | 19 | #include <linux/param.h> |
20 | #include <linux/time.h> | 20 | #include <linux/time.h> |
21 | #include <linux/compat.h> | ||
21 | #include <linux/smp_lock.h> | 22 | #include <linux/smp_lock.h> |
23 | |||
22 | #include "autofs_i.h" | 24 | #include "autofs_i.h" |
23 | 25 | ||
24 | static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); | 26 | static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); |
@@ -26,6 +28,7 @@ static int autofs4_dir_unlink(struct inode *,struct dentry *); | |||
26 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); | 28 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); |
27 | static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); | 29 | static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); |
28 | static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); | 30 | static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); |
31 | static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); | ||
29 | static int autofs4_dir_open(struct inode *inode, struct file *file); | 32 | static int autofs4_dir_open(struct inode *inode, struct file *file); |
30 | static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); | 33 | static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); |
31 | static void *autofs4_follow_link(struct dentry *, struct nameidata *); | 34 | static void *autofs4_follow_link(struct dentry *, struct nameidata *); |
@@ -40,6 +43,9 @@ const struct file_operations autofs4_root_operations = { | |||
40 | .readdir = dcache_readdir, | 43 | .readdir = dcache_readdir, |
41 | .llseek = dcache_dir_lseek, | 44 | .llseek = dcache_dir_lseek, |
42 | .unlocked_ioctl = autofs4_root_ioctl, | 45 | .unlocked_ioctl = autofs4_root_ioctl, |
46 | #ifdef CONFIG_COMPAT | ||
47 | .compat_ioctl = autofs4_root_compat_ioctl, | ||
48 | #endif | ||
43 | }; | 49 | }; |
44 | 50 | ||
45 | const struct file_operations autofs4_dir_operations = { | 51 | const struct file_operations autofs4_dir_operations = { |
@@ -198,8 +204,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) | |||
198 | } | 204 | } |
199 | 205 | ||
200 | /* Initialize expiry counter after successful mount */ | 206 | /* Initialize expiry counter after successful mount */ |
201 | if (ino) | 207 | ino->last_used = jiffies; |
202 | ino->last_used = jiffies; | ||
203 | 208 | ||
204 | spin_lock(&sbi->fs_lock); | 209 | spin_lock(&sbi->fs_lock); |
205 | ino->flags &= ~AUTOFS_INF_PENDING; | 210 | ino->flags &= ~AUTOFS_INF_PENDING; |
@@ -840,6 +845,26 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
840 | } | 845 | } |
841 | 846 | ||
842 | /* Get/set timeout ioctl() operation */ | 847 | /* Get/set timeout ioctl() operation */ |
848 | #ifdef CONFIG_COMPAT | ||
849 | static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi, | ||
850 | compat_ulong_t __user *p) | ||
851 | { | ||
852 | int rv; | ||
853 | unsigned long ntimeout; | ||
854 | |||
855 | if ((rv = get_user(ntimeout, p)) || | ||
856 | (rv = put_user(sbi->exp_timeout/HZ, p))) | ||
857 | return rv; | ||
858 | |||
859 | if (ntimeout > UINT_MAX/HZ) | ||
860 | sbi->exp_timeout = 0; | ||
861 | else | ||
862 | sbi->exp_timeout = ntimeout * HZ; | ||
863 | |||
864 | return 0; | ||
865 | } | ||
866 | #endif | ||
867 | |||
843 | static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, | 868 | static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, |
844 | unsigned long __user *p) | 869 | unsigned long __user *p) |
845 | { | 870 | { |
@@ -933,6 +958,10 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, | |||
933 | return autofs4_get_protosubver(sbi, p); | 958 | return autofs4_get_protosubver(sbi, p); |
934 | case AUTOFS_IOC_SETTIMEOUT: | 959 | case AUTOFS_IOC_SETTIMEOUT: |
935 | return autofs4_get_set_timeout(sbi, p); | 960 | return autofs4_get_set_timeout(sbi, p); |
961 | #ifdef CONFIG_COMPAT | ||
962 | case AUTOFS_IOC_SETTIMEOUT32: | ||
963 | return autofs4_compat_get_set_timeout(sbi, p); | ||
964 | #endif | ||
936 | 965 | ||
937 | case AUTOFS_IOC_ASKUMOUNT: | 966 | case AUTOFS_IOC_ASKUMOUNT: |
938 | return autofs4_ask_umount(filp->f_path.mnt, p); | 967 | return autofs4_ask_umount(filp->f_path.mnt, p); |
@@ -961,3 +990,22 @@ static long autofs4_root_ioctl(struct file *filp, | |||
961 | 990 | ||
962 | return ret; | 991 | return ret; |
963 | } | 992 | } |
993 | |||
994 | #ifdef CONFIG_COMPAT | ||
995 | static long autofs4_root_compat_ioctl(struct file *filp, | ||
996 | unsigned int cmd, unsigned long arg) | ||
997 | { | ||
998 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
999 | int ret; | ||
1000 | |||
1001 | lock_kernel(); | ||
1002 | if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) | ||
1003 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | ||
1004 | else | ||
1005 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, | ||
1006 | (unsigned long)compat_ptr(arg)); | ||
1007 | unlock_kernel(); | ||
1008 | |||
1009 | return ret; | ||
1010 | } | ||
1011 | #endif | ||
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 34ddda888e63..dc39d2824885 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -436,7 +436,7 @@ befs_init_inodecache(void) | |||
436 | init_once); | 436 | init_once); |
437 | if (befs_inode_cachep == NULL) { | 437 | if (befs_inode_cachep == NULL) { |
438 | printk(KERN_ERR "befs_init_inodecache: " | 438 | printk(KERN_ERR "befs_init_inodecache: " |
439 | "Couldn't initalize inode slabcache\n"); | 439 | "Couldn't initialize inode slabcache\n"); |
440 | return -ENOMEM; | 440 | return -ENOMEM; |
441 | } | 441 | } |
442 | 442 | ||
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 7109e451abf7..f7f87e233dd9 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h | |||
@@ -17,7 +17,6 @@ struct bfs_sb_info { | |||
17 | unsigned long si_lf_eblk; | 17 | unsigned long si_lf_eblk; |
18 | unsigned long si_lasti; | 18 | unsigned long si_lasti; |
19 | unsigned long *si_imap; | 19 | unsigned long *si_imap; |
20 | struct buffer_head *si_sbh; /* buffer header w/superblock */ | ||
21 | struct mutex bfs_lock; | 20 | struct mutex bfs_lock; |
22 | }; | 21 | }; |
23 | 22 | ||
diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 88b9a3ff44e4..eb67edd0f8ea 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c | |||
@@ -70,7 +70,6 @@ static int bfs_get_block(struct inode *inode, sector_t block, | |||
70 | struct super_block *sb = inode->i_sb; | 70 | struct super_block *sb = inode->i_sb; |
71 | struct bfs_sb_info *info = BFS_SB(sb); | 71 | struct bfs_sb_info *info = BFS_SB(sb); |
72 | struct bfs_inode_info *bi = BFS_I(inode); | 72 | struct bfs_inode_info *bi = BFS_I(inode); |
73 | struct buffer_head *sbh = info->si_sbh; | ||
74 | 73 | ||
75 | phys = bi->i_sblock + block; | 74 | phys = bi->i_sblock + block; |
76 | if (!create) { | 75 | if (!create) { |
@@ -112,7 +111,6 @@ static int bfs_get_block(struct inode *inode, sector_t block, | |||
112 | info->si_freeb -= phys - bi->i_eblock; | 111 | info->si_freeb -= phys - bi->i_eblock; |
113 | info->si_lf_eblk = bi->i_eblock = phys; | 112 | info->si_lf_eblk = bi->i_eblock = phys; |
114 | mark_inode_dirty(inode); | 113 | mark_inode_dirty(inode); |
115 | mark_buffer_dirty(sbh); | ||
116 | err = 0; | 114 | err = 0; |
117 | goto out; | 115 | goto out; |
118 | } | 116 | } |
@@ -147,7 +145,6 @@ static int bfs_get_block(struct inode *inode, sector_t block, | |||
147 | */ | 145 | */ |
148 | info->si_freeb -= bi->i_eblock - bi->i_sblock + 1 - inode->i_blocks; | 146 | info->si_freeb -= bi->i_eblock - bi->i_sblock + 1 - inode->i_blocks; |
149 | mark_inode_dirty(inode); | 147 | mark_inode_dirty(inode); |
150 | mark_buffer_dirty(sbh); | ||
151 | map_bh(bh_result, sb, phys); | 148 | map_bh(bh_result, sb, phys); |
152 | out: | 149 | out: |
153 | mutex_unlock(&info->bfs_lock); | 150 | mutex_unlock(&info->bfs_lock); |
@@ -168,9 +165,17 @@ static int bfs_write_begin(struct file *file, struct address_space *mapping, | |||
168 | loff_t pos, unsigned len, unsigned flags, | 165 | loff_t pos, unsigned len, unsigned flags, |
169 | struct page **pagep, void **fsdata) | 166 | struct page **pagep, void **fsdata) |
170 | { | 167 | { |
171 | *pagep = NULL; | 168 | int ret; |
172 | return block_write_begin(file, mapping, pos, len, flags, | 169 | |
173 | pagep, fsdata, bfs_get_block); | 170 | ret = block_write_begin(mapping, pos, len, flags, pagep, |
171 | bfs_get_block); | ||
172 | if (unlikely(ret)) { | ||
173 | loff_t isize = mapping->host->i_size; | ||
174 | if (pos + len > isize) | ||
175 | vmtruncate(mapping->host, isize); | ||
176 | } | ||
177 | |||
178 | return ret; | ||
174 | } | 179 | } |
175 | 180 | ||
176 | static sector_t bfs_bmap(struct address_space *mapping, sector_t block) | 181 | static sector_t bfs_bmap(struct address_space *mapping, sector_t block) |
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index f22a7d3dc362..c4daf0f5fc02 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c | |||
@@ -31,7 +31,6 @@ MODULE_LICENSE("GPL"); | |||
31 | #define dprintf(x...) | 31 | #define dprintf(x...) |
32 | #endif | 32 | #endif |
33 | 33 | ||
34 | static void bfs_write_super(struct super_block *s); | ||
35 | void dump_imap(const char *prefix, struct super_block *s); | 34 | void dump_imap(const char *prefix, struct super_block *s); |
36 | 35 | ||
37 | struct inode *bfs_iget(struct super_block *sb, unsigned long ino) | 36 | struct inode *bfs_iget(struct super_block *sb, unsigned long ino) |
@@ -99,6 +98,24 @@ error: | |||
99 | return ERR_PTR(-EIO); | 98 | return ERR_PTR(-EIO); |
100 | } | 99 | } |
101 | 100 | ||
101 | static struct bfs_inode *find_inode(struct super_block *sb, u16 ino, struct buffer_head **p) | ||
102 | { | ||
103 | if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(sb)->si_lasti)) { | ||
104 | printf("Bad inode number %s:%08x\n", sb->s_id, ino); | ||
105 | return ERR_PTR(-EIO); | ||
106 | } | ||
107 | |||
108 | ino -= BFS_ROOT_INO; | ||
109 | |||
110 | *p = sb_bread(sb, 1 + ino / BFS_INODES_PER_BLOCK); | ||
111 | if (!*p) { | ||
112 | printf("Unable to read inode %s:%08x\n", sb->s_id, ino); | ||
113 | return ERR_PTR(-EIO); | ||
114 | } | ||
115 | |||
116 | return (struct bfs_inode *)(*p)->b_data + ino % BFS_INODES_PER_BLOCK; | ||
117 | } | ||
118 | |||
102 | static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) | 119 | static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) |
103 | { | 120 | { |
104 | struct bfs_sb_info *info = BFS_SB(inode->i_sb); | 121 | struct bfs_sb_info *info = BFS_SB(inode->i_sb); |
@@ -106,28 +123,15 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
106 | unsigned long i_sblock; | 123 | unsigned long i_sblock; |
107 | struct bfs_inode *di; | 124 | struct bfs_inode *di; |
108 | struct buffer_head *bh; | 125 | struct buffer_head *bh; |
109 | int block, off; | ||
110 | int err = 0; | 126 | int err = 0; |
111 | 127 | ||
112 | dprintf("ino=%08x\n", ino); | 128 | dprintf("ino=%08x\n", ino); |
113 | 129 | ||
114 | if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) { | 130 | di = find_inode(inode->i_sb, ino, &bh); |
115 | printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino); | 131 | if (IS_ERR(di)) |
116 | return -EIO; | 132 | return PTR_ERR(di); |
117 | } | ||
118 | 133 | ||
119 | mutex_lock(&info->bfs_lock); | 134 | mutex_lock(&info->bfs_lock); |
120 | block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; | ||
121 | bh = sb_bread(inode->i_sb, block); | ||
122 | if (!bh) { | ||
123 | printf("Unable to read inode %s:%08x\n", | ||
124 | inode->i_sb->s_id, ino); | ||
125 | mutex_unlock(&info->bfs_lock); | ||
126 | return -EIO; | ||
127 | } | ||
128 | |||
129 | off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; | ||
130 | di = (struct bfs_inode *)bh->b_data + off; | ||
131 | 135 | ||
132 | if (ino == BFS_ROOT_INO) | 136 | if (ino == BFS_ROOT_INO) |
133 | di->i_vtype = cpu_to_le32(BFS_VDIR); | 137 | di->i_vtype = cpu_to_le32(BFS_VDIR); |
@@ -158,12 +162,11 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
158 | return err; | 162 | return err; |
159 | } | 163 | } |
160 | 164 | ||
161 | static void bfs_delete_inode(struct inode *inode) | 165 | static void bfs_evict_inode(struct inode *inode) |
162 | { | 166 | { |
163 | unsigned long ino = inode->i_ino; | 167 | unsigned long ino = inode->i_ino; |
164 | struct bfs_inode *di; | 168 | struct bfs_inode *di; |
165 | struct buffer_head *bh; | 169 | struct buffer_head *bh; |
166 | int block, off; | ||
167 | struct super_block *s = inode->i_sb; | 170 | struct super_block *s = inode->i_sb; |
168 | struct bfs_sb_info *info = BFS_SB(s); | 171 | struct bfs_sb_info *info = BFS_SB(s); |
169 | struct bfs_inode_info *bi = BFS_I(inode); | 172 | struct bfs_inode_info *bi = BFS_I(inode); |
@@ -171,28 +174,19 @@ static void bfs_delete_inode(struct inode *inode) | |||
171 | dprintf("ino=%08lx\n", ino); | 174 | dprintf("ino=%08lx\n", ino); |
172 | 175 | ||
173 | truncate_inode_pages(&inode->i_data, 0); | 176 | truncate_inode_pages(&inode->i_data, 0); |
177 | invalidate_inode_buffers(inode); | ||
178 | end_writeback(inode); | ||
174 | 179 | ||
175 | if ((ino < BFS_ROOT_INO) || (ino > info->si_lasti)) { | 180 | if (inode->i_nlink) |
176 | printf("invalid ino=%08lx\n", ino); | ||
177 | return; | 181 | return; |
178 | } | ||
179 | |||
180 | inode->i_size = 0; | ||
181 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; | ||
182 | mutex_lock(&info->bfs_lock); | ||
183 | mark_inode_dirty(inode); | ||
184 | 182 | ||
185 | block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; | 183 | di = find_inode(s, inode->i_ino, &bh); |
186 | bh = sb_bread(s, block); | 184 | if (IS_ERR(di)) |
187 | if (!bh) { | ||
188 | printf("Unable to read inode %s:%08lx\n", | ||
189 | inode->i_sb->s_id, ino); | ||
190 | mutex_unlock(&info->bfs_lock); | ||
191 | return; | 185 | return; |
192 | } | 186 | |
193 | off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; | 187 | mutex_lock(&info->bfs_lock); |
194 | di = (struct bfs_inode *)bh->b_data + off; | 188 | /* clear on-disk inode */ |
195 | memset((void *)di, 0, sizeof(struct bfs_inode)); | 189 | memset(di, 0, sizeof(struct bfs_inode)); |
196 | mark_buffer_dirty(bh); | 190 | mark_buffer_dirty(bh); |
197 | brelse(bh); | 191 | brelse(bh); |
198 | 192 | ||
@@ -209,32 +203,9 @@ static void bfs_delete_inode(struct inode *inode) | |||
209 | * "last block of the last file" even if there is no | 203 | * "last block of the last file" even if there is no |
210 | * real file there, saves us 1 gap. | 204 | * real file there, saves us 1 gap. |
211 | */ | 205 | */ |
212 | if (info->si_lf_eblk == bi->i_eblock) { | 206 | if (info->si_lf_eblk == bi->i_eblock) |
213 | info->si_lf_eblk = bi->i_sblock - 1; | 207 | info->si_lf_eblk = bi->i_sblock - 1; |
214 | mark_buffer_dirty(info->si_sbh); | ||
215 | } | ||
216 | mutex_unlock(&info->bfs_lock); | 208 | mutex_unlock(&info->bfs_lock); |
217 | clear_inode(inode); | ||
218 | } | ||
219 | |||
220 | static int bfs_sync_fs(struct super_block *sb, int wait) | ||
221 | { | ||
222 | struct bfs_sb_info *info = BFS_SB(sb); | ||
223 | |||
224 | mutex_lock(&info->bfs_lock); | ||
225 | mark_buffer_dirty(info->si_sbh); | ||
226 | sb->s_dirt = 0; | ||
227 | mutex_unlock(&info->bfs_lock); | ||
228 | |||
229 | return 0; | ||
230 | } | ||
231 | |||
232 | static void bfs_write_super(struct super_block *sb) | ||
233 | { | ||
234 | if (!(sb->s_flags & MS_RDONLY)) | ||
235 | bfs_sync_fs(sb, 1); | ||
236 | else | ||
237 | sb->s_dirt = 0; | ||
238 | } | 209 | } |
239 | 210 | ||
240 | static void bfs_put_super(struct super_block *s) | 211 | static void bfs_put_super(struct super_block *s) |
@@ -246,10 +217,6 @@ static void bfs_put_super(struct super_block *s) | |||
246 | 217 | ||
247 | lock_kernel(); | 218 | lock_kernel(); |
248 | 219 | ||
249 | if (s->s_dirt) | ||
250 | bfs_write_super(s); | ||
251 | |||
252 | brelse(info->si_sbh); | ||
253 | mutex_destroy(&info->bfs_lock); | 220 | mutex_destroy(&info->bfs_lock); |
254 | kfree(info->si_imap); | 221 | kfree(info->si_imap); |
255 | kfree(info); | 222 | kfree(info); |
@@ -319,10 +286,8 @@ static const struct super_operations bfs_sops = { | |||
319 | .alloc_inode = bfs_alloc_inode, | 286 | .alloc_inode = bfs_alloc_inode, |
320 | .destroy_inode = bfs_destroy_inode, | 287 | .destroy_inode = bfs_destroy_inode, |
321 | .write_inode = bfs_write_inode, | 288 | .write_inode = bfs_write_inode, |
322 | .delete_inode = bfs_delete_inode, | 289 | .evict_inode = bfs_evict_inode, |
323 | .put_super = bfs_put_super, | 290 | .put_super = bfs_put_super, |
324 | .write_super = bfs_write_super, | ||
325 | .sync_fs = bfs_sync_fs, | ||
326 | .statfs = bfs_statfs, | 291 | .statfs = bfs_statfs, |
327 | }; | 292 | }; |
328 | 293 | ||
@@ -349,7 +314,7 @@ void dump_imap(const char *prefix, struct super_block *s) | |||
349 | 314 | ||
350 | static int bfs_fill_super(struct super_block *s, void *data, int silent) | 315 | static int bfs_fill_super(struct super_block *s, void *data, int silent) |
351 | { | 316 | { |
352 | struct buffer_head *bh; | 317 | struct buffer_head *bh, *sbh; |
353 | struct bfs_super_block *bfs_sb; | 318 | struct bfs_super_block *bfs_sb; |
354 | struct inode *inode; | 319 | struct inode *inode; |
355 | unsigned i, imap_len; | 320 | unsigned i, imap_len; |
@@ -365,10 +330,10 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) | |||
365 | 330 | ||
366 | sb_set_blocksize(s, BFS_BSIZE); | 331 | sb_set_blocksize(s, BFS_BSIZE); |
367 | 332 | ||
368 | info->si_sbh = sb_bread(s, 0); | 333 | sbh = sb_bread(s, 0); |
369 | if (!info->si_sbh) | 334 | if (!sbh) |
370 | goto out; | 335 | goto out; |
371 | bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data; | 336 | bfs_sb = (struct bfs_super_block *)sbh->b_data; |
372 | if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { | 337 | if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { |
373 | if (!silent) | 338 | if (!silent) |
374 | printf("No BFS filesystem on %s (magic=%08x)\n", | 339 | printf("No BFS filesystem on %s (magic=%08x)\n", |
@@ -472,10 +437,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) | |||
472 | info->si_lf_eblk = eblock; | 437 | info->si_lf_eblk = eblock; |
473 | } | 438 | } |
474 | brelse(bh); | 439 | brelse(bh); |
475 | if (!(s->s_flags & MS_RDONLY)) { | 440 | brelse(sbh); |
476 | mark_buffer_dirty(info->si_sbh); | ||
477 | s->s_dirt = 1; | ||
478 | } | ||
479 | dump_imap("read_super", s); | 441 | dump_imap("read_super", s); |
480 | return 0; | 442 | return 0; |
481 | 443 | ||
@@ -485,7 +447,7 @@ out3: | |||
485 | out2: | 447 | out2: |
486 | kfree(info->si_imap); | 448 | kfree(info->si_imap); |
487 | out1: | 449 | out1: |
488 | brelse(info->si_sbh); | 450 | brelse(sbh); |
489 | out: | 451 | out: |
490 | mutex_destroy(&info->bfs_lock); | 452 | mutex_destroy(&info->bfs_lock); |
491 | kfree(info); | 453 | kfree(info); |
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index c4e83537ead7..9e60fd201716 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -502,8 +502,9 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) | |||
502 | return inode; | 502 | return inode; |
503 | } | 503 | } |
504 | 504 | ||
505 | static void bm_clear_inode(struct inode *inode) | 505 | static void bm_evict_inode(struct inode *inode) |
506 | { | 506 | { |
507 | end_writeback(inode); | ||
507 | kfree(inode->i_private); | 508 | kfree(inode->i_private); |
508 | } | 509 | } |
509 | 510 | ||
@@ -685,7 +686,7 @@ static const struct file_operations bm_status_operations = { | |||
685 | 686 | ||
686 | static const struct super_operations s_ops = { | 687 | static const struct super_operations s_ops = { |
687 | .statfs = simple_statfs, | 688 | .statfs = simple_statfs, |
688 | .clear_inode = bm_clear_inode, | 689 | .evict_inode = bm_evict_inode, |
689 | }; | 690 | }; |
690 | 691 | ||
691 | static int bm_fill_super(struct super_block * sb, void * data, int silent) | 692 | static int bm_fill_super(struct super_block * sb, void * data, int silent) |
@@ -843,7 +843,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q, | |||
843 | if (!bio) | 843 | if (!bio) |
844 | goto out_bmd; | 844 | goto out_bmd; |
845 | 845 | ||
846 | bio->bi_rw |= (!write_to_vm << BIO_RW); | 846 | if (!write_to_vm) |
847 | bio->bi_rw |= REQ_WRITE; | ||
847 | 848 | ||
848 | ret = 0; | 849 | ret = 0; |
849 | 850 | ||
@@ -1024,7 +1025,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
1024 | * set data direction, and check if mapped pages need bouncing | 1025 | * set data direction, and check if mapped pages need bouncing |
1025 | */ | 1026 | */ |
1026 | if (!write_to_vm) | 1027 | if (!write_to_vm) |
1027 | bio->bi_rw |= (1 << BIO_RW); | 1028 | bio->bi_rw |= REQ_WRITE; |
1028 | 1029 | ||
1029 | bio->bi_bdev = bdev; | 1030 | bio->bi_bdev = bdev; |
1030 | bio->bi_flags |= (1 << BIO_USER_MAPPED); | 1031 | bio->bi_flags |= (1 << BIO_USER_MAPPED); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 99d6af811747..50e8c8582faa 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -172,9 +172,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
172 | struct file *file = iocb->ki_filp; | 172 | struct file *file = iocb->ki_filp; |
173 | struct inode *inode = file->f_mapping->host; | 173 | struct inode *inode = file->f_mapping->host; |
174 | 174 | ||
175 | return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode, | 175 | return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, |
176 | I_BDEV(inode), iov, offset, nr_segs, | 176 | nr_segs, blkdev_get_blocks, NULL, NULL, 0); |
177 | blkdev_get_blocks, NULL); | ||
178 | } | 177 | } |
179 | 178 | ||
180 | int __sync_blockdev(struct block_device *bdev, int wait) | 179 | int __sync_blockdev(struct block_device *bdev, int wait) |
@@ -309,9 +308,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping, | |||
309 | loff_t pos, unsigned len, unsigned flags, | 308 | loff_t pos, unsigned len, unsigned flags, |
310 | struct page **pagep, void **fsdata) | 309 | struct page **pagep, void **fsdata) |
311 | { | 310 | { |
312 | *pagep = NULL; | 311 | return block_write_begin(mapping, pos, len, flags, pagep, |
313 | return block_write_begin_newtrunc(file, mapping, pos, len, flags, | 312 | blkdev_get_block); |
314 | pagep, fsdata, blkdev_get_block); | ||
315 | } | 313 | } |
316 | 314 | ||
317 | static int blkdev_write_end(struct file *file, struct address_space *mapping, | 315 | static int blkdev_write_end(struct file *file, struct address_space *mapping, |
@@ -428,10 +426,13 @@ static inline void __bd_forget(struct inode *inode) | |||
428 | inode->i_mapping = &inode->i_data; | 426 | inode->i_mapping = &inode->i_data; |
429 | } | 427 | } |
430 | 428 | ||
431 | static void bdev_clear_inode(struct inode *inode) | 429 | static void bdev_evict_inode(struct inode *inode) |
432 | { | 430 | { |
433 | struct block_device *bdev = &BDEV_I(inode)->bdev; | 431 | struct block_device *bdev = &BDEV_I(inode)->bdev; |
434 | struct list_head *p; | 432 | struct list_head *p; |
433 | truncate_inode_pages(&inode->i_data, 0); | ||
434 | invalidate_inode_buffers(inode); /* is it needed here? */ | ||
435 | end_writeback(inode); | ||
435 | spin_lock(&bdev_lock); | 436 | spin_lock(&bdev_lock); |
436 | while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { | 437 | while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { |
437 | __bd_forget(list_entry(p, struct inode, i_devices)); | 438 | __bd_forget(list_entry(p, struct inode, i_devices)); |
@@ -445,7 +446,7 @@ static const struct super_operations bdev_sops = { | |||
445 | .alloc_inode = bdev_alloc_inode, | 446 | .alloc_inode = bdev_alloc_inode, |
446 | .destroy_inode = bdev_destroy_inode, | 447 | .destroy_inode = bdev_destroy_inode, |
447 | .drop_inode = generic_delete_inode, | 448 | .drop_inode = generic_delete_inode, |
448 | .clear_inode = bdev_clear_inode, | 449 | .evict_inode = bdev_evict_inode, |
449 | }; | 450 | }; |
450 | 451 | ||
451 | static int bd_get_sb(struct file_system_type *fs_type, | 452 | static int bd_get_sb(struct file_system_type *fs_type, |
@@ -681,8 +682,8 @@ retry: | |||
681 | if (!bd_may_claim(bdev, whole, holder)) | 682 | if (!bd_may_claim(bdev, whole, holder)) |
682 | return -EBUSY; | 683 | return -EBUSY; |
683 | 684 | ||
684 | /* if someone else is claiming, wait for it to finish */ | 685 | /* if claiming is already in progress, wait for it to finish */ |
685 | if (whole->bd_claiming && whole->bd_claiming != holder) { | 686 | if (whole->bd_claiming) { |
686 | wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); | 687 | wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0); |
687 | DEFINE_WAIT(wait); | 688 | DEFINE_WAIT(wait); |
688 | 689 | ||
@@ -1339,19 +1340,20 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1339 | /* | 1340 | /* |
1340 | * hooks: /n/, see "layering violations". | 1341 | * hooks: /n/, see "layering violations". |
1341 | */ | 1342 | */ |
1342 | ret = devcgroup_inode_permission(bdev->bd_inode, perm); | 1343 | if (!for_part) { |
1343 | if (ret != 0) { | 1344 | ret = devcgroup_inode_permission(bdev->bd_inode, perm); |
1344 | bdput(bdev); | 1345 | if (ret != 0) { |
1345 | return ret; | 1346 | bdput(bdev); |
1347 | return ret; | ||
1348 | } | ||
1346 | } | 1349 | } |
1347 | 1350 | ||
1348 | lock_kernel(); | ||
1349 | restart: | 1351 | restart: |
1350 | 1352 | ||
1351 | ret = -ENXIO; | 1353 | ret = -ENXIO; |
1352 | disk = get_gendisk(bdev->bd_dev, &partno); | 1354 | disk = get_gendisk(bdev->bd_dev, &partno); |
1353 | if (!disk) | 1355 | if (!disk) |
1354 | goto out_unlock_kernel; | 1356 | goto out; |
1355 | 1357 | ||
1356 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1358 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
1357 | if (!bdev->bd_openers) { | 1359 | if (!bdev->bd_openers) { |
@@ -1431,7 +1433,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1431 | if (for_part) | 1433 | if (for_part) |
1432 | bdev->bd_part_count++; | 1434 | bdev->bd_part_count++; |
1433 | mutex_unlock(&bdev->bd_mutex); | 1435 | mutex_unlock(&bdev->bd_mutex); |
1434 | unlock_kernel(); | ||
1435 | return 0; | 1436 | return 0; |
1436 | 1437 | ||
1437 | out_clear: | 1438 | out_clear: |
@@ -1444,9 +1445,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1444 | bdev->bd_contains = NULL; | 1445 | bdev->bd_contains = NULL; |
1445 | out_unlock_bdev: | 1446 | out_unlock_bdev: |
1446 | mutex_unlock(&bdev->bd_mutex); | 1447 | mutex_unlock(&bdev->bd_mutex); |
1447 | out_unlock_kernel: | 1448 | out: |
1448 | unlock_kernel(); | ||
1449 | |||
1450 | if (disk) | 1449 | if (disk) |
1451 | module_put(disk->fops->owner); | 1450 | module_put(disk->fops->owner); |
1452 | put_disk(disk); | 1451 | put_disk(disk); |
@@ -1515,7 +1514,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | |||
1515 | struct block_device *victim = NULL; | 1514 | struct block_device *victim = NULL; |
1516 | 1515 | ||
1517 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1516 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
1518 | lock_kernel(); | ||
1519 | if (for_part) | 1517 | if (for_part) |
1520 | bdev->bd_part_count--; | 1518 | bdev->bd_part_count--; |
1521 | 1519 | ||
@@ -1540,7 +1538,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | |||
1540 | victim = bdev->bd_contains; | 1538 | victim = bdev->bd_contains; |
1541 | bdev->bd_contains = NULL; | 1539 | bdev->bd_contains = NULL; |
1542 | } | 1540 | } |
1543 | unlock_kernel(); | ||
1544 | mutex_unlock(&bdev->bd_mutex); | 1541 | mutex_unlock(&bdev->bd_mutex); |
1545 | bdput(bdev); | 1542 | bdput(bdev); |
1546 | if (victim) | 1543 | if (victim) |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 29c20092847e..eaf286abad17 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -2389,13 +2389,13 @@ unsigned long btrfs_force_ra(struct address_space *mapping, | |||
2389 | pgoff_t offset, pgoff_t last_index); | 2389 | pgoff_t offset, pgoff_t last_index); |
2390 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2390 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2391 | int btrfs_readpage(struct file *file, struct page *page); | 2391 | int btrfs_readpage(struct file *file, struct page *page); |
2392 | void btrfs_delete_inode(struct inode *inode); | 2392 | void btrfs_evict_inode(struct inode *inode); |
2393 | void btrfs_put_inode(struct inode *inode); | 2393 | void btrfs_put_inode(struct inode *inode); |
2394 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); | 2394 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); |
2395 | void btrfs_dirty_inode(struct inode *inode); | 2395 | void btrfs_dirty_inode(struct inode *inode); |
2396 | struct inode *btrfs_alloc_inode(struct super_block *sb); | 2396 | struct inode *btrfs_alloc_inode(struct super_block *sb); |
2397 | void btrfs_destroy_inode(struct inode *inode); | 2397 | void btrfs_destroy_inode(struct inode *inode); |
2398 | void btrfs_drop_inode(struct inode *inode); | 2398 | int btrfs_drop_inode(struct inode *inode); |
2399 | int btrfs_init_cachep(void); | 2399 | int btrfs_init_cachep(void); |
2400 | void btrfs_destroy_cachep(void); | 2400 | void btrfs_destroy_cachep(void); |
2401 | long btrfs_ioctl_trans_end(struct file *file); | 2401 | long btrfs_ioctl_trans_end(struct file *file); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 34f7c375567e..64f10082f048 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -480,7 +480,7 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
480 | end_io_wq->work.func = end_workqueue_fn; | 480 | end_io_wq->work.func = end_workqueue_fn; |
481 | end_io_wq->work.flags = 0; | 481 | end_io_wq->work.flags = 0; |
482 | 482 | ||
483 | if (bio->bi_rw & (1 << BIO_RW)) { | 483 | if (bio->bi_rw & REQ_WRITE) { |
484 | if (end_io_wq->metadata) | 484 | if (end_io_wq->metadata) |
485 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, | 485 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, |
486 | &end_io_wq->work); | 486 | &end_io_wq->work); |
@@ -604,7 +604,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
604 | 604 | ||
605 | atomic_inc(&fs_info->nr_async_submits); | 605 | atomic_inc(&fs_info->nr_async_submits); |
606 | 606 | ||
607 | if (rw & (1 << BIO_RW_SYNCIO)) | 607 | if (rw & REQ_SYNC) |
608 | btrfs_set_work_high_prio(&async->work); | 608 | btrfs_set_work_high_prio(&async->work); |
609 | 609 | ||
610 | btrfs_queue_worker(&fs_info->workers, &async->work); | 610 | btrfs_queue_worker(&fs_info->workers, &async->work); |
@@ -668,7 +668,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
668 | bio, 1); | 668 | bio, 1); |
669 | BUG_ON(ret); | 669 | BUG_ON(ret); |
670 | 670 | ||
671 | if (!(rw & (1 << BIO_RW))) { | 671 | if (!(rw & REQ_WRITE)) { |
672 | /* | 672 | /* |
673 | * called for a read, do the setup so that checksum validation | 673 | * called for a read, do the setup so that checksum validation |
674 | * can happen in the async kernel threads | 674 | * can happen in the async kernel threads |
@@ -1427,7 +1427,7 @@ static void end_workqueue_fn(struct btrfs_work *work) | |||
1427 | * ram and up to date before trying to verify things. For | 1427 | * ram and up to date before trying to verify things. For |
1428 | * blocksize <= pagesize, it is basically a noop | 1428 | * blocksize <= pagesize, it is basically a noop |
1429 | */ | 1429 | */ |
1430 | if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata && | 1430 | if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata && |
1431 | !bio_ready_for_csum(bio)) { | 1431 | !bio_ready_for_csum(bio)) { |
1432 | btrfs_queue_worker(&fs_info->endio_meta_workers, | 1432 | btrfs_queue_worker(&fs_info->endio_meta_workers, |
1433 | &end_io_wq->work); | 1433 | &end_io_wq->work); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1bff92ad4744..c03864406af3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -1429,7 +1429,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1429 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 1429 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
1430 | BUG_ON(ret); | 1430 | BUG_ON(ret); |
1431 | 1431 | ||
1432 | if (!(rw & (1 << BIO_RW))) { | 1432 | if (!(rw & REQ_WRITE)) { |
1433 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | 1433 | if (bio_flags & EXTENT_BIO_COMPRESSED) { |
1434 | return btrfs_submit_compressed_read(inode, bio, | 1434 | return btrfs_submit_compressed_read(inode, bio, |
1435 | mirror_num, bio_flags); | 1435 | mirror_num, bio_flags); |
@@ -1841,7 +1841,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1841 | bio->bi_size = 0; | 1841 | bio->bi_size = 0; |
1842 | 1842 | ||
1843 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); | 1843 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); |
1844 | if (failed_bio->bi_rw & (1 << BIO_RW)) | 1844 | if (failed_bio->bi_rw & REQ_WRITE) |
1845 | rw = WRITE; | 1845 | rw = WRITE; |
1846 | else | 1846 | else |
1847 | rw = READ; | 1847 | rw = READ; |
@@ -2938,7 +2938,6 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
2938 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 2938 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
2939 | ret = btrfs_update_inode(trans, root, dir); | 2939 | ret = btrfs_update_inode(trans, root, dir); |
2940 | BUG_ON(ret); | 2940 | BUG_ON(ret); |
2941 | dir->i_sb->s_dirt = 1; | ||
2942 | 2941 | ||
2943 | btrfs_free_path(path); | 2942 | btrfs_free_path(path); |
2944 | return 0; | 2943 | return 0; |
@@ -3656,17 +3655,19 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3656 | if (err) | 3655 | if (err) |
3657 | return err; | 3656 | return err; |
3658 | } | 3657 | } |
3659 | attr->ia_valid &= ~ATTR_SIZE; | ||
3660 | 3658 | ||
3661 | if (attr->ia_valid) | 3659 | if (attr->ia_valid) { |
3662 | err = inode_setattr(inode, attr); | 3660 | setattr_copy(inode, attr); |
3661 | mark_inode_dirty(inode); | ||
3662 | |||
3663 | if (attr->ia_valid & ATTR_MODE) | ||
3664 | err = btrfs_acl_chmod(inode); | ||
3665 | } | ||
3663 | 3666 | ||
3664 | if (!err && ((attr->ia_valid & ATTR_MODE))) | ||
3665 | err = btrfs_acl_chmod(inode); | ||
3666 | return err; | 3667 | return err; |
3667 | } | 3668 | } |
3668 | 3669 | ||
3669 | void btrfs_delete_inode(struct inode *inode) | 3670 | void btrfs_evict_inode(struct inode *inode) |
3670 | { | 3671 | { |
3671 | struct btrfs_trans_handle *trans; | 3672 | struct btrfs_trans_handle *trans; |
3672 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3673 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -3674,10 +3675,14 @@ void btrfs_delete_inode(struct inode *inode) | |||
3674 | int ret; | 3675 | int ret; |
3675 | 3676 | ||
3676 | truncate_inode_pages(&inode->i_data, 0); | 3677 | truncate_inode_pages(&inode->i_data, 0); |
3678 | if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0) | ||
3679 | goto no_delete; | ||
3680 | |||
3677 | if (is_bad_inode(inode)) { | 3681 | if (is_bad_inode(inode)) { |
3678 | btrfs_orphan_del(NULL, inode); | 3682 | btrfs_orphan_del(NULL, inode); |
3679 | goto no_delete; | 3683 | goto no_delete; |
3680 | } | 3684 | } |
3685 | /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ | ||
3681 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 3686 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
3682 | 3687 | ||
3683 | if (root->fs_info->log_root_recovering) { | 3688 | if (root->fs_info->log_root_recovering) { |
@@ -3727,7 +3732,7 @@ void btrfs_delete_inode(struct inode *inode) | |||
3727 | btrfs_end_transaction(trans, root); | 3732 | btrfs_end_transaction(trans, root); |
3728 | btrfs_btree_balance_dirty(root, nr); | 3733 | btrfs_btree_balance_dirty(root, nr); |
3729 | no_delete: | 3734 | no_delete: |
3730 | clear_inode(inode); | 3735 | end_writeback(inode); |
3731 | return; | 3736 | return; |
3732 | } | 3737 | } |
3733 | 3738 | ||
@@ -3858,7 +3863,7 @@ again: | |||
3858 | p = &parent->rb_right; | 3863 | p = &parent->rb_right; |
3859 | else { | 3864 | else { |
3860 | WARN_ON(!(entry->vfs_inode.i_state & | 3865 | WARN_ON(!(entry->vfs_inode.i_state & |
3861 | (I_WILL_FREE | I_FREEING | I_CLEAR))); | 3866 | (I_WILL_FREE | I_FREEING))); |
3862 | rb_erase(parent, &root->inode_tree); | 3867 | rb_erase(parent, &root->inode_tree); |
3863 | RB_CLEAR_NODE(parent); | 3868 | RB_CLEAR_NODE(parent); |
3864 | spin_unlock(&root->inode_lock); | 3869 | spin_unlock(&root->inode_lock); |
@@ -3937,7 +3942,7 @@ again: | |||
3937 | if (atomic_read(&inode->i_count) > 1) | 3942 | if (atomic_read(&inode->i_count) > 1) |
3938 | d_prune_aliases(inode); | 3943 | d_prune_aliases(inode); |
3939 | /* | 3944 | /* |
3940 | * btrfs_drop_inode will remove it from | 3945 | * btrfs_drop_inode will have it removed from |
3941 | * the inode cache when its usage count | 3946 | * the inode cache when its usage count |
3942 | * hits zero. | 3947 | * hits zero. |
3943 | */ | 3948 | */ |
@@ -5642,7 +5647,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | |||
5642 | struct bio_vec *bvec = bio->bi_io_vec; | 5647 | struct bio_vec *bvec = bio->bi_io_vec; |
5643 | u64 start; | 5648 | u64 start; |
5644 | int skip_sum; | 5649 | int skip_sum; |
5645 | int write = rw & (1 << BIO_RW); | 5650 | int write = rw & REQ_WRITE; |
5646 | int ret = 0; | 5651 | int ret = 0; |
5647 | 5652 | ||
5648 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 5653 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
@@ -6331,13 +6336,14 @@ free: | |||
6331 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | 6336 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); |
6332 | } | 6337 | } |
6333 | 6338 | ||
6334 | void btrfs_drop_inode(struct inode *inode) | 6339 | int btrfs_drop_inode(struct inode *inode) |
6335 | { | 6340 | { |
6336 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6341 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6337 | if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) | 6342 | |
6338 | generic_delete_inode(inode); | 6343 | if (btrfs_root_refs(&root->root_item) == 0) |
6344 | return 1; | ||
6339 | else | 6345 | else |
6340 | generic_drop_inode(inode); | 6346 | return generic_drop_inode(inode); |
6341 | } | 6347 | } |
6342 | 6348 | ||
6343 | static void init_once(void *foo) | 6349 | static void init_once(void *foo) |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f2393b390318..1776dbd8dc98 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -797,7 +797,7 @@ static int btrfs_unfreeze(struct super_block *sb) | |||
797 | 797 | ||
798 | static const struct super_operations btrfs_super_ops = { | 798 | static const struct super_operations btrfs_super_ops = { |
799 | .drop_inode = btrfs_drop_inode, | 799 | .drop_inode = btrfs_drop_inode, |
800 | .delete_inode = btrfs_delete_inode, | 800 | .evict_inode = btrfs_evict_inode, |
801 | .put_super = btrfs_put_super, | 801 | .put_super = btrfs_put_super, |
802 | .sync_fs = btrfs_sync_fs, | 802 | .sync_fs = btrfs_sync_fs, |
803 | .show_options = btrfs_show_options, | 803 | .show_options = btrfs_show_options, |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d6e3af8be95b..dd318ff280b2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -258,7 +258,7 @@ loop_lock: | |||
258 | 258 | ||
259 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 259 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
260 | 260 | ||
261 | if (bio_rw_flagged(cur, BIO_RW_SYNCIO)) | 261 | if (cur->bi_rw & REQ_SYNC) |
262 | num_sync_run++; | 262 | num_sync_run++; |
263 | 263 | ||
264 | submit_bio(cur->bi_rw, cur); | 264 | submit_bio(cur->bi_rw, cur); |
@@ -2651,7 +2651,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
2651 | int max_errors = 0; | 2651 | int max_errors = 0; |
2652 | struct btrfs_multi_bio *multi = NULL; | 2652 | struct btrfs_multi_bio *multi = NULL; |
2653 | 2653 | ||
2654 | if (multi_ret && !(rw & (1 << BIO_RW))) | 2654 | if (multi_ret && !(rw & REQ_WRITE)) |
2655 | stripes_allocated = 1; | 2655 | stripes_allocated = 1; |
2656 | again: | 2656 | again: |
2657 | if (multi_ret) { | 2657 | if (multi_ret) { |
@@ -2687,7 +2687,7 @@ again: | |||
2687 | mirror_num = 0; | 2687 | mirror_num = 0; |
2688 | 2688 | ||
2689 | /* if our multi bio struct is too small, back off and try again */ | 2689 | /* if our multi bio struct is too small, back off and try again */ |
2690 | if (rw & (1 << BIO_RW)) { | 2690 | if (rw & REQ_WRITE) { |
2691 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | 2691 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | |
2692 | BTRFS_BLOCK_GROUP_DUP)) { | 2692 | BTRFS_BLOCK_GROUP_DUP)) { |
2693 | stripes_required = map->num_stripes; | 2693 | stripes_required = map->num_stripes; |
@@ -2697,7 +2697,7 @@ again: | |||
2697 | max_errors = 1; | 2697 | max_errors = 1; |
2698 | } | 2698 | } |
2699 | } | 2699 | } |
2700 | if (multi_ret && (rw & (1 << BIO_RW)) && | 2700 | if (multi_ret && (rw & REQ_WRITE) && |
2701 | stripes_allocated < stripes_required) { | 2701 | stripes_allocated < stripes_required) { |
2702 | stripes_allocated = map->num_stripes; | 2702 | stripes_allocated = map->num_stripes; |
2703 | free_extent_map(em); | 2703 | free_extent_map(em); |
@@ -2733,7 +2733,7 @@ again: | |||
2733 | num_stripes = 1; | 2733 | num_stripes = 1; |
2734 | stripe_index = 0; | 2734 | stripe_index = 0; |
2735 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 2735 | if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
2736 | if (unplug_page || (rw & (1 << BIO_RW))) | 2736 | if (unplug_page || (rw & REQ_WRITE)) |
2737 | num_stripes = map->num_stripes; | 2737 | num_stripes = map->num_stripes; |
2738 | else if (mirror_num) | 2738 | else if (mirror_num) |
2739 | stripe_index = mirror_num - 1; | 2739 | stripe_index = mirror_num - 1; |
@@ -2744,7 +2744,7 @@ again: | |||
2744 | } | 2744 | } |
2745 | 2745 | ||
2746 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 2746 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
2747 | if (rw & (1 << BIO_RW)) | 2747 | if (rw & REQ_WRITE) |
2748 | num_stripes = map->num_stripes; | 2748 | num_stripes = map->num_stripes; |
2749 | else if (mirror_num) | 2749 | else if (mirror_num) |
2750 | stripe_index = mirror_num - 1; | 2750 | stripe_index = mirror_num - 1; |
@@ -2755,7 +2755,7 @@ again: | |||
2755 | stripe_index = do_div(stripe_nr, factor); | 2755 | stripe_index = do_div(stripe_nr, factor); |
2756 | stripe_index *= map->sub_stripes; | 2756 | stripe_index *= map->sub_stripes; |
2757 | 2757 | ||
2758 | if (unplug_page || (rw & (1 << BIO_RW))) | 2758 | if (unplug_page || (rw & REQ_WRITE)) |
2759 | num_stripes = map->sub_stripes; | 2759 | num_stripes = map->sub_stripes; |
2760 | else if (mirror_num) | 2760 | else if (mirror_num) |
2761 | stripe_index += mirror_num - 1; | 2761 | stripe_index += mirror_num - 1; |
@@ -2945,7 +2945,7 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2945 | struct btrfs_pending_bios *pending_bios; | 2945 | struct btrfs_pending_bios *pending_bios; |
2946 | 2946 | ||
2947 | /* don't bother with additional async steps for reads, right now */ | 2947 | /* don't bother with additional async steps for reads, right now */ |
2948 | if (!(rw & (1 << BIO_RW))) { | 2948 | if (!(rw & REQ_WRITE)) { |
2949 | bio_get(bio); | 2949 | bio_get(bio); |
2950 | submit_bio(rw, bio); | 2950 | submit_bio(rw, bio); |
2951 | bio_put(bio); | 2951 | bio_put(bio); |
@@ -2964,7 +2964,7 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2964 | bio->bi_rw |= rw; | 2964 | bio->bi_rw |= rw; |
2965 | 2965 | ||
2966 | spin_lock(&device->io_lock); | 2966 | spin_lock(&device->io_lock); |
2967 | if (bio_rw_flagged(bio, BIO_RW_SYNCIO)) | 2967 | if (bio->bi_rw & REQ_SYNC) |
2968 | pending_bios = &device->pending_sync_bios; | 2968 | pending_bios = &device->pending_sync_bios; |
2969 | else | 2969 | else |
2970 | pending_bios = &device->pending_bios; | 2970 | pending_bios = &device->pending_bios; |
diff --git a/fs/buffer.c b/fs/buffer.c index d54812b198e9..50efa339e051 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1833,9 +1833,10 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) | |||
1833 | } | 1833 | } |
1834 | EXPORT_SYMBOL(page_zero_new_buffers); | 1834 | EXPORT_SYMBOL(page_zero_new_buffers); |
1835 | 1835 | ||
1836 | static int __block_prepare_write(struct inode *inode, struct page *page, | 1836 | int block_prepare_write(struct page *page, unsigned from, unsigned to, |
1837 | unsigned from, unsigned to, get_block_t *get_block) | 1837 | get_block_t *get_block) |
1838 | { | 1838 | { |
1839 | struct inode *inode = page->mapping->host; | ||
1839 | unsigned block_start, block_end; | 1840 | unsigned block_start, block_end; |
1840 | sector_t block; | 1841 | sector_t block; |
1841 | int err = 0; | 1842 | int err = 0; |
@@ -1908,10 +1909,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page, | |||
1908 | if (!buffer_uptodate(*wait_bh)) | 1909 | if (!buffer_uptodate(*wait_bh)) |
1909 | err = -EIO; | 1910 | err = -EIO; |
1910 | } | 1911 | } |
1911 | if (unlikely(err)) | 1912 | if (unlikely(err)) { |
1912 | page_zero_new_buffers(page, from, to); | 1913 | page_zero_new_buffers(page, from, to); |
1914 | ClearPageUptodate(page); | ||
1915 | } | ||
1913 | return err; | 1916 | return err; |
1914 | } | 1917 | } |
1918 | EXPORT_SYMBOL(block_prepare_write); | ||
1915 | 1919 | ||
1916 | static int __block_commit_write(struct inode *inode, struct page *page, | 1920 | static int __block_commit_write(struct inode *inode, struct page *page, |
1917 | unsigned from, unsigned to) | 1921 | unsigned from, unsigned to) |
@@ -1948,90 +1952,41 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
1948 | return 0; | 1952 | return 0; |
1949 | } | 1953 | } |
1950 | 1954 | ||
1951 | /* | 1955 | int __block_write_begin(struct page *page, loff_t pos, unsigned len, |
1952 | * Filesystems implementing the new truncate sequence should use the | 1956 | get_block_t *get_block) |
1953 | * _newtrunc postfix variant which won't incorrectly call vmtruncate. | ||
1954 | * The filesystem needs to handle block truncation upon failure. | ||
1955 | */ | ||
1956 | int block_write_begin_newtrunc(struct file *file, struct address_space *mapping, | ||
1957 | loff_t pos, unsigned len, unsigned flags, | ||
1958 | struct page **pagep, void **fsdata, | ||
1959 | get_block_t *get_block) | ||
1960 | { | 1957 | { |
1961 | struct inode *inode = mapping->host; | 1958 | unsigned start = pos & (PAGE_CACHE_SIZE - 1); |
1962 | int status = 0; | ||
1963 | struct page *page; | ||
1964 | pgoff_t index; | ||
1965 | unsigned start, end; | ||
1966 | int ownpage = 0; | ||
1967 | |||
1968 | index = pos >> PAGE_CACHE_SHIFT; | ||
1969 | start = pos & (PAGE_CACHE_SIZE - 1); | ||
1970 | end = start + len; | ||
1971 | |||
1972 | page = *pagep; | ||
1973 | if (page == NULL) { | ||
1974 | ownpage = 1; | ||
1975 | page = grab_cache_page_write_begin(mapping, index, flags); | ||
1976 | if (!page) { | ||
1977 | status = -ENOMEM; | ||
1978 | goto out; | ||
1979 | } | ||
1980 | *pagep = page; | ||
1981 | } else | ||
1982 | BUG_ON(!PageLocked(page)); | ||
1983 | |||
1984 | status = __block_prepare_write(inode, page, start, end, get_block); | ||
1985 | if (unlikely(status)) { | ||
1986 | ClearPageUptodate(page); | ||
1987 | 1959 | ||
1988 | if (ownpage) { | 1960 | return block_prepare_write(page, start, start + len, get_block); |
1989 | unlock_page(page); | ||
1990 | page_cache_release(page); | ||
1991 | *pagep = NULL; | ||
1992 | } | ||
1993 | } | ||
1994 | |||
1995 | out: | ||
1996 | return status; | ||
1997 | } | 1961 | } |
1998 | EXPORT_SYMBOL(block_write_begin_newtrunc); | 1962 | EXPORT_SYMBOL(__block_write_begin); |
1999 | 1963 | ||
2000 | /* | 1964 | /* |
2001 | * block_write_begin takes care of the basic task of block allocation and | 1965 | * block_write_begin takes care of the basic task of block allocation and |
2002 | * bringing partial write blocks uptodate first. | 1966 | * bringing partial write blocks uptodate first. |
2003 | * | 1967 | * |
2004 | * If *pagep is not NULL, then block_write_begin uses the locked page | 1968 | * The filesystem needs to handle block truncation upon failure. |
2005 | * at *pagep rather than allocating its own. In this case, the page will | ||
2006 | * not be unlocked or deallocated on failure. | ||
2007 | */ | 1969 | */ |
2008 | int block_write_begin(struct file *file, struct address_space *mapping, | 1970 | int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, |
2009 | loff_t pos, unsigned len, unsigned flags, | 1971 | unsigned flags, struct page **pagep, get_block_t *get_block) |
2010 | struct page **pagep, void **fsdata, | ||
2011 | get_block_t *get_block) | ||
2012 | { | 1972 | { |
2013 | int ret; | 1973 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
1974 | struct page *page; | ||
1975 | int status; | ||
2014 | 1976 | ||
2015 | ret = block_write_begin_newtrunc(file, mapping, pos, len, flags, | 1977 | page = grab_cache_page_write_begin(mapping, index, flags); |
2016 | pagep, fsdata, get_block); | 1978 | if (!page) |
1979 | return -ENOMEM; | ||
2017 | 1980 | ||
2018 | /* | 1981 | status = __block_write_begin(page, pos, len, get_block); |
2019 | * prepare_write() may have instantiated a few blocks | 1982 | if (unlikely(status)) { |
2020 | * outside i_size. Trim these off again. Don't need | 1983 | unlock_page(page); |
2021 | * i_size_read because we hold i_mutex. | 1984 | page_cache_release(page); |
2022 | * | 1985 | page = NULL; |
2023 | * Filesystems which pass down their own page also cannot | ||
2024 | * call into vmtruncate here because it would lead to lock | ||
2025 | * inversion problems (*pagep is locked). This is a further | ||
2026 | * example of where the old truncate sequence is inadequate. | ||
2027 | */ | ||
2028 | if (unlikely(ret) && *pagep == NULL) { | ||
2029 | loff_t isize = mapping->host->i_size; | ||
2030 | if (pos + len > isize) | ||
2031 | vmtruncate(mapping->host, isize); | ||
2032 | } | 1986 | } |
2033 | 1987 | ||
2034 | return ret; | 1988 | *pagep = page; |
1989 | return status; | ||
2035 | } | 1990 | } |
2036 | EXPORT_SYMBOL(block_write_begin); | 1991 | EXPORT_SYMBOL(block_write_begin); |
2037 | 1992 | ||
@@ -2351,7 +2306,7 @@ out: | |||
2351 | * For moronic filesystems that do not allow holes in file. | 2306 | * For moronic filesystems that do not allow holes in file. |
2352 | * We may have to extend the file. | 2307 | * We may have to extend the file. |
2353 | */ | 2308 | */ |
2354 | int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, | 2309 | int cont_write_begin(struct file *file, struct address_space *mapping, |
2355 | loff_t pos, unsigned len, unsigned flags, | 2310 | loff_t pos, unsigned len, unsigned flags, |
2356 | struct page **pagep, void **fsdata, | 2311 | struct page **pagep, void **fsdata, |
2357 | get_block_t *get_block, loff_t *bytes) | 2312 | get_block_t *get_block, loff_t *bytes) |
@@ -2363,7 +2318,7 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, | |||
2363 | 2318 | ||
2364 | err = cont_expand_zero(file, mapping, pos, bytes); | 2319 | err = cont_expand_zero(file, mapping, pos, bytes); |
2365 | if (err) | 2320 | if (err) |
2366 | goto out; | 2321 | return err; |
2367 | 2322 | ||
2368 | zerofrom = *bytes & ~PAGE_CACHE_MASK; | 2323 | zerofrom = *bytes & ~PAGE_CACHE_MASK; |
2369 | if (pos+len > *bytes && zerofrom & (blocksize-1)) { | 2324 | if (pos+len > *bytes && zerofrom & (blocksize-1)) { |
@@ -2371,44 +2326,10 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, | |||
2371 | (*bytes)++; | 2326 | (*bytes)++; |
2372 | } | 2327 | } |
2373 | 2328 | ||
2374 | *pagep = NULL; | 2329 | return block_write_begin(mapping, pos, len, flags, pagep, get_block); |
2375 | err = block_write_begin_newtrunc(file, mapping, pos, len, | ||
2376 | flags, pagep, fsdata, get_block); | ||
2377 | out: | ||
2378 | return err; | ||
2379 | } | ||
2380 | EXPORT_SYMBOL(cont_write_begin_newtrunc); | ||
2381 | |||
2382 | int cont_write_begin(struct file *file, struct address_space *mapping, | ||
2383 | loff_t pos, unsigned len, unsigned flags, | ||
2384 | struct page **pagep, void **fsdata, | ||
2385 | get_block_t *get_block, loff_t *bytes) | ||
2386 | { | ||
2387 | int ret; | ||
2388 | |||
2389 | ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags, | ||
2390 | pagep, fsdata, get_block, bytes); | ||
2391 | if (unlikely(ret)) { | ||
2392 | loff_t isize = mapping->host->i_size; | ||
2393 | if (pos + len > isize) | ||
2394 | vmtruncate(mapping->host, isize); | ||
2395 | } | ||
2396 | |||
2397 | return ret; | ||
2398 | } | 2330 | } |
2399 | EXPORT_SYMBOL(cont_write_begin); | 2331 | EXPORT_SYMBOL(cont_write_begin); |
2400 | 2332 | ||
2401 | int block_prepare_write(struct page *page, unsigned from, unsigned to, | ||
2402 | get_block_t *get_block) | ||
2403 | { | ||
2404 | struct inode *inode = page->mapping->host; | ||
2405 | int err = __block_prepare_write(inode, page, from, to, get_block); | ||
2406 | if (err) | ||
2407 | ClearPageUptodate(page); | ||
2408 | return err; | ||
2409 | } | ||
2410 | EXPORT_SYMBOL(block_prepare_write); | ||
2411 | |||
2412 | int block_commit_write(struct page *page, unsigned from, unsigned to) | 2333 | int block_commit_write(struct page *page, unsigned from, unsigned to) |
2413 | { | 2334 | { |
2414 | struct inode *inode = page->mapping->host; | 2335 | struct inode *inode = page->mapping->host; |
@@ -2510,11 +2431,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head) | |||
2510 | } | 2431 | } |
2511 | 2432 | ||
2512 | /* | 2433 | /* |
2513 | * Filesystems implementing the new truncate sequence should use the | 2434 | * On entry, the page is fully not uptodate. |
2514 | * _newtrunc postfix variant which won't incorrectly call vmtruncate. | 2435 | * On exit the page is fully uptodate in the areas outside (from,to) |
2515 | * The filesystem needs to handle block truncation upon failure. | 2436 | * The filesystem needs to handle block truncation upon failure. |
2516 | */ | 2437 | */ |
2517 | int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping, | 2438 | int nobh_write_begin(struct address_space *mapping, |
2518 | loff_t pos, unsigned len, unsigned flags, | 2439 | loff_t pos, unsigned len, unsigned flags, |
2519 | struct page **pagep, void **fsdata, | 2440 | struct page **pagep, void **fsdata, |
2520 | get_block_t *get_block) | 2441 | get_block_t *get_block) |
@@ -2547,8 +2468,8 @@ int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping, | |||
2547 | unlock_page(page); | 2468 | unlock_page(page); |
2548 | page_cache_release(page); | 2469 | page_cache_release(page); |
2549 | *pagep = NULL; | 2470 | *pagep = NULL; |
2550 | return block_write_begin_newtrunc(file, mapping, pos, len, | 2471 | return block_write_begin(mapping, pos, len, flags, pagep, |
2551 | flags, pagep, fsdata, get_block); | 2472 | get_block); |
2552 | } | 2473 | } |
2553 | 2474 | ||
2554 | if (PageMappedToDisk(page)) | 2475 | if (PageMappedToDisk(page)) |
@@ -2654,35 +2575,6 @@ out_release: | |||
2654 | 2575 | ||
2655 | return ret; | 2576 | return ret; |
2656 | } | 2577 | } |
2657 | EXPORT_SYMBOL(nobh_write_begin_newtrunc); | ||
2658 | |||
2659 | /* | ||
2660 | * On entry, the page is fully not uptodate. | ||
2661 | * On exit the page is fully uptodate in the areas outside (from,to) | ||
2662 | */ | ||
2663 | int nobh_write_begin(struct file *file, struct address_space *mapping, | ||
2664 | loff_t pos, unsigned len, unsigned flags, | ||
2665 | struct page **pagep, void **fsdata, | ||
2666 | get_block_t *get_block) | ||
2667 | { | ||
2668 | int ret; | ||
2669 | |||
2670 | ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, | ||
2671 | pagep, fsdata, get_block); | ||
2672 | |||
2673 | /* | ||
2674 | * prepare_write() may have instantiated a few blocks | ||
2675 | * outside i_size. Trim these off again. Don't need | ||
2676 | * i_size_read because we hold i_mutex. | ||
2677 | */ | ||
2678 | if (unlikely(ret)) { | ||
2679 | loff_t isize = mapping->host->i_size; | ||
2680 | if (pos + len > isize) | ||
2681 | vmtruncate(mapping->host, isize); | ||
2682 | } | ||
2683 | |||
2684 | return ret; | ||
2685 | } | ||
2686 | EXPORT_SYMBOL(nobh_write_begin); | 2578 | EXPORT_SYMBOL(nobh_write_begin); |
2687 | 2579 | ||
2688 | int nobh_write_end(struct file *file, struct address_space *mapping, | 2580 | int nobh_write_end(struct file *file, struct address_space *mapping, |
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index 2906077ac798..a2603e7c0bb5 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c | |||
@@ -146,7 +146,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) | |||
146 | goto error_unsupported; | 146 | goto error_unsupported; |
147 | 147 | ||
148 | /* get the cache size and blocksize */ | 148 | /* get the cache size and blocksize */ |
149 | ret = vfs_statfs(root, &stats); | 149 | ret = vfs_statfs(&path, &stats); |
150 | if (ret < 0) | 150 | if (ret < 0) |
151 | goto error_unsupported; | 151 | goto error_unsupported; |
152 | 152 | ||
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index c2413561ea75..24eb0d37241a 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c | |||
@@ -683,6 +683,10 @@ int cachefiles_has_space(struct cachefiles_cache *cache, | |||
683 | unsigned fnr, unsigned bnr) | 683 | unsigned fnr, unsigned bnr) |
684 | { | 684 | { |
685 | struct kstatfs stats; | 685 | struct kstatfs stats; |
686 | struct path path = { | ||
687 | .mnt = cache->mnt, | ||
688 | .dentry = cache->mnt->mnt_root, | ||
689 | }; | ||
686 | int ret; | 690 | int ret; |
687 | 691 | ||
688 | //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", | 692 | //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", |
@@ -697,7 +701,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache, | |||
697 | /* find out how many pages of blockdev are available */ | 701 | /* find out how many pages of blockdev are available */ |
698 | memset(&stats, 0, sizeof(stats)); | 702 | memset(&stats, 0, sizeof(stats)); |
699 | 703 | ||
700 | ret = vfs_statfs(cache->mnt->mnt_root, &stats); | 704 | ret = vfs_statfs(&path, &stats); |
701 | if (ret < 0) { | 705 | if (ret < 0) { |
702 | if (ret == -EIO) | 706 | if (ret == -EIO) |
703 | cachefiles_io_error(cache, "statfs failed"); | 707 | cachefiles_io_error(cache, "statfs failed"); |
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index f4a7840bf42c..42c7fafc8bfe 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c | |||
@@ -37,9 +37,9 @@ void __cachefiles_printk_object(struct cachefiles_object *object, | |||
37 | 37 | ||
38 | printk(KERN_ERR "%sobject: OBJ%x\n", | 38 | printk(KERN_ERR "%sobject: OBJ%x\n", |
39 | prefix, object->fscache.debug_id); | 39 | prefix, object->fscache.debug_id); |
40 | printk(KERN_ERR "%sobjstate=%s fl=%lx swfl=%lx ev=%lx[%lx]\n", | 40 | printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", |
41 | prefix, fscache_object_states[object->fscache.state], | 41 | prefix, fscache_object_states[object->fscache.state], |
42 | object->fscache.flags, object->fscache.work.flags, | 42 | object->fscache.flags, work_busy(&object->fscache.work), |
43 | object->fscache.events, | 43 | object->fscache.events, |
44 | object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK); | 44 | object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK); |
45 | printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", | 45 | printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", |
@@ -212,7 +212,7 @@ wait_for_old_object: | |||
212 | 212 | ||
213 | /* if the object we're waiting for is queued for processing, | 213 | /* if the object we're waiting for is queued for processing, |
214 | * then just put ourselves on the queue behind it */ | 214 | * then just put ourselves on the queue behind it */ |
215 | if (slow_work_is_queued(&xobject->fscache.work)) { | 215 | if (work_pending(&xobject->fscache.work)) { |
216 | _debug("queue OBJ%x behind OBJ%x immediately", | 216 | _debug("queue OBJ%x behind OBJ%x immediately", |
217 | object->fscache.debug_id, | 217 | object->fscache.debug_id, |
218 | xobject->fscache.debug_id); | 218 | xobject->fscache.debug_id); |
@@ -220,8 +220,7 @@ wait_for_old_object: | |||
220 | } | 220 | } |
221 | 221 | ||
222 | /* otherwise we sleep until either the object we're waiting for | 222 | /* otherwise we sleep until either the object we're waiting for |
223 | * is done, or the slow-work facility wants the thread back to | 223 | * is done, or the fscache_object is congested */ |
224 | * do other work */ | ||
225 | wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE); | 224 | wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE); |
226 | init_wait(&wait); | 225 | init_wait(&wait); |
227 | requeue = false; | 226 | requeue = false; |
@@ -229,8 +228,8 @@ wait_for_old_object: | |||
229 | prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); | 228 | prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); |
230 | if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) | 229 | if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) |
231 | break; | 230 | break; |
232 | requeue = slow_work_sleep_till_thread_needed( | 231 | |
233 | &object->fscache.work, &timeout); | 232 | requeue = fscache_object_sleep_till_congested(&timeout); |
234 | } while (timeout > 0 && !requeue); | 233 | } while (timeout > 0 && !requeue); |
235 | finish_wait(wq, &wait); | 234 | finish_wait(wq, &wait); |
236 | 235 | ||
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 0f0d41fbb03f..0e3c0924cc3a 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c | |||
@@ -422,7 +422,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, | |||
422 | shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; | 422 | shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; |
423 | 423 | ||
424 | op->op.flags &= FSCACHE_OP_KEEP_FLAGS; | 424 | op->op.flags &= FSCACHE_OP_KEEP_FLAGS; |
425 | op->op.flags |= FSCACHE_OP_FAST; | 425 | op->op.flags |= FSCACHE_OP_ASYNC; |
426 | op->op.processor = cachefiles_read_copier; | 426 | op->op.processor = cachefiles_read_copier; |
427 | 427 | ||
428 | pagevec_init(&pagevec, 0); | 428 | pagevec_init(&pagevec, 0); |
@@ -729,7 +729,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, | |||
729 | pagevec_init(&pagevec, 0); | 729 | pagevec_init(&pagevec, 0); |
730 | 730 | ||
731 | op->op.flags &= FSCACHE_OP_KEEP_FLAGS; | 731 | op->op.flags &= FSCACHE_OP_KEEP_FLAGS; |
732 | op->op.flags |= FSCACHE_OP_FAST; | 732 | op->op.flags |= FSCACHE_OP_ASYNC; |
733 | op->op.processor = cachefiles_read_copier; | 733 | op->op.processor = cachefiles_read_copier; |
734 | 734 | ||
735 | INIT_LIST_HEAD(&backpages); | 735 | INIT_LIST_HEAD(&backpages); |
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 6a660e610be8..278e1172600d 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile | |||
@@ -6,7 +6,7 @@ ifneq ($(KERNELRELEASE),) | |||
6 | 6 | ||
7 | obj-$(CONFIG_CEPH_FS) += ceph.o | 7 | obj-$(CONFIG_CEPH_FS) += ceph.o |
8 | 8 | ||
9 | ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ | 9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ |
10 | export.o caps.o snap.o xattr.o \ | 10 | export.o caps.o snap.o xattr.o \ |
11 | messenger.o msgpool.o buffer.o pagelist.o \ | 11 | messenger.o msgpool.o buffer.o pagelist.o \ |
12 | mds_client.o mdsmap.o \ | 12 | mds_client.o mdsmap.o \ |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index d9c60b84949a..5598a0d02295 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -309,7 +309,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
309 | zero_user_segment(page, s, PAGE_CACHE_SIZE); | 309 | zero_user_segment(page, s, PAGE_CACHE_SIZE); |
310 | } | 310 | } |
311 | 311 | ||
312 | if (add_to_page_cache_lru(page, mapping, page->index, GFP_NOFS)) { | 312 | if (add_to_page_cache_lru(page, mapping, page->index, |
313 | GFP_NOFS)) { | ||
313 | page_cache_release(page); | 314 | page_cache_release(page); |
314 | dout("readpages %p add_to_page_cache failed %p\n", | 315 | dout("readpages %p add_to_page_cache failed %p\n", |
315 | inode, page); | 316 | inode, page); |
@@ -552,7 +553,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
552 | * page truncation thread, possibly losing some data that | 553 | * page truncation thread, possibly losing some data that |
553 | * raced its way in | 554 | * raced its way in |
554 | */ | 555 | */ |
555 | if ((issued & CEPH_CAP_FILE_CACHE) == 0) | 556 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) |
556 | generic_error_remove_page(inode->i_mapping, page); | 557 | generic_error_remove_page(inode->i_mapping, page); |
557 | 558 | ||
558 | unlock_page(page); | 559 | unlock_page(page); |
@@ -797,9 +798,12 @@ get_more_pages: | |||
797 | dout("%p will write page %p idx %lu\n", | 798 | dout("%p will write page %p idx %lu\n", |
798 | inode, page, page->index); | 799 | inode, page, page->index); |
799 | 800 | ||
800 | writeback_stat = atomic_long_inc_return(&client->writeback_count); | 801 | writeback_stat = |
801 | if (writeback_stat > CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) { | 802 | atomic_long_inc_return(&client->writeback_count); |
802 | set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); | 803 | if (writeback_stat > CONGESTION_ON_THRESH( |
804 | client->mount_args->congestion_kb)) { | ||
805 | set_bdi_congested(&client->backing_dev_info, | ||
806 | BLK_RW_ASYNC); | ||
803 | } | 807 | } |
804 | 808 | ||
805 | set_page_writeback(page); | 809 | set_page_writeback(page); |
@@ -1036,7 +1040,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, | |||
1036 | *pagep = page; | 1040 | *pagep = page; |
1037 | 1041 | ||
1038 | dout("write_begin file %p inode %p page %p %d~%d\n", file, | 1042 | dout("write_begin file %p inode %p page %p %d~%d\n", file, |
1039 | inode, page, (int)pos, (int)len); | 1043 | inode, page, (int)pos, (int)len); |
1040 | 1044 | ||
1041 | r = ceph_update_writeable_page(file, pos, len, page); | 1045 | r = ceph_update_writeable_page(file, pos, len, page); |
1042 | } while (r == -EAGAIN); | 1046 | } while (r == -EAGAIN); |
diff --git a/fs/ceph/armor.c b/fs/ceph/armor.c index 67b2c030924b..eb2a666b0be7 100644 --- a/fs/ceph/armor.c +++ b/fs/ceph/armor.c | |||
@@ -1,11 +1,15 @@ | |||
1 | 1 | ||
2 | #include <linux/errno.h> | 2 | #include <linux/errno.h> |
3 | 3 | ||
4 | int ceph_armor(char *dst, const char *src, const char *end); | ||
5 | int ceph_unarmor(char *dst, const char *src, const char *end); | ||
6 | |||
4 | /* | 7 | /* |
5 | * base64 encode/decode. | 8 | * base64 encode/decode. |
6 | */ | 9 | */ |
7 | 10 | ||
8 | const char *pem_key = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | 11 | static const char *pem_key = |
12 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | ||
9 | 13 | ||
10 | static int encode_bits(int c) | 14 | static int encode_bits(int c) |
11 | { | 15 | { |
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c index 89490beaf537..6d2e30600627 100644 --- a/fs/ceph/auth.c +++ b/fs/ceph/auth.c | |||
@@ -20,7 +20,7 @@ static u32 supported_protocols[] = { | |||
20 | CEPH_AUTH_CEPHX | 20 | CEPH_AUTH_CEPHX |
21 | }; | 21 | }; |
22 | 22 | ||
23 | int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) | 23 | static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) |
24 | { | 24 | { |
25 | switch (protocol) { | 25 | switch (protocol) { |
26 | case CEPH_AUTH_NONE: | 26 | case CEPH_AUTH_NONE: |
@@ -133,8 +133,8 @@ bad: | |||
133 | return -ERANGE; | 133 | return -ERANGE; |
134 | } | 134 | } |
135 | 135 | ||
136 | int ceph_build_auth_request(struct ceph_auth_client *ac, | 136 | static int ceph_build_auth_request(struct ceph_auth_client *ac, |
137 | void *msg_buf, size_t msg_len) | 137 | void *msg_buf, size_t msg_len) |
138 | { | 138 | { |
139 | struct ceph_mon_request_header *monhdr = msg_buf; | 139 | struct ceph_mon_request_header *monhdr = msg_buf; |
140 | void *p = monhdr + 1; | 140 | void *p = monhdr + 1; |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 6d44053ecff1..582e0b2caf8a 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
@@ -87,8 +87,8 @@ static int ceph_x_decrypt(struct ceph_crypto_key *secret, | |||
87 | /* | 87 | /* |
88 | * get existing (or insert new) ticket handler | 88 | * get existing (or insert new) ticket handler |
89 | */ | 89 | */ |
90 | struct ceph_x_ticket_handler *get_ticket_handler(struct ceph_auth_client *ac, | 90 | static struct ceph_x_ticket_handler * |
91 | int service) | 91 | get_ticket_handler(struct ceph_auth_client *ac, int service) |
92 | { | 92 | { |
93 | struct ceph_x_ticket_handler *th; | 93 | struct ceph_x_ticket_handler *th; |
94 | struct ceph_x_info *xi = ac->private; | 94 | struct ceph_x_info *xi = ac->private; |
@@ -429,7 +429,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, | |||
429 | auth->struct_v = 1; | 429 | auth->struct_v = 1; |
430 | auth->key = 0; | 430 | auth->key = 0; |
431 | for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) | 431 | for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) |
432 | auth->key ^= *u; | 432 | auth->key ^= *(__le64 *)u; |
433 | dout(" server_challenge %llx client_challenge %llx key %llx\n", | 433 | dout(" server_challenge %llx client_challenge %llx key %llx\n", |
434 | xi->server_challenge, le64_to_cpu(auth->client_challenge), | 434 | xi->server_challenge, le64_to_cpu(auth->client_challenge), |
435 | le64_to_cpu(auth->key)); | 435 | le64_to_cpu(auth->key)); |
diff --git a/fs/ceph/buffer.c b/fs/ceph/buffer.c index c67535d70aa6..cd39f17021de 100644 --- a/fs/ceph/buffer.c +++ b/fs/ceph/buffer.c | |||
@@ -47,22 +47,6 @@ void ceph_buffer_release(struct kref *kref) | |||
47 | kfree(b); | 47 | kfree(b); |
48 | } | 48 | } |
49 | 49 | ||
50 | int ceph_buffer_alloc(struct ceph_buffer *b, int len, gfp_t gfp) | ||
51 | { | ||
52 | b->vec.iov_base = kmalloc(len, gfp | __GFP_NOWARN); | ||
53 | if (b->vec.iov_base) { | ||
54 | b->is_vmalloc = false; | ||
55 | } else { | ||
56 | b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL); | ||
57 | b->is_vmalloc = true; | ||
58 | } | ||
59 | if (!b->vec.iov_base) | ||
60 | return -ENOMEM; | ||
61 | b->alloc_len = len; | ||
62 | b->vec.iov_len = len; | ||
63 | return 0; | ||
64 | } | ||
65 | |||
66 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) | 50 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) |
67 | { | 51 | { |
68 | size_t len; | 52 | size_t len; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b81be9a56487..7bf182b03973 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -113,58 +113,41 @@ const char *ceph_cap_string(int caps) | |||
113 | return cap_str[i]; | 113 | return cap_str[i]; |
114 | } | 114 | } |
115 | 115 | ||
116 | /* | 116 | void ceph_caps_init(struct ceph_mds_client *mdsc) |
117 | * Cap reservations | ||
118 | * | ||
119 | * Maintain a global pool of preallocated struct ceph_caps, referenced | ||
120 | * by struct ceph_caps_reservations. This ensures that we preallocate | ||
121 | * memory needed to successfully process an MDS response. (If an MDS | ||
122 | * sends us cap information and we fail to process it, we will have | ||
123 | * problems due to the client and MDS being out of sync.) | ||
124 | * | ||
125 | * Reservations are 'owned' by a ceph_cap_reservation context. | ||
126 | */ | ||
127 | static spinlock_t caps_list_lock; | ||
128 | static struct list_head caps_list; /* unused (reserved or unreserved) */ | ||
129 | static int caps_total_count; /* total caps allocated */ | ||
130 | static int caps_use_count; /* in use */ | ||
131 | static int caps_reserve_count; /* unused, reserved */ | ||
132 | static int caps_avail_count; /* unused, unreserved */ | ||
133 | static int caps_min_count; /* keep at least this many (unreserved) */ | ||
134 | |||
135 | void __init ceph_caps_init(void) | ||
136 | { | 117 | { |
137 | INIT_LIST_HEAD(&caps_list); | 118 | INIT_LIST_HEAD(&mdsc->caps_list); |
138 | spin_lock_init(&caps_list_lock); | 119 | spin_lock_init(&mdsc->caps_list_lock); |
139 | } | 120 | } |
140 | 121 | ||
141 | void ceph_caps_finalize(void) | 122 | void ceph_caps_finalize(struct ceph_mds_client *mdsc) |
142 | { | 123 | { |
143 | struct ceph_cap *cap; | 124 | struct ceph_cap *cap; |
144 | 125 | ||
145 | spin_lock(&caps_list_lock); | 126 | spin_lock(&mdsc->caps_list_lock); |
146 | while (!list_empty(&caps_list)) { | 127 | while (!list_empty(&mdsc->caps_list)) { |
147 | cap = list_first_entry(&caps_list, struct ceph_cap, caps_item); | 128 | cap = list_first_entry(&mdsc->caps_list, |
129 | struct ceph_cap, caps_item); | ||
148 | list_del(&cap->caps_item); | 130 | list_del(&cap->caps_item); |
149 | kmem_cache_free(ceph_cap_cachep, cap); | 131 | kmem_cache_free(ceph_cap_cachep, cap); |
150 | } | 132 | } |
151 | caps_total_count = 0; | 133 | mdsc->caps_total_count = 0; |
152 | caps_avail_count = 0; | 134 | mdsc->caps_avail_count = 0; |
153 | caps_use_count = 0; | 135 | mdsc->caps_use_count = 0; |
154 | caps_reserve_count = 0; | 136 | mdsc->caps_reserve_count = 0; |
155 | caps_min_count = 0; | 137 | mdsc->caps_min_count = 0; |
156 | spin_unlock(&caps_list_lock); | 138 | spin_unlock(&mdsc->caps_list_lock); |
157 | } | 139 | } |
158 | 140 | ||
159 | void ceph_adjust_min_caps(int delta) | 141 | void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta) |
160 | { | 142 | { |
161 | spin_lock(&caps_list_lock); | 143 | spin_lock(&mdsc->caps_list_lock); |
162 | caps_min_count += delta; | 144 | mdsc->caps_min_count += delta; |
163 | BUG_ON(caps_min_count < 0); | 145 | BUG_ON(mdsc->caps_min_count < 0); |
164 | spin_unlock(&caps_list_lock); | 146 | spin_unlock(&mdsc->caps_list_lock); |
165 | } | 147 | } |
166 | 148 | ||
167 | int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) | 149 | int ceph_reserve_caps(struct ceph_mds_client *mdsc, |
150 | struct ceph_cap_reservation *ctx, int need) | ||
168 | { | 151 | { |
169 | int i; | 152 | int i; |
170 | struct ceph_cap *cap; | 153 | struct ceph_cap *cap; |
@@ -176,16 +159,17 @@ int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) | |||
176 | dout("reserve caps ctx=%p need=%d\n", ctx, need); | 159 | dout("reserve caps ctx=%p need=%d\n", ctx, need); |
177 | 160 | ||
178 | /* first reserve any caps that are already allocated */ | 161 | /* first reserve any caps that are already allocated */ |
179 | spin_lock(&caps_list_lock); | 162 | spin_lock(&mdsc->caps_list_lock); |
180 | if (caps_avail_count >= need) | 163 | if (mdsc->caps_avail_count >= need) |
181 | have = need; | 164 | have = need; |
182 | else | 165 | else |
183 | have = caps_avail_count; | 166 | have = mdsc->caps_avail_count; |
184 | caps_avail_count -= have; | 167 | mdsc->caps_avail_count -= have; |
185 | caps_reserve_count += have; | 168 | mdsc->caps_reserve_count += have; |
186 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 169 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
187 | caps_avail_count); | 170 | mdsc->caps_reserve_count + |
188 | spin_unlock(&caps_list_lock); | 171 | mdsc->caps_avail_count); |
172 | spin_unlock(&mdsc->caps_list_lock); | ||
189 | 173 | ||
190 | for (i = have; i < need; i++) { | 174 | for (i = have; i < need; i++) { |
191 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); | 175 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); |
@@ -198,19 +182,20 @@ int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need) | |||
198 | } | 182 | } |
199 | BUG_ON(have + alloc != need); | 183 | BUG_ON(have + alloc != need); |
200 | 184 | ||
201 | spin_lock(&caps_list_lock); | 185 | spin_lock(&mdsc->caps_list_lock); |
202 | caps_total_count += alloc; | 186 | mdsc->caps_total_count += alloc; |
203 | caps_reserve_count += alloc; | 187 | mdsc->caps_reserve_count += alloc; |
204 | list_splice(&newcaps, &caps_list); | 188 | list_splice(&newcaps, &mdsc->caps_list); |
205 | 189 | ||
206 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 190 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
207 | caps_avail_count); | 191 | mdsc->caps_reserve_count + |
208 | spin_unlock(&caps_list_lock); | 192 | mdsc->caps_avail_count); |
193 | spin_unlock(&mdsc->caps_list_lock); | ||
209 | 194 | ||
210 | ctx->count = need; | 195 | ctx->count = need; |
211 | dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", | 196 | dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n", |
212 | ctx, caps_total_count, caps_use_count, caps_reserve_count, | 197 | ctx, mdsc->caps_total_count, mdsc->caps_use_count, |
213 | caps_avail_count); | 198 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
214 | return 0; | 199 | return 0; |
215 | 200 | ||
216 | out_alloc_count: | 201 | out_alloc_count: |
@@ -220,26 +205,29 @@ out_alloc_count: | |||
220 | return ret; | 205 | return ret; |
221 | } | 206 | } |
222 | 207 | ||
223 | int ceph_unreserve_caps(struct ceph_cap_reservation *ctx) | 208 | int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
209 | struct ceph_cap_reservation *ctx) | ||
224 | { | 210 | { |
225 | dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); | 211 | dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count); |
226 | if (ctx->count) { | 212 | if (ctx->count) { |
227 | spin_lock(&caps_list_lock); | 213 | spin_lock(&mdsc->caps_list_lock); |
228 | BUG_ON(caps_reserve_count < ctx->count); | 214 | BUG_ON(mdsc->caps_reserve_count < ctx->count); |
229 | caps_reserve_count -= ctx->count; | 215 | mdsc->caps_reserve_count -= ctx->count; |
230 | caps_avail_count += ctx->count; | 216 | mdsc->caps_avail_count += ctx->count; |
231 | ctx->count = 0; | 217 | ctx->count = 0; |
232 | dout("unreserve caps %d = %d used + %d resv + %d avail\n", | 218 | dout("unreserve caps %d = %d used + %d resv + %d avail\n", |
233 | caps_total_count, caps_use_count, caps_reserve_count, | 219 | mdsc->caps_total_count, mdsc->caps_use_count, |
234 | caps_avail_count); | 220 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
235 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 221 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
236 | caps_avail_count); | 222 | mdsc->caps_reserve_count + |
237 | spin_unlock(&caps_list_lock); | 223 | mdsc->caps_avail_count); |
224 | spin_unlock(&mdsc->caps_list_lock); | ||
238 | } | 225 | } |
239 | return 0; | 226 | return 0; |
240 | } | 227 | } |
241 | 228 | ||
242 | static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) | 229 | static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, |
230 | struct ceph_cap_reservation *ctx) | ||
243 | { | 231 | { |
244 | struct ceph_cap *cap = NULL; | 232 | struct ceph_cap *cap = NULL; |
245 | 233 | ||
@@ -247,71 +235,74 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx) | |||
247 | if (!ctx) { | 235 | if (!ctx) { |
248 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); | 236 | cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS); |
249 | if (cap) { | 237 | if (cap) { |
250 | caps_use_count++; | 238 | mdsc->caps_use_count++; |
251 | caps_total_count++; | 239 | mdsc->caps_total_count++; |
252 | } | 240 | } |
253 | return cap; | 241 | return cap; |
254 | } | 242 | } |
255 | 243 | ||
256 | spin_lock(&caps_list_lock); | 244 | spin_lock(&mdsc->caps_list_lock); |
257 | dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", | 245 | dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n", |
258 | ctx, ctx->count, caps_total_count, caps_use_count, | 246 | ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count, |
259 | caps_reserve_count, caps_avail_count); | 247 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
260 | BUG_ON(!ctx->count); | 248 | BUG_ON(!ctx->count); |
261 | BUG_ON(ctx->count > caps_reserve_count); | 249 | BUG_ON(ctx->count > mdsc->caps_reserve_count); |
262 | BUG_ON(list_empty(&caps_list)); | 250 | BUG_ON(list_empty(&mdsc->caps_list)); |
263 | 251 | ||
264 | ctx->count--; | 252 | ctx->count--; |
265 | caps_reserve_count--; | 253 | mdsc->caps_reserve_count--; |
266 | caps_use_count++; | 254 | mdsc->caps_use_count++; |
267 | 255 | ||
268 | cap = list_first_entry(&caps_list, struct ceph_cap, caps_item); | 256 | cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item); |
269 | list_del(&cap->caps_item); | 257 | list_del(&cap->caps_item); |
270 | 258 | ||
271 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 259 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
272 | caps_avail_count); | 260 | mdsc->caps_reserve_count + mdsc->caps_avail_count); |
273 | spin_unlock(&caps_list_lock); | 261 | spin_unlock(&mdsc->caps_list_lock); |
274 | return cap; | 262 | return cap; |
275 | } | 263 | } |
276 | 264 | ||
277 | void ceph_put_cap(struct ceph_cap *cap) | 265 | void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) |
278 | { | 266 | { |
279 | spin_lock(&caps_list_lock); | 267 | spin_lock(&mdsc->caps_list_lock); |
280 | dout("put_cap %p %d = %d used + %d resv + %d avail\n", | 268 | dout("put_cap %p %d = %d used + %d resv + %d avail\n", |
281 | cap, caps_total_count, caps_use_count, | 269 | cap, mdsc->caps_total_count, mdsc->caps_use_count, |
282 | caps_reserve_count, caps_avail_count); | 270 | mdsc->caps_reserve_count, mdsc->caps_avail_count); |
283 | caps_use_count--; | 271 | mdsc->caps_use_count--; |
284 | /* | 272 | /* |
285 | * Keep some preallocated caps around (ceph_min_count), to | 273 | * Keep some preallocated caps around (ceph_min_count), to |
286 | * avoid lots of free/alloc churn. | 274 | * avoid lots of free/alloc churn. |
287 | */ | 275 | */ |
288 | if (caps_avail_count >= caps_reserve_count + caps_min_count) { | 276 | if (mdsc->caps_avail_count >= mdsc->caps_reserve_count + |
289 | caps_total_count--; | 277 | mdsc->caps_min_count) { |
278 | mdsc->caps_total_count--; | ||
290 | kmem_cache_free(ceph_cap_cachep, cap); | 279 | kmem_cache_free(ceph_cap_cachep, cap); |
291 | } else { | 280 | } else { |
292 | caps_avail_count++; | 281 | mdsc->caps_avail_count++; |
293 | list_add(&cap->caps_item, &caps_list); | 282 | list_add(&cap->caps_item, &mdsc->caps_list); |
294 | } | 283 | } |
295 | 284 | ||
296 | BUG_ON(caps_total_count != caps_use_count + caps_reserve_count + | 285 | BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count + |
297 | caps_avail_count); | 286 | mdsc->caps_reserve_count + mdsc->caps_avail_count); |
298 | spin_unlock(&caps_list_lock); | 287 | spin_unlock(&mdsc->caps_list_lock); |
299 | } | 288 | } |
300 | 289 | ||
301 | void ceph_reservation_status(struct ceph_client *client, | 290 | void ceph_reservation_status(struct ceph_client *client, |
302 | int *total, int *avail, int *used, int *reserved, | 291 | int *total, int *avail, int *used, int *reserved, |
303 | int *min) | 292 | int *min) |
304 | { | 293 | { |
294 | struct ceph_mds_client *mdsc = &client->mdsc; | ||
295 | |||
305 | if (total) | 296 | if (total) |
306 | *total = caps_total_count; | 297 | *total = mdsc->caps_total_count; |
307 | if (avail) | 298 | if (avail) |
308 | *avail = caps_avail_count; | 299 | *avail = mdsc->caps_avail_count; |
309 | if (used) | 300 | if (used) |
310 | *used = caps_use_count; | 301 | *used = mdsc->caps_use_count; |
311 | if (reserved) | 302 | if (reserved) |
312 | *reserved = caps_reserve_count; | 303 | *reserved = mdsc->caps_reserve_count; |
313 | if (min) | 304 | if (min) |
314 | *min = caps_min_count; | 305 | *min = mdsc->caps_min_count; |
315 | } | 306 | } |
316 | 307 | ||
317 | /* | 308 | /* |
@@ -336,22 +327,29 @@ static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) | |||
336 | return NULL; | 327 | return NULL; |
337 | } | 328 | } |
338 | 329 | ||
330 | struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds) | ||
331 | { | ||
332 | struct ceph_cap *cap; | ||
333 | |||
334 | spin_lock(&ci->vfs_inode.i_lock); | ||
335 | cap = __get_cap_for_mds(ci, mds); | ||
336 | spin_unlock(&ci->vfs_inode.i_lock); | ||
337 | return cap; | ||
338 | } | ||
339 | |||
339 | /* | 340 | /* |
340 | * Return id of any MDS with a cap, preferably FILE_WR|WRBUFFER|EXCL, else | 341 | * Return id of any MDS with a cap, preferably FILE_WR|BUFFER|EXCL, else -1. |
341 | * -1. | ||
342 | */ | 342 | */ |
343 | static int __ceph_get_cap_mds(struct ceph_inode_info *ci, u32 *mseq) | 343 | static int __ceph_get_cap_mds(struct ceph_inode_info *ci) |
344 | { | 344 | { |
345 | struct ceph_cap *cap; | 345 | struct ceph_cap *cap; |
346 | int mds = -1; | 346 | int mds = -1; |
347 | struct rb_node *p; | 347 | struct rb_node *p; |
348 | 348 | ||
349 | /* prefer mds with WR|WRBUFFER|EXCL caps */ | 349 | /* prefer mds with WR|BUFFER|EXCL caps */ |
350 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { | 350 | for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { |
351 | cap = rb_entry(p, struct ceph_cap, ci_node); | 351 | cap = rb_entry(p, struct ceph_cap, ci_node); |
352 | mds = cap->mds; | 352 | mds = cap->mds; |
353 | if (mseq) | ||
354 | *mseq = cap->mseq; | ||
355 | if (cap->issued & (CEPH_CAP_FILE_WR | | 353 | if (cap->issued & (CEPH_CAP_FILE_WR | |
356 | CEPH_CAP_FILE_BUFFER | | 354 | CEPH_CAP_FILE_BUFFER | |
357 | CEPH_CAP_FILE_EXCL)) | 355 | CEPH_CAP_FILE_EXCL)) |
@@ -364,7 +362,7 @@ int ceph_get_cap_mds(struct inode *inode) | |||
364 | { | 362 | { |
365 | int mds; | 363 | int mds; |
366 | spin_lock(&inode->i_lock); | 364 | spin_lock(&inode->i_lock); |
367 | mds = __ceph_get_cap_mds(ceph_inode(inode), NULL); | 365 | mds = __ceph_get_cap_mds(ceph_inode(inode)); |
368 | spin_unlock(&inode->i_lock); | 366 | spin_unlock(&inode->i_lock); |
369 | return mds; | 367 | return mds; |
370 | } | 368 | } |
@@ -483,8 +481,8 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, | |||
483 | * Each time we receive FILE_CACHE anew, we increment | 481 | * Each time we receive FILE_CACHE anew, we increment |
484 | * i_rdcache_gen. | 482 | * i_rdcache_gen. |
485 | */ | 483 | */ |
486 | if ((issued & CEPH_CAP_FILE_CACHE) && | 484 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) && |
487 | (had & CEPH_CAP_FILE_CACHE) == 0) | 485 | (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) |
488 | ci->i_rdcache_gen++; | 486 | ci->i_rdcache_gen++; |
489 | 487 | ||
490 | /* | 488 | /* |
@@ -543,7 +541,7 @@ retry: | |||
543 | new_cap = NULL; | 541 | new_cap = NULL; |
544 | } else { | 542 | } else { |
545 | spin_unlock(&inode->i_lock); | 543 | spin_unlock(&inode->i_lock); |
546 | new_cap = get_cap(caps_reservation); | 544 | new_cap = get_cap(mdsc, caps_reservation); |
547 | if (new_cap == NULL) | 545 | if (new_cap == NULL) |
548 | return -ENOMEM; | 546 | return -ENOMEM; |
549 | goto retry; | 547 | goto retry; |
@@ -588,6 +586,7 @@ retry: | |||
588 | } else { | 586 | } else { |
589 | pr_err("ceph_add_cap: couldn't find snap realm %llx\n", | 587 | pr_err("ceph_add_cap: couldn't find snap realm %llx\n", |
590 | realmino); | 588 | realmino); |
589 | WARN_ON(!realm); | ||
591 | } | 590 | } |
592 | } | 591 | } |
593 | 592 | ||
@@ -831,7 +830,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci) | |||
831 | { | 830 | { |
832 | int want = 0; | 831 | int want = 0; |
833 | int mode; | 832 | int mode; |
834 | for (mode = 0; mode < 4; mode++) | 833 | for (mode = 0; mode < CEPH_FILE_MODE_NUM; mode++) |
835 | if (ci->i_nr_by_mode[mode]) | 834 | if (ci->i_nr_by_mode[mode]) |
836 | want |= ceph_caps_for_mode(mode); | 835 | want |= ceph_caps_for_mode(mode); |
837 | return want; | 836 | return want; |
@@ -901,7 +900,7 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
901 | ci->i_auth_cap = NULL; | 900 | ci->i_auth_cap = NULL; |
902 | 901 | ||
903 | if (removed) | 902 | if (removed) |
904 | ceph_put_cap(cap); | 903 | ceph_put_cap(mdsc, cap); |
905 | 904 | ||
906 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { | 905 | if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { |
907 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 906 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
@@ -1197,6 +1196,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1197 | */ | 1196 | */ |
1198 | void __ceph_flush_snaps(struct ceph_inode_info *ci, | 1197 | void __ceph_flush_snaps(struct ceph_inode_info *ci, |
1199 | struct ceph_mds_session **psession) | 1198 | struct ceph_mds_session **psession) |
1199 | __releases(ci->vfs_inode->i_lock) | ||
1200 | __acquires(ci->vfs_inode->i_lock) | ||
1200 | { | 1201 | { |
1201 | struct inode *inode = &ci->vfs_inode; | 1202 | struct inode *inode = &ci->vfs_inode; |
1202 | int mds; | 1203 | int mds; |
@@ -1232,7 +1233,13 @@ retry: | |||
1232 | BUG_ON(capsnap->dirty == 0); | 1233 | BUG_ON(capsnap->dirty == 0); |
1233 | 1234 | ||
1234 | /* pick mds, take s_mutex */ | 1235 | /* pick mds, take s_mutex */ |
1235 | mds = __ceph_get_cap_mds(ci, &mseq); | 1236 | if (ci->i_auth_cap == NULL) { |
1237 | dout("no auth cap (migrating?), doing nothing\n"); | ||
1238 | goto out; | ||
1239 | } | ||
1240 | mds = ci->i_auth_cap->session->s_mds; | ||
1241 | mseq = ci->i_auth_cap->mseq; | ||
1242 | |||
1236 | if (session && session->s_mds != mds) { | 1243 | if (session && session->s_mds != mds) { |
1237 | dout("oops, wrong session %p mutex\n", session); | 1244 | dout("oops, wrong session %p mutex\n", session); |
1238 | mutex_unlock(&session->s_mutex); | 1245 | mutex_unlock(&session->s_mutex); |
@@ -1251,8 +1258,8 @@ retry: | |||
1251 | } | 1258 | } |
1252 | /* | 1259 | /* |
1253 | * if session == NULL, we raced against a cap | 1260 | * if session == NULL, we raced against a cap |
1254 | * deletion. retry, and we'll get a better | 1261 | * deletion or migration. retry, and we'll |
1255 | * @mds value next time. | 1262 | * get a better @mds value next time. |
1256 | */ | 1263 | */ |
1257 | spin_lock(&inode->i_lock); | 1264 | spin_lock(&inode->i_lock); |
1258 | goto retry; | 1265 | goto retry; |
@@ -1290,6 +1297,7 @@ retry: | |||
1290 | list_del_init(&ci->i_snap_flush_item); | 1297 | list_del_init(&ci->i_snap_flush_item); |
1291 | spin_unlock(&mdsc->snap_flush_lock); | 1298 | spin_unlock(&mdsc->snap_flush_lock); |
1292 | 1299 | ||
1300 | out: | ||
1293 | if (psession) | 1301 | if (psession) |
1294 | *psession = session; | 1302 | *psession = session; |
1295 | else if (session) { | 1303 | else if (session) { |
@@ -1435,7 +1443,6 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1435 | */ | 1443 | */ |
1436 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 1444 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
1437 | struct ceph_mds_session *session) | 1445 | struct ceph_mds_session *session) |
1438 | __releases(session->s_mutex) | ||
1439 | { | 1446 | { |
1440 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); | 1447 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); |
1441 | struct ceph_mds_client *mdsc = &client->mdsc; | 1448 | struct ceph_mds_client *mdsc = &client->mdsc; |
@@ -1510,11 +1517,13 @@ retry_locked: | |||
1510 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ | 1517 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ |
1511 | ci->i_rdcache_gen && /* may have cached pages */ | 1518 | ci->i_rdcache_gen && /* may have cached pages */ |
1512 | (file_wanted == 0 || /* no open files */ | 1519 | (file_wanted == 0 || /* no open files */ |
1513 | (revoking & CEPH_CAP_FILE_CACHE)) && /* or revoking cache */ | 1520 | (revoking & (CEPH_CAP_FILE_CACHE| |
1521 | CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ | ||
1514 | !tried_invalidate) { | 1522 | !tried_invalidate) { |
1515 | dout("check_caps trying to invalidate on %p\n", inode); | 1523 | dout("check_caps trying to invalidate on %p\n", inode); |
1516 | if (try_nonblocking_invalidate(inode) < 0) { | 1524 | if (try_nonblocking_invalidate(inode) < 0) { |
1517 | if (revoking & CEPH_CAP_FILE_CACHE) { | 1525 | if (revoking & (CEPH_CAP_FILE_CACHE| |
1526 | CEPH_CAP_FILE_LAZYIO)) { | ||
1518 | dout("check_caps queuing invalidate\n"); | 1527 | dout("check_caps queuing invalidate\n"); |
1519 | queue_invalidate = 1; | 1528 | queue_invalidate = 1; |
1520 | ci->i_rdcache_revoking = ci->i_rdcache_gen; | 1529 | ci->i_rdcache_revoking = ci->i_rdcache_gen; |
@@ -2250,8 +2259,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2250 | struct ceph_mds_session *session, | 2259 | struct ceph_mds_session *session, |
2251 | struct ceph_cap *cap, | 2260 | struct ceph_cap *cap, |
2252 | struct ceph_buffer *xattr_buf) | 2261 | struct ceph_buffer *xattr_buf) |
2253 | __releases(inode->i_lock) | 2262 | __releases(inode->i_lock) |
2254 | __releases(session->s_mutex) | ||
2255 | { | 2263 | { |
2256 | struct ceph_inode_info *ci = ceph_inode(inode); | 2264 | struct ceph_inode_info *ci = ceph_inode(inode); |
2257 | int mds = session->s_mds; | 2265 | int mds = session->s_mds; |
@@ -2278,6 +2286,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2278 | * will invalidate _after_ writeback.) | 2286 | * will invalidate _after_ writeback.) |
2279 | */ | 2287 | */ |
2280 | if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && | 2288 | if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) && |
2289 | (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && | ||
2281 | !ci->i_wrbuffer_ref) { | 2290 | !ci->i_wrbuffer_ref) { |
2282 | if (try_nonblocking_invalidate(inode) == 0) { | 2291 | if (try_nonblocking_invalidate(inode) == 0) { |
2283 | revoked_rdcache = 1; | 2292 | revoked_rdcache = 1; |
@@ -2369,15 +2378,22 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2369 | 2378 | ||
2370 | /* revocation, grant, or no-op? */ | 2379 | /* revocation, grant, or no-op? */ |
2371 | if (cap->issued & ~newcaps) { | 2380 | if (cap->issued & ~newcaps) { |
2372 | dout("revocation: %s -> %s\n", ceph_cap_string(cap->issued), | 2381 | int revoking = cap->issued & ~newcaps; |
2373 | ceph_cap_string(newcaps)); | 2382 | |
2374 | if ((used & ~newcaps) & CEPH_CAP_FILE_BUFFER) | 2383 | dout("revocation: %s -> %s (revoking %s)\n", |
2375 | writeback = 1; /* will delay ack */ | 2384 | ceph_cap_string(cap->issued), |
2376 | else if (dirty & ~newcaps) | 2385 | ceph_cap_string(newcaps), |
2377 | check_caps = 1; /* initiate writeback in check_caps */ | 2386 | ceph_cap_string(revoking)); |
2378 | else if (((used & ~newcaps) & CEPH_CAP_FILE_CACHE) == 0 || | 2387 | if (revoking & used & CEPH_CAP_FILE_BUFFER) |
2379 | revoked_rdcache) | 2388 | writeback = 1; /* initiate writeback; will delay ack */ |
2380 | check_caps = 2; /* send revoke ack in check_caps */ | 2389 | else if (revoking == CEPH_CAP_FILE_CACHE && |
2390 | (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && | ||
2391 | queue_invalidate) | ||
2392 | ; /* do nothing yet, invalidation will be queued */ | ||
2393 | else if (cap == ci->i_auth_cap) | ||
2394 | check_caps = 1; /* check auth cap only */ | ||
2395 | else | ||
2396 | check_caps = 2; /* check all caps */ | ||
2381 | cap->issued = newcaps; | 2397 | cap->issued = newcaps; |
2382 | cap->implemented |= newcaps; | 2398 | cap->implemented |= newcaps; |
2383 | } else if (cap->issued == newcaps) { | 2399 | } else if (cap->issued == newcaps) { |
@@ -2568,7 +2584,8 @@ static void handle_cap_trunc(struct inode *inode, | |||
2568 | * caller holds s_mutex | 2584 | * caller holds s_mutex |
2569 | */ | 2585 | */ |
2570 | static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | 2586 | static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, |
2571 | struct ceph_mds_session *session) | 2587 | struct ceph_mds_session *session, |
2588 | int *open_target_sessions) | ||
2572 | { | 2589 | { |
2573 | struct ceph_inode_info *ci = ceph_inode(inode); | 2590 | struct ceph_inode_info *ci = ceph_inode(inode); |
2574 | int mds = session->s_mds; | 2591 | int mds = session->s_mds; |
@@ -2600,6 +2617,12 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2600 | ci->i_cap_exporting_mds = mds; | 2617 | ci->i_cap_exporting_mds = mds; |
2601 | ci->i_cap_exporting_mseq = mseq; | 2618 | ci->i_cap_exporting_mseq = mseq; |
2602 | ci->i_cap_exporting_issued = cap->issued; | 2619 | ci->i_cap_exporting_issued = cap->issued; |
2620 | |||
2621 | /* | ||
2622 | * make sure we have open sessions with all possible | ||
2623 | * export targets, so that we get the matching IMPORT | ||
2624 | */ | ||
2625 | *open_target_sessions = 1; | ||
2603 | } | 2626 | } |
2604 | __ceph_remove_cap(cap); | 2627 | __ceph_remove_cap(cap); |
2605 | } | 2628 | } |
@@ -2675,6 +2698,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2675 | u64 size, max_size; | 2698 | u64 size, max_size; |
2676 | u64 tid; | 2699 | u64 tid; |
2677 | void *snaptrace; | 2700 | void *snaptrace; |
2701 | size_t snaptrace_len; | ||
2702 | void *flock; | ||
2703 | u32 flock_len; | ||
2704 | int open_target_sessions = 0; | ||
2678 | 2705 | ||
2679 | dout("handle_caps from mds%d\n", mds); | 2706 | dout("handle_caps from mds%d\n", mds); |
2680 | 2707 | ||
@@ -2683,7 +2710,6 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2683 | if (msg->front.iov_len < sizeof(*h)) | 2710 | if (msg->front.iov_len < sizeof(*h)) |
2684 | goto bad; | 2711 | goto bad; |
2685 | h = msg->front.iov_base; | 2712 | h = msg->front.iov_base; |
2686 | snaptrace = h + 1; | ||
2687 | op = le32_to_cpu(h->op); | 2713 | op = le32_to_cpu(h->op); |
2688 | vino.ino = le64_to_cpu(h->ino); | 2714 | vino.ino = le64_to_cpu(h->ino); |
2689 | vino.snap = CEPH_NOSNAP; | 2715 | vino.snap = CEPH_NOSNAP; |
@@ -2693,6 +2719,21 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2693 | size = le64_to_cpu(h->size); | 2719 | size = le64_to_cpu(h->size); |
2694 | max_size = le64_to_cpu(h->max_size); | 2720 | max_size = le64_to_cpu(h->max_size); |
2695 | 2721 | ||
2722 | snaptrace = h + 1; | ||
2723 | snaptrace_len = le32_to_cpu(h->snap_trace_len); | ||
2724 | |||
2725 | if (le16_to_cpu(msg->hdr.version) >= 2) { | ||
2726 | void *p, *end; | ||
2727 | |||
2728 | p = snaptrace + snaptrace_len; | ||
2729 | end = msg->front.iov_base + msg->front.iov_len; | ||
2730 | ceph_decode_32_safe(&p, end, flock_len, bad); | ||
2731 | flock = p; | ||
2732 | } else { | ||
2733 | flock = NULL; | ||
2734 | flock_len = 0; | ||
2735 | } | ||
2736 | |||
2696 | mutex_lock(&session->s_mutex); | 2737 | mutex_lock(&session->s_mutex); |
2697 | session->s_seq++; | 2738 | session->s_seq++; |
2698 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, | 2739 | dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, |
@@ -2714,7 +2755,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2714 | * along for the mds (who clearly thinks we still have this | 2755 | * along for the mds (who clearly thinks we still have this |
2715 | * cap). | 2756 | * cap). |
2716 | */ | 2757 | */ |
2717 | ceph_add_cap_releases(mdsc, session, -1); | 2758 | ceph_add_cap_releases(mdsc, session); |
2718 | ceph_send_cap_releases(mdsc, session); | 2759 | ceph_send_cap_releases(mdsc, session); |
2719 | goto done; | 2760 | goto done; |
2720 | } | 2761 | } |
@@ -2726,12 +2767,12 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2726 | goto done; | 2767 | goto done; |
2727 | 2768 | ||
2728 | case CEPH_CAP_OP_EXPORT: | 2769 | case CEPH_CAP_OP_EXPORT: |
2729 | handle_cap_export(inode, h, session); | 2770 | handle_cap_export(inode, h, session, &open_target_sessions); |
2730 | goto done; | 2771 | goto done; |
2731 | 2772 | ||
2732 | case CEPH_CAP_OP_IMPORT: | 2773 | case CEPH_CAP_OP_IMPORT: |
2733 | handle_cap_import(mdsc, inode, h, session, | 2774 | handle_cap_import(mdsc, inode, h, session, |
2734 | snaptrace, le32_to_cpu(h->snap_trace_len)); | 2775 | snaptrace, snaptrace_len); |
2735 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, | 2776 | ceph_check_caps(ceph_inode(inode), CHECK_CAPS_NODELAY, |
2736 | session); | 2777 | session); |
2737 | goto done_unlocked; | 2778 | goto done_unlocked; |
@@ -2773,6 +2814,8 @@ done: | |||
2773 | done_unlocked: | 2814 | done_unlocked: |
2774 | if (inode) | 2815 | if (inode) |
2775 | iput(inode); | 2816 | iput(inode); |
2817 | if (open_target_sessions) | ||
2818 | ceph_mdsc_open_export_target_sessions(mdsc, session); | ||
2776 | return; | 2819 | return; |
2777 | 2820 | ||
2778 | bad: | 2821 | bad: |
diff --git a/fs/ceph/ceph_frag.h b/fs/ceph/ceph_frag.h index 793f50cb7c22..5babb8e95352 100644 --- a/fs/ceph/ceph_frag.h +++ b/fs/ceph/ceph_frag.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _FS_CEPH_FRAG_H | 1 | #ifndef FS_CEPH_FRAG_H |
2 | #define _FS_CEPH_FRAG_H | 2 | #define FS_CEPH_FRAG_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * "Frags" are a way to describe a subset of a 32-bit number space, | 5 | * "Frags" are a way to describe a subset of a 32-bit number space, |
diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c index 79d76bc4303f..3ac6cc7c1156 100644 --- a/fs/ceph/ceph_fs.c +++ b/fs/ceph/ceph_fs.c | |||
@@ -29,46 +29,44 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout) | |||
29 | 29 | ||
30 | int ceph_flags_to_mode(int flags) | 30 | int ceph_flags_to_mode(int flags) |
31 | { | 31 | { |
32 | int mode; | ||
33 | |||
32 | #ifdef O_DIRECTORY /* fixme */ | 34 | #ifdef O_DIRECTORY /* fixme */ |
33 | if ((flags & O_DIRECTORY) == O_DIRECTORY) | 35 | if ((flags & O_DIRECTORY) == O_DIRECTORY) |
34 | return CEPH_FILE_MODE_PIN; | 36 | return CEPH_FILE_MODE_PIN; |
35 | #endif | 37 | #endif |
38 | if ((flags & O_APPEND) == O_APPEND) | ||
39 | flags |= O_WRONLY; | ||
40 | |||
41 | if ((flags & O_ACCMODE) == O_RDWR) | ||
42 | mode = CEPH_FILE_MODE_RDWR; | ||
43 | else if ((flags & O_ACCMODE) == O_WRONLY) | ||
44 | mode = CEPH_FILE_MODE_WR; | ||
45 | else | ||
46 | mode = CEPH_FILE_MODE_RD; | ||
47 | |||
36 | #ifdef O_LAZY | 48 | #ifdef O_LAZY |
37 | if (flags & O_LAZY) | 49 | if (flags & O_LAZY) |
38 | return CEPH_FILE_MODE_LAZY; | 50 | mode |= CEPH_FILE_MODE_LAZY; |
39 | #endif | 51 | #endif |
40 | if ((flags & O_APPEND) == O_APPEND) | ||
41 | flags |= O_WRONLY; | ||
42 | 52 | ||
43 | flags &= O_ACCMODE; | 53 | return mode; |
44 | if ((flags & O_RDWR) == O_RDWR) | ||
45 | return CEPH_FILE_MODE_RDWR; | ||
46 | if ((flags & O_WRONLY) == O_WRONLY) | ||
47 | return CEPH_FILE_MODE_WR; | ||
48 | return CEPH_FILE_MODE_RD; | ||
49 | } | 54 | } |
50 | 55 | ||
51 | int ceph_caps_for_mode(int mode) | 56 | int ceph_caps_for_mode(int mode) |
52 | { | 57 | { |
53 | switch (mode) { | 58 | int caps = CEPH_CAP_PIN; |
54 | case CEPH_FILE_MODE_PIN: | 59 | |
55 | return CEPH_CAP_PIN; | 60 | if (mode & CEPH_FILE_MODE_RD) |
56 | case CEPH_FILE_MODE_RD: | 61 | caps |= CEPH_CAP_FILE_SHARED | |
57 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
58 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; | 62 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; |
59 | case CEPH_FILE_MODE_RDWR: | 63 | if (mode & CEPH_FILE_MODE_WR) |
60 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | 64 | caps |= CEPH_CAP_FILE_EXCL | |
61 | CEPH_CAP_FILE_EXCL | | ||
62 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | | ||
63 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | ||
64 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | ||
65 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | ||
66 | case CEPH_FILE_MODE_WR: | ||
67 | return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | | ||
68 | CEPH_CAP_FILE_EXCL | | ||
69 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | 65 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | |
70 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | 66 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | |
71 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | 67 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; |
72 | } | 68 | if (mode & CEPH_FILE_MODE_LAZY) |
73 | return 0; | 69 | caps |= CEPH_CAP_FILE_LAZYIO; |
70 | |||
71 | return caps; | ||
74 | } | 72 | } |
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h index 2fa992eaf7da..d5619ac86711 100644 --- a/fs/ceph/ceph_fs.h +++ b/fs/ceph/ceph_fs.h | |||
@@ -9,27 +9,13 @@ | |||
9 | * LGPL2 | 9 | * LGPL2 |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #ifndef _FS_CEPH_CEPH_FS_H | 12 | #ifndef CEPH_FS_H |
13 | #define _FS_CEPH_CEPH_FS_H | 13 | #define CEPH_FS_H |
14 | 14 | ||
15 | #include "msgr.h" | 15 | #include "msgr.h" |
16 | #include "rados.h" | 16 | #include "rados.h" |
17 | 17 | ||
18 | /* | 18 | /* |
19 | * Ceph release version | ||
20 | */ | ||
21 | #define CEPH_VERSION_MAJOR 0 | ||
22 | #define CEPH_VERSION_MINOR 20 | ||
23 | #define CEPH_VERSION_PATCH 0 | ||
24 | |||
25 | #define _CEPH_STRINGIFY(x) #x | ||
26 | #define CEPH_STRINGIFY(x) _CEPH_STRINGIFY(x) | ||
27 | #define CEPH_MAKE_VERSION(x, y, z) CEPH_STRINGIFY(x) "." CEPH_STRINGIFY(y) \ | ||
28 | "." CEPH_STRINGIFY(z) | ||
29 | #define CEPH_VERSION CEPH_MAKE_VERSION(CEPH_VERSION_MAJOR, \ | ||
30 | CEPH_VERSION_MINOR, CEPH_VERSION_PATCH) | ||
31 | |||
32 | /* | ||
33 | * subprotocol versions. when specific messages types or high-level | 19 | * subprotocol versions. when specific messages types or high-level |
34 | * protocols change, bump the affected components. we keep rev | 20 | * protocols change, bump the affected components. we keep rev |
35 | * internal cluster protocols separately from the public, | 21 | * internal cluster protocols separately from the public, |
@@ -53,18 +39,10 @@ | |||
53 | /* | 39 | /* |
54 | * feature bits | 40 | * feature bits |
55 | */ | 41 | */ |
56 | #define CEPH_FEATURE_UID 1 | 42 | #define CEPH_FEATURE_UID (1<<0) |
57 | #define CEPH_FEATURE_NOSRCADDR 2 | 43 | #define CEPH_FEATURE_NOSRCADDR (1<<1) |
58 | #define CEPH_FEATURE_FLOCK 4 | 44 | #define CEPH_FEATURE_MONCLOCKCHECK (1<<2) |
59 | 45 | #define CEPH_FEATURE_FLOCK (1<<3) | |
60 | #define CEPH_FEATURE_SUPPORTED_MON CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
61 | #define CEPH_FEATURE_REQUIRED_MON CEPH_FEATURE_UID | ||
62 | #define CEPH_FEATURE_SUPPORTED_MDS CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR|CEPH_FEATURE_FLOCK | ||
63 | #define CEPH_FEATURE_REQUIRED_MDS CEPH_FEATURE_UID | ||
64 | #define CEPH_FEATURE_SUPPORTED_OSD CEPH_FEATURE_UID|CEPH_FEATURE_NOSRCADDR | ||
65 | #define CEPH_FEATURE_REQUIRED_OSD CEPH_FEATURE_UID | ||
66 | #define CEPH_FEATURE_SUPPORTED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
67 | #define CEPH_FEATURE_REQUIRED_CLIENT CEPH_FEATURE_NOSRCADDR | ||
68 | 46 | ||
69 | 47 | ||
70 | /* | 48 | /* |
@@ -96,6 +74,8 @@ int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | |||
96 | #define CEPH_CRYPTO_NONE 0x0 | 74 | #define CEPH_CRYPTO_NONE 0x0 |
97 | #define CEPH_CRYPTO_AES 0x1 | 75 | #define CEPH_CRYPTO_AES 0x1 |
98 | 76 | ||
77 | #define CEPH_AES_IV "cephsageyudagreg" | ||
78 | |||
99 | /* security/authentication protocols */ | 79 | /* security/authentication protocols */ |
100 | #define CEPH_AUTH_UNKNOWN 0x0 | 80 | #define CEPH_AUTH_UNKNOWN 0x0 |
101 | #define CEPH_AUTH_NONE 0x1 | 81 | #define CEPH_AUTH_NONE 0x1 |
@@ -275,6 +255,7 @@ extern const char *ceph_mds_state_name(int s); | |||
275 | #define CEPH_LOCK_IDFT 512 /* dir frag tree */ | 255 | #define CEPH_LOCK_IDFT 512 /* dir frag tree */ |
276 | #define CEPH_LOCK_INEST 1024 /* mds internal */ | 256 | #define CEPH_LOCK_INEST 1024 /* mds internal */ |
277 | #define CEPH_LOCK_IXATTR 2048 | 257 | #define CEPH_LOCK_IXATTR 2048 |
258 | #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ | ||
278 | #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ | 259 | #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ |
279 | 260 | ||
280 | /* client_session ops */ | 261 | /* client_session ops */ |
@@ -316,6 +297,8 @@ enum { | |||
316 | CEPH_MDS_OP_RMXATTR = 0x01106, | 297 | CEPH_MDS_OP_RMXATTR = 0x01106, |
317 | CEPH_MDS_OP_SETLAYOUT = 0x01107, | 298 | CEPH_MDS_OP_SETLAYOUT = 0x01107, |
318 | CEPH_MDS_OP_SETATTR = 0x01108, | 299 | CEPH_MDS_OP_SETATTR = 0x01108, |
300 | CEPH_MDS_OP_SETFILELOCK= 0x01109, | ||
301 | CEPH_MDS_OP_GETFILELOCK= 0x00110, | ||
319 | 302 | ||
320 | CEPH_MDS_OP_MKNOD = 0x01201, | 303 | CEPH_MDS_OP_MKNOD = 0x01201, |
321 | CEPH_MDS_OP_LINK = 0x01202, | 304 | CEPH_MDS_OP_LINK = 0x01202, |
@@ -386,6 +369,15 @@ union ceph_mds_request_args { | |||
386 | struct { | 369 | struct { |
387 | struct ceph_file_layout layout; | 370 | struct ceph_file_layout layout; |
388 | } __attribute__ ((packed)) setlayout; | 371 | } __attribute__ ((packed)) setlayout; |
372 | struct { | ||
373 | __u8 rule; /* currently fcntl or flock */ | ||
374 | __u8 type; /* shared, exclusive, remove*/ | ||
375 | __le64 pid; /* process id requesting the lock */ | ||
376 | __le64 pid_namespace; | ||
377 | __le64 start; /* initial location to lock */ | ||
378 | __le64 length; /* num bytes to lock from start */ | ||
379 | __u8 wait; /* will caller wait for lock to become available? */ | ||
380 | } __attribute__ ((packed)) filelock_change; | ||
389 | } __attribute__ ((packed)); | 381 | } __attribute__ ((packed)); |
390 | 382 | ||
391 | #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ | 383 | #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ |
@@ -480,6 +472,23 @@ struct ceph_mds_reply_dirfrag { | |||
480 | __le32 dist[]; | 472 | __le32 dist[]; |
481 | } __attribute__ ((packed)); | 473 | } __attribute__ ((packed)); |
482 | 474 | ||
475 | #define CEPH_LOCK_FCNTL 1 | ||
476 | #define CEPH_LOCK_FLOCK 2 | ||
477 | |||
478 | #define CEPH_LOCK_SHARED 1 | ||
479 | #define CEPH_LOCK_EXCL 2 | ||
480 | #define CEPH_LOCK_UNLOCK 4 | ||
481 | |||
482 | struct ceph_filelock { | ||
483 | __le64 start;/* file offset to start lock at */ | ||
484 | __le64 length; /* num bytes to lock; 0 for all following start */ | ||
485 | __le64 client; /* which client holds the lock */ | ||
486 | __le64 pid; /* process id holding the lock on the client */ | ||
487 | __le64 pid_namespace; | ||
488 | __u8 type; /* shared lock, exclusive lock, or unlock */ | ||
489 | } __attribute__ ((packed)); | ||
490 | |||
491 | |||
483 | /* file access modes */ | 492 | /* file access modes */ |
484 | #define CEPH_FILE_MODE_PIN 0 | 493 | #define CEPH_FILE_MODE_PIN 0 |
485 | #define CEPH_FILE_MODE_RD 1 | 494 | #define CEPH_FILE_MODE_RD 1 |
@@ -508,9 +517,10 @@ int ceph_flags_to_mode(int flags); | |||
508 | #define CEPH_CAP_SAUTH 2 | 517 | #define CEPH_CAP_SAUTH 2 |
509 | #define CEPH_CAP_SLINK 4 | 518 | #define CEPH_CAP_SLINK 4 |
510 | #define CEPH_CAP_SXATTR 6 | 519 | #define CEPH_CAP_SXATTR 6 |
511 | #define CEPH_CAP_SFILE 8 /* goes at the end (uses >2 cap bits) */ | 520 | #define CEPH_CAP_SFILE 8 |
521 | #define CEPH_CAP_SFLOCK 20 | ||
512 | 522 | ||
513 | #define CEPH_CAP_BITS 16 | 523 | #define CEPH_CAP_BITS 22 |
514 | 524 | ||
515 | /* composed values */ | 525 | /* composed values */ |
516 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) | 526 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) |
@@ -528,6 +538,9 @@ int ceph_flags_to_mode(int flags); | |||
528 | #define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) | 538 | #define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) |
529 | #define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) | 539 | #define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) |
530 | #define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) | 540 | #define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) |
541 | #define CEPH_CAP_FLOCK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFLOCK) | ||
542 | #define CEPH_CAP_FLOCK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFLOCK) | ||
543 | |||
531 | 544 | ||
532 | /* cap masks (for getattr) */ | 545 | /* cap masks (for getattr) */ |
533 | #define CEPH_STAT_CAP_INODE CEPH_CAP_PIN | 546 | #define CEPH_STAT_CAP_INODE CEPH_CAP_PIN |
@@ -563,7 +576,8 @@ int ceph_flags_to_mode(int flags); | |||
563 | CEPH_CAP_FILE_EXCL) | 576 | CEPH_CAP_FILE_EXCL) |
564 | #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) | 577 | #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) |
565 | #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ | 578 | #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ |
566 | CEPH_CAP_ANY_FILE_WR | CEPH_CAP_PIN) | 579 | CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \ |
580 | CEPH_CAP_PIN) | ||
567 | 581 | ||
568 | #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ | 582 | #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ |
569 | CEPH_LOCK_IXATTR) | 583 | CEPH_LOCK_IXATTR) |
@@ -653,12 +667,21 @@ struct ceph_mds_cap_reconnect { | |||
653 | __le64 cap_id; | 667 | __le64 cap_id; |
654 | __le32 wanted; | 668 | __le32 wanted; |
655 | __le32 issued; | 669 | __le32 issued; |
670 | __le64 snaprealm; | ||
671 | __le64 pathbase; /* base ino for our path to this ino */ | ||
672 | __le32 flock_len; /* size of flock state blob, if any */ | ||
673 | } __attribute__ ((packed)); | ||
674 | /* followed by flock blob */ | ||
675 | |||
676 | struct ceph_mds_cap_reconnect_v1 { | ||
677 | __le64 cap_id; | ||
678 | __le32 wanted; | ||
679 | __le32 issued; | ||
656 | __le64 size; | 680 | __le64 size; |
657 | struct ceph_timespec mtime, atime; | 681 | struct ceph_timespec mtime, atime; |
658 | __le64 snaprealm; | 682 | __le64 snaprealm; |
659 | __le64 pathbase; /* base ino for our path to this ino */ | 683 | __le64 pathbase; /* base ino for our path to this ino */ |
660 | } __attribute__ ((packed)); | 684 | } __attribute__ ((packed)); |
661 | /* followed by encoded string */ | ||
662 | 685 | ||
663 | struct ceph_mds_snaprealm_reconnect { | 686 | struct ceph_mds_snaprealm_reconnect { |
664 | __le64 ino; /* snap realm base */ | 687 | __le64 ino; /* snap realm base */ |
diff --git a/fs/ceph/ceph_hash.h b/fs/ceph/ceph_hash.h index 5ac470c433c9..d099c3f90236 100644 --- a/fs/ceph/ceph_hash.h +++ b/fs/ceph/ceph_hash.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _FS_CEPH_HASH_H | 1 | #ifndef FS_CEPH_HASH_H |
2 | #define _FS_CEPH_HASH_H | 2 | #define FS_CEPH_HASH_H |
3 | 3 | ||
4 | #define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ | 4 | #define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ |
5 | #define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ | 5 | #define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ |
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/ceph_strings.c index 7503aee828ce..c6179d3a26a2 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/ceph_strings.c | |||
@@ -28,6 +28,7 @@ const char *ceph_osd_op_name(int op) | |||
28 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | 28 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; |
29 | case CEPH_OSD_OP_ZERO: return "zero"; | 29 | case CEPH_OSD_OP_ZERO: return "zero"; |
30 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | 30 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; |
31 | case CEPH_OSD_OP_ROLLBACK: return "rollback"; | ||
31 | 32 | ||
32 | case CEPH_OSD_OP_APPEND: return "append"; | 33 | case CEPH_OSD_OP_APPEND: return "append"; |
33 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | 34 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; |
@@ -129,6 +130,8 @@ const char *ceph_mds_op_name(int op) | |||
129 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; | 130 | case CEPH_MDS_OP_LSSNAP: return "lssnap"; |
130 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; | 131 | case CEPH_MDS_OP_MKSNAP: return "mksnap"; |
131 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; | 132 | case CEPH_MDS_OP_RMSNAP: return "rmsnap"; |
133 | case CEPH_MDS_OP_SETFILELOCK: return "setfilelock"; | ||
134 | case CEPH_MDS_OP_GETFILELOCK: return "getfilelock"; | ||
132 | } | 135 | } |
133 | return "???"; | 136 | return "???"; |
134 | } | 137 | } |
diff --git a/fs/ceph/crush/crush.h b/fs/ceph/crush/crush.h index dcd7e7523700..97e435b191f4 100644 --- a/fs/ceph/crush/crush.h +++ b/fs/ceph/crush/crush.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _CRUSH_CRUSH_H | 1 | #ifndef CEPH_CRUSH_CRUSH_H |
2 | #define _CRUSH_CRUSH_H | 2 | #define CEPH_CRUSH_CRUSH_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | 5 | ||
diff --git a/fs/ceph/crush/hash.h b/fs/ceph/crush/hash.h index ff48e110e4bb..91e884230d5d 100644 --- a/fs/ceph/crush/hash.h +++ b/fs/ceph/crush/hash.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _CRUSH_HASH_H | 1 | #ifndef CEPH_CRUSH_HASH_H |
2 | #define _CRUSH_HASH_H | 2 | #define CEPH_CRUSH_HASH_H |
3 | 3 | ||
4 | #define CRUSH_HASH_RJENKINS1 0 | 4 | #define CRUSH_HASH_RJENKINS1 0 |
5 | 5 | ||
diff --git a/fs/ceph/crush/mapper.h b/fs/ceph/crush/mapper.h index 98e90046fd9f..c46b99c18bb0 100644 --- a/fs/ceph/crush/mapper.h +++ b/fs/ceph/crush/mapper.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _CRUSH_MAPPER_H | 1 | #ifndef CEPH_CRUSH_MAPPER_H |
2 | #define _CRUSH_MAPPER_H | 2 | #define CEPH_CRUSH_MAPPER_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * CRUSH functions for find rules and then mapping an input to an | 5 | * CRUSH functions for find rules and then mapping an input to an |
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c index f704b3b62424..a3e627f63293 100644 --- a/fs/ceph/crypto.c +++ b/fs/ceph/crypto.c | |||
@@ -75,10 +75,11 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) | |||
75 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); | 75 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); |
76 | } | 76 | } |
77 | 77 | ||
78 | const u8 *aes_iv = "cephsageyudagreg"; | 78 | static const u8 *aes_iv = (u8 *)CEPH_AES_IV; |
79 | 79 | ||
80 | int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, | 80 | static int ceph_aes_encrypt(const void *key, int key_len, |
81 | const void *src, size_t src_len) | 81 | void *dst, size_t *dst_len, |
82 | const void *src, size_t src_len) | ||
82 | { | 83 | { |
83 | struct scatterlist sg_in[2], sg_out[1]; | 84 | struct scatterlist sg_in[2], sg_out[1]; |
84 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 85 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
@@ -126,9 +127,10 @@ int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, | |||
126 | return 0; | 127 | return 0; |
127 | } | 128 | } |
128 | 129 | ||
129 | int ceph_aes_encrypt2(const void *key, int key_len, void *dst, size_t *dst_len, | 130 | static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, |
130 | const void *src1, size_t src1_len, | 131 | size_t *dst_len, |
131 | const void *src2, size_t src2_len) | 132 | const void *src1, size_t src1_len, |
133 | const void *src2, size_t src2_len) | ||
132 | { | 134 | { |
133 | struct scatterlist sg_in[3], sg_out[1]; | 135 | struct scatterlist sg_in[3], sg_out[1]; |
134 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 136 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
@@ -179,8 +181,9 @@ int ceph_aes_encrypt2(const void *key, int key_len, void *dst, size_t *dst_len, | |||
179 | return 0; | 181 | return 0; |
180 | } | 182 | } |
181 | 183 | ||
182 | int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, | 184 | static int ceph_aes_decrypt(const void *key, int key_len, |
183 | const void *src, size_t src_len) | 185 | void *dst, size_t *dst_len, |
186 | const void *src, size_t src_len) | ||
184 | { | 187 | { |
185 | struct scatterlist sg_in[1], sg_out[2]; | 188 | struct scatterlist sg_in[1], sg_out[2]; |
186 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 189 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
@@ -238,10 +241,10 @@ int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, | |||
238 | return 0; | 241 | return 0; |
239 | } | 242 | } |
240 | 243 | ||
241 | int ceph_aes_decrypt2(const void *key, int key_len, | 244 | static int ceph_aes_decrypt2(const void *key, int key_len, |
242 | void *dst1, size_t *dst1_len, | 245 | void *dst1, size_t *dst1_len, |
243 | void *dst2, size_t *dst2_len, | 246 | void *dst2, size_t *dst2_len, |
244 | const void *src, size_t src_len) | 247 | const void *src, size_t src_len) |
245 | { | 248 | { |
246 | struct scatterlist sg_in[1], sg_out[3]; | 249 | struct scatterlist sg_in[1], sg_out[3]; |
247 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | 250 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); |
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h index 40b502e6bd89..bdf38607323c 100644 --- a/fs/ceph/crypto.h +++ b/fs/ceph/crypto.h | |||
@@ -42,7 +42,7 @@ extern int ceph_encrypt2(struct ceph_crypto_key *secret, | |||
42 | const void *src2, size_t src2_len); | 42 | const void *src2, size_t src2_len); |
43 | 43 | ||
44 | /* armor.c */ | 44 | /* armor.c */ |
45 | extern int ceph_armor(char *dst, const void *src, const void *end); | 45 | extern int ceph_armor(char *dst, const char *src, const char *end); |
46 | extern int ceph_unarmor(void *dst, const char *src, const char *end); | 46 | extern int ceph_unarmor(char *dst, const char *src, const char *end); |
47 | 47 | ||
48 | #endif | 48 | #endif |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index f2f5332ddbba..360c4f22718d 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -291,7 +291,7 @@ static int dentry_lru_show(struct seq_file *s, void *ptr) | |||
291 | return 0; | 291 | return 0; |
292 | } | 292 | } |
293 | 293 | ||
294 | #define DEFINE_SHOW_FUNC(name) \ | 294 | #define DEFINE_SHOW_FUNC(name) \ |
295 | static int name##_open(struct inode *inode, struct file *file) \ | 295 | static int name##_open(struct inode *inode, struct file *file) \ |
296 | { \ | 296 | { \ |
297 | struct seq_file *sf; \ | 297 | struct seq_file *sf; \ |
@@ -361,8 +361,8 @@ int ceph_debugfs_client_init(struct ceph_client *client) | |||
361 | int ret = 0; | 361 | int ret = 0; |
362 | char name[80]; | 362 | char name[80]; |
363 | 363 | ||
364 | snprintf(name, sizeof(name), FSID_FORMAT ".client%lld", | 364 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, |
365 | PR_FSID(&client->fsid), client->monc.auth->global_id); | 365 | client->monc.auth->global_id); |
366 | 366 | ||
367 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | 367 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); |
368 | if (!client->debugfs_dir) | 368 | if (!client->debugfs_dir) |
@@ -432,11 +432,12 @@ int ceph_debugfs_client_init(struct ceph_client *client) | |||
432 | if (!client->debugfs_caps) | 432 | if (!client->debugfs_caps) |
433 | goto out; | 433 | goto out; |
434 | 434 | ||
435 | client->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", | 435 | client->debugfs_congestion_kb = |
436 | 0600, | 436 | debugfs_create_file("writeback_congestion_kb", |
437 | client->debugfs_dir, | 437 | 0600, |
438 | client, | 438 | client->debugfs_dir, |
439 | &congestion_kb_fops); | 439 | client, |
440 | &congestion_kb_fops); | ||
440 | if (!client->debugfs_congestion_kb) | 441 | if (!client->debugfs_congestion_kb) |
441 | goto out; | 442 | goto out; |
442 | 443 | ||
@@ -466,7 +467,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) | |||
466 | debugfs_remove(client->debugfs_dir); | 467 | debugfs_remove(client->debugfs_dir); |
467 | } | 468 | } |
468 | 469 | ||
469 | #else // CONFIG_DEBUG_FS | 470 | #else /* CONFIG_DEBUG_FS */ |
470 | 471 | ||
471 | int __init ceph_debugfs_init(void) | 472 | int __init ceph_debugfs_init(void) |
472 | { | 473 | { |
@@ -486,4 +487,4 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client) | |||
486 | { | 487 | { |
487 | } | 488 | } |
488 | 489 | ||
489 | #endif // CONFIG_DEBUG_FS | 490 | #endif /* CONFIG_DEBUG_FS */ |
diff --git a/fs/ceph/decode.h b/fs/ceph/decode.h index 65b3e022eaf5..3d25415afe63 100644 --- a/fs/ceph/decode.h +++ b/fs/ceph/decode.h | |||
@@ -99,11 +99,13 @@ static inline void ceph_encode_timespec(struct ceph_timespec *tv, | |||
99 | */ | 99 | */ |
100 | static inline void ceph_encode_addr(struct ceph_entity_addr *a) | 100 | static inline void ceph_encode_addr(struct ceph_entity_addr *a) |
101 | { | 101 | { |
102 | a->in_addr.ss_family = htons(a->in_addr.ss_family); | 102 | __be16 ss_family = htons(a->in_addr.ss_family); |
103 | a->in_addr.ss_family = *(__u16 *)&ss_family; | ||
103 | } | 104 | } |
104 | static inline void ceph_decode_addr(struct ceph_entity_addr *a) | 105 | static inline void ceph_decode_addr(struct ceph_entity_addr *a) |
105 | { | 106 | { |
106 | a->in_addr.ss_family = ntohs(a->in_addr.ss_family); | 107 | __be16 ss_family = *(__be16 *)&a->in_addr.ss_family; |
108 | a->in_addr.ss_family = ntohs(ss_family); | ||
107 | WARN_ON(a->in_addr.ss_family == 512); | 109 | WARN_ON(a->in_addr.ss_family == 512); |
108 | } | 110 | } |
109 | 111 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f94ed3c7f6a5..67bbb41d5526 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -27,7 +27,7 @@ | |||
27 | 27 | ||
28 | const struct inode_operations ceph_dir_iops; | 28 | const struct inode_operations ceph_dir_iops; |
29 | const struct file_operations ceph_dir_fops; | 29 | const struct file_operations ceph_dir_fops; |
30 | struct dentry_operations ceph_dentry_ops; | 30 | const struct dentry_operations ceph_dentry_ops; |
31 | 31 | ||
32 | /* | 32 | /* |
33 | * Initialize ceph dentry state. | 33 | * Initialize ceph dentry state. |
@@ -94,6 +94,8 @@ static unsigned fpos_off(loff_t p) | |||
94 | */ | 94 | */ |
95 | static int __dcache_readdir(struct file *filp, | 95 | static int __dcache_readdir(struct file *filp, |
96 | void *dirent, filldir_t filldir) | 96 | void *dirent, filldir_t filldir) |
97 | __releases(inode->i_lock) | ||
98 | __acquires(inode->i_lock) | ||
97 | { | 99 | { |
98 | struct inode *inode = filp->f_dentry->d_inode; | 100 | struct inode *inode = filp->f_dentry->d_inode; |
99 | struct ceph_file_info *fi = filp->private_data; | 101 | struct ceph_file_info *fi = filp->private_data; |
@@ -1239,16 +1241,16 @@ const struct inode_operations ceph_dir_iops = { | |||
1239 | .create = ceph_create, | 1241 | .create = ceph_create, |
1240 | }; | 1242 | }; |
1241 | 1243 | ||
1242 | struct dentry_operations ceph_dentry_ops = { | 1244 | const struct dentry_operations ceph_dentry_ops = { |
1243 | .d_revalidate = ceph_d_revalidate, | 1245 | .d_revalidate = ceph_d_revalidate, |
1244 | .d_release = ceph_dentry_release, | 1246 | .d_release = ceph_dentry_release, |
1245 | }; | 1247 | }; |
1246 | 1248 | ||
1247 | struct dentry_operations ceph_snapdir_dentry_ops = { | 1249 | const struct dentry_operations ceph_snapdir_dentry_ops = { |
1248 | .d_revalidate = ceph_snapdir_d_revalidate, | 1250 | .d_revalidate = ceph_snapdir_d_revalidate, |
1249 | .d_release = ceph_dentry_release, | 1251 | .d_release = ceph_dentry_release, |
1250 | }; | 1252 | }; |
1251 | 1253 | ||
1252 | struct dentry_operations ceph_snap_dentry_ops = { | 1254 | const struct dentry_operations ceph_snap_dentry_ops = { |
1253 | .d_release = ceph_dentry_release, | 1255 | .d_release = ceph_dentry_release, |
1254 | }; | 1256 | }; |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7c08698fad3e..8c044a4f0457 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -317,7 +317,7 @@ void ceph_release_page_vector(struct page **pages, int num_pages) | |||
317 | /* | 317 | /* |
318 | * allocate a vector new pages | 318 | * allocate a vector new pages |
319 | */ | 319 | */ |
320 | struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | 320 | static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) |
321 | { | 321 | { |
322 | struct page **pages; | 322 | struct page **pages; |
323 | int i; | 323 | int i; |
@@ -665,7 +665,7 @@ more: | |||
665 | * throw out any page cache pages in this range. this | 665 | * throw out any page cache pages in this range. this |
666 | * may block. | 666 | * may block. |
667 | */ | 667 | */ |
668 | truncate_inode_pages_range(inode->i_mapping, pos, | 668 | truncate_inode_pages_range(inode->i_mapping, pos, |
669 | (pos+len) | (PAGE_CACHE_SIZE-1)); | 669 | (pos+len) | (PAGE_CACHE_SIZE-1)); |
670 | } else { | 670 | } else { |
671 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); | 671 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
@@ -740,28 +740,32 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
740 | unsigned long nr_segs, loff_t pos) | 740 | unsigned long nr_segs, loff_t pos) |
741 | { | 741 | { |
742 | struct file *filp = iocb->ki_filp; | 742 | struct file *filp = iocb->ki_filp; |
743 | struct ceph_file_info *fi = filp->private_data; | ||
743 | loff_t *ppos = &iocb->ki_pos; | 744 | loff_t *ppos = &iocb->ki_pos; |
744 | size_t len = iov->iov_len; | 745 | size_t len = iov->iov_len; |
745 | struct inode *inode = filp->f_dentry->d_inode; | 746 | struct inode *inode = filp->f_dentry->d_inode; |
746 | struct ceph_inode_info *ci = ceph_inode(inode); | 747 | struct ceph_inode_info *ci = ceph_inode(inode); |
747 | void *base = iov->iov_base; | 748 | void __user *base = iov->iov_base; |
748 | ssize_t ret; | 749 | ssize_t ret; |
749 | int got = 0; | 750 | int want, got = 0; |
750 | int checkeof = 0, read = 0; | 751 | int checkeof = 0, read = 0; |
751 | 752 | ||
752 | dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", | 753 | dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", |
753 | inode, ceph_vinop(inode), pos, (unsigned)len, inode); | 754 | inode, ceph_vinop(inode), pos, (unsigned)len, inode); |
754 | again: | 755 | again: |
755 | __ceph_do_pending_vmtruncate(inode); | 756 | __ceph_do_pending_vmtruncate(inode); |
756 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_CACHE, | 757 | if (fi->fmode & CEPH_FILE_MODE_LAZY) |
757 | &got, -1); | 758 | want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; |
759 | else | ||
760 | want = CEPH_CAP_FILE_CACHE; | ||
761 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); | ||
758 | if (ret < 0) | 762 | if (ret < 0) |
759 | goto out; | 763 | goto out; |
760 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", | 764 | dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", |
761 | inode, ceph_vinop(inode), pos, (unsigned)len, | 765 | inode, ceph_vinop(inode), pos, (unsigned)len, |
762 | ceph_cap_string(got)); | 766 | ceph_cap_string(got)); |
763 | 767 | ||
764 | if ((got & CEPH_CAP_FILE_CACHE) == 0 || | 768 | if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || |
765 | (iocb->ki_filp->f_flags & O_DIRECT) || | 769 | (iocb->ki_filp->f_flags & O_DIRECT) || |
766 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) | 770 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) |
767 | /* hmm, this isn't really async... */ | 771 | /* hmm, this isn't really async... */ |
@@ -807,11 +811,12 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
807 | unsigned long nr_segs, loff_t pos) | 811 | unsigned long nr_segs, loff_t pos) |
808 | { | 812 | { |
809 | struct file *file = iocb->ki_filp; | 813 | struct file *file = iocb->ki_filp; |
814 | struct ceph_file_info *fi = file->private_data; | ||
810 | struct inode *inode = file->f_dentry->d_inode; | 815 | struct inode *inode = file->f_dentry->d_inode; |
811 | struct ceph_inode_info *ci = ceph_inode(inode); | 816 | struct ceph_inode_info *ci = ceph_inode(inode); |
812 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 817 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; |
813 | loff_t endoff = pos + iov->iov_len; | 818 | loff_t endoff = pos + iov->iov_len; |
814 | int got = 0; | 819 | int want, got = 0; |
815 | int ret, err; | 820 | int ret, err; |
816 | 821 | ||
817 | if (ceph_snap(inode) != CEPH_NOSNAP) | 822 | if (ceph_snap(inode) != CEPH_NOSNAP) |
@@ -824,8 +829,11 @@ retry_snap: | |||
824 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", | 829 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", |
825 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 830 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
826 | inode->i_size); | 831 | inode->i_size); |
827 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER, | 832 | if (fi->fmode & CEPH_FILE_MODE_LAZY) |
828 | &got, endoff); | 833 | want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; |
834 | else | ||
835 | want = CEPH_CAP_FILE_BUFFER; | ||
836 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); | ||
829 | if (ret < 0) | 837 | if (ret < 0) |
830 | goto out; | 838 | goto out; |
831 | 839 | ||
@@ -833,7 +841,7 @@ retry_snap: | |||
833 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 841 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
834 | ceph_cap_string(got)); | 842 | ceph_cap_string(got)); |
835 | 843 | ||
836 | if ((got & CEPH_CAP_FILE_BUFFER) == 0 || | 844 | if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || |
837 | (iocb->ki_filp->f_flags & O_DIRECT) || | 845 | (iocb->ki_filp->f_flags & O_DIRECT) || |
838 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) { | 846 | (inode->i_sb->s_flags & MS_SYNCHRONOUS)) { |
839 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, | 847 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, |
@@ -930,6 +938,8 @@ const struct file_operations ceph_file_fops = { | |||
930 | .aio_write = ceph_aio_write, | 938 | .aio_write = ceph_aio_write, |
931 | .mmap = ceph_mmap, | 939 | .mmap = ceph_mmap, |
932 | .fsync = ceph_fsync, | 940 | .fsync = ceph_fsync, |
941 | .lock = ceph_lock, | ||
942 | .flock = ceph_flock, | ||
933 | .splice_read = generic_file_splice_read, | 943 | .splice_read = generic_file_splice_read, |
934 | .splice_write = generic_file_splice_write, | 944 | .splice_write = generic_file_splice_write, |
935 | .unlocked_ioctl = ceph_ioctl, | 945 | .unlocked_ioctl = ceph_ioctl, |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 389f9dbd9949..5d893d31e399 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -442,8 +442,9 @@ int ceph_fill_file_size(struct inode *inode, int issued, | |||
442 | * the file is either opened or mmaped | 442 | * the file is either opened or mmaped |
443 | */ | 443 | */ |
444 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_RD| | 444 | if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_RD| |
445 | CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER| | 445 | CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER| |
446 | CEPH_CAP_FILE_EXCL)) || | 446 | CEPH_CAP_FILE_EXCL| |
447 | CEPH_CAP_FILE_LAZYIO)) || | ||
447 | mapping_mapped(inode->i_mapping) || | 448 | mapping_mapped(inode->i_mapping) || |
448 | __ceph_caps_file_wanted(ci)) { | 449 | __ceph_caps_file_wanted(ci)) { |
449 | ci->i_truncate_pending++; | 450 | ci->i_truncate_pending++; |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index d085f07756b4..76e307d2aba1 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -143,6 +143,27 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
143 | return 0; | 143 | return 0; |
144 | } | 144 | } |
145 | 145 | ||
146 | static long ceph_ioctl_lazyio(struct file *file) | ||
147 | { | ||
148 | struct ceph_file_info *fi = file->private_data; | ||
149 | struct inode *inode = file->f_dentry->d_inode; | ||
150 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
151 | |||
152 | if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { | ||
153 | spin_lock(&inode->i_lock); | ||
154 | ci->i_nr_by_mode[fi->fmode]--; | ||
155 | fi->fmode |= CEPH_FILE_MODE_LAZY; | ||
156 | ci->i_nr_by_mode[fi->fmode]++; | ||
157 | spin_unlock(&inode->i_lock); | ||
158 | dout("ioctl_layzio: file %p marked lazy\n", file); | ||
159 | |||
160 | ceph_check_caps(ci, 0, NULL); | ||
161 | } else { | ||
162 | dout("ioctl_layzio: file %p already lazy\n", file); | ||
163 | } | ||
164 | return 0; | ||
165 | } | ||
166 | |||
146 | long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 167 | long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
147 | { | 168 | { |
148 | dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg); | 169 | dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg); |
@@ -155,6 +176,9 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
155 | 176 | ||
156 | case CEPH_IOC_GET_DATALOC: | 177 | case CEPH_IOC_GET_DATALOC: |
157 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); | 178 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); |
179 | |||
180 | case CEPH_IOC_LAZYIO: | ||
181 | return ceph_ioctl_lazyio(file); | ||
158 | } | 182 | } |
159 | return -ENOTTY; | 183 | return -ENOTTY; |
160 | } | 184 | } |
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index 25e4f1a9d059..88451a3b6857 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h | |||
@@ -37,4 +37,6 @@ struct ceph_ioctl_dataloc { | |||
37 | #define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \ | 37 | #define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \ |
38 | struct ceph_ioctl_dataloc) | 38 | struct ceph_ioctl_dataloc) |
39 | 39 | ||
40 | #define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4) | ||
41 | |||
40 | #endif | 42 | #endif |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c new file mode 100644 index 000000000000..ae85af06454f --- /dev/null +++ b/fs/ceph/locks.c | |||
@@ -0,0 +1,256 @@ | |||
1 | #include "ceph_debug.h" | ||
2 | |||
3 | #include <linux/file.h> | ||
4 | #include <linux/namei.h> | ||
5 | |||
6 | #include "super.h" | ||
7 | #include "mds_client.h" | ||
8 | #include "pagelist.h" | ||
9 | |||
10 | /** | ||
11 | * Implement fcntl and flock locking functions. | ||
12 | */ | ||
13 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | ||
14 | u64 pid, u64 pid_ns, | ||
15 | int cmd, u64 start, u64 length, u8 wait) | ||
16 | { | ||
17 | struct inode *inode = file->f_dentry->d_inode; | ||
18 | struct ceph_mds_client *mdsc = | ||
19 | &ceph_sb_to_client(inode->i_sb)->mdsc; | ||
20 | struct ceph_mds_request *req; | ||
21 | int err; | ||
22 | |||
23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); | ||
24 | if (IS_ERR(req)) | ||
25 | return PTR_ERR(req); | ||
26 | req->r_inode = igrab(inode); | ||
27 | |||
28 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | ||
29 | "length: %llu, wait: %d, type`: %d", (int)lock_type, | ||
30 | (int)operation, pid, start, length, wait, cmd); | ||
31 | |||
32 | req->r_args.filelock_change.rule = lock_type; | ||
33 | req->r_args.filelock_change.type = cmd; | ||
34 | req->r_args.filelock_change.pid = cpu_to_le64(pid); | ||
35 | /* This should be adjusted, but I'm not sure if | ||
36 | namespaces actually get id numbers*/ | ||
37 | req->r_args.filelock_change.pid_namespace = | ||
38 | cpu_to_le64((u64)pid_ns); | ||
39 | req->r_args.filelock_change.start = cpu_to_le64(start); | ||
40 | req->r_args.filelock_change.length = cpu_to_le64(length); | ||
41 | req->r_args.filelock_change.wait = wait; | ||
42 | |||
43 | err = ceph_mdsc_do_request(mdsc, inode, req); | ||
44 | ceph_mdsc_put_request(req); | ||
45 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | ||
46 | "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, | ||
47 | (int)operation, pid, start, length, wait, cmd, err); | ||
48 | return err; | ||
49 | } | ||
50 | |||
51 | /** | ||
52 | * Attempt to set an fcntl lock. | ||
53 | * For now, this just goes away to the server. Later it may be more awesome. | ||
54 | */ | ||
55 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | ||
56 | { | ||
57 | u64 length; | ||
58 | u8 lock_cmd; | ||
59 | int err; | ||
60 | u8 wait = 0; | ||
61 | u16 op = CEPH_MDS_OP_SETFILELOCK; | ||
62 | |||
63 | fl->fl_nspid = get_pid(task_tgid(current)); | ||
64 | dout("ceph_lock, fl_pid:%d", fl->fl_pid); | ||
65 | |||
66 | /* set wait bit as appropriate, then make command as Ceph expects it*/ | ||
67 | if (F_SETLKW == cmd) | ||
68 | wait = 1; | ||
69 | if (F_GETLK == cmd) | ||
70 | op = CEPH_MDS_OP_GETFILELOCK; | ||
71 | |||
72 | if (F_RDLCK == fl->fl_type) | ||
73 | lock_cmd = CEPH_LOCK_SHARED; | ||
74 | else if (F_WRLCK == fl->fl_type) | ||
75 | lock_cmd = CEPH_LOCK_EXCL; | ||
76 | else | ||
77 | lock_cmd = CEPH_LOCK_UNLOCK; | ||
78 | |||
79 | if (LLONG_MAX == fl->fl_end) | ||
80 | length = 0; | ||
81 | else | ||
82 | length = fl->fl_end - fl->fl_start + 1; | ||
83 | |||
84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | ||
85 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | ||
86 | lock_cmd, fl->fl_start, | ||
87 | length, wait); | ||
88 | if (!err) { | ||
89 | dout("mds locked, locking locally"); | ||
90 | err = posix_lock_file(file, fl, NULL); | ||
91 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | ||
92 | /* undo! This should only happen if the kernel detects | ||
93 | * local deadlock. */ | ||
94 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | ||
95 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | ||
96 | CEPH_LOCK_UNLOCK, fl->fl_start, | ||
97 | length, 0); | ||
98 | dout("got %d on posix_lock_file, undid lock", err); | ||
99 | } | ||
100 | } else { | ||
101 | dout("mds returned error code %d", err); | ||
102 | } | ||
103 | return err; | ||
104 | } | ||
105 | |||
106 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | ||
107 | { | ||
108 | u64 length; | ||
109 | u8 lock_cmd; | ||
110 | int err; | ||
111 | u8 wait = 1; | ||
112 | |||
113 | fl->fl_nspid = get_pid(task_tgid(current)); | ||
114 | dout("ceph_flock, fl_pid:%d", fl->fl_pid); | ||
115 | |||
116 | /* set wait bit, then clear it out of cmd*/ | ||
117 | if (cmd & LOCK_NB) | ||
118 | wait = 0; | ||
119 | cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN); | ||
120 | /* set command sequence that Ceph wants to see: | ||
121 | shared lock, exclusive lock, or unlock */ | ||
122 | if (LOCK_SH == cmd) | ||
123 | lock_cmd = CEPH_LOCK_SHARED; | ||
124 | else if (LOCK_EX == cmd) | ||
125 | lock_cmd = CEPH_LOCK_EXCL; | ||
126 | else | ||
127 | lock_cmd = CEPH_LOCK_UNLOCK; | ||
128 | /* mds requires start and length rather than start and end */ | ||
129 | if (LLONG_MAX == fl->fl_end) | ||
130 | length = 0; | ||
131 | else | ||
132 | length = fl->fl_end - fl->fl_start + 1; | ||
133 | |||
134 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | ||
135 | file, (u64)fl->fl_pid, (u64)fl->fl_nspid, | ||
136 | lock_cmd, fl->fl_start, | ||
137 | length, wait); | ||
138 | if (!err) { | ||
139 | err = flock_lock_file_wait(file, fl); | ||
140 | if (err) { | ||
141 | ceph_lock_message(CEPH_LOCK_FLOCK, | ||
142 | CEPH_MDS_OP_SETFILELOCK, | ||
143 | file, (u64)fl->fl_pid, | ||
144 | (u64)fl->fl_nspid, | ||
145 | CEPH_LOCK_UNLOCK, fl->fl_start, | ||
146 | length, 0); | ||
147 | dout("got %d on flock_lock_file_wait, undid lock", err); | ||
148 | } | ||
149 | } else { | ||
150 | dout("mds error code %d", err); | ||
151 | } | ||
152 | return err; | ||
153 | } | ||
154 | |||
155 | /** | ||
156 | * Must be called with BKL already held. Fills in the passed | ||
157 | * counter variables, so you can prepare pagelist metadata before calling | ||
158 | * ceph_encode_locks. | ||
159 | */ | ||
160 | void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | ||
161 | { | ||
162 | struct file_lock *lock; | ||
163 | |||
164 | *fcntl_count = 0; | ||
165 | *flock_count = 0; | ||
166 | |||
167 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||
168 | if (lock->fl_flags & FL_POSIX) | ||
169 | ++(*fcntl_count); | ||
170 | else if (lock->fl_flags & FL_FLOCK) | ||
171 | ++(*flock_count); | ||
172 | } | ||
173 | dout("counted %d flock locks and %d fcntl locks", | ||
174 | *flock_count, *fcntl_count); | ||
175 | } | ||
176 | |||
177 | /** | ||
178 | * Encode the flock and fcntl locks for the given inode into the pagelist. | ||
179 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, | ||
180 | * sequential flock locks. | ||
181 | * Must be called with BLK already held, and the lock numbers should have | ||
182 | * been gathered under the same lock holding window. | ||
183 | */ | ||
184 | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | ||
185 | int num_fcntl_locks, int num_flock_locks) | ||
186 | { | ||
187 | struct file_lock *lock; | ||
188 | struct ceph_filelock cephlock; | ||
189 | int err = 0; | ||
190 | |||
191 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, | ||
192 | num_fcntl_locks); | ||
193 | err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32)); | ||
194 | if (err) | ||
195 | goto fail; | ||
196 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||
197 | if (lock->fl_flags & FL_POSIX) { | ||
198 | err = lock_to_ceph_filelock(lock, &cephlock); | ||
199 | if (err) | ||
200 | goto fail; | ||
201 | err = ceph_pagelist_append(pagelist, &cephlock, | ||
202 | sizeof(struct ceph_filelock)); | ||
203 | } | ||
204 | if (err) | ||
205 | goto fail; | ||
206 | } | ||
207 | |||
208 | err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32)); | ||
209 | if (err) | ||
210 | goto fail; | ||
211 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | ||
212 | if (lock->fl_flags & FL_FLOCK) { | ||
213 | err = lock_to_ceph_filelock(lock, &cephlock); | ||
214 | if (err) | ||
215 | goto fail; | ||
216 | err = ceph_pagelist_append(pagelist, &cephlock, | ||
217 | sizeof(struct ceph_filelock)); | ||
218 | } | ||
219 | if (err) | ||
220 | goto fail; | ||
221 | } | ||
222 | fail: | ||
223 | return err; | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | * Given a pointer to a lock, convert it to a ceph filelock | ||
228 | */ | ||
229 | int lock_to_ceph_filelock(struct file_lock *lock, | ||
230 | struct ceph_filelock *cephlock) | ||
231 | { | ||
232 | int err = 0; | ||
233 | |||
234 | cephlock->start = cpu_to_le64(lock->fl_start); | ||
235 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | ||
236 | cephlock->client = cpu_to_le64(0); | ||
237 | cephlock->pid = cpu_to_le64(lock->fl_pid); | ||
238 | cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid); | ||
239 | |||
240 | switch (lock->fl_type) { | ||
241 | case F_RDLCK: | ||
242 | cephlock->type = CEPH_LOCK_SHARED; | ||
243 | break; | ||
244 | case F_WRLCK: | ||
245 | cephlock->type = CEPH_LOCK_EXCL; | ||
246 | break; | ||
247 | case F_UNLCK: | ||
248 | cephlock->type = CEPH_LOCK_UNLOCK; | ||
249 | break; | ||
250 | default: | ||
251 | dout("Have unknown lock type %d", lock->fl_type); | ||
252 | err = -EINVAL; | ||
253 | } | ||
254 | |||
255 | return err; | ||
256 | } | ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index dd440bd438a9..a75ddbf9fe37 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/wait.h> | 3 | #include <linux/wait.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | #include <linux/sched.h> | 5 | #include <linux/sched.h> |
6 | #include <linux/smp_lock.h> | ||
6 | 7 | ||
7 | #include "mds_client.h" | 8 | #include "mds_client.h" |
8 | #include "mon_client.h" | 9 | #include "mon_client.h" |
@@ -37,6 +38,11 @@ | |||
37 | * are no longer valid. | 38 | * are no longer valid. |
38 | */ | 39 | */ |
39 | 40 | ||
41 | struct ceph_reconnect_state { | ||
42 | struct ceph_pagelist *pagelist; | ||
43 | bool flock; | ||
44 | }; | ||
45 | |||
40 | static void __wake_requests(struct ceph_mds_client *mdsc, | 46 | static void __wake_requests(struct ceph_mds_client *mdsc, |
41 | struct list_head *head); | 47 | struct list_head *head); |
42 | 48 | ||
@@ -449,7 +455,7 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
449 | kfree(req->r_path1); | 455 | kfree(req->r_path1); |
450 | kfree(req->r_path2); | 456 | kfree(req->r_path2); |
451 | put_request_session(req); | 457 | put_request_session(req); |
452 | ceph_unreserve_caps(&req->r_caps_reservation); | 458 | ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation); |
453 | kfree(req); | 459 | kfree(req); |
454 | } | 460 | } |
455 | 461 | ||
@@ -512,7 +518,8 @@ static void __register_request(struct ceph_mds_client *mdsc, | |||
512 | { | 518 | { |
513 | req->r_tid = ++mdsc->last_tid; | 519 | req->r_tid = ++mdsc->last_tid; |
514 | if (req->r_num_caps) | 520 | if (req->r_num_caps) |
515 | ceph_reserve_caps(&req->r_caps_reservation, req->r_num_caps); | 521 | ceph_reserve_caps(mdsc, &req->r_caps_reservation, |
522 | req->r_num_caps); | ||
516 | dout("__register_request %p tid %lld\n", req, req->r_tid); | 523 | dout("__register_request %p tid %lld\n", req, req->r_tid); |
517 | ceph_mdsc_get_request(req); | 524 | ceph_mdsc_get_request(req); |
518 | __insert_request(mdsc, req); | 525 | __insert_request(mdsc, req); |
@@ -704,6 +711,51 @@ static int __open_session(struct ceph_mds_client *mdsc, | |||
704 | } | 711 | } |
705 | 712 | ||
706 | /* | 713 | /* |
714 | * open sessions for any export targets for the given mds | ||
715 | * | ||
716 | * called under mdsc->mutex | ||
717 | */ | ||
718 | static void __open_export_target_sessions(struct ceph_mds_client *mdsc, | ||
719 | struct ceph_mds_session *session) | ||
720 | { | ||
721 | struct ceph_mds_info *mi; | ||
722 | struct ceph_mds_session *ts; | ||
723 | int i, mds = session->s_mds; | ||
724 | int target; | ||
725 | |||
726 | if (mds >= mdsc->mdsmap->m_max_mds) | ||
727 | return; | ||
728 | mi = &mdsc->mdsmap->m_info[mds]; | ||
729 | dout("open_export_target_sessions for mds%d (%d targets)\n", | ||
730 | session->s_mds, mi->num_export_targets); | ||
731 | |||
732 | for (i = 0; i < mi->num_export_targets; i++) { | ||
733 | target = mi->export_targets[i]; | ||
734 | ts = __ceph_lookup_mds_session(mdsc, target); | ||
735 | if (!ts) { | ||
736 | ts = register_session(mdsc, target); | ||
737 | if (IS_ERR(ts)) | ||
738 | return; | ||
739 | } | ||
740 | if (session->s_state == CEPH_MDS_SESSION_NEW || | ||
741 | session->s_state == CEPH_MDS_SESSION_CLOSING) | ||
742 | __open_session(mdsc, session); | ||
743 | else | ||
744 | dout(" mds%d target mds%d %p is %s\n", session->s_mds, | ||
745 | i, ts, session_state_name(ts->s_state)); | ||
746 | ceph_put_mds_session(ts); | ||
747 | } | ||
748 | } | ||
749 | |||
750 | void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, | ||
751 | struct ceph_mds_session *session) | ||
752 | { | ||
753 | mutex_lock(&mdsc->mutex); | ||
754 | __open_export_target_sessions(mdsc, session); | ||
755 | mutex_unlock(&mdsc->mutex); | ||
756 | } | ||
757 | |||
758 | /* | ||
707 | * session caps | 759 | * session caps |
708 | */ | 760 | */ |
709 | 761 | ||
@@ -764,7 +816,7 @@ static int iterate_session_caps(struct ceph_mds_session *session, | |||
764 | last_inode = NULL; | 816 | last_inode = NULL; |
765 | } | 817 | } |
766 | if (old_cap) { | 818 | if (old_cap) { |
767 | ceph_put_cap(old_cap); | 819 | ceph_put_cap(session->s_mdsc, old_cap); |
768 | old_cap = NULL; | 820 | old_cap = NULL; |
769 | } | 821 | } |
770 | 822 | ||
@@ -793,7 +845,7 @@ out: | |||
793 | if (last_inode) | 845 | if (last_inode) |
794 | iput(last_inode); | 846 | iput(last_inode); |
795 | if (old_cap) | 847 | if (old_cap) |
796 | ceph_put_cap(old_cap); | 848 | ceph_put_cap(session->s_mdsc, old_cap); |
797 | 849 | ||
798 | return ret; | 850 | return ret; |
799 | } | 851 | } |
@@ -1067,15 +1119,16 @@ static int trim_caps(struct ceph_mds_client *mdsc, | |||
1067 | * Called under s_mutex. | 1119 | * Called under s_mutex. |
1068 | */ | 1120 | */ |
1069 | int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | 1121 | int ceph_add_cap_releases(struct ceph_mds_client *mdsc, |
1070 | struct ceph_mds_session *session, | 1122 | struct ceph_mds_session *session) |
1071 | int extra) | ||
1072 | { | 1123 | { |
1073 | struct ceph_msg *msg; | 1124 | struct ceph_msg *msg, *partial = NULL; |
1074 | struct ceph_mds_cap_release *head; | 1125 | struct ceph_mds_cap_release *head; |
1075 | int err = -ENOMEM; | 1126 | int err = -ENOMEM; |
1127 | int extra = mdsc->client->mount_args->cap_release_safety; | ||
1128 | int num; | ||
1076 | 1129 | ||
1077 | if (extra < 0) | 1130 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, |
1078 | extra = mdsc->client->mount_args->cap_release_safety; | 1131 | extra); |
1079 | 1132 | ||
1080 | spin_lock(&session->s_cap_lock); | 1133 | spin_lock(&session->s_cap_lock); |
1081 | 1134 | ||
@@ -1084,9 +1137,14 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||
1084 | struct ceph_msg, | 1137 | struct ceph_msg, |
1085 | list_head); | 1138 | list_head); |
1086 | head = msg->front.iov_base; | 1139 | head = msg->front.iov_base; |
1087 | extra += CEPH_CAPS_PER_RELEASE - le32_to_cpu(head->num); | 1140 | num = le32_to_cpu(head->num); |
1141 | if (num) { | ||
1142 | dout(" partial %p with (%d/%d)\n", msg, num, | ||
1143 | (int)CEPH_CAPS_PER_RELEASE); | ||
1144 | extra += CEPH_CAPS_PER_RELEASE - num; | ||
1145 | partial = msg; | ||
1146 | } | ||
1088 | } | 1147 | } |
1089 | |||
1090 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { | 1148 | while (session->s_num_cap_releases < session->s_nr_caps + extra) { |
1091 | spin_unlock(&session->s_cap_lock); | 1149 | spin_unlock(&session->s_cap_lock); |
1092 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, | 1150 | msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE, |
@@ -1103,19 +1161,14 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||
1103 | session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE; | 1161 | session->s_num_cap_releases += CEPH_CAPS_PER_RELEASE; |
1104 | } | 1162 | } |
1105 | 1163 | ||
1106 | if (!list_empty(&session->s_cap_releases)) { | 1164 | if (partial) { |
1107 | msg = list_first_entry(&session->s_cap_releases, | 1165 | head = partial->front.iov_base; |
1108 | struct ceph_msg, | 1166 | num = le32_to_cpu(head->num); |
1109 | list_head); | 1167 | dout(" queueing partial %p with %d/%d\n", partial, num, |
1110 | head = msg->front.iov_base; | 1168 | (int)CEPH_CAPS_PER_RELEASE); |
1111 | if (head->num) { | 1169 | list_move_tail(&partial->list_head, |
1112 | dout(" queueing non-full %p (%d)\n", msg, | 1170 | &session->s_cap_releases_done); |
1113 | le32_to_cpu(head->num)); | 1171 | session->s_num_cap_releases -= CEPH_CAPS_PER_RELEASE - num; |
1114 | list_move_tail(&msg->list_head, | ||
1115 | &session->s_cap_releases_done); | ||
1116 | session->s_num_cap_releases -= | ||
1117 | CEPH_CAPS_PER_RELEASE - le32_to_cpu(head->num); | ||
1118 | } | ||
1119 | } | 1172 | } |
1120 | err = 0; | 1173 | err = 0; |
1121 | spin_unlock(&session->s_cap_lock); | 1174 | spin_unlock(&session->s_cap_lock); |
@@ -1250,6 +1303,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) | |||
1250 | return ERR_PTR(-ENOMEM); | 1303 | return ERR_PTR(-ENOMEM); |
1251 | 1304 | ||
1252 | mutex_init(&req->r_fill_mutex); | 1305 | mutex_init(&req->r_fill_mutex); |
1306 | req->r_mdsc = mdsc; | ||
1253 | req->r_started = jiffies; | 1307 | req->r_started = jiffies; |
1254 | req->r_resend_mds = -1; | 1308 | req->r_resend_mds = -1; |
1255 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); | 1309 | INIT_LIST_HEAD(&req->r_unsafe_dir_item); |
@@ -1580,6 +1634,15 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc, | |||
1580 | 1634 | ||
1581 | req->r_mds = mds; | 1635 | req->r_mds = mds; |
1582 | req->r_attempts++; | 1636 | req->r_attempts++; |
1637 | if (req->r_inode) { | ||
1638 | struct ceph_cap *cap = | ||
1639 | ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds); | ||
1640 | |||
1641 | if (cap) | ||
1642 | req->r_sent_on_mseq = cap->mseq; | ||
1643 | else | ||
1644 | req->r_sent_on_mseq = -1; | ||
1645 | } | ||
1583 | dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, | 1646 | dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, |
1584 | req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); | 1647 | req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); |
1585 | 1648 | ||
@@ -1914,21 +1977,40 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1914 | result = le32_to_cpu(head->result); | 1977 | result = le32_to_cpu(head->result); |
1915 | 1978 | ||
1916 | /* | 1979 | /* |
1917 | * Tolerate 2 consecutive ESTALEs from the same mds. | 1980 | * Handle an ESTALE |
1918 | * FIXME: we should be looking at the cap migrate_seq. | 1981 | * if we're not talking to the authority, send to them |
1982 | * if the authority has changed while we weren't looking, | ||
1983 | * send to new authority | ||
1984 | * Otherwise we just have to return an ESTALE | ||
1919 | */ | 1985 | */ |
1920 | if (result == -ESTALE) { | 1986 | if (result == -ESTALE) { |
1921 | req->r_direct_mode = USE_AUTH_MDS; | 1987 | dout("got ESTALE on request %llu", req->r_tid); |
1922 | req->r_num_stale++; | 1988 | if (!req->r_inode) { |
1923 | if (req->r_num_stale <= 2) { | 1989 | /* do nothing; not an authority problem */ |
1990 | } else if (req->r_direct_mode != USE_AUTH_MDS) { | ||
1991 | dout("not using auth, setting for that now"); | ||
1992 | req->r_direct_mode = USE_AUTH_MDS; | ||
1924 | __do_request(mdsc, req); | 1993 | __do_request(mdsc, req); |
1925 | mutex_unlock(&mdsc->mutex); | 1994 | mutex_unlock(&mdsc->mutex); |
1926 | goto out; | 1995 | goto out; |
1996 | } else { | ||
1997 | struct ceph_inode_info *ci = ceph_inode(req->r_inode); | ||
1998 | struct ceph_cap *cap = | ||
1999 | ceph_get_cap_for_mds(ci, req->r_mds);; | ||
2000 | |||
2001 | dout("already using auth"); | ||
2002 | if ((!cap || cap != ci->i_auth_cap) || | ||
2003 | (cap->mseq != req->r_sent_on_mseq)) { | ||
2004 | dout("but cap changed, so resending"); | ||
2005 | __do_request(mdsc, req); | ||
2006 | mutex_unlock(&mdsc->mutex); | ||
2007 | goto out; | ||
2008 | } | ||
1927 | } | 2009 | } |
1928 | } else { | 2010 | dout("have to return ESTALE on request %llu", req->r_tid); |
1929 | req->r_num_stale = 0; | ||
1930 | } | 2011 | } |
1931 | 2012 | ||
2013 | |||
1932 | if (head->safe) { | 2014 | if (head->safe) { |
1933 | req->r_got_safe = true; | 2015 | req->r_got_safe = true; |
1934 | __unregister_request(mdsc, req); | 2016 | __unregister_request(mdsc, req); |
@@ -1985,7 +2067,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
1985 | if (err == 0) { | 2067 | if (err == 0) { |
1986 | if (result == 0 && rinfo->dir_nr) | 2068 | if (result == 0 && rinfo->dir_nr) |
1987 | ceph_readdir_prepopulate(req, req->r_session); | 2069 | ceph_readdir_prepopulate(req, req->r_session); |
1988 | ceph_unreserve_caps(&req->r_caps_reservation); | 2070 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); |
1989 | } | 2071 | } |
1990 | mutex_unlock(&req->r_fill_mutex); | 2072 | mutex_unlock(&req->r_fill_mutex); |
1991 | 2073 | ||
@@ -2005,7 +2087,7 @@ out_err: | |||
2005 | } | 2087 | } |
2006 | mutex_unlock(&mdsc->mutex); | 2088 | mutex_unlock(&mdsc->mutex); |
2007 | 2089 | ||
2008 | ceph_add_cap_releases(mdsc, req->r_session, -1); | 2090 | ceph_add_cap_releases(mdsc, req->r_session); |
2009 | mutex_unlock(&session->s_mutex); | 2091 | mutex_unlock(&session->s_mutex); |
2010 | 2092 | ||
2011 | /* kick calling process */ | 2093 | /* kick calling process */ |
@@ -2193,9 +2275,14 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc, | |||
2193 | static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | 2275 | static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, |
2194 | void *arg) | 2276 | void *arg) |
2195 | { | 2277 | { |
2196 | struct ceph_mds_cap_reconnect rec; | 2278 | union { |
2279 | struct ceph_mds_cap_reconnect v2; | ||
2280 | struct ceph_mds_cap_reconnect_v1 v1; | ||
2281 | } rec; | ||
2282 | size_t reclen; | ||
2197 | struct ceph_inode_info *ci; | 2283 | struct ceph_inode_info *ci; |
2198 | struct ceph_pagelist *pagelist = arg; | 2284 | struct ceph_reconnect_state *recon_state = arg; |
2285 | struct ceph_pagelist *pagelist = recon_state->pagelist; | ||
2199 | char *path; | 2286 | char *path; |
2200 | int pathlen, err; | 2287 | int pathlen, err; |
2201 | u64 pathbase; | 2288 | u64 pathbase; |
@@ -2228,17 +2315,44 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2228 | spin_lock(&inode->i_lock); | 2315 | spin_lock(&inode->i_lock); |
2229 | cap->seq = 0; /* reset cap seq */ | 2316 | cap->seq = 0; /* reset cap seq */ |
2230 | cap->issue_seq = 0; /* and issue_seq */ | 2317 | cap->issue_seq = 0; /* and issue_seq */ |
2231 | rec.cap_id = cpu_to_le64(cap->cap_id); | 2318 | |
2232 | rec.pathbase = cpu_to_le64(pathbase); | 2319 | if (recon_state->flock) { |
2233 | rec.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); | 2320 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); |
2234 | rec.issued = cpu_to_le32(cap->issued); | 2321 | rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); |
2235 | rec.size = cpu_to_le64(inode->i_size); | 2322 | rec.v2.issued = cpu_to_le32(cap->issued); |
2236 | ceph_encode_timespec(&rec.mtime, &inode->i_mtime); | 2323 | rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); |
2237 | ceph_encode_timespec(&rec.atime, &inode->i_atime); | 2324 | rec.v2.pathbase = cpu_to_le64(pathbase); |
2238 | rec.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); | 2325 | rec.v2.flock_len = 0; |
2326 | reclen = sizeof(rec.v2); | ||
2327 | } else { | ||
2328 | rec.v1.cap_id = cpu_to_le64(cap->cap_id); | ||
2329 | rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); | ||
2330 | rec.v1.issued = cpu_to_le32(cap->issued); | ||
2331 | rec.v1.size = cpu_to_le64(inode->i_size); | ||
2332 | ceph_encode_timespec(&rec.v1.mtime, &inode->i_mtime); | ||
2333 | ceph_encode_timespec(&rec.v1.atime, &inode->i_atime); | ||
2334 | rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); | ||
2335 | rec.v1.pathbase = cpu_to_le64(pathbase); | ||
2336 | reclen = sizeof(rec.v1); | ||
2337 | } | ||
2239 | spin_unlock(&inode->i_lock); | 2338 | spin_unlock(&inode->i_lock); |
2240 | 2339 | ||
2241 | err = ceph_pagelist_append(pagelist, &rec, sizeof(rec)); | 2340 | if (recon_state->flock) { |
2341 | int num_fcntl_locks, num_flock_locks; | ||
2342 | |||
2343 | lock_kernel(); | ||
2344 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | ||
2345 | rec.v2.flock_len = (2*sizeof(u32) + | ||
2346 | (num_fcntl_locks+num_flock_locks) * | ||
2347 | sizeof(struct ceph_filelock)); | ||
2348 | |||
2349 | err = ceph_pagelist_append(pagelist, &rec, reclen); | ||
2350 | if (!err) | ||
2351 | err = ceph_encode_locks(inode, pagelist, | ||
2352 | num_fcntl_locks, | ||
2353 | num_flock_locks); | ||
2354 | unlock_kernel(); | ||
2355 | } | ||
2242 | 2356 | ||
2243 | out: | 2357 | out: |
2244 | kfree(path); | 2358 | kfree(path); |
@@ -2267,6 +2381,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2267 | int mds = session->s_mds; | 2381 | int mds = session->s_mds; |
2268 | int err = -ENOMEM; | 2382 | int err = -ENOMEM; |
2269 | struct ceph_pagelist *pagelist; | 2383 | struct ceph_pagelist *pagelist; |
2384 | struct ceph_reconnect_state recon_state; | ||
2270 | 2385 | ||
2271 | pr_info("mds%d reconnect start\n", mds); | 2386 | pr_info("mds%d reconnect start\n", mds); |
2272 | 2387 | ||
@@ -2301,7 +2416,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2301 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); | 2416 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); |
2302 | if (err) | 2417 | if (err) |
2303 | goto fail; | 2418 | goto fail; |
2304 | err = iterate_session_caps(session, encode_caps_cb, pagelist); | 2419 | |
2420 | recon_state.pagelist = pagelist; | ||
2421 | recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; | ||
2422 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); | ||
2305 | if (err < 0) | 2423 | if (err < 0) |
2306 | goto fail; | 2424 | goto fail; |
2307 | 2425 | ||
@@ -2326,6 +2444,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
2326 | } | 2444 | } |
2327 | 2445 | ||
2328 | reply->pagelist = pagelist; | 2446 | reply->pagelist = pagelist; |
2447 | if (recon_state.flock) | ||
2448 | reply->hdr.version = cpu_to_le16(2); | ||
2329 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | 2449 | reply->hdr.data_len = cpu_to_le32(pagelist->length); |
2330 | reply->nr_pages = calc_pages_for(0, pagelist->length); | 2450 | reply->nr_pages = calc_pages_for(0, pagelist->length); |
2331 | ceph_con_send(&session->s_con, reply); | 2451 | ceph_con_send(&session->s_con, reply); |
@@ -2376,9 +2496,11 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2376 | oldstate = ceph_mdsmap_get_state(oldmap, i); | 2496 | oldstate = ceph_mdsmap_get_state(oldmap, i); |
2377 | newstate = ceph_mdsmap_get_state(newmap, i); | 2497 | newstate = ceph_mdsmap_get_state(newmap, i); |
2378 | 2498 | ||
2379 | dout("check_new_map mds%d state %s -> %s (session %s)\n", | 2499 | dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n", |
2380 | i, ceph_mds_state_name(oldstate), | 2500 | i, ceph_mds_state_name(oldstate), |
2501 | ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "", | ||
2381 | ceph_mds_state_name(newstate), | 2502 | ceph_mds_state_name(newstate), |
2503 | ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", | ||
2382 | session_state_name(s->s_state)); | 2504 | session_state_name(s->s_state)); |
2383 | 2505 | ||
2384 | if (memcmp(ceph_mdsmap_get_addr(oldmap, i), | 2506 | if (memcmp(ceph_mdsmap_get_addr(oldmap, i), |
@@ -2428,6 +2550,21 @@ static void check_new_map(struct ceph_mds_client *mdsc, | |||
2428 | wake_up_session_caps(s, 1); | 2550 | wake_up_session_caps(s, 1); |
2429 | } | 2551 | } |
2430 | } | 2552 | } |
2553 | |||
2554 | for (i = 0; i < newmap->m_max_mds && i < mdsc->max_sessions; i++) { | ||
2555 | s = mdsc->sessions[i]; | ||
2556 | if (!s) | ||
2557 | continue; | ||
2558 | if (!ceph_mdsmap_is_laggy(newmap, i)) | ||
2559 | continue; | ||
2560 | if (s->s_state == CEPH_MDS_SESSION_OPEN || | ||
2561 | s->s_state == CEPH_MDS_SESSION_HUNG || | ||
2562 | s->s_state == CEPH_MDS_SESSION_CLOSING) { | ||
2563 | dout(" connecting to export targets of laggy mds%d\n", | ||
2564 | i); | ||
2565 | __open_export_target_sessions(mdsc, s); | ||
2566 | } | ||
2567 | } | ||
2431 | } | 2568 | } |
2432 | 2569 | ||
2433 | 2570 | ||
@@ -2715,7 +2852,7 @@ static void delayed_work(struct work_struct *work) | |||
2715 | send_renew_caps(mdsc, s); | 2852 | send_renew_caps(mdsc, s); |
2716 | else | 2853 | else |
2717 | ceph_con_keepalive(&s->s_con); | 2854 | ceph_con_keepalive(&s->s_con); |
2718 | ceph_add_cap_releases(mdsc, s, -1); | 2855 | ceph_add_cap_releases(mdsc, s); |
2719 | if (s->s_state == CEPH_MDS_SESSION_OPEN || | 2856 | if (s->s_state == CEPH_MDS_SESSION_OPEN || |
2720 | s->s_state == CEPH_MDS_SESSION_HUNG) | 2857 | s->s_state == CEPH_MDS_SESSION_HUNG) |
2721 | ceph_send_cap_releases(mdsc, s); | 2858 | ceph_send_cap_releases(mdsc, s); |
@@ -2764,6 +2901,9 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2764 | spin_lock_init(&mdsc->dentry_lru_lock); | 2901 | spin_lock_init(&mdsc->dentry_lru_lock); |
2765 | INIT_LIST_HEAD(&mdsc->dentry_lru); | 2902 | INIT_LIST_HEAD(&mdsc->dentry_lru); |
2766 | 2903 | ||
2904 | ceph_caps_init(mdsc); | ||
2905 | ceph_adjust_min_caps(mdsc, client->min_caps); | ||
2906 | |||
2767 | return 0; | 2907 | return 0; |
2768 | } | 2908 | } |
2769 | 2909 | ||
@@ -2959,6 +3099,7 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | |||
2959 | if (mdsc->mdsmap) | 3099 | if (mdsc->mdsmap) |
2960 | ceph_mdsmap_destroy(mdsc->mdsmap); | 3100 | ceph_mdsmap_destroy(mdsc->mdsmap); |
2961 | kfree(mdsc->sessions); | 3101 | kfree(mdsc->sessions); |
3102 | ceph_caps_finalize(mdsc); | ||
2962 | } | 3103 | } |
2963 | 3104 | ||
2964 | 3105 | ||
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 952410c60d09..ab7e89f5e344 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -151,6 +151,7 @@ typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc, | |||
151 | struct ceph_mds_request { | 151 | struct ceph_mds_request { |
152 | u64 r_tid; /* transaction id */ | 152 | u64 r_tid; /* transaction id */ |
153 | struct rb_node r_node; | 153 | struct rb_node r_node; |
154 | struct ceph_mds_client *r_mdsc; | ||
154 | 155 | ||
155 | int r_op; /* mds op code */ | 156 | int r_op; /* mds op code */ |
156 | int r_mds; | 157 | int r_mds; |
@@ -207,8 +208,8 @@ struct ceph_mds_request { | |||
207 | 208 | ||
208 | int r_attempts; /* resend attempts */ | 209 | int r_attempts; /* resend attempts */ |
209 | int r_num_fwd; /* number of forward attempts */ | 210 | int r_num_fwd; /* number of forward attempts */ |
210 | int r_num_stale; | ||
211 | int r_resend_mds; /* mds to resend to next, if any*/ | 211 | int r_resend_mds; /* mds to resend to next, if any*/ |
212 | u32 r_sent_on_mseq; /* cap mseq request was sent at*/ | ||
212 | 213 | ||
213 | struct kref r_kref; | 214 | struct kref r_kref; |
214 | struct list_head r_wait; | 215 | struct list_head r_wait; |
@@ -267,6 +268,27 @@ struct ceph_mds_client { | |||
267 | spinlock_t cap_dirty_lock; /* protects above items */ | 268 | spinlock_t cap_dirty_lock; /* protects above items */ |
268 | wait_queue_head_t cap_flushing_wq; | 269 | wait_queue_head_t cap_flushing_wq; |
269 | 270 | ||
271 | /* | ||
272 | * Cap reservations | ||
273 | * | ||
274 | * Maintain a global pool of preallocated struct ceph_caps, referenced | ||
275 | * by struct ceph_caps_reservations. This ensures that we preallocate | ||
276 | * memory needed to successfully process an MDS response. (If an MDS | ||
277 | * sends us cap information and we fail to process it, we will have | ||
278 | * problems due to the client and MDS being out of sync.) | ||
279 | * | ||
280 | * Reservations are 'owned' by a ceph_cap_reservation context. | ||
281 | */ | ||
282 | spinlock_t caps_list_lock; | ||
283 | struct list_head caps_list; /* unused (reserved or | ||
284 | unreserved) */ | ||
285 | int caps_total_count; /* total caps allocated */ | ||
286 | int caps_use_count; /* in use */ | ||
287 | int caps_reserve_count; /* unused, reserved */ | ||
288 | int caps_avail_count; /* unused, unreserved */ | ||
289 | int caps_min_count; /* keep at least this many | ||
290 | (unreserved) */ | ||
291 | |||
270 | #ifdef CONFIG_DEBUG_FS | 292 | #ifdef CONFIG_DEBUG_FS |
271 | struct dentry *debugfs_file; | 293 | struct dentry *debugfs_file; |
272 | #endif | 294 | #endif |
@@ -324,8 +346,7 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) | |||
324 | } | 346 | } |
325 | 347 | ||
326 | extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | 348 | extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, |
327 | struct ceph_mds_session *session, | 349 | struct ceph_mds_session *session); |
328 | int extra); | ||
329 | extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, | 350 | extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, |
330 | struct ceph_mds_session *session); | 351 | struct ceph_mds_session *session); |
331 | 352 | ||
@@ -343,4 +364,7 @@ extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, | |||
343 | extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, | 364 | extern void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, |
344 | struct ceph_msg *msg); | 365 | struct ceph_msg *msg); |
345 | 366 | ||
367 | extern void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, | ||
368 | struct ceph_mds_session *session); | ||
369 | |||
346 | #endif | 370 | #endif |
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index c4c498e6dfef..040be6d1150b 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
@@ -85,6 +85,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
85 | struct ceph_entity_addr addr; | 85 | struct ceph_entity_addr addr; |
86 | u32 num_export_targets; | 86 | u32 num_export_targets; |
87 | void *pexport_targets = NULL; | 87 | void *pexport_targets = NULL; |
88 | struct ceph_timespec laggy_since; | ||
88 | 89 | ||
89 | ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); | 90 | ceph_decode_need(p, end, sizeof(u64)*2 + 1 + sizeof(u32), bad); |
90 | global_id = ceph_decode_64(p); | 91 | global_id = ceph_decode_64(p); |
@@ -103,7 +104,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
103 | state_seq = ceph_decode_64(p); | 104 | state_seq = ceph_decode_64(p); |
104 | ceph_decode_copy(p, &addr, sizeof(addr)); | 105 | ceph_decode_copy(p, &addr, sizeof(addr)); |
105 | ceph_decode_addr(&addr); | 106 | ceph_decode_addr(&addr); |
106 | *p += sizeof(struct ceph_timespec); | 107 | ceph_decode_copy(p, &laggy_since, sizeof(laggy_since)); |
107 | *p += sizeof(u32); | 108 | *p += sizeof(u32); |
108 | ceph_decode_32_safe(p, end, namelen, bad); | 109 | ceph_decode_32_safe(p, end, namelen, bad); |
109 | *p += namelen; | 110 | *p += namelen; |
@@ -122,6 +123,9 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
122 | m->m_info[mds].global_id = global_id; | 123 | m->m_info[mds].global_id = global_id; |
123 | m->m_info[mds].state = state; | 124 | m->m_info[mds].state = state; |
124 | m->m_info[mds].addr = addr; | 125 | m->m_info[mds].addr = addr; |
126 | m->m_info[mds].laggy = | ||
127 | (laggy_since.tv_sec != 0 || | ||
128 | laggy_since.tv_nsec != 0); | ||
125 | m->m_info[mds].num_export_targets = num_export_targets; | 129 | m->m_info[mds].num_export_targets = num_export_targets; |
126 | if (num_export_targets) { | 130 | if (num_export_targets) { |
127 | m->m_info[mds].export_targets = | 131 | m->m_info[mds].export_targets = |
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h index eacc131aa5cb..4c5cb0880bba 100644 --- a/fs/ceph/mdsmap.h +++ b/fs/ceph/mdsmap.h | |||
@@ -13,6 +13,7 @@ struct ceph_mds_info { | |||
13 | struct ceph_entity_addr addr; | 13 | struct ceph_entity_addr addr; |
14 | s32 state; | 14 | s32 state; |
15 | int num_export_targets; | 15 | int num_export_targets; |
16 | bool laggy; | ||
16 | u32 *export_targets; | 17 | u32 *export_targets; |
17 | }; | 18 | }; |
18 | 19 | ||
@@ -47,6 +48,13 @@ static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) | |||
47 | return m->m_info[w].state; | 48 | return m->m_info[w].state; |
48 | } | 49 | } |
49 | 50 | ||
51 | static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) | ||
52 | { | ||
53 | if (w >= 0 && w < m->m_max_mds) | ||
54 | return m->m_info[w].laggy; | ||
55 | return false; | ||
56 | } | ||
57 | |||
50 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); | 58 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); |
51 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); | 59 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); |
52 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); | 60 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); |
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 15167b2daa55..2502d76fcec1 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c | |||
@@ -108,7 +108,7 @@ void ceph_msgr_exit(void) | |||
108 | destroy_workqueue(ceph_msgr_wq); | 108 | destroy_workqueue(ceph_msgr_wq); |
109 | } | 109 | } |
110 | 110 | ||
111 | void ceph_msgr_flush() | 111 | void ceph_msgr_flush(void) |
112 | { | 112 | { |
113 | flush_workqueue(ceph_msgr_wq); | 113 | flush_workqueue(ceph_msgr_wq); |
114 | } | 114 | } |
@@ -647,7 +647,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr, | |||
647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | 647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, |
648 | con->connect_seq, global_seq, proto); | 648 | con->connect_seq, global_seq, proto); |
649 | 649 | ||
650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT); | 650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED); |
651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | 651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); |
652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | 652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); |
653 | con->out_connect.global_seq = cpu_to_le32(global_seq); | 653 | con->out_connect.global_seq = cpu_to_le32(global_seq); |
@@ -1081,11 +1081,11 @@ static int process_banner(struct ceph_connection *con) | |||
1081 | sizeof(con->peer_addr)) != 0 && | 1081 | sizeof(con->peer_addr)) != 0 && |
1082 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && | 1082 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && |
1083 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { | 1083 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { |
1084 | pr_warning("wrong peer, want %s/%lld, got %s/%lld\n", | 1084 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", |
1085 | pr_addr(&con->peer_addr.in_addr), | 1085 | pr_addr(&con->peer_addr.in_addr), |
1086 | le64_to_cpu(con->peer_addr.nonce), | 1086 | (int)le32_to_cpu(con->peer_addr.nonce), |
1087 | pr_addr(&con->actual_peer_addr.in_addr), | 1087 | pr_addr(&con->actual_peer_addr.in_addr), |
1088 | le64_to_cpu(con->actual_peer_addr.nonce)); | 1088 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); |
1089 | con->error_msg = "wrong peer at address"; | 1089 | con->error_msg = "wrong peer at address"; |
1090 | return -1; | 1090 | return -1; |
1091 | } | 1091 | } |
@@ -1123,8 +1123,8 @@ static void fail_protocol(struct ceph_connection *con) | |||
1123 | 1123 | ||
1124 | static int process_connect(struct ceph_connection *con) | 1124 | static int process_connect(struct ceph_connection *con) |
1125 | { | 1125 | { |
1126 | u64 sup_feat = CEPH_FEATURE_SUPPORTED_CLIENT; | 1126 | u64 sup_feat = CEPH_FEATURE_SUPPORTED; |
1127 | u64 req_feat = CEPH_FEATURE_REQUIRED_CLIENT; | 1127 | u64 req_feat = CEPH_FEATURE_REQUIRED; |
1128 | u64 server_feat = le64_to_cpu(con->in_reply.features); | 1128 | u64 server_feat = le64_to_cpu(con->in_reply.features); |
1129 | 1129 | ||
1130 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | 1130 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); |
@@ -1302,8 +1302,8 @@ static void process_ack(struct ceph_connection *con) | |||
1302 | 1302 | ||
1303 | 1303 | ||
1304 | static int read_partial_message_section(struct ceph_connection *con, | 1304 | static int read_partial_message_section(struct ceph_connection *con, |
1305 | struct kvec *section, unsigned int sec_len, | 1305 | struct kvec *section, |
1306 | u32 *crc) | 1306 | unsigned int sec_len, u32 *crc) |
1307 | { | 1307 | { |
1308 | int left; | 1308 | int left; |
1309 | int ret; | 1309 | int ret; |
@@ -1434,7 +1434,8 @@ static int read_partial_message(struct ceph_connection *con) | |||
1434 | 1434 | ||
1435 | /* middle */ | 1435 | /* middle */ |
1436 | if (m->middle) { | 1436 | if (m->middle) { |
1437 | ret = read_partial_message_section(con, &m->middle->vec, middle_len, | 1437 | ret = read_partial_message_section(con, &m->middle->vec, |
1438 | middle_len, | ||
1438 | &con->in_middle_crc); | 1439 | &con->in_middle_crc); |
1439 | if (ret <= 0) | 1440 | if (ret <= 0) |
1440 | return ret; | 1441 | return ret; |
@@ -1920,7 +1921,7 @@ out: | |||
1920 | /* | 1921 | /* |
1921 | * in case we faulted due to authentication, invalidate our | 1922 | * in case we faulted due to authentication, invalidate our |
1922 | * current tickets so that we can get new ones. | 1923 | * current tickets so that we can get new ones. |
1923 | */ | 1924 | */ |
1924 | if (con->auth_retry && con->ops->invalidate_authorizer) { | 1925 | if (con->auth_retry && con->ops->invalidate_authorizer) { |
1925 | dout("calling invalidate_authorizer()\n"); | 1926 | dout("calling invalidate_authorizer()\n"); |
1926 | con->ops->invalidate_authorizer(con); | 1927 | con->ops->invalidate_authorizer(con); |
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 54fe01c50706..b2a5a3e4a671 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c | |||
@@ -349,7 +349,7 @@ out: | |||
349 | } | 349 | } |
350 | 350 | ||
351 | /* | 351 | /* |
352 | * statfs | 352 | * generic requests (e.g., statfs, poolop) |
353 | */ | 353 | */ |
354 | static struct ceph_mon_generic_request *__lookup_generic_req( | 354 | static struct ceph_mon_generic_request *__lookup_generic_req( |
355 | struct ceph_mon_client *monc, u64 tid) | 355 | struct ceph_mon_client *monc, u64 tid) |
@@ -442,6 +442,35 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con, | |||
442 | return m; | 442 | return m; |
443 | } | 443 | } |
444 | 444 | ||
445 | static int do_generic_request(struct ceph_mon_client *monc, | ||
446 | struct ceph_mon_generic_request *req) | ||
447 | { | ||
448 | int err; | ||
449 | |||
450 | /* register request */ | ||
451 | mutex_lock(&monc->mutex); | ||
452 | req->tid = ++monc->last_tid; | ||
453 | req->request->hdr.tid = cpu_to_le64(req->tid); | ||
454 | __insert_generic_request(monc, req); | ||
455 | monc->num_generic_requests++; | ||
456 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | ||
457 | mutex_unlock(&monc->mutex); | ||
458 | |||
459 | err = wait_for_completion_interruptible(&req->completion); | ||
460 | |||
461 | mutex_lock(&monc->mutex); | ||
462 | rb_erase(&req->node, &monc->generic_request_tree); | ||
463 | monc->num_generic_requests--; | ||
464 | mutex_unlock(&monc->mutex); | ||
465 | |||
466 | if (!err) | ||
467 | err = req->result; | ||
468 | return err; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * statfs | ||
473 | */ | ||
445 | static void handle_statfs_reply(struct ceph_mon_client *monc, | 474 | static void handle_statfs_reply(struct ceph_mon_client *monc, |
446 | struct ceph_msg *msg) | 475 | struct ceph_msg *msg) |
447 | { | 476 | { |
@@ -468,7 +497,7 @@ static void handle_statfs_reply(struct ceph_mon_client *monc, | |||
468 | return; | 497 | return; |
469 | 498 | ||
470 | bad: | 499 | bad: |
471 | pr_err("corrupt generic reply, no tid\n"); | 500 | pr_err("corrupt generic reply, tid %llu\n", tid); |
472 | ceph_msg_dump(msg); | 501 | ceph_msg_dump(msg); |
473 | } | 502 | } |
474 | 503 | ||
@@ -487,6 +516,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | |||
487 | 516 | ||
488 | kref_init(&req->kref); | 517 | kref_init(&req->kref); |
489 | req->buf = buf; | 518 | req->buf = buf; |
519 | req->buf_len = sizeof(*buf); | ||
490 | init_completion(&req->completion); | 520 | init_completion(&req->completion); |
491 | 521 | ||
492 | err = -ENOMEM; | 522 | err = -ENOMEM; |
@@ -504,33 +534,134 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | |||
504 | h->monhdr.session_mon_tid = 0; | 534 | h->monhdr.session_mon_tid = 0; |
505 | h->fsid = monc->monmap->fsid; | 535 | h->fsid = monc->monmap->fsid; |
506 | 536 | ||
507 | /* register request */ | 537 | err = do_generic_request(monc, req); |
508 | mutex_lock(&monc->mutex); | ||
509 | req->tid = ++monc->last_tid; | ||
510 | req->request->hdr.tid = cpu_to_le64(req->tid); | ||
511 | __insert_generic_request(monc, req); | ||
512 | monc->num_generic_requests++; | ||
513 | mutex_unlock(&monc->mutex); | ||
514 | 538 | ||
515 | /* send request and wait */ | 539 | out: |
516 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | 540 | kref_put(&req->kref, release_generic_request); |
517 | err = wait_for_completion_interruptible(&req->completion); | 541 | return err; |
542 | } | ||
543 | |||
544 | /* | ||
545 | * pool ops | ||
546 | */ | ||
547 | static int get_poolop_reply_buf(const char *src, size_t src_len, | ||
548 | char *dst, size_t dst_len) | ||
549 | { | ||
550 | u32 buf_len; | ||
551 | |||
552 | if (src_len != sizeof(u32) + dst_len) | ||
553 | return -EINVAL; | ||
554 | |||
555 | buf_len = le32_to_cpu(*(u32 *)src); | ||
556 | if (buf_len != dst_len) | ||
557 | return -EINVAL; | ||
558 | |||
559 | memcpy(dst, src + sizeof(u32), dst_len); | ||
560 | return 0; | ||
561 | } | ||
562 | |||
563 | static void handle_poolop_reply(struct ceph_mon_client *monc, | ||
564 | struct ceph_msg *msg) | ||
565 | { | ||
566 | struct ceph_mon_generic_request *req; | ||
567 | struct ceph_mon_poolop_reply *reply = msg->front.iov_base; | ||
568 | u64 tid = le64_to_cpu(msg->hdr.tid); | ||
569 | |||
570 | if (msg->front.iov_len < sizeof(*reply)) | ||
571 | goto bad; | ||
572 | dout("handle_poolop_reply %p tid %llu\n", msg, tid); | ||
518 | 573 | ||
519 | mutex_lock(&monc->mutex); | 574 | mutex_lock(&monc->mutex); |
520 | rb_erase(&req->node, &monc->generic_request_tree); | 575 | req = __lookup_generic_req(monc, tid); |
521 | monc->num_generic_requests--; | 576 | if (req) { |
577 | if (req->buf_len && | ||
578 | get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply), | ||
579 | msg->front.iov_len - sizeof(*reply), | ||
580 | req->buf, req->buf_len) < 0) { | ||
581 | mutex_unlock(&monc->mutex); | ||
582 | goto bad; | ||
583 | } | ||
584 | req->result = le32_to_cpu(reply->reply_code); | ||
585 | get_generic_request(req); | ||
586 | } | ||
522 | mutex_unlock(&monc->mutex); | 587 | mutex_unlock(&monc->mutex); |
588 | if (req) { | ||
589 | complete(&req->completion); | ||
590 | put_generic_request(req); | ||
591 | } | ||
592 | return; | ||
523 | 593 | ||
524 | if (!err) | 594 | bad: |
525 | err = req->result; | 595 | pr_err("corrupt generic reply, tid %llu\n", tid); |
596 | ceph_msg_dump(msg); | ||
597 | } | ||
598 | |||
599 | /* | ||
600 | * Do a synchronous pool op. | ||
601 | */ | ||
602 | int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op, | ||
603 | u32 pool, u64 snapid, | ||
604 | char *buf, int len) | ||
605 | { | ||
606 | struct ceph_mon_generic_request *req; | ||
607 | struct ceph_mon_poolop *h; | ||
608 | int err; | ||
609 | |||
610 | req = kzalloc(sizeof(*req), GFP_NOFS); | ||
611 | if (!req) | ||
612 | return -ENOMEM; | ||
613 | |||
614 | kref_init(&req->kref); | ||
615 | req->buf = buf; | ||
616 | req->buf_len = len; | ||
617 | init_completion(&req->completion); | ||
618 | |||
619 | err = -ENOMEM; | ||
620 | req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS); | ||
621 | if (!req->request) | ||
622 | goto out; | ||
623 | req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS); | ||
624 | if (!req->reply) | ||
625 | goto out; | ||
626 | |||
627 | /* fill out request */ | ||
628 | req->request->hdr.version = cpu_to_le16(2); | ||
629 | h = req->request->front.iov_base; | ||
630 | h->monhdr.have_version = 0; | ||
631 | h->monhdr.session_mon = cpu_to_le16(-1); | ||
632 | h->monhdr.session_mon_tid = 0; | ||
633 | h->fsid = monc->monmap->fsid; | ||
634 | h->pool = cpu_to_le32(pool); | ||
635 | h->op = cpu_to_le32(op); | ||
636 | h->auid = 0; | ||
637 | h->snapid = cpu_to_le64(snapid); | ||
638 | h->name_len = 0; | ||
639 | |||
640 | err = do_generic_request(monc, req); | ||
526 | 641 | ||
527 | out: | 642 | out: |
528 | kref_put(&req->kref, release_generic_request); | 643 | kref_put(&req->kref, release_generic_request); |
529 | return err; | 644 | return err; |
530 | } | 645 | } |
531 | 646 | ||
647 | int ceph_monc_create_snapid(struct ceph_mon_client *monc, | ||
648 | u32 pool, u64 *snapid) | ||
649 | { | ||
650 | return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, | ||
651 | pool, 0, (char *)snapid, sizeof(*snapid)); | ||
652 | |||
653 | } | ||
654 | |||
655 | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||
656 | u32 pool, u64 snapid) | ||
657 | { | ||
658 | return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, | ||
659 | pool, snapid, 0, 0); | ||
660 | |||
661 | } | ||
662 | |||
532 | /* | 663 | /* |
533 | * Resend pending statfs requests. | 664 | * Resend pending generic requests. |
534 | */ | 665 | */ |
535 | static void __resend_generic_request(struct ceph_mon_client *monc) | 666 | static void __resend_generic_request(struct ceph_mon_client *monc) |
536 | { | 667 | { |
@@ -783,6 +914,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | |||
783 | handle_statfs_reply(monc, msg); | 914 | handle_statfs_reply(monc, msg); |
784 | break; | 915 | break; |
785 | 916 | ||
917 | case CEPH_MSG_POOLOP_REPLY: | ||
918 | handle_poolop_reply(monc, msg); | ||
919 | break; | ||
920 | |||
786 | case CEPH_MSG_MON_MAP: | 921 | case CEPH_MSG_MON_MAP: |
787 | ceph_monc_handle_map(monc, msg); | 922 | ceph_monc_handle_map(monc, msg); |
788 | break; | 923 | break; |
@@ -820,6 +955,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, | |||
820 | case CEPH_MSG_MON_SUBSCRIBE_ACK: | 955 | case CEPH_MSG_MON_SUBSCRIBE_ACK: |
821 | m = ceph_msg_get(monc->m_subscribe_ack); | 956 | m = ceph_msg_get(monc->m_subscribe_ack); |
822 | break; | 957 | break; |
958 | case CEPH_MSG_POOLOP_REPLY: | ||
823 | case CEPH_MSG_STATFS_REPLY: | 959 | case CEPH_MSG_STATFS_REPLY: |
824 | return get_generic_reply(con, hdr, skip); | 960 | return get_generic_reply(con, hdr, skip); |
825 | case CEPH_MSG_AUTH_REPLY: | 961 | case CEPH_MSG_AUTH_REPLY: |
diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h index 174d794321d0..8e396f2c0963 100644 --- a/fs/ceph/mon_client.h +++ b/fs/ceph/mon_client.h | |||
@@ -50,6 +50,7 @@ struct ceph_mon_generic_request { | |||
50 | struct rb_node node; | 50 | struct rb_node node; |
51 | int result; | 51 | int result; |
52 | void *buf; | 52 | void *buf; |
53 | int buf_len; | ||
53 | struct completion completion; | 54 | struct completion completion; |
54 | struct ceph_msg *request; /* original request */ | 55 | struct ceph_msg *request; /* original request */ |
55 | struct ceph_msg *reply; /* and reply */ | 56 | struct ceph_msg *reply; /* and reply */ |
@@ -111,6 +112,10 @@ extern int ceph_monc_open_session(struct ceph_mon_client *monc); | |||
111 | 112 | ||
112 | extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); | 113 | extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); |
113 | 114 | ||
115 | extern int ceph_monc_create_snapid(struct ceph_mon_client *monc, | ||
116 | u32 pool, u64 *snapid); | ||
114 | 117 | ||
118 | extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||
119 | u32 pool, u64 snapid); | ||
115 | 120 | ||
116 | #endif | 121 | #endif |
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h index 892a0298dfdf..680d3d648cac 100644 --- a/fs/ceph/msgr.h +++ b/fs/ceph/msgr.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef __MSGR_H | 1 | #ifndef CEPH_MSGR_H |
2 | #define __MSGR_H | 2 | #define CEPH_MSGR_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * Data types for message passing layer used by Ceph. | 5 | * Data types for message passing layer used by Ceph. |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index e38522347898..bed6391e52c7 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
@@ -1276,8 +1276,6 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
1276 | 1276 | ||
1277 | /* it may be a short read due to an object boundary */ | 1277 | /* it may be a short read due to an object boundary */ |
1278 | req->r_pages = pages; | 1278 | req->r_pages = pages; |
1279 | num_pages = calc_pages_for(off, *plen); | ||
1280 | req->r_num_pages = num_pages; | ||
1281 | 1279 | ||
1282 | dout("readpages final extent is %llu~%llu (%d pages)\n", | 1280 | dout("readpages final extent is %llu~%llu (%d pages)\n", |
1283 | off, *plen, req->r_num_pages); | 1281 | off, *plen, req->r_num_pages); |
@@ -1319,7 +1317,6 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
1319 | 1317 | ||
1320 | /* it may be a short write due to an object boundary */ | 1318 | /* it may be a short write due to an object boundary */ |
1321 | req->r_pages = pages; | 1319 | req->r_pages = pages; |
1322 | req->r_num_pages = calc_pages_for(off, len); | ||
1323 | dout("writepages %llu~%llu (%d pages)\n", off, len, | 1320 | dout("writepages %llu~%llu (%d pages)\n", off, len, |
1324 | req->r_num_pages); | 1321 | req->r_num_pages); |
1325 | 1322 | ||
@@ -1476,8 +1473,8 @@ static void put_osd_con(struct ceph_connection *con) | |||
1476 | * authentication | 1473 | * authentication |
1477 | */ | 1474 | */ |
1478 | static int get_authorizer(struct ceph_connection *con, | 1475 | static int get_authorizer(struct ceph_connection *con, |
1479 | void **buf, int *len, int *proto, | 1476 | void **buf, int *len, int *proto, |
1480 | void **reply_buf, int *reply_len, int force_new) | 1477 | void **reply_buf, int *reply_len, int force_new) |
1481 | { | 1478 | { |
1482 | struct ceph_osd *o = con->private; | 1479 | struct ceph_osd *o = con->private; |
1483 | struct ceph_osd_client *osdc = o->o_osdc; | 1480 | struct ceph_osd_client *osdc = o->o_osdc; |
@@ -1497,7 +1494,7 @@ static int get_authorizer(struct ceph_connection *con, | |||
1497 | &o->o_authorizer_reply_buf, | 1494 | &o->o_authorizer_reply_buf, |
1498 | &o->o_authorizer_reply_buf_len); | 1495 | &o->o_authorizer_reply_buf_len); |
1499 | if (ret) | 1496 | if (ret) |
1500 | return ret; | 1497 | return ret; |
1501 | } | 1498 | } |
1502 | 1499 | ||
1503 | *proto = ac->protocol; | 1500 | *proto = ac->protocol; |
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c index 416d46adbf87..e31f118f1392 100644 --- a/fs/ceph/osdmap.c +++ b/fs/ceph/osdmap.c | |||
@@ -424,12 +424,30 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | |||
424 | kfree(pi); | 424 | kfree(pi); |
425 | } | 425 | } |
426 | 426 | ||
427 | void __decode_pool(void **p, struct ceph_pg_pool_info *pi) | 427 | static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) |
428 | { | 428 | { |
429 | unsigned n, m; | ||
430 | |||
429 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | 431 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); |
430 | calc_pg_masks(pi); | 432 | calc_pg_masks(pi); |
431 | *p += le32_to_cpu(pi->v.num_snaps) * sizeof(u64); | 433 | |
434 | /* num_snaps * snap_info_t */ | ||
435 | n = le32_to_cpu(pi->v.num_snaps); | ||
436 | while (n--) { | ||
437 | ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + | ||
438 | sizeof(struct ceph_timespec), bad); | ||
439 | *p += sizeof(u64) + /* key */ | ||
440 | 1 + sizeof(u64) + /* u8, snapid */ | ||
441 | sizeof(struct ceph_timespec); | ||
442 | m = ceph_decode_32(p); /* snap name */ | ||
443 | *p += m; | ||
444 | } | ||
445 | |||
432 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; | 446 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; |
447 | return 0; | ||
448 | |||
449 | bad: | ||
450 | return -EINVAL; | ||
433 | } | 451 | } |
434 | 452 | ||
435 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) | 453 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) |
@@ -571,7 +589,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
571 | kfree(pi); | 589 | kfree(pi); |
572 | goto bad; | 590 | goto bad; |
573 | } | 591 | } |
574 | __decode_pool(p, pi); | 592 | err = __decode_pool(p, end, pi); |
593 | if (err < 0) | ||
594 | goto bad; | ||
575 | __insert_pg_pool(&map->pg_pools, pi); | 595 | __insert_pg_pool(&map->pg_pools, pi); |
576 | } | 596 | } |
577 | 597 | ||
@@ -760,7 +780,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
760 | pi->id = pool; | 780 | pi->id = pool; |
761 | __insert_pg_pool(&map->pg_pools, pi); | 781 | __insert_pg_pool(&map->pg_pools, pi); |
762 | } | 782 | } |
763 | __decode_pool(p, pi); | 783 | err = __decode_pool(p, end, pi); |
784 | if (err < 0) | ||
785 | goto bad; | ||
764 | } | 786 | } |
765 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) | 787 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) |
766 | goto bad; | 788 | goto bad; |
@@ -833,7 +855,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
833 | node)->pgid, pgid) <= 0) { | 855 | node)->pgid, pgid) <= 0) { |
834 | struct ceph_pg_mapping *cur = | 856 | struct ceph_pg_mapping *cur = |
835 | rb_entry(rbp, struct ceph_pg_mapping, node); | 857 | rb_entry(rbp, struct ceph_pg_mapping, node); |
836 | 858 | ||
837 | rbp = rb_next(rbp); | 859 | rbp = rb_next(rbp); |
838 | dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); | 860 | dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); |
839 | rb_erase(&cur->node, &map->pg_temp); | 861 | rb_erase(&cur->node, &map->pg_temp); |
@@ -1026,8 +1048,9 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
1026 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, | 1048 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, |
1027 | pool->v.type, pool->v.size); | 1049 | pool->v.type, pool->v.size); |
1028 | if (ruleno < 0) { | 1050 | if (ruleno < 0) { |
1029 | pr_err("no crush rule pool %d type %d size %d\n", | 1051 | pr_err("no crush rule pool %d ruleset %d type %d size %d\n", |
1030 | poolid, pool->v.type, pool->v.size); | 1052 | poolid, pool->v.crush_ruleset, pool->v.type, |
1053 | pool->v.size); | ||
1031 | return NULL; | 1054 | return NULL; |
1032 | } | 1055 | } |
1033 | 1056 | ||
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h index 8fcc023056c7..6d5247f2e81b 100644 --- a/fs/ceph/rados.h +++ b/fs/ceph/rados.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef __RADOS_H | 1 | #ifndef CEPH_RADOS_H |
2 | #define __RADOS_H | 2 | #define CEPH_RADOS_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * Data types for the Ceph distributed object storage layer RADOS | 5 | * Data types for the Ceph distributed object storage layer RADOS |
@@ -203,6 +203,7 @@ enum { | |||
203 | CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, | 203 | CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, |
204 | 204 | ||
205 | CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, | 205 | CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, |
206 | CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14, | ||
206 | 207 | ||
207 | /** attrs **/ | 208 | /** attrs **/ |
208 | /* read */ | 209 | /* read */ |
@@ -272,6 +273,10 @@ static inline int ceph_osd_op_mode_modify(int op) | |||
272 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; | 273 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; |
273 | } | 274 | } |
274 | 275 | ||
276 | /* | ||
277 | * note that the following tmap stuff is also defined in the ceph librados.h | ||
278 | * any modification here needs to be updated there | ||
279 | */ | ||
275 | #define CEPH_OSD_TMAP_HDR 'h' | 280 | #define CEPH_OSD_TMAP_HDR 'h' |
276 | #define CEPH_OSD_TMAP_SET 's' | 281 | #define CEPH_OSD_TMAP_SET 's' |
277 | #define CEPH_OSD_TMAP_RM 'r' | 282 | #define CEPH_OSD_TMAP_RM 'r' |
@@ -297,6 +302,7 @@ enum { | |||
297 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ | 302 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ |
298 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ | 303 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ |
299 | CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ | 304 | CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ |
305 | CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ | ||
300 | }; | 306 | }; |
301 | 307 | ||
302 | enum { | 308 | enum { |
@@ -350,6 +356,9 @@ struct ceph_osd_op { | |||
350 | struct { | 356 | struct { |
351 | __le64 cookie, count; | 357 | __le64 cookie, count; |
352 | } __attribute__ ((packed)) pgls; | 358 | } __attribute__ ((packed)) pgls; |
359 | struct { | ||
360 | __le64 snapid; | ||
361 | } __attribute__ ((packed)) snap; | ||
353 | }; | 362 | }; |
354 | __le32 payload_len; | 363 | __le32 payload_len; |
355 | } __attribute__ ((packed)); | 364 | } __attribute__ ((packed)); |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index fa87f51e38e1..9922628532b2 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include "ceph_debug.h" | 2 | #include "ceph_debug.h" |
3 | 3 | ||
4 | #include <linux/backing-dev.h> | 4 | #include <linux/backing-dev.h> |
5 | #include <linux/ctype.h> | ||
5 | #include <linux/fs.h> | 6 | #include <linux/fs.h> |
6 | #include <linux/inet.h> | 7 | #include <linux/inet.h> |
7 | #include <linux/in6.h> | 8 | #include <linux/in6.h> |
@@ -101,12 +102,21 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
101 | } | 102 | } |
102 | 103 | ||
103 | 104 | ||
104 | static int ceph_syncfs(struct super_block *sb, int wait) | 105 | static int ceph_sync_fs(struct super_block *sb, int wait) |
105 | { | 106 | { |
106 | dout("sync_fs %d\n", wait); | 107 | struct ceph_client *client = ceph_sb_to_client(sb); |
108 | |||
109 | if (!wait) { | ||
110 | dout("sync_fs (non-blocking)\n"); | ||
111 | ceph_flush_dirty_caps(&client->mdsc); | ||
112 | dout("sync_fs (non-blocking) done\n"); | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | dout("sync_fs (blocking)\n"); | ||
107 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); | 117 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); |
108 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); | 118 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); |
109 | dout("sync_fs %d done\n", wait); | 119 | dout("sync_fs (blocking) done\n"); |
110 | return 0; | 120 | return 0; |
111 | } | 121 | } |
112 | 122 | ||
@@ -150,9 +160,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
150 | struct ceph_mount_args *args = client->mount_args; | 160 | struct ceph_mount_args *args = client->mount_args; |
151 | 161 | ||
152 | if (args->flags & CEPH_OPT_FSID) | 162 | if (args->flags & CEPH_OPT_FSID) |
153 | seq_printf(m, ",fsidmajor=%llu,fsidminor%llu", | 163 | seq_printf(m, ",fsid=%pU", &args->fsid); |
154 | le64_to_cpu(*(__le64 *)&args->fsid.fsid[0]), | ||
155 | le64_to_cpu(*(__le64 *)&args->fsid.fsid[8])); | ||
156 | if (args->flags & CEPH_OPT_NOSHARE) | 164 | if (args->flags & CEPH_OPT_NOSHARE) |
157 | seq_puts(m, ",noshare"); | 165 | seq_puts(m, ",noshare"); |
158 | if (args->flags & CEPH_OPT_DIRSTAT) | 166 | if (args->flags & CEPH_OPT_DIRSTAT) |
@@ -279,7 +287,7 @@ static const struct super_operations ceph_super_ops = { | |||
279 | .alloc_inode = ceph_alloc_inode, | 287 | .alloc_inode = ceph_alloc_inode, |
280 | .destroy_inode = ceph_destroy_inode, | 288 | .destroy_inode = ceph_destroy_inode, |
281 | .write_inode = ceph_write_inode, | 289 | .write_inode = ceph_write_inode, |
282 | .sync_fs = ceph_syncfs, | 290 | .sync_fs = ceph_sync_fs, |
283 | .put_super = ceph_put_super, | 291 | .put_super = ceph_put_super, |
284 | .show_options = ceph_show_options, | 292 | .show_options = ceph_show_options, |
285 | .statfs = ceph_statfs, | 293 | .statfs = ceph_statfs, |
@@ -322,9 +330,6 @@ const char *ceph_msg_type_name(int type) | |||
322 | * mount options | 330 | * mount options |
323 | */ | 331 | */ |
324 | enum { | 332 | enum { |
325 | Opt_fsidmajor, | ||
326 | Opt_fsidminor, | ||
327 | Opt_monport, | ||
328 | Opt_wsize, | 333 | Opt_wsize, |
329 | Opt_rsize, | 334 | Opt_rsize, |
330 | Opt_osdtimeout, | 335 | Opt_osdtimeout, |
@@ -339,6 +344,7 @@ enum { | |||
339 | Opt_congestion_kb, | 344 | Opt_congestion_kb, |
340 | Opt_last_int, | 345 | Opt_last_int, |
341 | /* int args above */ | 346 | /* int args above */ |
347 | Opt_fsid, | ||
342 | Opt_snapdirname, | 348 | Opt_snapdirname, |
343 | Opt_name, | 349 | Opt_name, |
344 | Opt_secret, | 350 | Opt_secret, |
@@ -355,9 +361,6 @@ enum { | |||
355 | }; | 361 | }; |
356 | 362 | ||
357 | static match_table_t arg_tokens = { | 363 | static match_table_t arg_tokens = { |
358 | {Opt_fsidmajor, "fsidmajor=%ld"}, | ||
359 | {Opt_fsidminor, "fsidminor=%ld"}, | ||
360 | {Opt_monport, "monport=%d"}, | ||
361 | {Opt_wsize, "wsize=%d"}, | 364 | {Opt_wsize, "wsize=%d"}, |
362 | {Opt_rsize, "rsize=%d"}, | 365 | {Opt_rsize, "rsize=%d"}, |
363 | {Opt_osdtimeout, "osdtimeout=%d"}, | 366 | {Opt_osdtimeout, "osdtimeout=%d"}, |
@@ -371,6 +374,7 @@ static match_table_t arg_tokens = { | |||
371 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | 374 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, |
372 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 375 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
373 | /* int args above */ | 376 | /* int args above */ |
377 | {Opt_fsid, "fsid=%s"}, | ||
374 | {Opt_snapdirname, "snapdirname=%s"}, | 378 | {Opt_snapdirname, "snapdirname=%s"}, |
375 | {Opt_name, "name=%s"}, | 379 | {Opt_name, "name=%s"}, |
376 | {Opt_secret, "secret=%s"}, | 380 | {Opt_secret, "secret=%s"}, |
@@ -386,6 +390,36 @@ static match_table_t arg_tokens = { | |||
386 | {-1, NULL} | 390 | {-1, NULL} |
387 | }; | 391 | }; |
388 | 392 | ||
393 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | ||
394 | { | ||
395 | int i = 0; | ||
396 | char tmp[3]; | ||
397 | int err = -EINVAL; | ||
398 | int d; | ||
399 | |||
400 | dout("parse_fsid '%s'\n", str); | ||
401 | tmp[2] = 0; | ||
402 | while (*str && i < 16) { | ||
403 | if (ispunct(*str)) { | ||
404 | str++; | ||
405 | continue; | ||
406 | } | ||
407 | if (!isxdigit(str[0]) || !isxdigit(str[1])) | ||
408 | break; | ||
409 | tmp[0] = str[0]; | ||
410 | tmp[1] = str[1]; | ||
411 | if (sscanf(tmp, "%x", &d) < 1) | ||
412 | break; | ||
413 | fsid->fsid[i] = d & 0xff; | ||
414 | i++; | ||
415 | str += 2; | ||
416 | } | ||
417 | |||
418 | if (i == 16) | ||
419 | err = 0; | ||
420 | dout("parse_fsid ret %d got fsid %pU", err, fsid); | ||
421 | return err; | ||
422 | } | ||
389 | 423 | ||
390 | static struct ceph_mount_args *parse_mount_args(int flags, char *options, | 424 | static struct ceph_mount_args *parse_mount_args(int flags, char *options, |
391 | const char *dev_name, | 425 | const char *dev_name, |
@@ -469,12 +503,6 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
469 | dout("got token %d\n", token); | 503 | dout("got token %d\n", token); |
470 | } | 504 | } |
471 | switch (token) { | 505 | switch (token) { |
472 | case Opt_fsidmajor: | ||
473 | *(__le64 *)&args->fsid.fsid[0] = cpu_to_le64(intval); | ||
474 | break; | ||
475 | case Opt_fsidminor: | ||
476 | *(__le64 *)&args->fsid.fsid[8] = cpu_to_le64(intval); | ||
477 | break; | ||
478 | case Opt_ip: | 506 | case Opt_ip: |
479 | err = ceph_parse_ips(argstr[0].from, | 507 | err = ceph_parse_ips(argstr[0].from, |
480 | argstr[0].to, | 508 | argstr[0].to, |
@@ -485,6 +513,11 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
485 | args->flags |= CEPH_OPT_MYIP; | 513 | args->flags |= CEPH_OPT_MYIP; |
486 | break; | 514 | break; |
487 | 515 | ||
516 | case Opt_fsid: | ||
517 | err = parse_fsid(argstr[0].from, &args->fsid); | ||
518 | if (err == 0) | ||
519 | args->flags |= CEPH_OPT_FSID; | ||
520 | break; | ||
488 | case Opt_snapdirname: | 521 | case Opt_snapdirname: |
489 | kfree(args->snapdir_name); | 522 | kfree(args->snapdir_name); |
490 | args->snapdir_name = kstrndup(argstr[0].from, | 523 | args->snapdir_name = kstrndup(argstr[0].from, |
@@ -515,6 +548,9 @@ static struct ceph_mount_args *parse_mount_args(int flags, char *options, | |||
515 | case Opt_osdkeepalivetimeout: | 548 | case Opt_osdkeepalivetimeout: |
516 | args->osd_keepalive_timeout = intval; | 549 | args->osd_keepalive_timeout = intval; |
517 | break; | 550 | break; |
551 | case Opt_osd_idle_ttl: | ||
552 | args->osd_idle_ttl = intval; | ||
553 | break; | ||
518 | case Opt_mount_timeout: | 554 | case Opt_mount_timeout: |
519 | args->mount_timeout = intval; | 555 | args->mount_timeout = intval; |
520 | break; | 556 | break; |
@@ -630,7 +666,6 @@ static struct ceph_client *ceph_create_client(struct ceph_mount_args *args) | |||
630 | 666 | ||
631 | /* caps */ | 667 | /* caps */ |
632 | client->min_caps = args->max_readdir; | 668 | client->min_caps = args->max_readdir; |
633 | ceph_adjust_min_caps(client->min_caps); | ||
634 | 669 | ||
635 | /* subsystems */ | 670 | /* subsystems */ |
636 | err = ceph_monc_init(&client->monc, client); | 671 | err = ceph_monc_init(&client->monc, client); |
@@ -680,8 +715,6 @@ static void ceph_destroy_client(struct ceph_client *client) | |||
680 | 715 | ||
681 | ceph_monc_stop(&client->monc); | 716 | ceph_monc_stop(&client->monc); |
682 | 717 | ||
683 | ceph_adjust_min_caps(-client->min_caps); | ||
684 | |||
685 | ceph_debugfs_client_cleanup(client); | 718 | ceph_debugfs_client_cleanup(client); |
686 | destroy_workqueue(client->wb_wq); | 719 | destroy_workqueue(client->wb_wq); |
687 | destroy_workqueue(client->pg_inv_wq); | 720 | destroy_workqueue(client->pg_inv_wq); |
@@ -706,13 +739,13 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | |||
706 | { | 739 | { |
707 | if (client->have_fsid) { | 740 | if (client->have_fsid) { |
708 | if (ceph_fsid_compare(&client->fsid, fsid)) { | 741 | if (ceph_fsid_compare(&client->fsid, fsid)) { |
709 | pr_err("bad fsid, had " FSID_FORMAT " got " FSID_FORMAT, | 742 | pr_err("bad fsid, had %pU got %pU", |
710 | PR_FSID(&client->fsid), PR_FSID(fsid)); | 743 | &client->fsid, fsid); |
711 | return -1; | 744 | return -1; |
712 | } | 745 | } |
713 | } else { | 746 | } else { |
714 | pr_info("client%lld fsid " FSID_FORMAT "\n", | 747 | pr_info("client%lld fsid %pU\n", client->monc.auth->global_id, |
715 | client->monc.auth->global_id, PR_FSID(fsid)); | 748 | fsid); |
716 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | 749 | memcpy(&client->fsid, fsid, sizeof(*fsid)); |
717 | ceph_debugfs_client_init(client); | 750 | ceph_debugfs_client_init(client); |
718 | client->have_fsid = true; | 751 | client->have_fsid = true; |
@@ -1043,8 +1076,6 @@ static int __init init_ceph(void) | |||
1043 | if (ret) | 1076 | if (ret) |
1044 | goto out_msgr; | 1077 | goto out_msgr; |
1045 | 1078 | ||
1046 | ceph_caps_init(); | ||
1047 | |||
1048 | ret = register_filesystem(&ceph_fs_type); | 1079 | ret = register_filesystem(&ceph_fs_type); |
1049 | if (ret) | 1080 | if (ret) |
1050 | goto out_icache; | 1081 | goto out_icache; |
@@ -1069,7 +1100,6 @@ static void __exit exit_ceph(void) | |||
1069 | { | 1100 | { |
1070 | dout("exit_ceph\n"); | 1101 | dout("exit_ceph\n"); |
1071 | unregister_filesystem(&ceph_fs_type); | 1102 | unregister_filesystem(&ceph_fs_type); |
1072 | ceph_caps_finalize(); | ||
1073 | destroy_caches(); | 1103 | destroy_caches(); |
1074 | ceph_msgr_exit(); | 1104 | ceph_msgr_exit(); |
1075 | ceph_debugfs_cleanup(); | 1105 | ceph_debugfs_cleanup(); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 10a4a406e887..2482d696f0de 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -31,6 +31,12 @@ | |||
31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) | 31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) |
32 | 32 | ||
33 | /* | 33 | /* |
34 | * Supported features | ||
35 | */ | ||
36 | #define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK | ||
37 | #define CEPH_FEATURE_REQUIRED CEPH_FEATURE_NOSRCADDR | ||
38 | |||
39 | /* | ||
34 | * mount options | 40 | * mount options |
35 | */ | 41 | */ |
36 | #define CEPH_OPT_FSID (1<<0) | 42 | #define CEPH_OPT_FSID (1<<0) |
@@ -560,11 +566,13 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci) | |||
560 | /* what the mds thinks we want */ | 566 | /* what the mds thinks we want */ |
561 | extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci); | 567 | extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci); |
562 | 568 | ||
563 | extern void ceph_caps_init(void); | 569 | extern void ceph_caps_init(struct ceph_mds_client *mdsc); |
564 | extern void ceph_caps_finalize(void); | 570 | extern void ceph_caps_finalize(struct ceph_mds_client *mdsc); |
565 | extern void ceph_adjust_min_caps(int delta); | 571 | extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta); |
566 | extern int ceph_reserve_caps(struct ceph_cap_reservation *ctx, int need); | 572 | extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, |
567 | extern int ceph_unreserve_caps(struct ceph_cap_reservation *ctx); | 573 | struct ceph_cap_reservation *ctx, int need); |
574 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | ||
575 | struct ceph_cap_reservation *ctx); | ||
568 | extern void ceph_reservation_status(struct ceph_client *client, | 576 | extern void ceph_reservation_status(struct ceph_client *client, |
569 | int *total, int *avail, int *used, | 577 | int *total, int *avail, int *used, |
570 | int *reserved, int *min); | 578 | int *reserved, int *min); |
@@ -738,13 +746,6 @@ extern struct kmem_cache *ceph_file_cachep; | |||
738 | extern const char *ceph_msg_type_name(int type); | 746 | extern const char *ceph_msg_type_name(int type); |
739 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | 747 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); |
740 | 748 | ||
741 | #define FSID_FORMAT "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-" \ | ||
742 | "%02x%02x%02x%02x%02x%02x" | ||
743 | #define PR_FSID(f) (f)->fsid[0], (f)->fsid[1], (f)->fsid[2], (f)->fsid[3], \ | ||
744 | (f)->fsid[4], (f)->fsid[5], (f)->fsid[6], (f)->fsid[7], \ | ||
745 | (f)->fsid[8], (f)->fsid[9], (f)->fsid[10], (f)->fsid[11], \ | ||
746 | (f)->fsid[12], (f)->fsid[13], (f)->fsid[14], (f)->fsid[15] | ||
747 | |||
748 | /* inode.c */ | 749 | /* inode.c */ |
749 | extern const struct inode_operations ceph_file_iops; | 750 | extern const struct inode_operations ceph_file_iops; |
750 | 751 | ||
@@ -806,13 +807,16 @@ static inline void ceph_remove_cap(struct ceph_cap *cap) | |||
806 | __ceph_remove_cap(cap); | 807 | __ceph_remove_cap(cap); |
807 | spin_unlock(&inode->i_lock); | 808 | spin_unlock(&inode->i_lock); |
808 | } | 809 | } |
809 | extern void ceph_put_cap(struct ceph_cap *cap); | 810 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, |
811 | struct ceph_cap *cap); | ||
810 | 812 | ||
811 | extern void ceph_queue_caps_release(struct inode *inode); | 813 | extern void ceph_queue_caps_release(struct inode *inode); |
812 | extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); | 814 | extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); |
813 | extern int ceph_fsync(struct file *file, int datasync); | 815 | extern int ceph_fsync(struct file *file, int datasync); |
814 | extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, | 816 | extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, |
815 | struct ceph_mds_session *session); | 817 | struct ceph_mds_session *session); |
818 | extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, | ||
819 | int mds); | ||
816 | extern int ceph_get_cap_mds(struct inode *inode); | 820 | extern int ceph_get_cap_mds(struct inode *inode); |
817 | extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); | 821 | extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); |
818 | extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); | 822 | extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); |
@@ -857,7 +861,7 @@ extern void ceph_release_page_vector(struct page **pages, int num_pages); | |||
857 | /* dir.c */ | 861 | /* dir.c */ |
858 | extern const struct file_operations ceph_dir_fops; | 862 | extern const struct file_operations ceph_dir_fops; |
859 | extern const struct inode_operations ceph_dir_iops; | 863 | extern const struct inode_operations ceph_dir_iops; |
860 | extern struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, | 864 | extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, |
861 | ceph_snapdir_dentry_ops; | 865 | ceph_snapdir_dentry_ops; |
862 | 866 | ||
863 | extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); | 867 | extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); |
@@ -888,6 +892,14 @@ extern void ceph_debugfs_cleanup(void); | |||
888 | extern int ceph_debugfs_client_init(struct ceph_client *client); | 892 | extern int ceph_debugfs_client_init(struct ceph_client *client); |
889 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | 893 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); |
890 | 894 | ||
895 | /* locks.c */ | ||
896 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | ||
897 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | ||
898 | extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); | ||
899 | extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p, | ||
900 | int p_locks, int f_locks); | ||
901 | extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); | ||
902 | |||
891 | static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) | 903 | static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) |
892 | { | 904 | { |
893 | if (dentry && dentry->d_parent) | 905 | if (dentry && dentry->d_parent) |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 68aeebc69681..097a2654c00f 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -337,6 +337,8 @@ void __ceph_destroy_xattrs(struct ceph_inode_info *ci) | |||
337 | } | 337 | } |
338 | 338 | ||
339 | static int __build_xattrs(struct inode *inode) | 339 | static int __build_xattrs(struct inode *inode) |
340 | __releases(inode->i_lock) | ||
341 | __acquires(inode->i_lock) | ||
340 | { | 342 | { |
341 | u32 namelen; | 343 | u32 namelen; |
342 | u32 numattr = 0; | 344 | u32 numattr = 0; |
diff --git a/fs/char_dev.c b/fs/char_dev.c index d6db933df2b2..f80a4f25123c 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/cdev.h> | 20 | #include <linux/cdev.h> |
21 | #include <linux/mutex.h> | 21 | #include <linux/mutex.h> |
22 | #include <linux/backing-dev.h> | 22 | #include <linux/backing-dev.h> |
23 | #include <linux/tty.h> | ||
23 | 24 | ||
24 | #include "internal.h" | 25 | #include "internal.h" |
25 | 26 | ||
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 5739fd7f88b4..917b7d449bb2 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig | |||
@@ -2,7 +2,6 @@ config CIFS | |||
2 | tristate "CIFS support (advanced network filesystem, SMBFS successor)" | 2 | tristate "CIFS support (advanced network filesystem, SMBFS successor)" |
3 | depends on INET | 3 | depends on INET |
4 | select NLS | 4 | select NLS |
5 | select SLOW_WORK | ||
6 | help | 5 | help |
7 | This is the client VFS module for the Common Internet File System | 6 | This is the client VFS module for the Common Internet File System |
8 | (CIFS) protocol which is the successor to the Server Message Block | 7 | (CIFS) protocol which is the successor to the Server Message Block |
@@ -71,14 +70,14 @@ config CIFS_WEAK_PW_HASH | |||
71 | If unsure, say N. | 70 | If unsure, say N. |
72 | 71 | ||
73 | config CIFS_UPCALL | 72 | config CIFS_UPCALL |
74 | bool "Kerberos/SPNEGO advanced session setup" | 73 | bool "Kerberos/SPNEGO advanced session setup" |
75 | depends on CIFS && KEYS | 74 | depends on CIFS && KEYS |
76 | help | 75 | select DNS_RESOLVER |
77 | Enables an upcall mechanism for CIFS which accesses | 76 | help |
78 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | 77 | Enables an upcall mechanism for CIFS which accesses userspace helper |
79 | Kerberos tickets which are needed to mount to certain secure servers | 78 | utilities to provide SPNEGO packaged (RFC 4178) Kerberos tickets |
80 | (for which more secure Kerberos authentication is required). If | 79 | which are needed to mount to certain secure servers (for which more |
81 | unsure, say N. | 80 | secure Kerberos authentication is required). If unsure, say N. |
82 | 81 | ||
83 | config CIFS_XATTR | 82 | config CIFS_XATTR |
84 | bool "CIFS extended attributes" | 83 | bool "CIFS extended attributes" |
@@ -122,6 +121,7 @@ config CIFS_DEBUG2 | |||
122 | config CIFS_DFS_UPCALL | 121 | config CIFS_DFS_UPCALL |
123 | bool "DFS feature support" | 122 | bool "DFS feature support" |
124 | depends on CIFS && KEYS | 123 | depends on CIFS && KEYS |
124 | select DNS_RESOLVER | ||
125 | help | 125 | help |
126 | Distributed File System (DFS) support is used to access shares | 126 | Distributed File System (DFS) support is used to access shares |
127 | transparently in an enterprise name space, even if the share | 127 | transparently in an enterprise name space, even if the share |
diff --git a/fs/cifs/README b/fs/cifs/README index a727b7cb075f..a7081eeeb85d 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -568,8 +568,9 @@ module can be displayed via modinfo. | |||
568 | Misc /proc/fs/cifs Flags and Debug Info | 568 | Misc /proc/fs/cifs Flags and Debug Info |
569 | ======================================= | 569 | ======================================= |
570 | Informational pseudo-files: | 570 | Informational pseudo-files: |
571 | DebugData Displays information about active CIFS sessions | 571 | DebugData Displays information about active CIFS sessions and |
572 | and shares, as well as the cifs.ko version. | 572 | shares, features enabled as well as the cifs.ko |
573 | version. | ||
573 | Stats Lists summary resource usage information as well as per | 574 | Stats Lists summary resource usage information as well as per |
574 | share statistics, if CONFIG_CIFS_STATS in enabled | 575 | share statistics, if CONFIG_CIFS_STATS in enabled |
575 | in the kernel configuration. | 576 | in the kernel configuration. |
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 4fce6e61b34e..eb1ba493489f 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c | |||
@@ -119,6 +119,31 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) | |||
119 | "Display Internal CIFS Data Structures for Debugging\n" | 119 | "Display Internal CIFS Data Structures for Debugging\n" |
120 | "---------------------------------------------------\n"); | 120 | "---------------------------------------------------\n"); |
121 | seq_printf(m, "CIFS Version %s\n", CIFS_VERSION); | 121 | seq_printf(m, "CIFS Version %s\n", CIFS_VERSION); |
122 | seq_printf(m, "Features: "); | ||
123 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
124 | seq_printf(m, "dfs"); | ||
125 | seq_putc(m, ' '); | ||
126 | #endif | ||
127 | #ifdef CONFIG_CIFS_FSCACHE | ||
128 | seq_printf(m, "fscache"); | ||
129 | seq_putc(m, ' '); | ||
130 | #endif | ||
131 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | ||
132 | seq_printf(m, "lanman"); | ||
133 | seq_putc(m, ' '); | ||
134 | #endif | ||
135 | #ifdef CONFIG_CIFS_POSIX | ||
136 | seq_printf(m, "posix"); | ||
137 | seq_putc(m, ' '); | ||
138 | #endif | ||
139 | #ifdef CONFIG_CIFS_UPCALL | ||
140 | seq_printf(m, "spnego"); | ||
141 | seq_putc(m, ' '); | ||
142 | #endif | ||
143 | #ifdef CONFIG_CIFS_XATTR | ||
144 | seq_printf(m, "xattr"); | ||
145 | #endif | ||
146 | seq_putc(m, '\n'); | ||
122 | seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); | 147 | seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); |
123 | seq_printf(m, "Servers:"); | 148 | seq_printf(m, "Servers:"); |
124 | 149 | ||
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index dc1ed50ea06e..d6ced7aa23cf 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c | |||
@@ -141,7 +141,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata, | |||
141 | } | 141 | } |
142 | 142 | ||
143 | rc = dns_resolve_server_name_to_ip(*devname, &srvIP); | 143 | rc = dns_resolve_server_name_to_ip(*devname, &srvIP); |
144 | if (rc != 0) { | 144 | if (rc < 0) { |
145 | cERROR(1, "%s: Failed to resolve server part of %s to IP: %d", | 145 | cERROR(1, "%s: Failed to resolve server part of %s to IP: %d", |
146 | __func__, *devname, rc); | 146 | __func__, *devname, rc); |
147 | goto compose_mount_options_err; | 147 | goto compose_mount_options_err; |
@@ -150,8 +150,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata, | |||
150 | * assuming that we have 'unc=' and 'ip=' in | 150 | * assuming that we have 'unc=' and 'ip=' in |
151 | * the original sb_mountdata | 151 | * the original sb_mountdata |
152 | */ | 152 | */ |
153 | md_len = strlen(sb_mountdata) + strlen(srvIP) + | 153 | md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12; |
154 | strlen(ref->node_name) + 12; | ||
155 | mountdata = kzalloc(md_len+1, GFP_KERNEL); | 154 | mountdata = kzalloc(md_len+1, GFP_KERNEL); |
156 | if (mountdata == NULL) { | 155 | if (mountdata == NULL) { |
157 | rc = -ENOMEM; | 156 | rc = -ENOMEM; |
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 6effccff85a5..87044906cd1f 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
@@ -84,6 +84,9 @@ struct key_type cifs_spnego_key_type = { | |||
84 | /* strlen of ";uid=0x" */ | 84 | /* strlen of ";uid=0x" */ |
85 | #define UID_KEY_LEN 7 | 85 | #define UID_KEY_LEN 7 |
86 | 86 | ||
87 | /* strlen of ";creduid=0x" */ | ||
88 | #define CREDUID_KEY_LEN 11 | ||
89 | |||
87 | /* strlen of ";user=" */ | 90 | /* strlen of ";user=" */ |
88 | #define USER_KEY_LEN 6 | 91 | #define USER_KEY_LEN 6 |
89 | 92 | ||
@@ -107,6 +110,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | |||
107 | IP_KEY_LEN + INET6_ADDRSTRLEN + | 110 | IP_KEY_LEN + INET6_ADDRSTRLEN + |
108 | MAX_MECH_STR_LEN + | 111 | MAX_MECH_STR_LEN + |
109 | UID_KEY_LEN + (sizeof(uid_t) * 2) + | 112 | UID_KEY_LEN + (sizeof(uid_t) * 2) + |
113 | CREDUID_KEY_LEN + (sizeof(uid_t) * 2) + | ||
110 | USER_KEY_LEN + strlen(sesInfo->userName) + | 114 | USER_KEY_LEN + strlen(sesInfo->userName) + |
111 | PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; | 115 | PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; |
112 | 116 | ||
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8a2cf129e535..b7431afdd76d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -45,7 +45,6 @@ | |||
45 | #include "cifs_fs_sb.h" | 45 | #include "cifs_fs_sb.h" |
46 | #include <linux/mm.h> | 46 | #include <linux/mm.h> |
47 | #include <linux/key-type.h> | 47 | #include <linux/key-type.h> |
48 | #include "dns_resolve.h" | ||
49 | #include "cifs_spnego.h" | 48 | #include "cifs_spnego.h" |
50 | #include "fscache.h" | 49 | #include "fscache.h" |
51 | #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ | 50 | #define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ |
@@ -330,8 +329,10 @@ cifs_destroy_inode(struct inode *inode) | |||
330 | } | 329 | } |
331 | 330 | ||
332 | static void | 331 | static void |
333 | cifs_clear_inode(struct inode *inode) | 332 | cifs_evict_inode(struct inode *inode) |
334 | { | 333 | { |
334 | truncate_inode_pages(&inode->i_data, 0); | ||
335 | end_writeback(inode); | ||
335 | cifs_fscache_release_inode_cookie(inode); | 336 | cifs_fscache_release_inode_cookie(inode); |
336 | } | 337 | } |
337 | 338 | ||
@@ -480,14 +481,13 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data) | |||
480 | return 0; | 481 | return 0; |
481 | } | 482 | } |
482 | 483 | ||
483 | void cifs_drop_inode(struct inode *inode) | 484 | static int cifs_drop_inode(struct inode *inode) |
484 | { | 485 | { |
485 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 486 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
486 | 487 | ||
487 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) | 488 | /* no serverino => unconditional eviction */ |
488 | return generic_drop_inode(inode); | 489 | return !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) || |
489 | 490 | generic_drop_inode(inode); | |
490 | return generic_delete_inode(inode); | ||
491 | } | 491 | } |
492 | 492 | ||
493 | static const struct super_operations cifs_super_ops = { | 493 | static const struct super_operations cifs_super_ops = { |
@@ -496,7 +496,7 @@ static const struct super_operations cifs_super_ops = { | |||
496 | .alloc_inode = cifs_alloc_inode, | 496 | .alloc_inode = cifs_alloc_inode, |
497 | .destroy_inode = cifs_destroy_inode, | 497 | .destroy_inode = cifs_destroy_inode, |
498 | .drop_inode = cifs_drop_inode, | 498 | .drop_inode = cifs_drop_inode, |
499 | .clear_inode = cifs_clear_inode, | 499 | .evict_inode = cifs_evict_inode, |
500 | /* .delete_inode = cifs_delete_inode, */ /* Do not need above | 500 | /* .delete_inode = cifs_delete_inode, */ /* Do not need above |
501 | function unless later we add lazy close of inodes or unless the | 501 | function unless later we add lazy close of inodes or unless the |
502 | kernel forgets to call us with the same number of releases (closes) | 502 | kernel forgets to call us with the same number of releases (closes) |
@@ -934,27 +934,13 @@ init_cifs(void) | |||
934 | if (rc) | 934 | if (rc) |
935 | goto out_unregister_filesystem; | 935 | goto out_unregister_filesystem; |
936 | #endif | 936 | #endif |
937 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
938 | rc = cifs_init_dns_resolver(); | ||
939 | if (rc) | ||
940 | goto out_unregister_key_type; | ||
941 | #endif | ||
942 | rc = slow_work_register_user(THIS_MODULE); | ||
943 | if (rc) | ||
944 | goto out_unregister_resolver_key; | ||
945 | 937 | ||
946 | return 0; | 938 | return 0; |
947 | 939 | ||
948 | out_unregister_resolver_key: | ||
949 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
950 | cifs_exit_dns_resolver(); | ||
951 | out_unregister_key_type: | ||
952 | #endif | ||
953 | #ifdef CONFIG_CIFS_UPCALL | 940 | #ifdef CONFIG_CIFS_UPCALL |
954 | unregister_key_type(&cifs_spnego_key_type); | ||
955 | out_unregister_filesystem: | 941 | out_unregister_filesystem: |
956 | #endif | ||
957 | unregister_filesystem(&cifs_fs_type); | 942 | unregister_filesystem(&cifs_fs_type); |
943 | #endif | ||
958 | out_destroy_request_bufs: | 944 | out_destroy_request_bufs: |
959 | cifs_destroy_request_bufs(); | 945 | cifs_destroy_request_bufs(); |
960 | out_destroy_mids: | 946 | out_destroy_mids: |
@@ -976,7 +962,6 @@ exit_cifs(void) | |||
976 | cifs_fscache_unregister(); | 962 | cifs_fscache_unregister(); |
977 | #ifdef CONFIG_CIFS_DFS_UPCALL | 963 | #ifdef CONFIG_CIFS_DFS_UPCALL |
978 | cifs_dfs_release_automount_timer(); | 964 | cifs_dfs_release_automount_timer(); |
979 | cifs_exit_dns_resolver(); | ||
980 | #endif | 965 | #endif |
981 | #ifdef CONFIG_CIFS_UPCALL | 966 | #ifdef CONFIG_CIFS_UPCALL |
982 | unregister_key_type(&cifs_spnego_key_type); | 967 | unregister_key_type(&cifs_spnego_key_type); |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 59906146ad36..0cdfb8c32ac6 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -22,7 +22,7 @@ | |||
22 | #include <linux/in.h> | 22 | #include <linux/in.h> |
23 | #include <linux/in6.h> | 23 | #include <linux/in6.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/slow-work.h> | 25 | #include <linux/workqueue.h> |
26 | #include "cifs_fs_sb.h" | 26 | #include "cifs_fs_sb.h" |
27 | #include "cifsacl.h" | 27 | #include "cifsacl.h" |
28 | /* | 28 | /* |
@@ -356,7 +356,7 @@ struct cifsFileInfo { | |||
356 | atomic_t count; /* reference count */ | 356 | atomic_t count; /* reference count */ |
357 | struct mutex fh_mutex; /* prevents reopen race after dead ses*/ | 357 | struct mutex fh_mutex; /* prevents reopen race after dead ses*/ |
358 | struct cifs_search_info srch_inf; | 358 | struct cifs_search_info srch_inf; |
359 | struct slow_work oplock_break; /* slow_work job for oplock breaks */ | 359 | struct work_struct oplock_break; /* work for oplock breaks */ |
360 | }; | 360 | }; |
361 | 361 | ||
362 | /* Take a reference on the file private data */ | 362 | /* Take a reference on the file private data */ |
@@ -728,6 +728,10 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ | |||
728 | GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ | 728 | GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */ |
729 | GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ | 729 | GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/ |
730 | 730 | ||
731 | void cifs_oplock_break(struct work_struct *work); | ||
732 | void cifs_oplock_break_get(struct cifsFileInfo *cfile); | ||
733 | void cifs_oplock_break_put(struct cifsFileInfo *cfile); | ||
734 | |||
731 | extern const struct slow_work_ops cifs_oplock_break_ops; | 735 | extern const struct slow_work_ops cifs_oplock_break_ops; |
732 | 736 | ||
733 | #endif /* _CIFS_GLOB_H */ | 737 | #endif /* _CIFS_GLOB_H */ |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 2eaebbd31132..1f5450814087 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -86,8 +86,8 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr); | |||
86 | extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); | 86 | extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); |
87 | extern int decode_negTokenInit(unsigned char *security_blob, int length, | 87 | extern int decode_negTokenInit(unsigned char *security_blob, int length, |
88 | struct TCP_Server_Info *server); | 88 | struct TCP_Server_Info *server); |
89 | extern int cifs_convert_address(struct sockaddr *dst, char *src); | 89 | extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); |
90 | extern int cifs_fill_sockaddr(struct sockaddr *dst, char *src, | 90 | extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, |
91 | unsigned short int port); | 91 | unsigned short int port); |
92 | extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); | 92 | extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); |
93 | extern void header_assemble(struct smb_hdr *, char /* command */ , | 93 | extern void header_assemble(struct smb_hdr *, char /* command */ , |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 2a43a0aca965..95c2ea67edfb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -1543,6 +1543,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) | |||
1543 | if (volume_info->UNCip && volume_info->UNC) { | 1543 | if (volume_info->UNCip && volume_info->UNC) { |
1544 | rc = cifs_fill_sockaddr((struct sockaddr *)&addr, | 1544 | rc = cifs_fill_sockaddr((struct sockaddr *)&addr, |
1545 | volume_info->UNCip, | 1545 | volume_info->UNCip, |
1546 | strlen(volume_info->UNCip), | ||
1546 | volume_info->port); | 1547 | volume_info->port); |
1547 | if (!rc) { | 1548 | if (!rc) { |
1548 | /* we failed translating address */ | 1549 | /* we failed translating address */ |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index a7de5e9fff11..578d88c5b46e 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -157,7 +157,7 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, | |||
157 | mutex_init(&pCifsFile->lock_mutex); | 157 | mutex_init(&pCifsFile->lock_mutex); |
158 | INIT_LIST_HEAD(&pCifsFile->llist); | 158 | INIT_LIST_HEAD(&pCifsFile->llist); |
159 | atomic_set(&pCifsFile->count, 1); | 159 | atomic_set(&pCifsFile->count, 1); |
160 | slow_work_init(&pCifsFile->oplock_break, &cifs_oplock_break_ops); | 160 | INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); |
161 | 161 | ||
162 | write_lock(&GlobalSMBSeslock); | 162 | write_lock(&GlobalSMBSeslock); |
163 | list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList); | 163 | list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList); |
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 3ad7f4300c45..0eb87026cad3 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c | |||
@@ -4,6 +4,8 @@ | |||
4 | * Copyright (c) 2007 Igor Mammedov | 4 | * Copyright (c) 2007 Igor Mammedov |
5 | * Author(s): Igor Mammedov (niallain@gmail.com) | 5 | * Author(s): Igor Mammedov (niallain@gmail.com) |
6 | * Steve French (sfrench@us.ibm.com) | 6 | * Steve French (sfrench@us.ibm.com) |
7 | * Wang Lei (wang840925@gmail.com) | ||
8 | * David Howells (dhowells@redhat.com) | ||
7 | * | 9 | * |
8 | * Contains the CIFS DFS upcall routines used for hostname to | 10 | * Contains the CIFS DFS upcall routines used for hostname to |
9 | * IP address translation. | 11 | * IP address translation. |
@@ -24,214 +26,73 @@ | |||
24 | */ | 26 | */ |
25 | 27 | ||
26 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
27 | #include <linux/keyctl.h> | 29 | #include <linux/dns_resolver.h> |
28 | #include <linux/key-type.h> | ||
29 | #include <keys/user-type.h> | ||
30 | #include "dns_resolve.h" | 30 | #include "dns_resolve.h" |
31 | #include "cifsglob.h" | 31 | #include "cifsglob.h" |
32 | #include "cifsproto.h" | 32 | #include "cifsproto.h" |
33 | #include "cifs_debug.h" | 33 | #include "cifs_debug.h" |
34 | 34 | ||
35 | static const struct cred *dns_resolver_cache; | 35 | /** |
36 | 36 | * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address. | |
37 | /* Checks if supplied name is IP address | 37 | * @unc: UNC path specifying the server |
38 | * returns: | 38 | * @ip_addr: Where to return the IP address. |
39 | * 1 - name is IP | 39 | * |
40 | * 0 - name is not IP | 40 | * The IP address will be returned in string form, and the caller is |
41 | */ | 41 | * responsible for freeing it. |
42 | static int | 42 | * |
43 | is_ip(char *name) | 43 | * Returns length of result on success, -ve on error. |
44 | { | ||
45 | struct sockaddr_storage ss; | ||
46 | |||
47 | return cifs_convert_address((struct sockaddr *)&ss, name); | ||
48 | } | ||
49 | |||
50 | static int | ||
51 | dns_resolver_instantiate(struct key *key, const void *data, | ||
52 | size_t datalen) | ||
53 | { | ||
54 | int rc = 0; | ||
55 | char *ip; | ||
56 | |||
57 | ip = kmalloc(datalen + 1, GFP_KERNEL); | ||
58 | if (!ip) | ||
59 | return -ENOMEM; | ||
60 | |||
61 | memcpy(ip, data, datalen); | ||
62 | ip[datalen] = '\0'; | ||
63 | |||
64 | /* make sure this looks like an address */ | ||
65 | if (!is_ip(ip)) { | ||
66 | kfree(ip); | ||
67 | return -EINVAL; | ||
68 | } | ||
69 | |||
70 | key->type_data.x[0] = datalen; | ||
71 | key->payload.data = ip; | ||
72 | |||
73 | return rc; | ||
74 | } | ||
75 | |||
76 | static void | ||
77 | dns_resolver_destroy(struct key *key) | ||
78 | { | ||
79 | kfree(key->payload.data); | ||
80 | } | ||
81 | |||
82 | struct key_type key_type_dns_resolver = { | ||
83 | .name = "dns_resolver", | ||
84 | .def_datalen = sizeof(struct in_addr), | ||
85 | .describe = user_describe, | ||
86 | .instantiate = dns_resolver_instantiate, | ||
87 | .destroy = dns_resolver_destroy, | ||
88 | .match = user_match, | ||
89 | }; | ||
90 | |||
91 | /* Resolves server name to ip address. | ||
92 | * input: | ||
93 | * unc - server UNC | ||
94 | * output: | ||
95 | * *ip_addr - pointer to server ip, caller responcible for freeing it. | ||
96 | * return 0 on success | ||
97 | */ | 44 | */ |
98 | int | 45 | int |
99 | dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) | 46 | dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) |
100 | { | 47 | { |
101 | const struct cred *saved_cred; | 48 | struct sockaddr_storage ss; |
102 | int rc = -EAGAIN; | 49 | const char *hostname, *sep; |
103 | struct key *rkey = ERR_PTR(-EAGAIN); | ||
104 | char *name; | 50 | char *name; |
105 | char *data = NULL; | 51 | int len, rc; |
106 | int len; | ||
107 | 52 | ||
108 | if (!ip_addr || !unc) | 53 | if (!ip_addr || !unc) |
109 | return -EINVAL; | 54 | return -EINVAL; |
110 | 55 | ||
111 | /* search for server name delimiter */ | ||
112 | len = strlen(unc); | 56 | len = strlen(unc); |
113 | if (len < 3) { | 57 | if (len < 3) { |
114 | cFYI(1, "%s: unc is too short: %s", __func__, unc); | 58 | cFYI(1, "%s: unc is too short: %s", __func__, unc); |
115 | return -EINVAL; | 59 | return -EINVAL; |
116 | } | 60 | } |
117 | len -= 2; | ||
118 | name = memchr(unc+2, '\\', len); | ||
119 | if (!name) { | ||
120 | cFYI(1, "%s: probably server name is whole unc: %s", | ||
121 | __func__, unc); | ||
122 | } else { | ||
123 | len = (name - unc) - 2/* leading // */; | ||
124 | } | ||
125 | |||
126 | name = kmalloc(len+1, GFP_KERNEL); | ||
127 | if (!name) { | ||
128 | rc = -ENOMEM; | ||
129 | return rc; | ||
130 | } | ||
131 | memcpy(name, unc+2, len); | ||
132 | name[len] = 0; | ||
133 | |||
134 | if (is_ip(name)) { | ||
135 | cFYI(1, "%s: it is IP, skipping dns upcall: %s", | ||
136 | __func__, name); | ||
137 | data = name; | ||
138 | goto skip_upcall; | ||
139 | } | ||
140 | 61 | ||
141 | saved_cred = override_creds(dns_resolver_cache); | 62 | /* Discount leading slashes for cifs */ |
142 | rkey = request_key(&key_type_dns_resolver, name, ""); | 63 | len -= 2; |
143 | revert_creds(saved_cred); | 64 | hostname = unc + 2; |
144 | if (!IS_ERR(rkey)) { | ||
145 | if (!(rkey->perm & KEY_USR_VIEW)) { | ||
146 | down_read(&rkey->sem); | ||
147 | rkey->perm |= KEY_USR_VIEW; | ||
148 | up_read(&rkey->sem); | ||
149 | } | ||
150 | len = rkey->type_data.x[0]; | ||
151 | data = rkey->payload.data; | ||
152 | } else { | ||
153 | cERROR(1, "%s: unable to resolve: %s", __func__, name); | ||
154 | goto out; | ||
155 | } | ||
156 | |||
157 | skip_upcall: | ||
158 | if (data) { | ||
159 | *ip_addr = kmalloc(len + 1, GFP_KERNEL); | ||
160 | if (*ip_addr) { | ||
161 | memcpy(*ip_addr, data, len + 1); | ||
162 | if (!IS_ERR(rkey)) | ||
163 | cFYI(1, "%s: resolved: %s to %s", __func__, | ||
164 | name, | ||
165 | *ip_addr | ||
166 | ); | ||
167 | rc = 0; | ||
168 | } else { | ||
169 | rc = -ENOMEM; | ||
170 | } | ||
171 | if (!IS_ERR(rkey)) | ||
172 | key_put(rkey); | ||
173 | } | ||
174 | 65 | ||
175 | out: | 66 | /* Search for server name delimiter */ |
176 | kfree(name); | 67 | sep = memchr(hostname, '\\', len); |
68 | if (sep) | ||
69 | len = sep - unc; | ||
70 | else | ||
71 | cFYI(1, "%s: probably server name is whole unc: %s", | ||
72 | __func__, unc); | ||
73 | |||
74 | /* Try to interpret hostname as an IPv4 or IPv6 address */ | ||
75 | rc = cifs_convert_address((struct sockaddr *)&ss, hostname, len); | ||
76 | if (rc > 0) | ||
77 | goto name_is_IP_address; | ||
78 | |||
79 | /* Perform the upcall */ | ||
80 | rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL); | ||
81 | if (rc < 0) | ||
82 | cERROR(1, "%s: unable to resolve: %*.*s", | ||
83 | __func__, len, len, hostname); | ||
84 | else | ||
85 | cFYI(1, "%s: resolved: %*.*s to %s", | ||
86 | __func__, len, len, hostname, *ip_addr); | ||
177 | return rc; | 87 | return rc; |
178 | } | ||
179 | 88 | ||
180 | int __init cifs_init_dns_resolver(void) | 89 | name_is_IP_address: |
181 | { | 90 | name = kmalloc(len + 1, GFP_KERNEL); |
182 | struct cred *cred; | 91 | if (!name) |
183 | struct key *keyring; | ||
184 | int ret; | ||
185 | |||
186 | printk(KERN_NOTICE "Registering the %s key type\n", | ||
187 | key_type_dns_resolver.name); | ||
188 | |||
189 | /* create an override credential set with a special thread keyring in | ||
190 | * which DNS requests are cached | ||
191 | * | ||
192 | * this is used to prevent malicious redirections from being installed | ||
193 | * with add_key(). | ||
194 | */ | ||
195 | cred = prepare_kernel_cred(NULL); | ||
196 | if (!cred) | ||
197 | return -ENOMEM; | 92 | return -ENOMEM; |
198 | 93 | memcpy(name, hostname, len); | |
199 | keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred, | 94 | name[len] = 0; |
200 | (KEY_POS_ALL & ~KEY_POS_SETATTR) | | 95 | cFYI(1, "%s: unc is IP, skipping dns upcall: %s", __func__, name); |
201 | KEY_USR_VIEW | KEY_USR_READ, | 96 | *ip_addr = name; |
202 | KEY_ALLOC_NOT_IN_QUOTA); | ||
203 | if (IS_ERR(keyring)) { | ||
204 | ret = PTR_ERR(keyring); | ||
205 | goto failed_put_cred; | ||
206 | } | ||
207 | |||
208 | ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); | ||
209 | if (ret < 0) | ||
210 | goto failed_put_key; | ||
211 | |||
212 | ret = register_key_type(&key_type_dns_resolver); | ||
213 | if (ret < 0) | ||
214 | goto failed_put_key; | ||
215 | |||
216 | /* instruct request_key() to use this special keyring as a cache for | ||
217 | * the results it looks up */ | ||
218 | cred->thread_keyring = keyring; | ||
219 | cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; | ||
220 | dns_resolver_cache = cred; | ||
221 | return 0; | 97 | return 0; |
222 | |||
223 | failed_put_key: | ||
224 | key_put(keyring); | ||
225 | failed_put_cred: | ||
226 | put_cred(cred); | ||
227 | return ret; | ||
228 | } | ||
229 | |||
230 | void cifs_exit_dns_resolver(void) | ||
231 | { | ||
232 | key_revoke(dns_resolver_cache->thread_keyring); | ||
233 | unregister_key_type(&key_type_dns_resolver); | ||
234 | put_cred(dns_resolver_cache); | ||
235 | printk(KERN_NOTICE "Unregistered %s key type\n", | ||
236 | key_type_dns_resolver.name); | ||
237 | } | 98 | } |
diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h index 5d7f291df162..d3f5d27f4d06 100644 --- a/fs/cifs/dns_resolve.h +++ b/fs/cifs/dns_resolve.h | |||
@@ -24,8 +24,6 @@ | |||
24 | #define _DNS_RESOLVE_H | 24 | #define _DNS_RESOLVE_H |
25 | 25 | ||
26 | #ifdef __KERNEL__ | 26 | #ifdef __KERNEL__ |
27 | extern int __init cifs_init_dns_resolver(void); | ||
28 | extern void cifs_exit_dns_resolver(void); | ||
29 | extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr); | 27 | extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr); |
30 | #endif /* KERNEL */ | 28 | #endif /* KERNEL */ |
31 | 29 | ||
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index fa04a00d126d..db11fdef0e92 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -2307,8 +2307,7 @@ static void cifs_invalidate_page(struct page *page, unsigned long offset) | |||
2307 | cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); | 2307 | cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); |
2308 | } | 2308 | } |
2309 | 2309 | ||
2310 | static void | 2310 | void cifs_oplock_break(struct work_struct *work) |
2311 | cifs_oplock_break(struct slow_work *work) | ||
2312 | { | 2311 | { |
2313 | struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, | 2312 | struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, |
2314 | oplock_break); | 2313 | oplock_break); |
@@ -2345,33 +2344,30 @@ cifs_oplock_break(struct slow_work *work) | |||
2345 | LOCKING_ANDX_OPLOCK_RELEASE, false); | 2344 | LOCKING_ANDX_OPLOCK_RELEASE, false); |
2346 | cFYI(1, "Oplock release rc = %d", rc); | 2345 | cFYI(1, "Oplock release rc = %d", rc); |
2347 | } | 2346 | } |
2347 | |||
2348 | /* | ||
2349 | * We might have kicked in before is_valid_oplock_break() | ||
2350 | * finished grabbing reference for us. Make sure it's done by | ||
2351 | * waiting for GlobalSMSSeslock. | ||
2352 | */ | ||
2353 | write_lock(&GlobalSMBSeslock); | ||
2354 | write_unlock(&GlobalSMBSeslock); | ||
2355 | |||
2356 | cifs_oplock_break_put(cfile); | ||
2348 | } | 2357 | } |
2349 | 2358 | ||
2350 | static int | 2359 | void cifs_oplock_break_get(struct cifsFileInfo *cfile) |
2351 | cifs_oplock_break_get(struct slow_work *work) | ||
2352 | { | 2360 | { |
2353 | struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, | ||
2354 | oplock_break); | ||
2355 | mntget(cfile->mnt); | 2361 | mntget(cfile->mnt); |
2356 | cifsFileInfo_get(cfile); | 2362 | cifsFileInfo_get(cfile); |
2357 | return 0; | ||
2358 | } | 2363 | } |
2359 | 2364 | ||
2360 | static void | 2365 | void cifs_oplock_break_put(struct cifsFileInfo *cfile) |
2361 | cifs_oplock_break_put(struct slow_work *work) | ||
2362 | { | 2366 | { |
2363 | struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, | ||
2364 | oplock_break); | ||
2365 | mntput(cfile->mnt); | 2367 | mntput(cfile->mnt); |
2366 | cifsFileInfo_put(cfile); | 2368 | cifsFileInfo_put(cfile); |
2367 | } | 2369 | } |
2368 | 2370 | ||
2369 | const struct slow_work_ops cifs_oplock_break_ops = { | ||
2370 | .get_ref = cifs_oplock_break_get, | ||
2371 | .put_ref = cifs_oplock_break_put, | ||
2372 | .execute = cifs_oplock_break, | ||
2373 | }; | ||
2374 | |||
2375 | const struct address_space_operations cifs_addr_ops = { | 2371 | const struct address_space_operations cifs_addr_ops = { |
2376 | .readpage = cifs_readpage, | 2372 | .readpage = cifs_readpage, |
2377 | .readpages = cifs_readpages, | 2373 | .readpages = cifs_readpages, |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a15b3a9bbff4..4bc47e5b5f29 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -732,15 +732,9 @@ cifs_find_inode(struct inode *inode, void *opaque) | |||
732 | if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT)) | 732 | if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT)) |
733 | return 0; | 733 | return 0; |
734 | 734 | ||
735 | /* | 735 | /* if it's not a directory or has no dentries, then flag it */ |
736 | * uh oh -- it's a directory. We can't use it since hardlinked dirs are | 736 | if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) |
737 | * verboten. Disable serverino and return it as if it were found, the | ||
738 | * caller can discard it, generate a uniqueid and retry the find | ||
739 | */ | ||
740 | if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) { | ||
741 | fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; | 737 | fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; |
742 | cifs_autodisable_serverino(CIFS_SB(inode->i_sb)); | ||
743 | } | ||
744 | 738 | ||
745 | return 1; | 739 | return 1; |
746 | } | 740 | } |
@@ -754,6 +748,27 @@ cifs_init_inode(struct inode *inode, void *opaque) | |||
754 | return 0; | 748 | return 0; |
755 | } | 749 | } |
756 | 750 | ||
751 | /* | ||
752 | * walk dentry list for an inode and report whether it has aliases that | ||
753 | * are hashed. We use this to determine if a directory inode can actually | ||
754 | * be used. | ||
755 | */ | ||
756 | static bool | ||
757 | inode_has_hashed_dentries(struct inode *inode) | ||
758 | { | ||
759 | struct dentry *dentry; | ||
760 | |||
761 | spin_lock(&dcache_lock); | ||
762 | list_for_each_entry(dentry, &inode->i_dentry, d_alias) { | ||
763 | if (!d_unhashed(dentry) || IS_ROOT(dentry)) { | ||
764 | spin_unlock(&dcache_lock); | ||
765 | return true; | ||
766 | } | ||
767 | } | ||
768 | spin_unlock(&dcache_lock); | ||
769 | return false; | ||
770 | } | ||
771 | |||
757 | /* Given fattrs, get a corresponding inode */ | 772 | /* Given fattrs, get a corresponding inode */ |
758 | struct inode * | 773 | struct inode * |
759 | cifs_iget(struct super_block *sb, struct cifs_fattr *fattr) | 774 | cifs_iget(struct super_block *sb, struct cifs_fattr *fattr) |
@@ -769,12 +784,16 @@ retry_iget5_locked: | |||
769 | 784 | ||
770 | inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); | 785 | inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); |
771 | if (inode) { | 786 | if (inode) { |
772 | /* was there a problematic inode number collision? */ | 787 | /* was there a potentially problematic inode collision? */ |
773 | if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) { | 788 | if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) { |
774 | iput(inode); | ||
775 | fattr->cf_uniqueid = iunique(sb, ROOT_I); | ||
776 | fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION; | 789 | fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION; |
777 | goto retry_iget5_locked; | 790 | |
791 | if (inode_has_hashed_dentries(inode)) { | ||
792 | cifs_autodisable_serverino(CIFS_SB(sb)); | ||
793 | iput(inode); | ||
794 | fattr->cf_uniqueid = iunique(sb, ROOT_I); | ||
795 | goto retry_iget5_locked; | ||
796 | } | ||
778 | } | 797 | } |
779 | 798 | ||
780 | cifs_fattr_to_inode(inode, fattr); | 799 | cifs_fattr_to_inode(inode, fattr); |
@@ -1679,26 +1698,16 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from) | |||
1679 | return rc; | 1698 | return rc; |
1680 | } | 1699 | } |
1681 | 1700 | ||
1682 | static int cifs_vmtruncate(struct inode *inode, loff_t offset) | 1701 | static void cifs_setsize(struct inode *inode, loff_t offset) |
1683 | { | 1702 | { |
1684 | loff_t oldsize; | 1703 | loff_t oldsize; |
1685 | int err; | ||
1686 | 1704 | ||
1687 | spin_lock(&inode->i_lock); | 1705 | spin_lock(&inode->i_lock); |
1688 | err = inode_newsize_ok(inode, offset); | ||
1689 | if (err) { | ||
1690 | spin_unlock(&inode->i_lock); | ||
1691 | goto out; | ||
1692 | } | ||
1693 | |||
1694 | oldsize = inode->i_size; | 1706 | oldsize = inode->i_size; |
1695 | i_size_write(inode, offset); | 1707 | i_size_write(inode, offset); |
1696 | spin_unlock(&inode->i_lock); | 1708 | spin_unlock(&inode->i_lock); |
1709 | |||
1697 | truncate_pagecache(inode, oldsize, offset); | 1710 | truncate_pagecache(inode, oldsize, offset); |
1698 | if (inode->i_op->truncate) | ||
1699 | inode->i_op->truncate(inode); | ||
1700 | out: | ||
1701 | return err; | ||
1702 | } | 1711 | } |
1703 | 1712 | ||
1704 | static int | 1713 | static int |
@@ -1771,7 +1780,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1771 | 1780 | ||
1772 | if (rc == 0) { | 1781 | if (rc == 0) { |
1773 | cifsInode->server_eof = attrs->ia_size; | 1782 | cifsInode->server_eof = attrs->ia_size; |
1774 | rc = cifs_vmtruncate(inode, attrs->ia_size); | 1783 | cifs_setsize(inode, attrs->ia_size); |
1775 | cifs_truncate_page(inode->i_mapping, inode->i_size); | 1784 | cifs_truncate_page(inode->i_mapping, inode->i_size); |
1776 | } | 1785 | } |
1777 | 1786 | ||
@@ -1796,14 +1805,12 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) | |||
1796 | 1805 | ||
1797 | xid = GetXid(); | 1806 | xid = GetXid(); |
1798 | 1807 | ||
1799 | if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { | 1808 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) |
1800 | /* check if we have permission to change attrs */ | 1809 | attrs->ia_valid |= ATTR_FORCE; |
1801 | rc = inode_change_ok(inode, attrs); | 1810 | |
1802 | if (rc < 0) | 1811 | rc = inode_change_ok(inode, attrs); |
1803 | goto out; | 1812 | if (rc < 0) |
1804 | else | 1813 | goto out; |
1805 | rc = 0; | ||
1806 | } | ||
1807 | 1814 | ||
1808 | full_path = build_path_from_dentry(direntry); | 1815 | full_path = build_path_from_dentry(direntry); |
1809 | if (full_path == NULL) { | 1816 | if (full_path == NULL) { |
@@ -1889,18 +1896,24 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) | |||
1889 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1896 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
1890 | } | 1897 | } |
1891 | 1898 | ||
1892 | if (!rc) { | 1899 | if (rc) |
1893 | rc = inode_setattr(inode, attrs); | 1900 | goto out; |
1894 | 1901 | ||
1895 | /* force revalidate when any of these times are set since some | 1902 | if ((attrs->ia_valid & ATTR_SIZE) && |
1896 | of the fs types (eg ext3, fat) do not have fine enough | 1903 | attrs->ia_size != i_size_read(inode)) |
1897 | time granularity to match protocol, and we do not have a | 1904 | truncate_setsize(inode, attrs->ia_size); |
1898 | a way (yet) to query the server fs's time granularity (and | 1905 | |
1899 | whether it rounds times down). | 1906 | setattr_copy(inode, attrs); |
1900 | */ | 1907 | mark_inode_dirty(inode); |
1901 | if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME))) | 1908 | |
1902 | cifsInode->time = 0; | 1909 | /* force revalidate when any of these times are set since some |
1903 | } | 1910 | of the fs types (eg ext3, fat) do not have fine enough |
1911 | time granularity to match protocol, and we do not have a | ||
1912 | a way (yet) to query the server fs's time granularity (and | ||
1913 | whether it rounds times down). | ||
1914 | */ | ||
1915 | if (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME)) | ||
1916 | cifsInode->time = 0; | ||
1904 | out: | 1917 | out: |
1905 | kfree(args); | 1918 | kfree(args); |
1906 | kfree(full_path); | 1919 | kfree(full_path); |
@@ -1925,14 +1938,13 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) | |||
1925 | cFYI(1, "setattr on file %s attrs->iavalid 0x%x", | 1938 | cFYI(1, "setattr on file %s attrs->iavalid 0x%x", |
1926 | direntry->d_name.name, attrs->ia_valid); | 1939 | direntry->d_name.name, attrs->ia_valid); |
1927 | 1940 | ||
1928 | if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { | 1941 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) |
1929 | /* check if we have permission to change attrs */ | 1942 | attrs->ia_valid |= ATTR_FORCE; |
1930 | rc = inode_change_ok(inode, attrs); | 1943 | |
1931 | if (rc < 0) { | 1944 | rc = inode_change_ok(inode, attrs); |
1932 | FreeXid(xid); | 1945 | if (rc < 0) { |
1933 | return rc; | 1946 | FreeXid(xid); |
1934 | } else | 1947 | return rc; |
1935 | rc = 0; | ||
1936 | } | 1948 | } |
1937 | 1949 | ||
1938 | full_path = build_path_from_dentry(direntry); | 1950 | full_path = build_path_from_dentry(direntry); |
@@ -2040,8 +2052,17 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) | |||
2040 | 2052 | ||
2041 | /* do not need local check to inode_check_ok since the server does | 2053 | /* do not need local check to inode_check_ok since the server does |
2042 | that */ | 2054 | that */ |
2043 | if (!rc) | 2055 | if (rc) |
2044 | rc = inode_setattr(inode, attrs); | 2056 | goto cifs_setattr_exit; |
2057 | |||
2058 | if ((attrs->ia_valid & ATTR_SIZE) && | ||
2059 | attrs->ia_size != i_size_read(inode)) | ||
2060 | truncate_setsize(inode, attrs->ia_size); | ||
2061 | |||
2062 | setattr_copy(inode, attrs); | ||
2063 | mark_inode_dirty(inode); | ||
2064 | return 0; | ||
2065 | |||
2045 | cifs_setattr_exit: | 2066 | cifs_setattr_exit: |
2046 | kfree(full_path); | 2067 | kfree(full_path); |
2047 | FreeXid(xid); | 2068 | FreeXid(xid); |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1394aa37f26c..3ccadc1326d6 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -498,7 +498,6 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) | |||
498 | struct cifsTconInfo *tcon; | 498 | struct cifsTconInfo *tcon; |
499 | struct cifsInodeInfo *pCifsInode; | 499 | struct cifsInodeInfo *pCifsInode; |
500 | struct cifsFileInfo *netfile; | 500 | struct cifsFileInfo *netfile; |
501 | int rc; | ||
502 | 501 | ||
503 | cFYI(1, "Checking for oplock break or dnotify response"); | 502 | cFYI(1, "Checking for oplock break or dnotify response"); |
504 | if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) && | 503 | if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) && |
@@ -583,13 +582,18 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) | |||
583 | pCifsInode->clientCanCacheAll = false; | 582 | pCifsInode->clientCanCacheAll = false; |
584 | if (pSMB->OplockLevel == 0) | 583 | if (pSMB->OplockLevel == 0) |
585 | pCifsInode->clientCanCacheRead = false; | 584 | pCifsInode->clientCanCacheRead = false; |
586 | rc = slow_work_enqueue(&netfile->oplock_break); | 585 | |
587 | if (rc) { | 586 | /* |
588 | cERROR(1, "failed to enqueue oplock " | 587 | * cifs_oplock_break_put() can't be called |
589 | "break: %d\n", rc); | 588 | * from here. Get reference after queueing |
590 | } else { | 589 | * succeeded. cifs_oplock_break() will |
591 | netfile->oplock_break_cancelled = false; | 590 | * synchronize using GlobalSMSSeslock. |
592 | } | 591 | */ |
592 | if (queue_work(system_nrt_wq, | ||
593 | &netfile->oplock_break)) | ||
594 | cifs_oplock_break_get(netfile); | ||
595 | netfile->oplock_break_cancelled = false; | ||
596 | |||
593 | read_unlock(&GlobalSMBSeslock); | 597 | read_unlock(&GlobalSMBSeslock); |
594 | read_unlock(&cifs_tcp_ses_lock); | 598 | read_unlock(&cifs_tcp_ses_lock); |
595 | return true; | 599 | return true; |
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index c6721ee26dbc..f97851119e6c 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c | |||
@@ -140,17 +140,18 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = { | |||
140 | * Returns 0 on failure. | 140 | * Returns 0 on failure. |
141 | */ | 141 | */ |
142 | static int | 142 | static int |
143 | cifs_inet_pton(const int address_family, const char *cp, void *dst) | 143 | cifs_inet_pton(const int address_family, const char *cp, int len, void *dst) |
144 | { | 144 | { |
145 | int ret = 0; | 145 | int ret = 0; |
146 | 146 | ||
147 | /* calculate length by finding first slash or NULL */ | 147 | /* calculate length by finding first slash or NULL */ |
148 | if (address_family == AF_INET) | 148 | if (address_family == AF_INET) |
149 | ret = in4_pton(cp, -1 /* len */, dst, '\\', NULL); | 149 | ret = in4_pton(cp, len, dst, '\\', NULL); |
150 | else if (address_family == AF_INET6) | 150 | else if (address_family == AF_INET6) |
151 | ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL); | 151 | ret = in6_pton(cp, len, dst , '\\', NULL); |
152 | 152 | ||
153 | cFYI(DBG2, "address conversion returned %d for %s", ret, cp); | 153 | cFYI(DBG2, "address conversion returned %d for %*.*s", |
154 | ret, len, len, cp); | ||
154 | if (ret > 0) | 155 | if (ret > 0) |
155 | ret = 1; | 156 | ret = 1; |
156 | return ret; | 157 | return ret; |
@@ -165,37 +166,39 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst) | |||
165 | * Returns 0 on failure. | 166 | * Returns 0 on failure. |
166 | */ | 167 | */ |
167 | int | 168 | int |
168 | cifs_convert_address(struct sockaddr *dst, char *src) | 169 | cifs_convert_address(struct sockaddr *dst, const char *src, int len) |
169 | { | 170 | { |
170 | int rc; | 171 | int rc, alen, slen; |
171 | char *pct, *endp; | 172 | const char *pct; |
173 | char *endp, scope_id[13]; | ||
172 | struct sockaddr_in *s4 = (struct sockaddr_in *) dst; | 174 | struct sockaddr_in *s4 = (struct sockaddr_in *) dst; |
173 | struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; | 175 | struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; |
174 | 176 | ||
175 | /* IPv4 address */ | 177 | /* IPv4 address */ |
176 | if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) { | 178 | if (cifs_inet_pton(AF_INET, src, len, &s4->sin_addr.s_addr)) { |
177 | s4->sin_family = AF_INET; | 179 | s4->sin_family = AF_INET; |
178 | return 1; | 180 | return 1; |
179 | } | 181 | } |
180 | 182 | ||
181 | /* temporarily terminate string */ | 183 | /* attempt to exclude the scope ID from the address part */ |
182 | pct = strchr(src, '%'); | 184 | pct = memchr(src, '%', len); |
183 | if (pct) | 185 | alen = pct ? pct - src : len; |
184 | *pct = '\0'; | ||
185 | |||
186 | rc = cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr); | ||
187 | |||
188 | /* repair temp termination (if any) and make pct point to scopeid */ | ||
189 | if (pct) | ||
190 | *pct++ = '%'; | ||
191 | 186 | ||
187 | rc = cifs_inet_pton(AF_INET6, src, alen, &s6->sin6_addr.s6_addr); | ||
192 | if (!rc) | 188 | if (!rc) |
193 | return rc; | 189 | return rc; |
194 | 190 | ||
195 | s6->sin6_family = AF_INET6; | 191 | s6->sin6_family = AF_INET6; |
196 | if (pct) { | 192 | if (pct) { |
193 | /* grab the scope ID */ | ||
194 | slen = len - (alen + 1); | ||
195 | if (slen <= 0 || slen > 12) | ||
196 | return 0; | ||
197 | memcpy(scope_id, pct + 1, slen); | ||
198 | scope_id[slen] = '\0'; | ||
199 | |||
197 | s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); | 200 | s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); |
198 | if (!*pct || *endp) | 201 | if (endp != scope_id + slen) |
199 | return 0; | 202 | return 0; |
200 | } | 203 | } |
201 | 204 | ||
@@ -203,10 +206,10 @@ cifs_convert_address(struct sockaddr *dst, char *src) | |||
203 | } | 206 | } |
204 | 207 | ||
205 | int | 208 | int |
206 | cifs_fill_sockaddr(struct sockaddr *dst, char *src, | 209 | cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, |
207 | const unsigned short int port) | 210 | const unsigned short int port) |
208 | { | 211 | { |
209 | if (!cifs_convert_address(dst, src)) | 212 | if (!cifs_convert_address(dst, src, len)) |
210 | return 0; | 213 | return 0; |
211 | 214 | ||
212 | switch (dst->sa_family) { | 215 | switch (dst->sa_family) { |
diff --git a/fs/coda/inode.c b/fs/coda/inode.c index d97f9935a028..6526e6f21ecf 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c | |||
@@ -35,7 +35,7 @@ | |||
35 | #include "coda_int.h" | 35 | #include "coda_int.h" |
36 | 36 | ||
37 | /* VFS super_block ops */ | 37 | /* VFS super_block ops */ |
38 | static void coda_clear_inode(struct inode *); | 38 | static void coda_evict_inode(struct inode *); |
39 | static void coda_put_super(struct super_block *); | 39 | static void coda_put_super(struct super_block *); |
40 | static int coda_statfs(struct dentry *dentry, struct kstatfs *buf); | 40 | static int coda_statfs(struct dentry *dentry, struct kstatfs *buf); |
41 | 41 | ||
@@ -93,7 +93,7 @@ static const struct super_operations coda_super_operations = | |||
93 | { | 93 | { |
94 | .alloc_inode = coda_alloc_inode, | 94 | .alloc_inode = coda_alloc_inode, |
95 | .destroy_inode = coda_destroy_inode, | 95 | .destroy_inode = coda_destroy_inode, |
96 | .clear_inode = coda_clear_inode, | 96 | .evict_inode = coda_evict_inode, |
97 | .put_super = coda_put_super, | 97 | .put_super = coda_put_super, |
98 | .statfs = coda_statfs, | 98 | .statfs = coda_statfs, |
99 | .remount_fs = coda_remount, | 99 | .remount_fs = coda_remount, |
@@ -224,8 +224,10 @@ static void coda_put_super(struct super_block *sb) | |||
224 | printk("Coda: Bye bye.\n"); | 224 | printk("Coda: Bye bye.\n"); |
225 | } | 225 | } |
226 | 226 | ||
227 | static void coda_clear_inode(struct inode *inode) | 227 | static void coda_evict_inode(struct inode *inode) |
228 | { | 228 | { |
229 | truncate_inode_pages(&inode->i_data, 0); | ||
230 | end_writeback(inode); | ||
229 | coda_cache_clear_inode(inode); | 231 | coda_cache_clear_inode(inode); |
230 | } | 232 | } |
231 | 233 | ||
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 66b9cf79c5ba..de89645777c7 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c | |||
@@ -177,7 +177,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, | |||
177 | nbytes = req->uc_outSize; /* don't have more space! */ | 177 | nbytes = req->uc_outSize; /* don't have more space! */ |
178 | } | 178 | } |
179 | if (copy_from_user(req->uc_data, buf, nbytes)) { | 179 | if (copy_from_user(req->uc_data, buf, nbytes)) { |
180 | req->uc_flags |= REQ_ABORT; | 180 | req->uc_flags |= CODA_REQ_ABORT; |
181 | wake_up(&req->uc_sleep); | 181 | wake_up(&req->uc_sleep); |
182 | retval = -EFAULT; | 182 | retval = -EFAULT; |
183 | goto out; | 183 | goto out; |
@@ -254,8 +254,8 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf, | |||
254 | retval = -EFAULT; | 254 | retval = -EFAULT; |
255 | 255 | ||
256 | /* If request was not a signal, enqueue and don't free */ | 256 | /* If request was not a signal, enqueue and don't free */ |
257 | if (!(req->uc_flags & REQ_ASYNC)) { | 257 | if (!(req->uc_flags & CODA_REQ_ASYNC)) { |
258 | req->uc_flags |= REQ_READ; | 258 | req->uc_flags |= CODA_REQ_READ; |
259 | list_add_tail(&(req->uc_chain), &vcp->vc_processing); | 259 | list_add_tail(&(req->uc_chain), &vcp->vc_processing); |
260 | goto out; | 260 | goto out; |
261 | } | 261 | } |
@@ -315,19 +315,19 @@ static int coda_psdev_release(struct inode * inode, struct file * file) | |||
315 | list_del(&req->uc_chain); | 315 | list_del(&req->uc_chain); |
316 | 316 | ||
317 | /* Async requests need to be freed here */ | 317 | /* Async requests need to be freed here */ |
318 | if (req->uc_flags & REQ_ASYNC) { | 318 | if (req->uc_flags & CODA_REQ_ASYNC) { |
319 | CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr)); | 319 | CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr)); |
320 | kfree(req); | 320 | kfree(req); |
321 | continue; | 321 | continue; |
322 | } | 322 | } |
323 | req->uc_flags |= REQ_ABORT; | 323 | req->uc_flags |= CODA_REQ_ABORT; |
324 | wake_up(&req->uc_sleep); | 324 | wake_up(&req->uc_sleep); |
325 | } | 325 | } |
326 | 326 | ||
327 | list_for_each_entry_safe(req, tmp, &vcp->vc_processing, uc_chain) { | 327 | list_for_each_entry_safe(req, tmp, &vcp->vc_processing, uc_chain) { |
328 | list_del(&req->uc_chain); | 328 | list_del(&req->uc_chain); |
329 | 329 | ||
330 | req->uc_flags |= REQ_ABORT; | 330 | req->uc_flags |= CODA_REQ_ABORT; |
331 | wake_up(&req->uc_sleep); | 331 | wake_up(&req->uc_sleep); |
332 | } | 332 | } |
333 | 333 | ||
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index f09c5ed76f6c..b8893ab6f9e6 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c | |||
@@ -604,7 +604,7 @@ static void coda_unblock_signals(sigset_t *old) | |||
604 | (((r)->uc_opcode != CODA_CLOSE && \ | 604 | (((r)->uc_opcode != CODA_CLOSE && \ |
605 | (r)->uc_opcode != CODA_STORE && \ | 605 | (r)->uc_opcode != CODA_STORE && \ |
606 | (r)->uc_opcode != CODA_RELEASE) || \ | 606 | (r)->uc_opcode != CODA_RELEASE) || \ |
607 | (r)->uc_flags & REQ_READ)) | 607 | (r)->uc_flags & CODA_REQ_READ)) |
608 | 608 | ||
609 | static inline void coda_waitfor_upcall(struct upc_req *req) | 609 | static inline void coda_waitfor_upcall(struct upc_req *req) |
610 | { | 610 | { |
@@ -624,7 +624,7 @@ static inline void coda_waitfor_upcall(struct upc_req *req) | |||
624 | set_current_state(TASK_UNINTERRUPTIBLE); | 624 | set_current_state(TASK_UNINTERRUPTIBLE); |
625 | 625 | ||
626 | /* got a reply */ | 626 | /* got a reply */ |
627 | if (req->uc_flags & (REQ_WRITE | REQ_ABORT)) | 627 | if (req->uc_flags & (CODA_REQ_WRITE | CODA_REQ_ABORT)) |
628 | break; | 628 | break; |
629 | 629 | ||
630 | if (blocked && time_after(jiffies, timeout) && | 630 | if (blocked && time_after(jiffies, timeout) && |
@@ -708,7 +708,7 @@ static int coda_upcall(struct venus_comm *vcp, | |||
708 | coda_waitfor_upcall(req); | 708 | coda_waitfor_upcall(req); |
709 | 709 | ||
710 | /* Op went through, interrupt or not... */ | 710 | /* Op went through, interrupt or not... */ |
711 | if (req->uc_flags & REQ_WRITE) { | 711 | if (req->uc_flags & CODA_REQ_WRITE) { |
712 | out = (union outputArgs *)req->uc_data; | 712 | out = (union outputArgs *)req->uc_data; |
713 | /* here we map positive Venus errors to kernel errors */ | 713 | /* here we map positive Venus errors to kernel errors */ |
714 | error = -out->oh.result; | 714 | error = -out->oh.result; |
@@ -717,13 +717,13 @@ static int coda_upcall(struct venus_comm *vcp, | |||
717 | } | 717 | } |
718 | 718 | ||
719 | error = -EINTR; | 719 | error = -EINTR; |
720 | if ((req->uc_flags & REQ_ABORT) || !signal_pending(current)) { | 720 | if ((req->uc_flags & CODA_REQ_ABORT) || !signal_pending(current)) { |
721 | printk(KERN_WARNING "coda: Unexpected interruption.\n"); | 721 | printk(KERN_WARNING "coda: Unexpected interruption.\n"); |
722 | goto exit; | 722 | goto exit; |
723 | } | 723 | } |
724 | 724 | ||
725 | /* Interrupted before venus read it. */ | 725 | /* Interrupted before venus read it. */ |
726 | if (!(req->uc_flags & REQ_READ)) | 726 | if (!(req->uc_flags & CODA_REQ_READ)) |
727 | goto exit; | 727 | goto exit; |
728 | 728 | ||
729 | /* Venus saw the upcall, make sure we can send interrupt signal */ | 729 | /* Venus saw the upcall, make sure we can send interrupt signal */ |
@@ -747,7 +747,7 @@ static int coda_upcall(struct venus_comm *vcp, | |||
747 | sig_inputArgs->ih.opcode = CODA_SIGNAL; | 747 | sig_inputArgs->ih.opcode = CODA_SIGNAL; |
748 | sig_inputArgs->ih.unique = req->uc_unique; | 748 | sig_inputArgs->ih.unique = req->uc_unique; |
749 | 749 | ||
750 | sig_req->uc_flags = REQ_ASYNC; | 750 | sig_req->uc_flags = CODA_REQ_ASYNC; |
751 | sig_req->uc_opcode = sig_inputArgs->ih.opcode; | 751 | sig_req->uc_opcode = sig_inputArgs->ih.opcode; |
752 | sig_req->uc_unique = sig_inputArgs->ih.unique; | 752 | sig_req->uc_unique = sig_inputArgs->ih.unique; |
753 | sig_req->uc_inSize = sizeof(struct coda_in_hdr); | 753 | sig_req->uc_inSize = sizeof(struct coda_in_hdr); |
diff --git a/fs/compat.c b/fs/compat.c index 6490d2134ff3..e6d5d70cf3cf 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -8,13 +8,14 @@ | |||
8 | * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) | 8 | * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) |
9 | * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) | 9 | * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) |
10 | * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs | 10 | * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs |
11 | * Copyright (C) 2003 Pavel Machek (pavel@suse.cz) | 11 | * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) |
12 | * | 12 | * |
13 | * This program is free software; you can redistribute it and/or modify | 13 | * This program is free software; you can redistribute it and/or modify |
14 | * it under the terms of the GNU General Public License version 2 as | 14 | * it under the terms of the GNU General Public License version 2 as |
15 | * published by the Free Software Foundation. | 15 | * published by the Free Software Foundation. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/stddef.h> | ||
18 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
19 | #include <linux/linkage.h> | 20 | #include <linux/linkage.h> |
20 | #include <linux/compat.h> | 21 | #include <linux/compat.h> |
@@ -266,7 +267,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta | |||
266 | error = user_path(pathname, &path); | 267 | error = user_path(pathname, &path); |
267 | if (!error) { | 268 | if (!error) { |
268 | struct kstatfs tmp; | 269 | struct kstatfs tmp; |
269 | error = vfs_statfs(path.dentry, &tmp); | 270 | error = vfs_statfs(&path, &tmp); |
270 | if (!error) | 271 | if (!error) |
271 | error = put_compat_statfs(buf, &tmp); | 272 | error = put_compat_statfs(buf, &tmp); |
272 | path_put(&path); | 273 | path_put(&path); |
@@ -284,7 +285,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user | |||
284 | file = fget(fd); | 285 | file = fget(fd); |
285 | if (!file) | 286 | if (!file) |
286 | goto out; | 287 | goto out; |
287 | error = vfs_statfs(file->f_path.dentry, &tmp); | 288 | error = vfs_statfs(&file->f_path, &tmp); |
288 | if (!error) | 289 | if (!error) |
289 | error = put_compat_statfs(buf, &tmp); | 290 | error = put_compat_statfs(buf, &tmp); |
290 | fput(file); | 291 | fput(file); |
@@ -334,7 +335,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s | |||
334 | error = user_path(pathname, &path); | 335 | error = user_path(pathname, &path); |
335 | if (!error) { | 336 | if (!error) { |
336 | struct kstatfs tmp; | 337 | struct kstatfs tmp; |
337 | error = vfs_statfs(path.dentry, &tmp); | 338 | error = vfs_statfs(&path, &tmp); |
338 | if (!error) | 339 | if (!error) |
339 | error = put_compat_statfs64(buf, &tmp); | 340 | error = put_compat_statfs64(buf, &tmp); |
340 | path_put(&path); | 341 | path_put(&path); |
@@ -355,7 +356,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c | |||
355 | file = fget(fd); | 356 | file = fget(fd); |
356 | if (!file) | 357 | if (!file) |
357 | goto out; | 358 | goto out; |
358 | error = vfs_statfs(file->f_path.dentry, &tmp); | 359 | error = vfs_statfs(&file->f_path, &tmp); |
359 | if (!error) | 360 | if (!error) |
360 | error = put_compat_statfs64(buf, &tmp); | 361 | error = put_compat_statfs64(buf, &tmp); |
361 | fput(file); | 362 | fput(file); |
@@ -378,7 +379,7 @@ asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u) | |||
378 | sb = user_get_super(new_decode_dev(dev)); | 379 | sb = user_get_super(new_decode_dev(dev)); |
379 | if (!sb) | 380 | if (!sb) |
380 | return -EINVAL; | 381 | return -EINVAL; |
381 | err = vfs_statfs(sb->s_root, &sbuf); | 382 | err = statfs_by_dentry(sb->s_root, &sbuf); |
382 | drop_super(sb); | 383 | drop_super(sb); |
383 | if (err) | 384 | if (err) |
384 | return err; | 385 | return err; |
@@ -891,8 +892,6 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name, | |||
891 | return retval; | 892 | return retval; |
892 | } | 893 | } |
893 | 894 | ||
894 | #define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) | ||
895 | |||
896 | struct compat_old_linux_dirent { | 895 | struct compat_old_linux_dirent { |
897 | compat_ulong_t d_ino; | 896 | compat_ulong_t d_ino; |
898 | compat_ulong_t d_offset; | 897 | compat_ulong_t d_offset; |
@@ -981,7 +980,8 @@ static int compat_filldir(void *__buf, const char *name, int namlen, | |||
981 | struct compat_linux_dirent __user * dirent; | 980 | struct compat_linux_dirent __user * dirent; |
982 | struct compat_getdents_callback *buf = __buf; | 981 | struct compat_getdents_callback *buf = __buf; |
983 | compat_ulong_t d_ino; | 982 | compat_ulong_t d_ino; |
984 | int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(compat_long_t)); | 983 | int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + |
984 | namlen + 2, sizeof(compat_long_t)); | ||
985 | 985 | ||
986 | buf->error = -EINVAL; /* only used if we fail.. */ | 986 | buf->error = -EINVAL; /* only used if we fail.. */ |
987 | if (reclen > buf->count) | 987 | if (reclen > buf->count) |
@@ -1068,8 +1068,8 @@ static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t | |||
1068 | { | 1068 | { |
1069 | struct linux_dirent64 __user *dirent; | 1069 | struct linux_dirent64 __user *dirent; |
1070 | struct compat_getdents_callback64 *buf = __buf; | 1070 | struct compat_getdents_callback64 *buf = __buf; |
1071 | int jj = NAME_OFFSET(dirent); | 1071 | int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, |
1072 | int reclen = ALIGN(jj + namlen + 1, sizeof(u64)); | 1072 | sizeof(u64)); |
1073 | u64 off; | 1073 | u64 off; |
1074 | 1074 | ||
1075 | buf->error = -EINVAL; /* only used if we fail.. */ | 1075 | buf->error = -EINVAL; /* only used if we fail.. */ |
@@ -1193,11 +1193,10 @@ out: | |||
1193 | if (iov != iovstack) | 1193 | if (iov != iovstack) |
1194 | kfree(iov); | 1194 | kfree(iov); |
1195 | if ((ret + (type == READ)) > 0) { | 1195 | if ((ret + (type == READ)) > 0) { |
1196 | struct dentry *dentry = file->f_path.dentry; | ||
1197 | if (type == READ) | 1196 | if (type == READ) |
1198 | fsnotify_access(dentry); | 1197 | fsnotify_access(file); |
1199 | else | 1198 | else |
1200 | fsnotify_modify(dentry); | 1199 | fsnotify_modify(file); |
1201 | } | 1200 | } |
1202 | return ret; | 1201 | return ret; |
1203 | } | 1202 | } |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 641640dc7ae5..70227e0dc01d 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -4,7 +4,7 @@ | |||
4 | * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) | 4 | * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) |
5 | * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) | 5 | * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) |
6 | * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs | 6 | * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs |
7 | * Copyright (C) 2003 Pavel Machek (pavel@suse.cz) | 7 | * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) |
8 | * | 8 | * |
9 | * These routines maintain argument size conversion between 32bit and 64bit | 9 | * These routines maintain argument size conversion between 32bit and 64bit |
10 | * ioctls. | 10 | * ioctls. |
@@ -131,23 +131,6 @@ static int w_long(unsigned int fd, unsigned int cmd, | |||
131 | return err; | 131 | return err; |
132 | } | 132 | } |
133 | 133 | ||
134 | static int rw_long(unsigned int fd, unsigned int cmd, | ||
135 | compat_ulong_t __user *argp) | ||
136 | { | ||
137 | mm_segment_t old_fs = get_fs(); | ||
138 | int err; | ||
139 | unsigned long val; | ||
140 | |||
141 | if(get_user(val, argp)) | ||
142 | return -EFAULT; | ||
143 | set_fs (KERNEL_DS); | ||
144 | err = sys_ioctl(fd, cmd, (unsigned long)&val); | ||
145 | set_fs (old_fs); | ||
146 | if (!err && put_user(val, argp)) | ||
147 | return -EFAULT; | ||
148 | return err; | ||
149 | } | ||
150 | |||
151 | struct compat_video_event { | 134 | struct compat_video_event { |
152 | int32_t type; | 135 | int32_t type; |
153 | compat_time_t timestamp; | 136 | compat_time_t timestamp; |
@@ -594,15 +577,12 @@ static int do_smb_getmountuid(unsigned int fd, unsigned int cmd, | |||
594 | return err; | 577 | return err; |
595 | } | 578 | } |
596 | 579 | ||
597 | static int ioc_settimeout(unsigned int fd, unsigned int cmd, | ||
598 | compat_ulong_t __user *argp) | ||
599 | { | ||
600 | return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, argp); | ||
601 | } | ||
602 | |||
603 | /* Bluetooth ioctls */ | 580 | /* Bluetooth ioctls */ |
604 | #define HCIUARTSETPROTO _IOW('U', 200, int) | 581 | #define HCIUARTSETPROTO _IOW('U', 200, int) |
605 | #define HCIUARTGETPROTO _IOR('U', 201, int) | 582 | #define HCIUARTGETPROTO _IOR('U', 201, int) |
583 | #define HCIUARTGETDEVICE _IOR('U', 202, int) | ||
584 | #define HCIUARTSETFLAGS _IOW('U', 203, int) | ||
585 | #define HCIUARTGETFLAGS _IOR('U', 204, int) | ||
606 | 586 | ||
607 | #define BNEPCONNADD _IOW('B', 200, int) | 587 | #define BNEPCONNADD _IOW('B', 200, int) |
608 | #define BNEPCONNDEL _IOW('B', 201, int) | 588 | #define BNEPCONNDEL _IOW('B', 201, int) |
@@ -966,6 +946,7 @@ COMPATIBLE_IOCTL(TIOCGPGRP) | |||
966 | COMPATIBLE_IOCTL(TIOCGPTN) | 946 | COMPATIBLE_IOCTL(TIOCGPTN) |
967 | COMPATIBLE_IOCTL(TIOCSPTLCK) | 947 | COMPATIBLE_IOCTL(TIOCSPTLCK) |
968 | COMPATIBLE_IOCTL(TIOCSERGETLSR) | 948 | COMPATIBLE_IOCTL(TIOCSERGETLSR) |
949 | COMPATIBLE_IOCTL(TIOCSIG) | ||
969 | #ifdef TCGETS2 | 950 | #ifdef TCGETS2 |
970 | COMPATIBLE_IOCTL(TCGETS2) | 951 | COMPATIBLE_IOCTL(TCGETS2) |
971 | COMPATIBLE_IOCTL(TCSETS2) | 952 | COMPATIBLE_IOCTL(TCSETS2) |
@@ -1281,13 +1262,6 @@ COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5) | |||
1281 | COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS) | 1262 | COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS) |
1282 | COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS) | 1263 | COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS) |
1283 | COMPATIBLE_IOCTL(OSS_GETVERSION) | 1264 | COMPATIBLE_IOCTL(OSS_GETVERSION) |
1284 | /* AUTOFS */ | ||
1285 | COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC) | ||
1286 | COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER) | ||
1287 | COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE) | ||
1288 | COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI) | ||
1289 | COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER) | ||
1290 | COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT) | ||
1291 | /* Raw devices */ | 1265 | /* Raw devices */ |
1292 | COMPATIBLE_IOCTL(RAW_SETBIND) | 1266 | COMPATIBLE_IOCTL(RAW_SETBIND) |
1293 | COMPATIBLE_IOCTL(RAW_GETBIND) | 1267 | COMPATIBLE_IOCTL(RAW_GETBIND) |
@@ -1328,6 +1302,8 @@ COMPATIBLE_IOCTL(HCISETLINKPOL) | |||
1328 | COMPATIBLE_IOCTL(HCISETLINKMODE) | 1302 | COMPATIBLE_IOCTL(HCISETLINKMODE) |
1329 | COMPATIBLE_IOCTL(HCISETACLMTU) | 1303 | COMPATIBLE_IOCTL(HCISETACLMTU) |
1330 | COMPATIBLE_IOCTL(HCISETSCOMTU) | 1304 | COMPATIBLE_IOCTL(HCISETSCOMTU) |
1305 | COMPATIBLE_IOCTL(HCIBLOCKADDR) | ||
1306 | COMPATIBLE_IOCTL(HCIUNBLOCKADDR) | ||
1331 | COMPATIBLE_IOCTL(HCIINQUIRY) | 1307 | COMPATIBLE_IOCTL(HCIINQUIRY) |
1332 | COMPATIBLE_IOCTL(HCIUARTSETPROTO) | 1308 | COMPATIBLE_IOCTL(HCIUARTSETPROTO) |
1333 | COMPATIBLE_IOCTL(HCIUARTGETPROTO) | 1309 | COMPATIBLE_IOCTL(HCIUARTGETPROTO) |
@@ -1552,9 +1528,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd, | |||
1552 | case RAW_GETBIND: | 1528 | case RAW_GETBIND: |
1553 | return raw_ioctl(fd, cmd, argp); | 1529 | return raw_ioctl(fd, cmd, argp); |
1554 | #endif | 1530 | #endif |
1555 | #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int) | ||
1556 | case AUTOFS_IOC_SETTIMEOUT32: | ||
1557 | return ioc_settimeout(fd, cmd, argp); | ||
1558 | /* One SMB ioctl needs translations. */ | 1531 | /* One SMB ioctl needs translations. */ |
1559 | #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t) | 1532 | #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t) |
1560 | case SMB_IOC_GETMOUNTUID_32: | 1533 | case SMB_IOC_GETMOUNTUID_32: |
@@ -1609,9 +1582,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd, | |||
1609 | case KDSKBMETA: | 1582 | case KDSKBMETA: |
1610 | case KDSKBLED: | 1583 | case KDSKBLED: |
1611 | case KDSETLED: | 1584 | case KDSETLED: |
1612 | /* AUTOFS */ | ||
1613 | case AUTOFS_IOC_READY: | ||
1614 | case AUTOFS_IOC_FAIL: | ||
1615 | /* NBD */ | 1585 | /* NBD */ |
1616 | case NBD_SET_SOCK: | 1586 | case NBD_SET_SOCK: |
1617 | case NBD_SET_BLKSIZE: | 1587 | case NBD_SET_BLKSIZE: |
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index dd3634e4c967..a53b130b366c 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
@@ -39,66 +39,55 @@ static DEFINE_MUTEX(read_mutex); | |||
39 | #define CRAMINO(x) (((x)->offset && (x)->size)?(x)->offset<<2:1) | 39 | #define CRAMINO(x) (((x)->offset && (x)->size)?(x)->offset<<2:1) |
40 | #define OFFSET(x) ((x)->i_ino) | 40 | #define OFFSET(x) ((x)->i_ino) |
41 | 41 | ||
42 | 42 | static void setup_inode(struct inode *inode, struct cramfs_inode * cramfs_inode) | |
43 | static int cramfs_iget5_test(struct inode *inode, void *opaque) | ||
44 | { | ||
45 | struct cramfs_inode *cramfs_inode = opaque; | ||
46 | return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1; | ||
47 | } | ||
48 | |||
49 | static int cramfs_iget5_set(struct inode *inode, void *opaque) | ||
50 | { | 43 | { |
51 | struct cramfs_inode *cramfs_inode = opaque; | 44 | static struct timespec zerotime; |
52 | inode->i_ino = CRAMINO(cramfs_inode); | 45 | inode->i_mode = cramfs_inode->mode; |
53 | return 0; | 46 | inode->i_uid = cramfs_inode->uid; |
47 | inode->i_size = cramfs_inode->size; | ||
48 | inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; | ||
49 | inode->i_gid = cramfs_inode->gid; | ||
50 | /* Struct copy intentional */ | ||
51 | inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; | ||
52 | /* inode->i_nlink is left 1 - arguably wrong for directories, | ||
53 | but it's the best we can do without reading the directory | ||
54 | contents. 1 yields the right result in GNU find, even | ||
55 | without -noleaf option. */ | ||
56 | if (S_ISREG(inode->i_mode)) { | ||
57 | inode->i_fop = &generic_ro_fops; | ||
58 | inode->i_data.a_ops = &cramfs_aops; | ||
59 | } else if (S_ISDIR(inode->i_mode)) { | ||
60 | inode->i_op = &cramfs_dir_inode_operations; | ||
61 | inode->i_fop = &cramfs_directory_operations; | ||
62 | } else if (S_ISLNK(inode->i_mode)) { | ||
63 | inode->i_op = &page_symlink_inode_operations; | ||
64 | inode->i_data.a_ops = &cramfs_aops; | ||
65 | } else { | ||
66 | init_special_inode(inode, inode->i_mode, | ||
67 | old_decode_dev(cramfs_inode->size)); | ||
68 | } | ||
54 | } | 69 | } |
55 | 70 | ||
56 | static struct inode *get_cramfs_inode(struct super_block *sb, | 71 | static struct inode *get_cramfs_inode(struct super_block *sb, |
57 | struct cramfs_inode * cramfs_inode) | 72 | struct cramfs_inode * cramfs_inode) |
58 | { | 73 | { |
59 | struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), | 74 | struct inode *inode; |
60 | cramfs_iget5_test, cramfs_iget5_set, | 75 | if (CRAMINO(cramfs_inode) == 1) { |
61 | cramfs_inode); | 76 | inode = new_inode(sb); |
62 | static struct timespec zerotime; | 77 | if (inode) { |
63 | 78 | inode->i_ino = 1; | |
64 | if (inode && (inode->i_state & I_NEW)) { | 79 | setup_inode(inode, cramfs_inode); |
65 | inode->i_mode = cramfs_inode->mode; | 80 | } |
66 | inode->i_uid = cramfs_inode->uid; | 81 | } else { |
67 | inode->i_size = cramfs_inode->size; | 82 | inode = iget_locked(sb, CRAMINO(cramfs_inode)); |
68 | inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; | 83 | if (inode) { |
69 | inode->i_gid = cramfs_inode->gid; | 84 | setup_inode(inode, cramfs_inode); |
70 | /* Struct copy intentional */ | 85 | unlock_new_inode(inode); |
71 | inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; | ||
72 | /* inode->i_nlink is left 1 - arguably wrong for directories, | ||
73 | but it's the best we can do without reading the directory | ||
74 | contents. 1 yields the right result in GNU find, even | ||
75 | without -noleaf option. */ | ||
76 | if (S_ISREG(inode->i_mode)) { | ||
77 | inode->i_fop = &generic_ro_fops; | ||
78 | inode->i_data.a_ops = &cramfs_aops; | ||
79 | } else if (S_ISDIR(inode->i_mode)) { | ||
80 | inode->i_op = &cramfs_dir_inode_operations; | ||
81 | inode->i_fop = &cramfs_directory_operations; | ||
82 | } else if (S_ISLNK(inode->i_mode)) { | ||
83 | inode->i_op = &page_symlink_inode_operations; | ||
84 | inode->i_data.a_ops = &cramfs_aops; | ||
85 | } else { | ||
86 | init_special_inode(inode, inode->i_mode, | ||
87 | old_decode_dev(cramfs_inode->size)); | ||
88 | } | 86 | } |
89 | unlock_new_inode(inode); | ||
90 | } | 87 | } |
91 | return inode; | 88 | return inode; |
92 | } | 89 | } |
93 | 90 | ||
94 | static void cramfs_drop_inode(struct inode *inode) | ||
95 | { | ||
96 | if (inode->i_ino == 1) | ||
97 | generic_delete_inode(inode); | ||
98 | else | ||
99 | generic_drop_inode(inode); | ||
100 | } | ||
101 | |||
102 | /* | 91 | /* |
103 | * We have our own block cache: don't fill up the buffer cache | 92 | * We have our own block cache: don't fill up the buffer cache |
104 | * with the rom-image, because the way the filesystem is set | 93 | * with the rom-image, because the way the filesystem is set |
@@ -542,7 +531,6 @@ static const struct super_operations cramfs_ops = { | |||
542 | .put_super = cramfs_put_super, | 531 | .put_super = cramfs_put_super, |
543 | .remount_fs = cramfs_remount, | 532 | .remount_fs = cramfs_remount, |
544 | .statfs = cramfs_statfs, | 533 | .statfs = cramfs_statfs, |
545 | .drop_inode = cramfs_drop_inode, | ||
546 | }; | 534 | }; |
547 | 535 | ||
548 | static int cramfs_get_sb(struct file_system_type *fs_type, | 536 | static int cramfs_get_sb(struct file_system_type *fs_type, |
diff --git a/fs/dcache.c b/fs/dcache.c index 86d4db15473e..9f2c13417969 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -536,7 +536,7 @@ restart: | |||
536 | */ | 536 | */ |
537 | static void prune_dcache(int count) | 537 | static void prune_dcache(int count) |
538 | { | 538 | { |
539 | struct super_block *sb, *n; | 539 | struct super_block *sb, *p = NULL; |
540 | int w_count; | 540 | int w_count; |
541 | int unused = dentry_stat.nr_unused; | 541 | int unused = dentry_stat.nr_unused; |
542 | int prune_ratio; | 542 | int prune_ratio; |
@@ -550,7 +550,7 @@ static void prune_dcache(int count) | |||
550 | else | 550 | else |
551 | prune_ratio = unused / count; | 551 | prune_ratio = unused / count; |
552 | spin_lock(&sb_lock); | 552 | spin_lock(&sb_lock); |
553 | list_for_each_entry_safe(sb, n, &super_blocks, s_list) { | 553 | list_for_each_entry(sb, &super_blocks, s_list) { |
554 | if (list_empty(&sb->s_instances)) | 554 | if (list_empty(&sb->s_instances)) |
555 | continue; | 555 | continue; |
556 | if (sb->s_nr_dentry_unused == 0) | 556 | if (sb->s_nr_dentry_unused == 0) |
@@ -590,14 +590,16 @@ static void prune_dcache(int count) | |||
590 | up_read(&sb->s_umount); | 590 | up_read(&sb->s_umount); |
591 | } | 591 | } |
592 | spin_lock(&sb_lock); | 592 | spin_lock(&sb_lock); |
593 | /* lock was dropped, must reset next */ | 593 | if (p) |
594 | list_safe_reset_next(sb, n, s_list); | 594 | __put_super(p); |
595 | count -= pruned; | 595 | count -= pruned; |
596 | __put_super(sb); | 596 | p = sb; |
597 | /* more work left to do? */ | 597 | /* more work left to do? */ |
598 | if (count <= 0) | 598 | if (count <= 0) |
599 | break; | 599 | break; |
600 | } | 600 | } |
601 | if (p) | ||
602 | __put_super(p); | ||
601 | spin_unlock(&sb_lock); | 603 | spin_unlock(&sb_lock); |
602 | spin_unlock(&dcache_lock); | 604 | spin_unlock(&dcache_lock); |
603 | } | 605 | } |
@@ -2049,16 +2051,12 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, | |||
2049 | /* | 2051 | /* |
2050 | * Write full pathname from the root of the filesystem into the buffer. | 2052 | * Write full pathname from the root of the filesystem into the buffer. |
2051 | */ | 2053 | */ |
2052 | char *dentry_path(struct dentry *dentry, char *buf, int buflen) | 2054 | char *__dentry_path(struct dentry *dentry, char *buf, int buflen) |
2053 | { | 2055 | { |
2054 | char *end = buf + buflen; | 2056 | char *end = buf + buflen; |
2055 | char *retval; | 2057 | char *retval; |
2056 | 2058 | ||
2057 | spin_lock(&dcache_lock); | ||
2058 | prepend(&end, &buflen, "\0", 1); | 2059 | prepend(&end, &buflen, "\0", 1); |
2059 | if (d_unlinked(dentry) && | ||
2060 | (prepend(&end, &buflen, "//deleted", 9) != 0)) | ||
2061 | goto Elong; | ||
2062 | if (buflen < 1) | 2060 | if (buflen < 1) |
2063 | goto Elong; | 2061 | goto Elong; |
2064 | /* Get '/' right */ | 2062 | /* Get '/' right */ |
@@ -2076,7 +2074,28 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen) | |||
2076 | retval = end; | 2074 | retval = end; |
2077 | dentry = parent; | 2075 | dentry = parent; |
2078 | } | 2076 | } |
2077 | return retval; | ||
2078 | Elong: | ||
2079 | return ERR_PTR(-ENAMETOOLONG); | ||
2080 | } | ||
2081 | EXPORT_SYMBOL(__dentry_path); | ||
2082 | |||
2083 | char *dentry_path(struct dentry *dentry, char *buf, int buflen) | ||
2084 | { | ||
2085 | char *p = NULL; | ||
2086 | char *retval; | ||
2087 | |||
2088 | spin_lock(&dcache_lock); | ||
2089 | if (d_unlinked(dentry)) { | ||
2090 | p = buf + buflen; | ||
2091 | if (prepend(&p, &buflen, "//deleted", 10) != 0) | ||
2092 | goto Elong; | ||
2093 | buflen++; | ||
2094 | } | ||
2095 | retval = __dentry_path(dentry, buf, buflen); | ||
2079 | spin_unlock(&dcache_lock); | 2096 | spin_unlock(&dcache_lock); |
2097 | if (!IS_ERR(retval) && p) | ||
2098 | *p = '/'; /* restore '/' overriden with '\0' */ | ||
2080 | return retval; | 2099 | return retval; |
2081 | Elong: | 2100 | Elong: |
2082 | spin_unlock(&dcache_lock); | 2101 | spin_unlock(&dcache_lock); |
diff --git a/fs/direct-io.c b/fs/direct-io.c index a10cb91cadea..51f270b479b6 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -1136,8 +1136,27 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1136 | return ret; | 1136 | return ret; |
1137 | } | 1137 | } |
1138 | 1138 | ||
1139 | /* | ||
1140 | * This is a library function for use by filesystem drivers. | ||
1141 | * | ||
1142 | * The locking rules are governed by the flags parameter: | ||
1143 | * - if the flags value contains DIO_LOCKING we use a fancy locking | ||
1144 | * scheme for dumb filesystems. | ||
1145 | * For writes this function is called under i_mutex and returns with | ||
1146 | * i_mutex held, for reads, i_mutex is not held on entry, but it is | ||
1147 | * taken and dropped again before returning. | ||
1148 | * For reads and writes i_alloc_sem is taken in shared mode and released | ||
1149 | * on I/O completion (which may happen asynchronously after returning to | ||
1150 | * the caller). | ||
1151 | * | ||
1152 | * - if the flags value does NOT contain DIO_LOCKING we don't use any | ||
1153 | * internal locking but rather rely on the filesystem to synchronize | ||
1154 | * direct I/O reads/writes versus each other and truncate. | ||
1155 | * For reads and writes both i_mutex and i_alloc_sem are not held on | ||
1156 | * entry and are never taken. | ||
1157 | */ | ||
1139 | ssize_t | 1158 | ssize_t |
1140 | __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode, | 1159 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
1141 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1160 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
1142 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 1161 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
1143 | dio_submit_t submit_io, int flags) | 1162 | dio_submit_t submit_io, int flags) |
@@ -1233,57 +1252,4 @@ __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode, | |||
1233 | out: | 1252 | out: |
1234 | return retval; | 1253 | return retval; |
1235 | } | 1254 | } |
1236 | EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc); | ||
1237 | |||
1238 | /* | ||
1239 | * This is a library function for use by filesystem drivers. | ||
1240 | * | ||
1241 | * The locking rules are governed by the flags parameter: | ||
1242 | * - if the flags value contains DIO_LOCKING we use a fancy locking | ||
1243 | * scheme for dumb filesystems. | ||
1244 | * For writes this function is called under i_mutex and returns with | ||
1245 | * i_mutex held, for reads, i_mutex is not held on entry, but it is | ||
1246 | * taken and dropped again before returning. | ||
1247 | * For reads and writes i_alloc_sem is taken in shared mode and released | ||
1248 | * on I/O completion (which may happen asynchronously after returning to | ||
1249 | * the caller). | ||
1250 | * | ||
1251 | * - if the flags value does NOT contain DIO_LOCKING we don't use any | ||
1252 | * internal locking but rather rely on the filesystem to synchronize | ||
1253 | * direct I/O reads/writes versus each other and truncate. | ||
1254 | * For reads and writes both i_mutex and i_alloc_sem are not held on | ||
1255 | * entry and are never taken. | ||
1256 | */ | ||
1257 | ssize_t | ||
1258 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | ||
1259 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | ||
1260 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | ||
1261 | dio_submit_t submit_io, int flags) | ||
1262 | { | ||
1263 | ssize_t retval; | ||
1264 | |||
1265 | retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov, | ||
1266 | offset, nr_segs, get_block, end_io, submit_io, flags); | ||
1267 | /* | ||
1268 | * In case of error extending write may have instantiated a few | ||
1269 | * blocks outside i_size. Trim these off again for DIO_LOCKING. | ||
1270 | * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in | ||
1271 | * their own manner. This is a further example of where the old | ||
1272 | * truncate sequence is inadequate. | ||
1273 | * | ||
1274 | * NOTE: filesystems with their own locking have to handle this | ||
1275 | * on their own. | ||
1276 | */ | ||
1277 | if (flags & DIO_LOCKING) { | ||
1278 | if (unlikely((rw & WRITE) && retval < 0)) { | ||
1279 | loff_t isize = i_size_read(inode); | ||
1280 | loff_t end = offset + iov_length(iov, nr_segs); | ||
1281 | |||
1282 | if (end > isize) | ||
1283 | vmtruncate(inode, isize); | ||
1284 | } | ||
1285 | } | ||
1286 | |||
1287 | return retval; | ||
1288 | } | ||
1289 | EXPORT_SYMBOL(__blockdev_direct_IO); | 1255 | EXPORT_SYMBOL(__blockdev_direct_IO); |
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index c0d35c620526..37a34c2c622a 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
@@ -248,7 +248,7 @@ static struct connection *assoc2con(int assoc_id) | |||
248 | 248 | ||
249 | for (i = 0 ; i < CONN_HASH_SIZE; i++) { | 249 | for (i = 0 ; i < CONN_HASH_SIZE; i++) { |
250 | hlist_for_each_entry(con, h, &connection_hash[i], list) { | 250 | hlist_for_each_entry(con, h, &connection_hash[i], list) { |
251 | if (con && con->sctp_assoc == assoc_id) { | 251 | if (con->sctp_assoc == assoc_id) { |
252 | mutex_unlock(&connections_lock); | 252 | mutex_unlock(&connections_lock); |
253 | return con; | 253 | return con; |
254 | } | 254 | } |
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 2c6ad518100d..ef17e0169da1 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c | |||
@@ -81,24 +81,11 @@ static struct genl_ops dlm_nl_ops = { | |||
81 | 81 | ||
82 | int __init dlm_netlink_init(void) | 82 | int __init dlm_netlink_init(void) |
83 | { | 83 | { |
84 | int rv; | 84 | return genl_register_family_with_ops(&family, &dlm_nl_ops, 1); |
85 | |||
86 | rv = genl_register_family(&family); | ||
87 | if (rv) | ||
88 | return rv; | ||
89 | |||
90 | rv = genl_register_ops(&family, &dlm_nl_ops); | ||
91 | if (rv < 0) | ||
92 | goto err; | ||
93 | return 0; | ||
94 | err: | ||
95 | genl_unregister_family(&family); | ||
96 | return rv; | ||
97 | } | 85 | } |
98 | 86 | ||
99 | void dlm_netlink_exit(void) | 87 | void dlm_netlink_exit(void) |
100 | { | 88 | { |
101 | genl_unregister_ops(&family, &dlm_nl_ops); | ||
102 | genl_unregister_family(&family); | 89 | genl_unregister_family(&family); |
103 | } | 90 | } |
104 | 91 | ||
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 83c4f600786a..2195c213ab2f 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -18,7 +18,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) | |||
18 | 18 | ||
19 | spin_lock(&inode_lock); | 19 | spin_lock(&inode_lock); |
20 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 20 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
21 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 21 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) |
22 | continue; | 22 | continue; |
23 | if (inode->i_mapping->nrpages == 0) | 23 | if (inode->i_mapping->nrpages == 0) |
24 | continue; | 24 | continue; |
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 1cc087635a5e..a2e3b562e65d 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
@@ -762,7 +762,7 @@ ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, | |||
762 | 762 | ||
763 | /** | 763 | /** |
764 | * ecryptfs_init_crypt_ctx | 764 | * ecryptfs_init_crypt_ctx |
765 | * @crypt_stat: Uninitilized crypt stats structure | 765 | * @crypt_stat: Uninitialized crypt stats structure |
766 | * | 766 | * |
767 | * Initialize the crypto context. | 767 | * Initialize the crypto context. |
768 | * | 768 | * |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index e8fcf4e2ed7d..622c95140802 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
@@ -199,7 +199,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file) | |||
199 | "the persistent file for the dentry with name " | 199 | "the persistent file for the dentry with name " |
200 | "[%s]; rc = [%d]\n", __func__, | 200 | "[%s]; rc = [%d]\n", __func__, |
201 | ecryptfs_dentry->d_name.name, rc); | 201 | ecryptfs_dentry->d_name.name, rc); |
202 | goto out; | 202 | goto out_free; |
203 | } | 203 | } |
204 | } | 204 | } |
205 | if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY) | 205 | if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY) |
@@ -207,7 +207,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file) | |||
207 | rc = -EPERM; | 207 | rc = -EPERM; |
208 | printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " | 208 | printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " |
209 | "file must hence be opened RO\n", __func__); | 209 | "file must hence be opened RO\n", __func__); |
210 | goto out; | 210 | goto out_free; |
211 | } | 211 | } |
212 | ecryptfs_set_file_lower( | 212 | ecryptfs_set_file_lower( |
213 | file, ecryptfs_inode_to_private(inode)->lower_file); | 213 | file, ecryptfs_inode_to_private(inode)->lower_file); |
@@ -292,12 +292,40 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) | |||
292 | return rc; | 292 | return rc; |
293 | } | 293 | } |
294 | 294 | ||
295 | static int ecryptfs_ioctl(struct inode *inode, struct file *file, | 295 | static long |
296 | unsigned int cmd, unsigned long arg); | 296 | ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
297 | { | ||
298 | struct file *lower_file = NULL; | ||
299 | long rc = -ENOTTY; | ||
300 | |||
301 | if (ecryptfs_file_to_private(file)) | ||
302 | lower_file = ecryptfs_file_to_lower(file); | ||
303 | if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl) | ||
304 | rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); | ||
305 | return rc; | ||
306 | } | ||
307 | |||
308 | #ifdef CONFIG_COMPAT | ||
309 | static long | ||
310 | ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
311 | { | ||
312 | struct file *lower_file = NULL; | ||
313 | long rc = -ENOIOCTLCMD; | ||
314 | |||
315 | if (ecryptfs_file_to_private(file)) | ||
316 | lower_file = ecryptfs_file_to_lower(file); | ||
317 | if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl) | ||
318 | rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); | ||
319 | return rc; | ||
320 | } | ||
321 | #endif | ||
297 | 322 | ||
298 | const struct file_operations ecryptfs_dir_fops = { | 323 | const struct file_operations ecryptfs_dir_fops = { |
299 | .readdir = ecryptfs_readdir, | 324 | .readdir = ecryptfs_readdir, |
300 | .ioctl = ecryptfs_ioctl, | 325 | .unlocked_ioctl = ecryptfs_unlocked_ioctl, |
326 | #ifdef CONFIG_COMPAT | ||
327 | .compat_ioctl = ecryptfs_compat_ioctl, | ||
328 | #endif | ||
301 | .open = ecryptfs_open, | 329 | .open = ecryptfs_open, |
302 | .flush = ecryptfs_flush, | 330 | .flush = ecryptfs_flush, |
303 | .release = ecryptfs_release, | 331 | .release = ecryptfs_release, |
@@ -313,7 +341,10 @@ const struct file_operations ecryptfs_main_fops = { | |||
313 | .write = do_sync_write, | 341 | .write = do_sync_write, |
314 | .aio_write = generic_file_aio_write, | 342 | .aio_write = generic_file_aio_write, |
315 | .readdir = ecryptfs_readdir, | 343 | .readdir = ecryptfs_readdir, |
316 | .ioctl = ecryptfs_ioctl, | 344 | .unlocked_ioctl = ecryptfs_unlocked_ioctl, |
345 | #ifdef CONFIG_COMPAT | ||
346 | .compat_ioctl = ecryptfs_compat_ioctl, | ||
347 | #endif | ||
317 | .mmap = generic_file_mmap, | 348 | .mmap = generic_file_mmap, |
318 | .open = ecryptfs_open, | 349 | .open = ecryptfs_open, |
319 | .flush = ecryptfs_flush, | 350 | .flush = ecryptfs_flush, |
@@ -322,20 +353,3 @@ const struct file_operations ecryptfs_main_fops = { | |||
322 | .fasync = ecryptfs_fasync, | 353 | .fasync = ecryptfs_fasync, |
323 | .splice_read = generic_file_splice_read, | 354 | .splice_read = generic_file_splice_read, |
324 | }; | 355 | }; |
325 | |||
326 | static int | ||
327 | ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd, | ||
328 | unsigned long arg) | ||
329 | { | ||
330 | int rc = 0; | ||
331 | struct file *lower_file = NULL; | ||
332 | |||
333 | if (ecryptfs_file_to_private(file)) | ||
334 | lower_file = ecryptfs_file_to_lower(file); | ||
335 | if (lower_file && lower_file->f_op && lower_file->f_op->ioctl) | ||
336 | rc = lower_file->f_op->ioctl(ecryptfs_inode_to_lower(inode), | ||
337 | lower_file, cmd, arg); | ||
338 | else | ||
339 | rc = -ENOTTY; | ||
340 | return rc; | ||
341 | } | ||
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 31ef5252f0fe..6c55113e7222 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -264,7 +264,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, | |||
264 | printk(KERN_ERR "%s: Out of memory whilst attempting " | 264 | printk(KERN_ERR "%s: Out of memory whilst attempting " |
265 | "to allocate ecryptfs_dentry_info struct\n", | 265 | "to allocate ecryptfs_dentry_info struct\n", |
266 | __func__); | 266 | __func__); |
267 | goto out_dput; | 267 | goto out_put; |
268 | } | 268 | } |
269 | ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry); | 269 | ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry); |
270 | ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt); | 270 | ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt); |
@@ -339,14 +339,85 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, | |||
339 | out_free_kmem: | 339 | out_free_kmem: |
340 | kmem_cache_free(ecryptfs_header_cache_2, page_virt); | 340 | kmem_cache_free(ecryptfs_header_cache_2, page_virt); |
341 | goto out; | 341 | goto out; |
342 | out_dput: | 342 | out_put: |
343 | dput(lower_dentry); | 343 | dput(lower_dentry); |
344 | mntput(lower_mnt); | ||
344 | d_drop(ecryptfs_dentry); | 345 | d_drop(ecryptfs_dentry); |
345 | out: | 346 | out: |
346 | return rc; | 347 | return rc; |
347 | } | 348 | } |
348 | 349 | ||
349 | /** | 350 | /** |
351 | * ecryptfs_new_lower_dentry | ||
352 | * @ename: The name of the new dentry. | ||
353 | * @lower_dir_dentry: Parent directory of the new dentry. | ||
354 | * @nd: nameidata from last lookup. | ||
355 | * | ||
356 | * Create a new dentry or get it from lower parent dir. | ||
357 | */ | ||
358 | static struct dentry * | ||
359 | ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry, | ||
360 | struct nameidata *nd) | ||
361 | { | ||
362 | struct dentry *new_dentry; | ||
363 | struct dentry *tmp; | ||
364 | struct inode *lower_dir_inode; | ||
365 | |||
366 | lower_dir_inode = lower_dir_dentry->d_inode; | ||
367 | |||
368 | tmp = d_alloc(lower_dir_dentry, name); | ||
369 | if (!tmp) | ||
370 | return ERR_PTR(-ENOMEM); | ||
371 | |||
372 | mutex_lock(&lower_dir_inode->i_mutex); | ||
373 | new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd); | ||
374 | mutex_unlock(&lower_dir_inode->i_mutex); | ||
375 | |||
376 | if (!new_dentry) | ||
377 | new_dentry = tmp; | ||
378 | else | ||
379 | dput(tmp); | ||
380 | |||
381 | return new_dentry; | ||
382 | } | ||
383 | |||
384 | |||
385 | /** | ||
386 | * ecryptfs_lookup_one_lower | ||
387 | * @ecryptfs_dentry: The eCryptfs dentry that we are looking up | ||
388 | * @lower_dir_dentry: lower parent directory | ||
389 | * | ||
390 | * Get the lower dentry from vfs. If lower dentry does not exist yet, | ||
391 | * create it. | ||
392 | */ | ||
393 | static struct dentry * | ||
394 | ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry, | ||
395 | struct dentry *lower_dir_dentry) | ||
396 | { | ||
397 | struct nameidata nd; | ||
398 | struct vfsmount *lower_mnt; | ||
399 | struct qstr *name; | ||
400 | int err; | ||
401 | |||
402 | name = &ecryptfs_dentry->d_name; | ||
403 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( | ||
404 | ecryptfs_dentry->d_parent)); | ||
405 | err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd); | ||
406 | mntput(lower_mnt); | ||
407 | |||
408 | if (!err) { | ||
409 | /* we dont need the mount */ | ||
410 | mntput(nd.path.mnt); | ||
411 | return nd.path.dentry; | ||
412 | } | ||
413 | if (err != -ENOENT) | ||
414 | return ERR_PTR(err); | ||
415 | |||
416 | /* create a new lower dentry */ | ||
417 | return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd); | ||
418 | } | ||
419 | |||
420 | /** | ||
350 | * ecryptfs_lookup | 421 | * ecryptfs_lookup |
351 | * @ecryptfs_dir_inode: The eCryptfs directory inode | 422 | * @ecryptfs_dir_inode: The eCryptfs directory inode |
352 | * @ecryptfs_dentry: The eCryptfs dentry that we are looking up | 423 | * @ecryptfs_dentry: The eCryptfs dentry that we are looking up |
@@ -373,14 +444,12 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
373 | goto out_d_drop; | 444 | goto out_d_drop; |
374 | } | 445 | } |
375 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); | 446 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); |
376 | mutex_lock(&lower_dir_dentry->d_inode->i_mutex); | 447 | |
377 | lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, | 448 | lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, |
378 | lower_dir_dentry, | 449 | lower_dir_dentry); |
379 | ecryptfs_dentry->d_name.len); | ||
380 | mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); | ||
381 | if (IS_ERR(lower_dentry)) { | 450 | if (IS_ERR(lower_dentry)) { |
382 | rc = PTR_ERR(lower_dentry); | 451 | rc = PTR_ERR(lower_dentry); |
383 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " | 452 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " |
384 | "[%d] on lower_dentry = [%s]\n", __func__, rc, | 453 | "[%d] on lower_dentry = [%s]\n", __func__, rc, |
385 | encrypted_and_encoded_name); | 454 | encrypted_and_encoded_name); |
386 | goto out_d_drop; | 455 | goto out_d_drop; |
@@ -402,14 +471,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
402 | "filename; rc = [%d]\n", __func__, rc); | 471 | "filename; rc = [%d]\n", __func__, rc); |
403 | goto out_d_drop; | 472 | goto out_d_drop; |
404 | } | 473 | } |
405 | mutex_lock(&lower_dir_dentry->d_inode->i_mutex); | 474 | lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry, |
406 | lower_dentry = lookup_one_len(encrypted_and_encoded_name, | 475 | lower_dir_dentry); |
407 | lower_dir_dentry, | ||
408 | encrypted_and_encoded_name_size - 1); | ||
409 | mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); | ||
410 | if (IS_ERR(lower_dentry)) { | 476 | if (IS_ERR(lower_dentry)) { |
411 | rc = PTR_ERR(lower_dentry); | 477 | rc = PTR_ERR(lower_dentry); |
412 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " | 478 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned " |
413 | "[%d] on lower_dentry = [%s]\n", __func__, rc, | 479 | "[%d] on lower_dentry = [%s]\n", __func__, rc, |
414 | encrypted_and_encoded_name); | 480 | encrypted_and_encoded_name); |
415 | goto out_d_drop; | 481 | goto out_d_drop; |
@@ -804,10 +870,20 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
804 | size_t num_zeros = (PAGE_CACHE_SIZE | 870 | size_t num_zeros = (PAGE_CACHE_SIZE |
805 | - (ia->ia_size & ~PAGE_CACHE_MASK)); | 871 | - (ia->ia_size & ~PAGE_CACHE_MASK)); |
806 | 872 | ||
873 | |||
874 | /* | ||
875 | * XXX(truncate) this should really happen at the begginning | ||
876 | * of ->setattr. But the code is too messy to that as part | ||
877 | * of a larger patch. ecryptfs is also totally missing out | ||
878 | * on the inode_change_ok check at the beginning of | ||
879 | * ->setattr while would include this. | ||
880 | */ | ||
881 | rc = inode_newsize_ok(inode, ia->ia_size); | ||
882 | if (rc) | ||
883 | goto out; | ||
884 | |||
807 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | 885 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { |
808 | rc = simple_setsize(inode, ia->ia_size); | 886 | truncate_setsize(inode, ia->ia_size); |
809 | if (rc) | ||
810 | goto out; | ||
811 | lower_ia->ia_size = ia->ia_size; | 887 | lower_ia->ia_size = ia->ia_size; |
812 | lower_ia->ia_valid |= ATTR_SIZE; | 888 | lower_ia->ia_valid |= ATTR_SIZE; |
813 | goto out; | 889 | goto out; |
@@ -830,7 +906,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
830 | goto out; | 906 | goto out; |
831 | } | 907 | } |
832 | } | 908 | } |
833 | simple_setsize(inode, ia->ia_size); | 909 | truncate_setsize(inode, ia->ia_size); |
834 | rc = ecryptfs_write_inode_size_to_metadata(inode); | 910 | rc = ecryptfs_write_inode_size_to_metadata(inode); |
835 | if (rc) { | 911 | if (rc) { |
836 | printk(KERN_ERR "Problem with " | 912 | printk(KERN_ERR "Problem with " |
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 46c4dd8dfcc3..bcb68c0cb1f0 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
@@ -274,7 +274,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, | |||
274 | struct user_namespace *user_ns, struct pid *pid, | 274 | struct user_namespace *user_ns, struct pid *pid, |
275 | u32 seq) | 275 | u32 seq) |
276 | { | 276 | { |
277 | struct ecryptfs_daemon *daemon; | 277 | struct ecryptfs_daemon *uninitialized_var(daemon); |
278 | struct ecryptfs_msg_ctx *msg_ctx; | 278 | struct ecryptfs_msg_ctx *msg_ctx; |
279 | size_t msg_size; | 279 | size_t msg_size; |
280 | struct nsproxy *nsproxy; | 280 | struct nsproxy *nsproxy; |
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 0435886e4a9f..f7fc286a3aa9 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c | |||
@@ -118,11 +118,15 @@ void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode) | |||
118 | */ | 118 | */ |
119 | static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 119 | static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
120 | { | 120 | { |
121 | return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf); | 121 | struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); |
122 | |||
123 | if (!lower_dentry->d_sb->s_op->statfs) | ||
124 | return -ENOSYS; | ||
125 | return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf); | ||
122 | } | 126 | } |
123 | 127 | ||
124 | /** | 128 | /** |
125 | * ecryptfs_clear_inode | 129 | * ecryptfs_evict_inode |
126 | * @inode - The ecryptfs inode | 130 | * @inode - The ecryptfs inode |
127 | * | 131 | * |
128 | * Called by iput() when the inode reference count reached zero | 132 | * Called by iput() when the inode reference count reached zero |
@@ -131,8 +135,10 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
131 | * on the inode free list. We use this to drop out reference to the | 135 | * on the inode free list. We use this to drop out reference to the |
132 | * lower inode. | 136 | * lower inode. |
133 | */ | 137 | */ |
134 | static void ecryptfs_clear_inode(struct inode *inode) | 138 | static void ecryptfs_evict_inode(struct inode *inode) |
135 | { | 139 | { |
140 | truncate_inode_pages(&inode->i_data, 0); | ||
141 | end_writeback(inode); | ||
136 | iput(ecryptfs_inode_to_lower(inode)); | 142 | iput(ecryptfs_inode_to_lower(inode)); |
137 | } | 143 | } |
138 | 144 | ||
@@ -184,6 +190,6 @@ const struct super_operations ecryptfs_sops = { | |||
184 | .drop_inode = generic_delete_inode, | 190 | .drop_inode = generic_delete_inode, |
185 | .statfs = ecryptfs_statfs, | 191 | .statfs = ecryptfs_statfs, |
186 | .remount_fs = NULL, | 192 | .remount_fs = NULL, |
187 | .clear_inode = ecryptfs_clear_inode, | 193 | .evict_inode = ecryptfs_evict_inode, |
188 | .show_options = ecryptfs_show_options | 194 | .show_options = ecryptfs_show_options |
189 | }; | 195 | }; |
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/stat.h> | 29 | #include <linux/stat.h> |
30 | #include <linux/fcntl.h> | 30 | #include <linux/fcntl.h> |
31 | #include <linux/smp_lock.h> | ||
32 | #include <linux/swap.h> | 31 | #include <linux/swap.h> |
33 | #include <linux/string.h> | 32 | #include <linux/string.h> |
34 | #include <linux/init.h> | 33 | #include <linux/init.h> |
@@ -129,7 +128,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) | |||
129 | if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) | 128 | if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) |
130 | goto exit; | 129 | goto exit; |
131 | 130 | ||
132 | fsnotify_open(file->f_path.dentry); | 131 | fsnotify_open(file); |
133 | 132 | ||
134 | error = -ENOEXEC; | 133 | error = -ENOEXEC; |
135 | if(file->f_op) { | 134 | if(file->f_op) { |
@@ -653,6 +652,7 @@ int setup_arg_pages(struct linux_binprm *bprm, | |||
653 | else | 652 | else |
654 | stack_base = vma->vm_start - stack_expand; | 653 | stack_base = vma->vm_start - stack_expand; |
655 | #endif | 654 | #endif |
655 | current->mm->start_stack = bprm->p; | ||
656 | ret = expand_stack(vma, stack_base); | 656 | ret = expand_stack(vma, stack_base); |
657 | if (ret) | 657 | if (ret) |
658 | ret = -EFAULT; | 658 | ret = -EFAULT; |
@@ -683,7 +683,7 @@ struct file *open_exec(const char *name) | |||
683 | if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) | 683 | if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) |
684 | goto exit; | 684 | goto exit; |
685 | 685 | ||
686 | fsnotify_open(file->f_path.dentry); | 686 | fsnotify_open(file); |
687 | 687 | ||
688 | err = deny_write_access(file); | 688 | err = deny_write_access(file); |
689 | if (err) | 689 | if (err) |
@@ -1891,13 +1891,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1891 | */ | 1891 | */ |
1892 | clear_thread_flag(TIF_SIGPENDING); | 1892 | clear_thread_flag(TIF_SIGPENDING); |
1893 | 1893 | ||
1894 | /* | ||
1895 | * lock_kernel() because format_corename() is controlled by sysctl, which | ||
1896 | * uses lock_kernel() | ||
1897 | */ | ||
1898 | lock_kernel(); | ||
1899 | ispipe = format_corename(corename, signr); | 1894 | ispipe = format_corename(corename, signr); |
1900 | unlock_kernel(); | ||
1901 | 1895 | ||
1902 | if (ispipe) { | 1896 | if (ispipe) { |
1903 | int dump_count; | 1897 | int dump_count; |
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 22721b2fd890..2dc925fa1010 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -256,7 +256,6 @@ static inline int exofs_oi_read(struct exofs_i_info *oi, | |||
256 | } | 256 | } |
257 | 257 | ||
258 | /* inode.c */ | 258 | /* inode.c */ |
259 | void exofs_truncate(struct inode *inode); | ||
260 | int exofs_setattr(struct dentry *, struct iattr *); | 259 | int exofs_setattr(struct dentry *, struct iattr *); |
261 | int exofs_write_begin(struct file *file, struct address_space *mapping, | 260 | int exofs_write_begin(struct file *file, struct address_space *mapping, |
262 | loff_t pos, unsigned len, unsigned flags, | 261 | loff_t pos, unsigned len, unsigned flags, |
@@ -264,7 +263,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, | |||
264 | extern struct inode *exofs_iget(struct super_block *, unsigned long); | 263 | extern struct inode *exofs_iget(struct super_block *, unsigned long); |
265 | struct inode *exofs_new_inode(struct inode *, int); | 264 | struct inode *exofs_new_inode(struct inode *, int); |
266 | extern int exofs_write_inode(struct inode *, struct writeback_control *wbc); | 265 | extern int exofs_write_inode(struct inode *, struct writeback_control *wbc); |
267 | extern void exofs_delete_inode(struct inode *); | 266 | extern void exofs_evict_inode(struct inode *); |
268 | 267 | ||
269 | /* dir.c: */ | 268 | /* dir.c: */ |
270 | int exofs_add_link(struct dentry *, struct inode *); | 269 | int exofs_add_link(struct dentry *, struct inode *); |
diff --git a/fs/exofs/file.c b/fs/exofs/file.c index aa1fd1a372cf..68cb23e3bb98 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c | |||
@@ -91,6 +91,5 @@ const struct file_operations exofs_file_operations = { | |||
91 | }; | 91 | }; |
92 | 92 | ||
93 | const struct inode_operations exofs_file_inode_operations = { | 93 | const struct inode_operations exofs_file_inode_operations = { |
94 | .truncate = exofs_truncate, | ||
95 | .setattr = exofs_setattr, | 94 | .setattr = exofs_setattr, |
96 | }; | 95 | }; |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index fbf9f34554e0..eb7368ebd8cd 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -694,6 +694,13 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc) | |||
694 | return write_exec(&pcol); | 694 | return write_exec(&pcol); |
695 | } | 695 | } |
696 | 696 | ||
697 | /* i_mutex held using inode->i_size directly */ | ||
698 | static void _write_failed(struct inode *inode, loff_t to) | ||
699 | { | ||
700 | if (to > inode->i_size) | ||
701 | truncate_pagecache(inode, to, inode->i_size); | ||
702 | } | ||
703 | |||
697 | int exofs_write_begin(struct file *file, struct address_space *mapping, | 704 | int exofs_write_begin(struct file *file, struct address_space *mapping, |
698 | loff_t pos, unsigned len, unsigned flags, | 705 | loff_t pos, unsigned len, unsigned flags, |
699 | struct page **pagep, void **fsdata) | 706 | struct page **pagep, void **fsdata) |
@@ -707,7 +714,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, | |||
707 | fsdata); | 714 | fsdata); |
708 | if (ret) { | 715 | if (ret) { |
709 | EXOFS_DBGMSG("simple_write_begin faild\n"); | 716 | EXOFS_DBGMSG("simple_write_begin faild\n"); |
710 | return ret; | 717 | goto out; |
711 | } | 718 | } |
712 | 719 | ||
713 | page = *pagep; | 720 | page = *pagep; |
@@ -722,6 +729,9 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, | |||
722 | EXOFS_DBGMSG("__readpage_filler faild\n"); | 729 | EXOFS_DBGMSG("__readpage_filler faild\n"); |
723 | } | 730 | } |
724 | } | 731 | } |
732 | out: | ||
733 | if (unlikely(ret)) | ||
734 | _write_failed(mapping->host, pos + len); | ||
725 | 735 | ||
726 | return ret; | 736 | return ret; |
727 | } | 737 | } |
@@ -747,6 +757,10 @@ static int exofs_write_end(struct file *file, struct address_space *mapping, | |||
747 | int ret; | 757 | int ret; |
748 | 758 | ||
749 | ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata); | 759 | ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata); |
760 | if (unlikely(ret)) | ||
761 | _write_failed(inode, pos + len); | ||
762 | |||
763 | /* TODO: once simple_write_end marks inode dirty remove */ | ||
750 | if (i_size != inode->i_size) | 764 | if (i_size != inode->i_size) |
751 | mark_inode_dirty(inode); | 765 | mark_inode_dirty(inode); |
752 | return ret; | 766 | return ret; |
@@ -803,87 +817,55 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode) | |||
803 | return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); | 817 | return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); |
804 | } | 818 | } |
805 | 819 | ||
806 | /* | ||
807 | * get_block_t - Fill in a buffer_head | ||
808 | * An OSD takes care of block allocation so we just fake an allocation by | ||
809 | * putting in the inode's sector_t in the buffer_head. | ||
810 | * TODO: What about the case of create==0 and @iblock does not exist in the | ||
811 | * object? | ||
812 | */ | ||
813 | static int exofs_get_block(struct inode *inode, sector_t iblock, | ||
814 | struct buffer_head *bh_result, int create) | ||
815 | { | ||
816 | map_bh(bh_result, inode->i_sb, iblock); | ||
817 | return 0; | ||
818 | } | ||
819 | |||
820 | const struct osd_attr g_attr_logical_length = ATTR_DEF( | 820 | const struct osd_attr g_attr_logical_length = ATTR_DEF( |
821 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | 821 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); |
822 | 822 | ||
823 | static int _do_truncate(struct inode *inode) | 823 | static int _do_truncate(struct inode *inode, loff_t newsize) |
824 | { | 824 | { |
825 | struct exofs_i_info *oi = exofs_i(inode); | 825 | struct exofs_i_info *oi = exofs_i(inode); |
826 | loff_t isize = i_size_read(inode); | ||
827 | int ret; | 826 | int ret; |
828 | 827 | ||
829 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 828 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
830 | 829 | ||
831 | nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | 830 | ret = exofs_oi_truncate(oi, (u64)newsize); |
831 | if (likely(!ret)) | ||
832 | truncate_setsize(inode, newsize); | ||
832 | 833 | ||
833 | ret = exofs_oi_truncate(oi, (u64)isize); | 834 | EXOFS_DBGMSG("(0x%lx) size=0x%llx ret=>%d\n", |
834 | EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize); | 835 | inode->i_ino, newsize, ret); |
835 | return ret; | 836 | return ret; |
836 | } | 837 | } |
837 | 838 | ||
838 | /* | 839 | /* |
839 | * Truncate a file to the specified size - all we have to do is set the size | 840 | * Set inode attributes - update size attribute on OSD if needed, |
840 | * attribute. We make sure the object exists first. | 841 | * otherwise just call generic functions. |
841 | */ | ||
842 | void exofs_truncate(struct inode *inode) | ||
843 | { | ||
844 | struct exofs_i_info *oi = exofs_i(inode); | ||
845 | int ret; | ||
846 | |||
847 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) | ||
848 | || S_ISLNK(inode->i_mode))) | ||
849 | return; | ||
850 | if (exofs_inode_is_fast_symlink(inode)) | ||
851 | return; | ||
852 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
853 | return; | ||
854 | |||
855 | /* if we are about to truncate an object, and it hasn't been | ||
856 | * created yet, wait | ||
857 | */ | ||
858 | if (unlikely(wait_obj_created(oi))) | ||
859 | goto fail; | ||
860 | |||
861 | ret = _do_truncate(inode); | ||
862 | if (ret) | ||
863 | goto fail; | ||
864 | |||
865 | out: | ||
866 | mark_inode_dirty(inode); | ||
867 | return; | ||
868 | fail: | ||
869 | make_bad_inode(inode); | ||
870 | goto out; | ||
871 | } | ||
872 | |||
873 | /* | ||
874 | * Set inode attributes - just call generic functions. | ||
875 | */ | 842 | */ |
876 | int exofs_setattr(struct dentry *dentry, struct iattr *iattr) | 843 | int exofs_setattr(struct dentry *dentry, struct iattr *iattr) |
877 | { | 844 | { |
878 | struct inode *inode = dentry->d_inode; | 845 | struct inode *inode = dentry->d_inode; |
879 | int error; | 846 | int error; |
880 | 847 | ||
848 | /* if we are about to modify an object, and it hasn't been | ||
849 | * created yet, wait | ||
850 | */ | ||
851 | error = wait_obj_created(exofs_i(inode)); | ||
852 | if (unlikely(error)) | ||
853 | return error; | ||
854 | |||
881 | error = inode_change_ok(inode, iattr); | 855 | error = inode_change_ok(inode, iattr); |
882 | if (error) | 856 | if (unlikely(error)) |
883 | return error; | 857 | return error; |
884 | 858 | ||
885 | error = inode_setattr(inode, iattr); | 859 | if ((iattr->ia_valid & ATTR_SIZE) && |
886 | return error; | 860 | iattr->ia_size != i_size_read(inode)) { |
861 | error = _do_truncate(inode, iattr->ia_size); | ||
862 | if (unlikely(error)) | ||
863 | return error; | ||
864 | } | ||
865 | |||
866 | setattr_copy(inode, iattr); | ||
867 | mark_inode_dirty(inode); | ||
868 | return 0; | ||
887 | } | 869 | } |
888 | 870 | ||
889 | static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF( | 871 | static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF( |
@@ -1320,7 +1302,7 @@ static void delete_done(struct exofs_io_state *ios, void *p) | |||
1320 | * from the OSD here. We make sure the object was created before we try and | 1302 | * from the OSD here. We make sure the object was created before we try and |
1321 | * delete it. | 1303 | * delete it. |
1322 | */ | 1304 | */ |
1323 | void exofs_delete_inode(struct inode *inode) | 1305 | void exofs_evict_inode(struct inode *inode) |
1324 | { | 1306 | { |
1325 | struct exofs_i_info *oi = exofs_i(inode); | 1307 | struct exofs_i_info *oi = exofs_i(inode); |
1326 | struct super_block *sb = inode->i_sb; | 1308 | struct super_block *sb = inode->i_sb; |
@@ -1330,30 +1312,27 @@ void exofs_delete_inode(struct inode *inode) | |||
1330 | 1312 | ||
1331 | truncate_inode_pages(&inode->i_data, 0); | 1313 | truncate_inode_pages(&inode->i_data, 0); |
1332 | 1314 | ||
1333 | if (is_bad_inode(inode)) | 1315 | /* TODO: should do better here */ |
1316 | if (inode->i_nlink || is_bad_inode(inode)) | ||
1334 | goto no_delete; | 1317 | goto no_delete; |
1335 | 1318 | ||
1336 | mark_inode_dirty(inode); | ||
1337 | exofs_update_inode(inode, inode_needs_sync(inode)); | ||
1338 | |||
1339 | inode->i_size = 0; | 1319 | inode->i_size = 0; |
1340 | if (inode->i_blocks) | 1320 | end_writeback(inode); |
1341 | exofs_truncate(inode); | ||
1342 | 1321 | ||
1343 | clear_inode(inode); | 1322 | /* if we are deleting an obj that hasn't been created yet, wait */ |
1323 | if (!obj_created(oi)) { | ||
1324 | BUG_ON(!obj_2bcreated(oi)); | ||
1325 | wait_event(oi->i_wq, obj_created(oi)); | ||
1326 | /* ignore the error attempt a remove anyway */ | ||
1327 | } | ||
1344 | 1328 | ||
1329 | /* Now Remove the OSD objects */ | ||
1345 | ret = exofs_get_io_state(&sbi->layout, &ios); | 1330 | ret = exofs_get_io_state(&sbi->layout, &ios); |
1346 | if (unlikely(ret)) { | 1331 | if (unlikely(ret)) { |
1347 | EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); | 1332 | EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); |
1348 | return; | 1333 | return; |
1349 | } | 1334 | } |
1350 | 1335 | ||
1351 | /* if we are deleting an obj that hasn't been created yet, wait */ | ||
1352 | if (!obj_created(oi)) { | ||
1353 | BUG_ON(!obj_2bcreated(oi)); | ||
1354 | wait_event(oi->i_wq, obj_created(oi)); | ||
1355 | } | ||
1356 | |||
1357 | ios->obj.id = exofs_oi_objno(oi); | 1336 | ios->obj.id = exofs_oi_objno(oi); |
1358 | ios->done = delete_done; | 1337 | ios->done = delete_done; |
1359 | ios->private = sbi; | 1338 | ios->private = sbi; |
@@ -1369,5 +1348,5 @@ void exofs_delete_inode(struct inode *inode) | |||
1369 | return; | 1348 | return; |
1370 | 1349 | ||
1371 | no_delete: | 1350 | no_delete: |
1372 | clear_inode(inode); | 1351 | end_writeback(inode); |
1373 | } | 1352 | } |
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 5a1960500c8a..6550bf70e41d 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c | |||
@@ -579,7 +579,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) | |||
579 | } else { | 579 | } else { |
580 | bio = master_dev->bio; | 580 | bio = master_dev->bio; |
581 | /* FIXME: bio_set_dir() */ | 581 | /* FIXME: bio_set_dir() */ |
582 | bio->bi_rw |= (1 << BIO_RW); | 582 | bio->bi_rw |= REQ_WRITE; |
583 | } | 583 | } |
584 | 584 | ||
585 | osd_req_write(or, &ios->obj, per_dev->offset, bio, | 585 | osd_req_write(or, &ios->obj, per_dev->offset, bio, |
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 50cb1745e29c..047e92fa3af8 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -742,7 +742,7 @@ static const struct super_operations exofs_sops = { | |||
742 | .alloc_inode = exofs_alloc_inode, | 742 | .alloc_inode = exofs_alloc_inode, |
743 | .destroy_inode = exofs_destroy_inode, | 743 | .destroy_inode = exofs_destroy_inode, |
744 | .write_inode = exofs_write_inode, | 744 | .write_inode = exofs_write_inode, |
745 | .delete_inode = exofs_delete_inode, | 745 | .evict_inode = exofs_evict_inode, |
746 | .put_super = exofs_put_super, | 746 | .put_super = exofs_put_super, |
747 | .write_super = exofs_write_super, | 747 | .write_super = exofs_write_super, |
748 | .sync_fs = exofs_sync_fs, | 748 | .sync_fs = exofs_sync_fs, |
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index e8766a396776..c6c684b44ea1 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c | |||
@@ -571,7 +571,7 @@ do_more: | |||
571 | error_return: | 571 | error_return: |
572 | brelse(bitmap_bh); | 572 | brelse(bitmap_bh); |
573 | release_blocks(sb, freed); | 573 | release_blocks(sb, freed); |
574 | dquot_free_block(inode, freed); | 574 | dquot_free_block_nodirty(inode, freed); |
575 | } | 575 | } |
576 | 576 | ||
577 | /** | 577 | /** |
@@ -1418,7 +1418,8 @@ allocated: | |||
1418 | 1418 | ||
1419 | *errp = 0; | 1419 | *errp = 0; |
1420 | brelse(bitmap_bh); | 1420 | brelse(bitmap_bh); |
1421 | dquot_free_block(inode, *count-num); | 1421 | dquot_free_block_nodirty(inode, *count-num); |
1422 | mark_inode_dirty(inode); | ||
1422 | *count = num; | 1423 | *count = num; |
1423 | return ret_block; | 1424 | return ret_block; |
1424 | 1425 | ||
@@ -1428,8 +1429,10 @@ out: | |||
1428 | /* | 1429 | /* |
1429 | * Undo the block allocation | 1430 | * Undo the block allocation |
1430 | */ | 1431 | */ |
1431 | if (!performed_allocation) | 1432 | if (!performed_allocation) { |
1432 | dquot_free_block(inode, *count); | 1433 | dquot_free_block_nodirty(inode, *count); |
1434 | mark_inode_dirty(inode); | ||
1435 | } | ||
1433 | brelse(bitmap_bh); | 1436 | brelse(bitmap_bh); |
1434 | return 0; | 1437 | return 0; |
1435 | } | 1438 | } |
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 7516957273ed..764109886ec0 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c | |||
@@ -448,6 +448,11 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child) | |||
448 | return res; | 448 | return res; |
449 | } | 449 | } |
450 | 450 | ||
451 | static int ext2_prepare_chunk(struct page *page, loff_t pos, unsigned len) | ||
452 | { | ||
453 | return __block_write_begin(page, pos, len, ext2_get_block); | ||
454 | } | ||
455 | |||
451 | /* Releases the page */ | 456 | /* Releases the page */ |
452 | void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, | 457 | void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, |
453 | struct page *page, struct inode *inode, int update_times) | 458 | struct page *page, struct inode *inode, int update_times) |
@@ -458,8 +463,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, | |||
458 | int err; | 463 | int err; |
459 | 464 | ||
460 | lock_page(page); | 465 | lock_page(page); |
461 | err = __ext2_write_begin(NULL, page->mapping, pos, len, | 466 | err = ext2_prepare_chunk(page, pos, len); |
462 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
463 | BUG_ON(err); | 467 | BUG_ON(err); |
464 | de->inode = cpu_to_le32(inode->i_ino); | 468 | de->inode = cpu_to_le32(inode->i_ino); |
465 | ext2_set_de_type(de, inode); | 469 | ext2_set_de_type(de, inode); |
@@ -542,8 +546,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) | |||
542 | got_it: | 546 | got_it: |
543 | pos = page_offset(page) + | 547 | pos = page_offset(page) + |
544 | (char*)de - (char*)page_address(page); | 548 | (char*)de - (char*)page_address(page); |
545 | err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0, | 549 | err = ext2_prepare_chunk(page, pos, rec_len); |
546 | &page, NULL); | ||
547 | if (err) | 550 | if (err) |
548 | goto out_unlock; | 551 | goto out_unlock; |
549 | if (de->inode) { | 552 | if (de->inode) { |
@@ -576,8 +579,7 @@ out_unlock: | |||
576 | */ | 579 | */ |
577 | int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) | 580 | int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) |
578 | { | 581 | { |
579 | struct address_space *mapping = page->mapping; | 582 | struct inode *inode = page->mapping->host; |
580 | struct inode *inode = mapping->host; | ||
581 | char *kaddr = page_address(page); | 583 | char *kaddr = page_address(page); |
582 | unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); | 584 | unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); |
583 | unsigned to = ((char *)dir - kaddr) + | 585 | unsigned to = ((char *)dir - kaddr) + |
@@ -601,8 +603,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) | |||
601 | from = (char*)pde - (char*)page_address(page); | 603 | from = (char*)pde - (char*)page_address(page); |
602 | pos = page_offset(page) + from; | 604 | pos = page_offset(page) + from; |
603 | lock_page(page); | 605 | lock_page(page); |
604 | err = __ext2_write_begin(NULL, page->mapping, pos, to - from, 0, | 606 | err = ext2_prepare_chunk(page, pos, to - from); |
605 | &page, NULL); | ||
606 | BUG_ON(err); | 607 | BUG_ON(err); |
607 | if (pde) | 608 | if (pde) |
608 | pde->rec_len = ext2_rec_len_to_disk(to - from); | 609 | pde->rec_len = ext2_rec_len_to_disk(to - from); |
@@ -621,8 +622,7 @@ out: | |||
621 | */ | 622 | */ |
622 | int ext2_make_empty(struct inode *inode, struct inode *parent) | 623 | int ext2_make_empty(struct inode *inode, struct inode *parent) |
623 | { | 624 | { |
624 | struct address_space *mapping = inode->i_mapping; | 625 | struct page *page = grab_cache_page(inode->i_mapping, 0); |
625 | struct page *page = grab_cache_page(mapping, 0); | ||
626 | unsigned chunk_size = ext2_chunk_size(inode); | 626 | unsigned chunk_size = ext2_chunk_size(inode); |
627 | struct ext2_dir_entry_2 * de; | 627 | struct ext2_dir_entry_2 * de; |
628 | int err; | 628 | int err; |
@@ -631,8 +631,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent) | |||
631 | if (!page) | 631 | if (!page) |
632 | return -ENOMEM; | 632 | return -ENOMEM; |
633 | 633 | ||
634 | err = __ext2_write_begin(NULL, page->mapping, 0, chunk_size, 0, | 634 | err = ext2_prepare_chunk(page, 0, chunk_size); |
635 | &page, NULL); | ||
636 | if (err) { | 635 | if (err) { |
637 | unlock_page(page); | 636 | unlock_page(page); |
638 | goto fail; | 637 | goto fail; |
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 52b34f1d2738..416daa62242c 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -119,7 +119,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned); | |||
119 | /* inode.c */ | 119 | /* inode.c */ |
120 | extern struct inode *ext2_iget (struct super_block *, unsigned long); | 120 | extern struct inode *ext2_iget (struct super_block *, unsigned long); |
121 | extern int ext2_write_inode (struct inode *, struct writeback_control *); | 121 | extern int ext2_write_inode (struct inode *, struct writeback_control *); |
122 | extern void ext2_delete_inode (struct inode *); | 122 | extern void ext2_evict_inode(struct inode *); |
123 | extern int ext2_sync_inode (struct inode *); | 123 | extern int ext2_sync_inode (struct inode *); |
124 | extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); | 124 | extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); |
125 | extern int ext2_setattr (struct dentry *, struct iattr *); | 125 | extern int ext2_setattr (struct dentry *, struct iattr *); |
@@ -127,9 +127,6 @@ extern void ext2_set_inode_flags(struct inode *inode); | |||
127 | extern void ext2_get_inode_flags(struct ext2_inode_info *); | 127 | extern void ext2_get_inode_flags(struct ext2_inode_info *); |
128 | extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 128 | extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
129 | u64 start, u64 len); | 129 | u64 start, u64 len); |
130 | int __ext2_write_begin(struct file *file, struct address_space *mapping, | ||
131 | loff_t pos, unsigned len, unsigned flags, | ||
132 | struct page **pagep, void **fsdata); | ||
133 | 130 | ||
134 | /* ioctl.c */ | 131 | /* ioctl.c */ |
135 | extern long ext2_ioctl(struct file *, unsigned int, unsigned long); | 132 | extern long ext2_ioctl(struct file *, unsigned int, unsigned long); |
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 938dbc739d00..ad70479aabff 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c | |||
@@ -118,19 +118,14 @@ void ext2_free_inode (struct inode * inode) | |||
118 | * Note: we must free any quota before locking the superblock, | 118 | * Note: we must free any quota before locking the superblock, |
119 | * as writing the quota to disk may need the lock as well. | 119 | * as writing the quota to disk may need the lock as well. |
120 | */ | 120 | */ |
121 | if (!is_bad_inode(inode)) { | 121 | /* Quota is already initialized in iput() */ |
122 | /* Quota is already initialized in iput() */ | 122 | ext2_xattr_delete_inode(inode); |
123 | ext2_xattr_delete_inode(inode); | 123 | dquot_free_inode(inode); |
124 | dquot_free_inode(inode); | 124 | dquot_drop(inode); |
125 | dquot_drop(inode); | ||
126 | } | ||
127 | 125 | ||
128 | es = EXT2_SB(sb)->s_es; | 126 | es = EXT2_SB(sb)->s_es; |
129 | is_directory = S_ISDIR(inode->i_mode); | 127 | is_directory = S_ISDIR(inode->i_mode); |
130 | 128 | ||
131 | /* Do this BEFORE marking the inode not in use or returning an error */ | ||
132 | clear_inode (inode); | ||
133 | |||
134 | if (ino < EXT2_FIRST_INO(sb) || | 129 | if (ino < EXT2_FIRST_INO(sb) || |
135 | ino > le32_to_cpu(es->s_inodes_count)) { | 130 | ino > le32_to_cpu(es->s_inodes_count)) { |
136 | ext2_error (sb, "ext2_free_inode", | 131 | ext2_error (sb, "ext2_free_inode", |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 3675088cb88c..940c96168868 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -69,26 +69,42 @@ static void ext2_write_failed(struct address_space *mapping, loff_t to) | |||
69 | /* | 69 | /* |
70 | * Called at the last iput() if i_nlink is zero. | 70 | * Called at the last iput() if i_nlink is zero. |
71 | */ | 71 | */ |
72 | void ext2_delete_inode (struct inode * inode) | 72 | void ext2_evict_inode(struct inode * inode) |
73 | { | 73 | { |
74 | if (!is_bad_inode(inode)) | 74 | struct ext2_block_alloc_info *rsv; |
75 | int want_delete = 0; | ||
76 | |||
77 | if (!inode->i_nlink && !is_bad_inode(inode)) { | ||
78 | want_delete = 1; | ||
75 | dquot_initialize(inode); | 79 | dquot_initialize(inode); |
80 | } else { | ||
81 | dquot_drop(inode); | ||
82 | } | ||
83 | |||
76 | truncate_inode_pages(&inode->i_data, 0); | 84 | truncate_inode_pages(&inode->i_data, 0); |
77 | 85 | ||
78 | if (is_bad_inode(inode)) | 86 | if (want_delete) { |
79 | goto no_delete; | 87 | /* set dtime */ |
80 | EXT2_I(inode)->i_dtime = get_seconds(); | 88 | EXT2_I(inode)->i_dtime = get_seconds(); |
81 | mark_inode_dirty(inode); | 89 | mark_inode_dirty(inode); |
82 | __ext2_write_inode(inode, inode_needs_sync(inode)); | 90 | __ext2_write_inode(inode, inode_needs_sync(inode)); |
91 | /* truncate to 0 */ | ||
92 | inode->i_size = 0; | ||
93 | if (inode->i_blocks) | ||
94 | ext2_truncate_blocks(inode, 0); | ||
95 | } | ||
83 | 96 | ||
84 | inode->i_size = 0; | 97 | invalidate_inode_buffers(inode); |
85 | if (inode->i_blocks) | 98 | end_writeback(inode); |
86 | ext2_truncate_blocks(inode, 0); | ||
87 | ext2_free_inode (inode); | ||
88 | 99 | ||
89 | return; | 100 | ext2_discard_reservation(inode); |
90 | no_delete: | 101 | rsv = EXT2_I(inode)->i_block_alloc_info; |
91 | clear_inode(inode); /* We must guarantee clearing of inode... */ | 102 | EXT2_I(inode)->i_block_alloc_info = NULL; |
103 | if (unlikely(rsv)) | ||
104 | kfree(rsv); | ||
105 | |||
106 | if (want_delete) | ||
107 | ext2_free_inode(inode); | ||
92 | } | 108 | } |
93 | 109 | ||
94 | typedef struct { | 110 | typedef struct { |
@@ -423,6 +439,8 @@ static int ext2_alloc_blocks(struct inode *inode, | |||
423 | failed_out: | 439 | failed_out: |
424 | for (i = 0; i <index; i++) | 440 | for (i = 0; i <index; i++) |
425 | ext2_free_blocks(inode, new_blocks[i], 1); | 441 | ext2_free_blocks(inode, new_blocks[i], 1); |
442 | if (index) | ||
443 | mark_inode_dirty(inode); | ||
426 | return ret; | 444 | return ret; |
427 | } | 445 | } |
428 | 446 | ||
@@ -765,14 +783,6 @@ ext2_readpages(struct file *file, struct address_space *mapping, | |||
765 | return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); | 783 | return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); |
766 | } | 784 | } |
767 | 785 | ||
768 | int __ext2_write_begin(struct file *file, struct address_space *mapping, | ||
769 | loff_t pos, unsigned len, unsigned flags, | ||
770 | struct page **pagep, void **fsdata) | ||
771 | { | ||
772 | return block_write_begin_newtrunc(file, mapping, pos, len, flags, | ||
773 | pagep, fsdata, ext2_get_block); | ||
774 | } | ||
775 | |||
776 | static int | 786 | static int |
777 | ext2_write_begin(struct file *file, struct address_space *mapping, | 787 | ext2_write_begin(struct file *file, struct address_space *mapping, |
778 | loff_t pos, unsigned len, unsigned flags, | 788 | loff_t pos, unsigned len, unsigned flags, |
@@ -780,8 +790,8 @@ ext2_write_begin(struct file *file, struct address_space *mapping, | |||
780 | { | 790 | { |
781 | int ret; | 791 | int ret; |
782 | 792 | ||
783 | *pagep = NULL; | 793 | ret = block_write_begin(mapping, pos, len, flags, pagep, |
784 | ret = __ext2_write_begin(file, mapping, pos, len, flags, pagep, fsdata); | 794 | ext2_get_block); |
785 | if (ret < 0) | 795 | if (ret < 0) |
786 | ext2_write_failed(mapping, pos + len); | 796 | ext2_write_failed(mapping, pos + len); |
787 | return ret; | 797 | return ret; |
@@ -806,13 +816,8 @@ ext2_nobh_write_begin(struct file *file, struct address_space *mapping, | |||
806 | { | 816 | { |
807 | int ret; | 817 | int ret; |
808 | 818 | ||
809 | /* | 819 | ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata, |
810 | * Dir-in-pagecache still uses ext2_write_begin. Would have to rework | 820 | ext2_get_block); |
811 | * directory handling code to pass around offsets rather than struct | ||
812 | * pages in order to make this work easily. | ||
813 | */ | ||
814 | ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, pagep, | ||
815 | fsdata, ext2_get_block); | ||
816 | if (ret < 0) | 821 | if (ret < 0) |
817 | ext2_write_failed(mapping, pos + len); | 822 | ext2_write_failed(mapping, pos + len); |
818 | return ret; | 823 | return ret; |
@@ -838,7 +843,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
838 | struct inode *inode = mapping->host; | 843 | struct inode *inode = mapping->host; |
839 | ssize_t ret; | 844 | ssize_t ret; |
840 | 845 | ||
841 | ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev, | 846 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, |
842 | iov, offset, nr_segs, ext2_get_block, NULL); | 847 | iov, offset, nr_segs, ext2_get_block, NULL); |
843 | if (ret < 0 && (rw & WRITE)) | 848 | if (ret < 0 && (rw & WRITE)) |
844 | ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); | 849 | ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); |
@@ -1006,8 +1011,8 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q) | |||
1006 | else if (block_to_free == nr - count) | 1011 | else if (block_to_free == nr - count) |
1007 | count++; | 1012 | count++; |
1008 | else { | 1013 | else { |
1009 | mark_inode_dirty(inode); | ||
1010 | ext2_free_blocks (inode, block_to_free, count); | 1014 | ext2_free_blocks (inode, block_to_free, count); |
1015 | mark_inode_dirty(inode); | ||
1011 | free_this: | 1016 | free_this: |
1012 | block_to_free = nr; | 1017 | block_to_free = nr; |
1013 | count = 1; | 1018 | count = 1; |
@@ -1015,8 +1020,8 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q) | |||
1015 | } | 1020 | } |
1016 | } | 1021 | } |
1017 | if (count > 0) { | 1022 | if (count > 0) { |
1018 | mark_inode_dirty(inode); | ||
1019 | ext2_free_blocks (inode, block_to_free, count); | 1023 | ext2_free_blocks (inode, block_to_free, count); |
1024 | mark_inode_dirty(inode); | ||
1020 | } | 1025 | } |
1021 | } | 1026 | } |
1022 | 1027 | ||
@@ -1169,15 +1174,10 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset) | |||
1169 | __ext2_truncate_blocks(inode, offset); | 1174 | __ext2_truncate_blocks(inode, offset); |
1170 | } | 1175 | } |
1171 | 1176 | ||
1172 | int ext2_setsize(struct inode *inode, loff_t newsize) | 1177 | static int ext2_setsize(struct inode *inode, loff_t newsize) |
1173 | { | 1178 | { |
1174 | loff_t oldsize; | ||
1175 | int error; | 1179 | int error; |
1176 | 1180 | ||
1177 | error = inode_newsize_ok(inode, newsize); | ||
1178 | if (error) | ||
1179 | return error; | ||
1180 | |||
1181 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 1181 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
1182 | S_ISLNK(inode->i_mode))) | 1182 | S_ISLNK(inode->i_mode))) |
1183 | return -EINVAL; | 1183 | return -EINVAL; |
@@ -1197,10 +1197,7 @@ int ext2_setsize(struct inode *inode, loff_t newsize) | |||
1197 | if (error) | 1197 | if (error) |
1198 | return error; | 1198 | return error; |
1199 | 1199 | ||
1200 | oldsize = inode->i_size; | 1200 | truncate_setsize(inode, newsize); |
1201 | i_size_write(inode, newsize); | ||
1202 | truncate_pagecache(inode, oldsize, newsize); | ||
1203 | |||
1204 | __ext2_truncate_blocks(inode, newsize); | 1201 | __ext2_truncate_blocks(inode, newsize); |
1205 | 1202 | ||
1206 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; | 1203 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; |
@@ -1557,7 +1554,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr) | |||
1557 | if (error) | 1554 | if (error) |
1558 | return error; | 1555 | return error; |
1559 | } | 1556 | } |
1560 | generic_setattr(inode, iattr); | 1557 | setattr_copy(inode, iattr); |
1561 | if (iattr->ia_valid & ATTR_MODE) | 1558 | if (iattr->ia_valid & ATTR_MODE) |
1562 | error = ext2_acl_chmod(inode); | 1559 | error = ext2_acl_chmod(inode); |
1563 | mark_inode_dirty(inode); | 1560 | mark_inode_dirty(inode); |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7ff43f4a59cd..1ec602673ea8 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -195,17 +195,6 @@ static void destroy_inodecache(void) | |||
195 | kmem_cache_destroy(ext2_inode_cachep); | 195 | kmem_cache_destroy(ext2_inode_cachep); |
196 | } | 196 | } |
197 | 197 | ||
198 | static void ext2_clear_inode(struct inode *inode) | ||
199 | { | ||
200 | struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info; | ||
201 | |||
202 | dquot_drop(inode); | ||
203 | ext2_discard_reservation(inode); | ||
204 | EXT2_I(inode)->i_block_alloc_info = NULL; | ||
205 | if (unlikely(rsv)) | ||
206 | kfree(rsv); | ||
207 | } | ||
208 | |||
209 | static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) | 198 | static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) |
210 | { | 199 | { |
211 | struct super_block *sb = vfs->mnt_sb; | 200 | struct super_block *sb = vfs->mnt_sb; |
@@ -299,13 +288,12 @@ static const struct super_operations ext2_sops = { | |||
299 | .alloc_inode = ext2_alloc_inode, | 288 | .alloc_inode = ext2_alloc_inode, |
300 | .destroy_inode = ext2_destroy_inode, | 289 | .destroy_inode = ext2_destroy_inode, |
301 | .write_inode = ext2_write_inode, | 290 | .write_inode = ext2_write_inode, |
302 | .delete_inode = ext2_delete_inode, | 291 | .evict_inode = ext2_evict_inode, |
303 | .put_super = ext2_put_super, | 292 | .put_super = ext2_put_super, |
304 | .write_super = ext2_write_super, | 293 | .write_super = ext2_write_super, |
305 | .sync_fs = ext2_sync_fs, | 294 | .sync_fs = ext2_sync_fs, |
306 | .statfs = ext2_statfs, | 295 | .statfs = ext2_statfs, |
307 | .remount_fs = ext2_remount, | 296 | .remount_fs = ext2_remount, |
308 | .clear_inode = ext2_clear_inode, | ||
309 | .show_options = ext2_show_options, | 297 | .show_options = ext2_show_options, |
310 | #ifdef CONFIG_QUOTA | 298 | #ifdef CONFIG_QUOTA |
311 | .quota_read = ext2_quota_read, | 299 | .quota_read = ext2_quota_read, |
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 7c3915780b19..8c29ae15129e 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c | |||
@@ -674,6 +674,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, | |||
674 | new_bh = sb_getblk(sb, block); | 674 | new_bh = sb_getblk(sb, block); |
675 | if (!new_bh) { | 675 | if (!new_bh) { |
676 | ext2_free_blocks(inode, block, 1); | 676 | ext2_free_blocks(inode, block, 1); |
677 | mark_inode_dirty(inode); | ||
677 | error = -EIO; | 678 | error = -EIO; |
678 | goto cleanup; | 679 | goto cleanup; |
679 | } | 680 | } |
@@ -703,8 +704,10 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, | |||
703 | * written (only some dirty data were not) so we just proceed | 704 | * written (only some dirty data were not) so we just proceed |
704 | * as if nothing happened and cleanup the unused block */ | 705 | * as if nothing happened and cleanup the unused block */ |
705 | if (error && error != -ENOSPC) { | 706 | if (error && error != -ENOSPC) { |
706 | if (new_bh && new_bh != old_bh) | 707 | if (new_bh && new_bh != old_bh) { |
707 | dquot_free_block(inode, 1); | 708 | dquot_free_block_nodirty(inode, 1); |
709 | mark_inode_dirty(inode); | ||
710 | } | ||
708 | goto cleanup; | 711 | goto cleanup; |
709 | } | 712 | } |
710 | } else | 713 | } else |
@@ -727,6 +730,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, | |||
727 | mb_cache_entry_free(ce); | 730 | mb_cache_entry_free(ce); |
728 | ea_bdebug(old_bh, "freeing"); | 731 | ea_bdebug(old_bh, "freeing"); |
729 | ext2_free_blocks(inode, old_bh->b_blocknr, 1); | 732 | ext2_free_blocks(inode, old_bh->b_blocknr, 1); |
733 | mark_inode_dirty(inode); | ||
730 | /* We let our caller release old_bh, so we | 734 | /* We let our caller release old_bh, so we |
731 | * need to duplicate the buffer before. */ | 735 | * need to duplicate the buffer before. */ |
732 | get_bh(old_bh); | 736 | get_bh(old_bh); |
@@ -736,7 +740,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, | |||
736 | le32_add_cpu(&HDR(old_bh)->h_refcount, -1); | 740 | le32_add_cpu(&HDR(old_bh)->h_refcount, -1); |
737 | if (ce) | 741 | if (ce) |
738 | mb_cache_entry_release(ce); | 742 | mb_cache_entry_release(ce); |
739 | dquot_free_block(inode, 1); | 743 | dquot_free_block_nodirty(inode, 1); |
744 | mark_inode_dirty(inode); | ||
740 | mark_buffer_dirty(old_bh); | 745 | mark_buffer_dirty(old_bh); |
741 | ea_bdebug(old_bh, "refcount now=%d", | 746 | ea_bdebug(old_bh, "refcount now=%d", |
742 | le32_to_cpu(HDR(old_bh)->h_refcount)); | 747 | le32_to_cpu(HDR(old_bh)->h_refcount)); |
@@ -799,7 +804,7 @@ ext2_xattr_delete_inode(struct inode *inode) | |||
799 | mark_buffer_dirty(bh); | 804 | mark_buffer_dirty(bh); |
800 | if (IS_SYNC(inode)) | 805 | if (IS_SYNC(inode)) |
801 | sync_dirty_buffer(bh); | 806 | sync_dirty_buffer(bh); |
802 | dquot_free_block(inode, 1); | 807 | dquot_free_block_nodirty(inode, 1); |
803 | } | 808 | } |
804 | EXT2_I(inode)->i_file_acl = 0; | 809 | EXT2_I(inode)->i_file_acl = 0; |
805 | 810 | ||
@@ -838,7 +843,7 @@ ext2_xattr_cache_insert(struct buffer_head *bh) | |||
838 | ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS); | 843 | ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS); |
839 | if (!ce) | 844 | if (!ce) |
840 | return -ENOMEM; | 845 | return -ENOMEM; |
841 | error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); | 846 | error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); |
842 | if (error) { | 847 | if (error) { |
843 | mb_cache_entry_free(ce); | 848 | mb_cache_entry_free(ce); |
844 | if (error == -EBUSY) { | 849 | if (error == -EBUSY) { |
@@ -912,8 +917,8 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) | |||
912 | return NULL; /* never share */ | 917 | return NULL; /* never share */ |
913 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); | 918 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); |
914 | again: | 919 | again: |
915 | ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, | 920 | ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev, |
916 | inode->i_sb->s_bdev, hash); | 921 | hash); |
917 | while (ce) { | 922 | while (ce) { |
918 | struct buffer_head *bh; | 923 | struct buffer_head *bh; |
919 | 924 | ||
@@ -945,7 +950,7 @@ again: | |||
945 | unlock_buffer(bh); | 950 | unlock_buffer(bh); |
946 | brelse(bh); | 951 | brelse(bh); |
947 | } | 952 | } |
948 | ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); | 953 | ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); |
949 | } | 954 | } |
950 | return NULL; | 955 | return NULL; |
951 | } | 956 | } |
@@ -1021,9 +1026,7 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header, | |||
1021 | int __init | 1026 | int __init |
1022 | init_ext2_xattr(void) | 1027 | init_ext2_xattr(void) |
1023 | { | 1028 | { |
1024 | ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, | 1029 | ext2_xattr_cache = mb_cache_create("ext2_xattr", 6); |
1025 | sizeof(struct mb_cache_entry) + | ||
1026 | sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); | ||
1027 | if (!ext2_xattr_cache) | 1030 | if (!ext2_xattr_cache) |
1028 | return -ENOMEM; | 1031 | return -ENOMEM; |
1029 | return 0; | 1032 | return 0; |
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig index 522b15498f45..e8c6ba0e4a3e 100644 --- a/fs/ext3/Kconfig +++ b/fs/ext3/Kconfig | |||
@@ -31,6 +31,7 @@ config EXT3_FS | |||
31 | config EXT3_DEFAULTS_TO_ORDERED | 31 | config EXT3_DEFAULTS_TO_ORDERED |
32 | bool "Default to 'data=ordered' in ext3" | 32 | bool "Default to 'data=ordered' in ext3" |
33 | depends on EXT3_FS | 33 | depends on EXT3_FS |
34 | default y | ||
34 | help | 35 | help |
35 | The journal mode options for ext3 have different tradeoffs | 36 | The journal mode options for ext3 have different tradeoffs |
36 | between when data is guaranteed to be on disk and | 37 | between when data is guaranteed to be on disk and |
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 498021eb88fb..4ab72db3559e 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c | |||
@@ -119,20 +119,8 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) | |||
119 | ino = inode->i_ino; | 119 | ino = inode->i_ino; |
120 | ext3_debug ("freeing inode %lu\n", ino); | 120 | ext3_debug ("freeing inode %lu\n", ino); |
121 | 121 | ||
122 | /* | ||
123 | * Note: we must free any quota before locking the superblock, | ||
124 | * as writing the quota to disk may need the lock as well. | ||
125 | */ | ||
126 | dquot_initialize(inode); | ||
127 | ext3_xattr_delete_inode(handle, inode); | ||
128 | dquot_free_inode(inode); | ||
129 | dquot_drop(inode); | ||
130 | |||
131 | is_directory = S_ISDIR(inode->i_mode); | 122 | is_directory = S_ISDIR(inode->i_mode); |
132 | 123 | ||
133 | /* Do this BEFORE marking the inode not in use or returning an error */ | ||
134 | clear_inode (inode); | ||
135 | |||
136 | es = EXT3_SB(sb)->s_es; | 124 | es = EXT3_SB(sb)->s_es; |
137 | if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { | 125 | if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { |
138 | ext3_error (sb, "ext3_free_inode", | 126 | ext3_error (sb, "ext3_free_inode", |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 735f0190ec2a..5e0faf4cda79 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -190,18 +190,28 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode) | |||
190 | } | 190 | } |
191 | 191 | ||
192 | /* | 192 | /* |
193 | * Called at the last iput() if i_nlink is zero. | 193 | * Called at inode eviction from icache |
194 | */ | 194 | */ |
195 | void ext3_delete_inode (struct inode * inode) | 195 | void ext3_evict_inode (struct inode *inode) |
196 | { | 196 | { |
197 | struct ext3_block_alloc_info *rsv; | ||
197 | handle_t *handle; | 198 | handle_t *handle; |
199 | int want_delete = 0; | ||
198 | 200 | ||
199 | if (!is_bad_inode(inode)) | 201 | if (!inode->i_nlink && !is_bad_inode(inode)) { |
200 | dquot_initialize(inode); | 202 | dquot_initialize(inode); |
203 | want_delete = 1; | ||
204 | } | ||
201 | 205 | ||
202 | truncate_inode_pages(&inode->i_data, 0); | 206 | truncate_inode_pages(&inode->i_data, 0); |
203 | 207 | ||
204 | if (is_bad_inode(inode)) | 208 | ext3_discard_reservation(inode); |
209 | rsv = EXT3_I(inode)->i_block_alloc_info; | ||
210 | EXT3_I(inode)->i_block_alloc_info = NULL; | ||
211 | if (unlikely(rsv)) | ||
212 | kfree(rsv); | ||
213 | |||
214 | if (!want_delete) | ||
205 | goto no_delete; | 215 | goto no_delete; |
206 | 216 | ||
207 | handle = start_transaction(inode); | 217 | handle = start_transaction(inode); |
@@ -238,15 +248,22 @@ void ext3_delete_inode (struct inode * inode) | |||
238 | * having errors), but we can't free the inode if the mark_dirty | 248 | * having errors), but we can't free the inode if the mark_dirty |
239 | * fails. | 249 | * fails. |
240 | */ | 250 | */ |
241 | if (ext3_mark_inode_dirty(handle, inode)) | 251 | if (ext3_mark_inode_dirty(handle, inode)) { |
242 | /* If that failed, just do the required in-core inode clear. */ | 252 | /* If that failed, just dquot_drop() and be done with that */ |
243 | clear_inode(inode); | 253 | dquot_drop(inode); |
244 | else | 254 | end_writeback(inode); |
255 | } else { | ||
256 | ext3_xattr_delete_inode(handle, inode); | ||
257 | dquot_free_inode(inode); | ||
258 | dquot_drop(inode); | ||
259 | end_writeback(inode); | ||
245 | ext3_free_inode(handle, inode); | 260 | ext3_free_inode(handle, inode); |
261 | } | ||
246 | ext3_journal_stop(handle); | 262 | ext3_journal_stop(handle); |
247 | return; | 263 | return; |
248 | no_delete: | 264 | no_delete: |
249 | clear_inode(inode); /* We must guarantee clearing of inode... */ | 265 | end_writeback(inode); |
266 | dquot_drop(inode); | ||
250 | } | 267 | } |
251 | 268 | ||
252 | typedef struct { | 269 | typedef struct { |
@@ -1149,9 +1166,25 @@ static int walk_page_buffers( handle_t *handle, | |||
1149 | static int do_journal_get_write_access(handle_t *handle, | 1166 | static int do_journal_get_write_access(handle_t *handle, |
1150 | struct buffer_head *bh) | 1167 | struct buffer_head *bh) |
1151 | { | 1168 | { |
1169 | int dirty = buffer_dirty(bh); | ||
1170 | int ret; | ||
1171 | |||
1152 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1172 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1153 | return 0; | 1173 | return 0; |
1154 | return ext3_journal_get_write_access(handle, bh); | 1174 | /* |
1175 | * __block_prepare_write() could have dirtied some buffers. Clean | ||
1176 | * the dirty bit as jbd2_journal_get_write_access() could complain | ||
1177 | * otherwise about fs integrity issues. Setting of the dirty bit | ||
1178 | * by __block_prepare_write() isn't a real problem here as we clear | ||
1179 | * the bit before releasing a page lock and thus writeback cannot | ||
1180 | * ever write the buffer. | ||
1181 | */ | ||
1182 | if (dirty) | ||
1183 | clear_buffer_dirty(bh); | ||
1184 | ret = ext3_journal_get_write_access(handle, bh); | ||
1185 | if (!ret && dirty) | ||
1186 | ret = ext3_journal_dirty_metadata(handle, bh); | ||
1187 | return ret; | ||
1155 | } | 1188 | } |
1156 | 1189 | ||
1157 | /* | 1190 | /* |
@@ -1196,8 +1229,7 @@ retry: | |||
1196 | ret = PTR_ERR(handle); | 1229 | ret = PTR_ERR(handle); |
1197 | goto out; | 1230 | goto out; |
1198 | } | 1231 | } |
1199 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 1232 | ret = __block_write_begin(page, pos, len, ext3_get_block); |
1200 | ext3_get_block); | ||
1201 | if (ret) | 1233 | if (ret) |
1202 | goto write_begin_failed; | 1234 | goto write_begin_failed; |
1203 | 1235 | ||
@@ -1625,10 +1657,7 @@ static int ext3_writeback_writepage(struct page *page, | |||
1625 | goto out_fail; | 1657 | goto out_fail; |
1626 | } | 1658 | } |
1627 | 1659 | ||
1628 | if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) | 1660 | ret = block_write_full_page(page, ext3_get_block, wbc); |
1629 | ret = nobh_writepage(page, ext3_get_block, wbc); | ||
1630 | else | ||
1631 | ret = block_write_full_page(page, ext3_get_block, wbc); | ||
1632 | 1661 | ||
1633 | err = ext3_journal_stop(handle); | 1662 | err = ext3_journal_stop(handle); |
1634 | if (!ret) | 1663 | if (!ret) |
@@ -1785,6 +1814,17 @@ retry: | |||
1785 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 1814 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
1786 | offset, nr_segs, | 1815 | offset, nr_segs, |
1787 | ext3_get_block, NULL); | 1816 | ext3_get_block, NULL); |
1817 | /* | ||
1818 | * In case of error extending write may have instantiated a few | ||
1819 | * blocks outside i_size. Trim these off again. | ||
1820 | */ | ||
1821 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
1822 | loff_t isize = i_size_read(inode); | ||
1823 | loff_t end = offset + iov_length(iov, nr_segs); | ||
1824 | |||
1825 | if (end > isize) | ||
1826 | vmtruncate(inode, isize); | ||
1827 | } | ||
1788 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) | 1828 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) |
1789 | goto retry; | 1829 | goto retry; |
1790 | 1830 | ||
@@ -1922,17 +1962,6 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, | |||
1922 | length = blocksize - (offset & (blocksize - 1)); | 1962 | length = blocksize - (offset & (blocksize - 1)); |
1923 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 1963 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
1924 | 1964 | ||
1925 | /* | ||
1926 | * For "nobh" option, we can only work if we don't need to | ||
1927 | * read-in the page - otherwise we create buffers to do the IO. | ||
1928 | */ | ||
1929 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && | ||
1930 | ext3_should_writeback_data(inode) && PageUptodate(page)) { | ||
1931 | zero_user(page, offset, length); | ||
1932 | set_page_dirty(page); | ||
1933 | goto unlock; | ||
1934 | } | ||
1935 | |||
1936 | if (!page_has_buffers(page)) | 1965 | if (!page_has_buffers(page)) |
1937 | create_empty_buffers(page, blocksize, 0); | 1966 | create_empty_buffers(page, blocksize, 0); |
1938 | 1967 | ||
@@ -2284,27 +2313,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, | |||
2284 | depth); | 2313 | depth); |
2285 | 2314 | ||
2286 | /* | 2315 | /* |
2287 | * We've probably journalled the indirect block several | ||
2288 | * times during the truncate. But it's no longer | ||
2289 | * needed and we now drop it from the transaction via | ||
2290 | * journal_revoke(). | ||
2291 | * | ||
2292 | * That's easy if it's exclusively part of this | ||
2293 | * transaction. But if it's part of the committing | ||
2294 | * transaction then journal_forget() will simply | ||
2295 | * brelse() it. That means that if the underlying | ||
2296 | * block is reallocated in ext3_get_block(), | ||
2297 | * unmap_underlying_metadata() will find this block | ||
2298 | * and will try to get rid of it. damn, damn. | ||
2299 | * | ||
2300 | * If this block has already been committed to the | ||
2301 | * journal, a revoke record will be written. And | ||
2302 | * revoke records must be emitted *before* clearing | ||
2303 | * this block's bit in the bitmaps. | ||
2304 | */ | ||
2305 | ext3_forget(handle, 1, inode, bh, bh->b_blocknr); | ||
2306 | |||
2307 | /* | ||
2308 | * Everything below this this pointer has been | 2316 | * Everything below this this pointer has been |
2309 | * released. Now let this top-of-subtree go. | 2317 | * released. Now let this top-of-subtree go. |
2310 | * | 2318 | * |
@@ -2327,6 +2335,31 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, | |||
2327 | truncate_restart_transaction(handle, inode); | 2335 | truncate_restart_transaction(handle, inode); |
2328 | } | 2336 | } |
2329 | 2337 | ||
2338 | /* | ||
2339 | * We've probably journalled the indirect block several | ||
2340 | * times during the truncate. But it's no longer | ||
2341 | * needed and we now drop it from the transaction via | ||
2342 | * journal_revoke(). | ||
2343 | * | ||
2344 | * That's easy if it's exclusively part of this | ||
2345 | * transaction. But if it's part of the committing | ||
2346 | * transaction then journal_forget() will simply | ||
2347 | * brelse() it. That means that if the underlying | ||
2348 | * block is reallocated in ext3_get_block(), | ||
2349 | * unmap_underlying_metadata() will find this block | ||
2350 | * and will try to get rid of it. damn, damn. Thus | ||
2351 | * we don't allow a block to be reallocated until | ||
2352 | * a transaction freeing it has fully committed. | ||
2353 | * | ||
2354 | * We also have to make sure journal replay after a | ||
2355 | * crash does not overwrite non-journaled data blocks | ||
2356 | * with old metadata when the block got reallocated for | ||
2357 | * data. Thus we have to store a revoke record for a | ||
2358 | * block in the same transaction in which we free the | ||
2359 | * block. | ||
2360 | */ | ||
2361 | ext3_forget(handle, 1, inode, bh, bh->b_blocknr); | ||
2362 | |||
2330 | ext3_free_blocks(handle, inode, nr, 1); | 2363 | ext3_free_blocks(handle, inode, nr, 1); |
2331 | 2364 | ||
2332 | if (parent_bh) { | 2365 | if (parent_bh) { |
@@ -2554,7 +2587,7 @@ out_stop: | |||
2554 | * If this was a simple ftruncate(), and the file will remain alive | 2587 | * If this was a simple ftruncate(), and the file will remain alive |
2555 | * then we need to clear up the orphan record which we created above. | 2588 | * then we need to clear up the orphan record which we created above. |
2556 | * However, if this was a real unlink then we were called by | 2589 | * However, if this was a real unlink then we were called by |
2557 | * ext3_delete_inode(), and we allow that function to clean up the | 2590 | * ext3_evict_inode(), and we allow that function to clean up the |
2558 | * orphan info for us. | 2591 | * orphan info for us. |
2559 | */ | 2592 | */ |
2560 | if (inode->i_nlink) | 2593 | if (inode->i_nlink) |
@@ -3198,9 +3231,17 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) | |||
3198 | ext3_journal_stop(handle); | 3231 | ext3_journal_stop(handle); |
3199 | } | 3232 | } |
3200 | 3233 | ||
3201 | rc = inode_setattr(inode, attr); | 3234 | if ((attr->ia_valid & ATTR_SIZE) && |
3235 | attr->ia_size != i_size_read(inode)) { | ||
3236 | rc = vmtruncate(inode, attr->ia_size); | ||
3237 | if (rc) | ||
3238 | goto err_out; | ||
3239 | } | ||
3240 | |||
3241 | setattr_copy(inode, attr); | ||
3242 | mark_inode_dirty(inode); | ||
3202 | 3243 | ||
3203 | if (!rc && (ia_valid & ATTR_MODE)) | 3244 | if (ia_valid & ATTR_MODE) |
3204 | rc = ext3_acl_chmod(inode); | 3245 | rc = ext3_acl_chmod(inode); |
3205 | 3246 | ||
3206 | err_out: | 3247 | err_out: |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index ee184084ca42..2b35ddb70d65 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -1447,7 +1447,6 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry, | |||
1447 | struct inode *inode) | 1447 | struct inode *inode) |
1448 | { | 1448 | { |
1449 | struct inode *dir = dentry->d_parent->d_inode; | 1449 | struct inode *dir = dentry->d_parent->d_inode; |
1450 | unsigned long offset; | ||
1451 | struct buffer_head * bh; | 1450 | struct buffer_head * bh; |
1452 | struct ext3_dir_entry_2 *de; | 1451 | struct ext3_dir_entry_2 *de; |
1453 | struct super_block * sb; | 1452 | struct super_block * sb; |
@@ -1469,7 +1468,7 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry, | |||
1469 | ext3_mark_inode_dirty(handle, dir); | 1468 | ext3_mark_inode_dirty(handle, dir); |
1470 | } | 1469 | } |
1471 | blocks = dir->i_size >> sb->s_blocksize_bits; | 1470 | blocks = dir->i_size >> sb->s_blocksize_bits; |
1472 | for (block = 0, offset = 0; block < blocks; block++) { | 1471 | for (block = 0; block < blocks; block++) { |
1473 | bh = ext3_bread(handle, dir, block, 0, &retval); | 1472 | bh = ext3_bread(handle, dir, block, 0, &retval); |
1474 | if(!bh) | 1473 | if(!bh) |
1475 | return retval; | 1474 | return retval; |
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 54351ac7cef9..0ccd7b12b73c 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c | |||
@@ -964,7 +964,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, | |||
964 | ext3_fsblk_t n_blocks_count) | 964 | ext3_fsblk_t n_blocks_count) |
965 | { | 965 | { |
966 | ext3_fsblk_t o_blocks_count; | 966 | ext3_fsblk_t o_blocks_count; |
967 | unsigned long o_groups_count; | ||
968 | ext3_grpblk_t last; | 967 | ext3_grpblk_t last; |
969 | ext3_grpblk_t add; | 968 | ext3_grpblk_t add; |
970 | struct buffer_head * bh; | 969 | struct buffer_head * bh; |
@@ -976,7 +975,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, | |||
976 | * yet: we're going to revalidate es->s_blocks_count after | 975 | * yet: we're going to revalidate es->s_blocks_count after |
977 | * taking the s_resize_lock below. */ | 976 | * taking the s_resize_lock below. */ |
978 | o_blocks_count = le32_to_cpu(es->s_blocks_count); | 977 | o_blocks_count = le32_to_cpu(es->s_blocks_count); |
979 | o_groups_count = EXT3_SB(sb)->s_groups_count; | ||
980 | 978 | ||
981 | if (test_opt(sb, DEBUG)) | 979 | if (test_opt(sb, DEBUG)) |
982 | printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n", | 980 | printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n", |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6c953bb255e7..5dbf4dba03c4 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -527,17 +527,6 @@ static void destroy_inodecache(void) | |||
527 | kmem_cache_destroy(ext3_inode_cachep); | 527 | kmem_cache_destroy(ext3_inode_cachep); |
528 | } | 528 | } |
529 | 529 | ||
530 | static void ext3_clear_inode(struct inode *inode) | ||
531 | { | ||
532 | struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; | ||
533 | |||
534 | dquot_drop(inode); | ||
535 | ext3_discard_reservation(inode); | ||
536 | EXT3_I(inode)->i_block_alloc_info = NULL; | ||
537 | if (unlikely(rsv)) | ||
538 | kfree(rsv); | ||
539 | } | ||
540 | |||
541 | static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) | 530 | static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) |
542 | { | 531 | { |
543 | #if defined(CONFIG_QUOTA) | 532 | #if defined(CONFIG_QUOTA) |
@@ -661,9 +650,6 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
661 | */ | 650 | */ |
662 | seq_puts(seq, ",barrier="); | 651 | seq_puts(seq, ",barrier="); |
663 | seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); | 652 | seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); |
664 | if (test_opt(sb, NOBH)) | ||
665 | seq_puts(seq, ",nobh"); | ||
666 | |||
667 | seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS))); | 653 | seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS))); |
668 | if (test_opt(sb, DATA_ERR_ABORT)) | 654 | if (test_opt(sb, DATA_ERR_ABORT)) |
669 | seq_puts(seq, ",data_err=abort"); | 655 | seq_puts(seq, ",data_err=abort"); |
@@ -783,14 +769,13 @@ static const struct super_operations ext3_sops = { | |||
783 | .destroy_inode = ext3_destroy_inode, | 769 | .destroy_inode = ext3_destroy_inode, |
784 | .write_inode = ext3_write_inode, | 770 | .write_inode = ext3_write_inode, |
785 | .dirty_inode = ext3_dirty_inode, | 771 | .dirty_inode = ext3_dirty_inode, |
786 | .delete_inode = ext3_delete_inode, | 772 | .evict_inode = ext3_evict_inode, |
787 | .put_super = ext3_put_super, | 773 | .put_super = ext3_put_super, |
788 | .sync_fs = ext3_sync_fs, | 774 | .sync_fs = ext3_sync_fs, |
789 | .freeze_fs = ext3_freeze, | 775 | .freeze_fs = ext3_freeze, |
790 | .unfreeze_fs = ext3_unfreeze, | 776 | .unfreeze_fs = ext3_unfreeze, |
791 | .statfs = ext3_statfs, | 777 | .statfs = ext3_statfs, |
792 | .remount_fs = ext3_remount, | 778 | .remount_fs = ext3_remount, |
793 | .clear_inode = ext3_clear_inode, | ||
794 | .show_options = ext3_show_options, | 779 | .show_options = ext3_show_options, |
795 | #ifdef CONFIG_QUOTA | 780 | #ifdef CONFIG_QUOTA |
796 | .quota_read = ext3_quota_read, | 781 | .quota_read = ext3_quota_read, |
@@ -1255,10 +1240,12 @@ set_qf_format: | |||
1255 | *n_blocks_count = option; | 1240 | *n_blocks_count = option; |
1256 | break; | 1241 | break; |
1257 | case Opt_nobh: | 1242 | case Opt_nobh: |
1258 | set_opt(sbi->s_mount_opt, NOBH); | 1243 | ext3_msg(sb, KERN_WARNING, |
1244 | "warning: ignoring deprecated nobh option"); | ||
1259 | break; | 1245 | break; |
1260 | case Opt_bh: | 1246 | case Opt_bh: |
1261 | clear_opt(sbi->s_mount_opt, NOBH); | 1247 | ext3_msg(sb, KERN_WARNING, |
1248 | "warning: ignoring deprecated bh option"); | ||
1262 | break; | 1249 | break; |
1263 | default: | 1250 | default: |
1264 | ext3_msg(sb, KERN_ERR, | 1251 | ext3_msg(sb, KERN_ERR, |
@@ -2001,14 +1988,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
2001 | break; | 1988 | break; |
2002 | } | 1989 | } |
2003 | 1990 | ||
2004 | if (test_opt(sb, NOBH)) { | ||
2005 | if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) { | ||
2006 | ext3_msg(sb, KERN_WARNING, | ||
2007 | "warning: ignoring nobh option - " | ||
2008 | "it is supported only with writeback mode"); | ||
2009 | clear_opt(sbi->s_mount_opt, NOBH); | ||
2010 | } | ||
2011 | } | ||
2012 | /* | 1991 | /* |
2013 | * The journal_load will have done any necessary log recovery, | 1992 | * The journal_load will have done any necessary log recovery, |
2014 | * so we can safely mount the rest of the filesystem now. | 1993 | * so we can safely mount the rest of the filesystem now. |
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 71fb8d65e54c..e69dc6dfaa89 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c | |||
@@ -1139,7 +1139,7 @@ ext3_xattr_cache_insert(struct buffer_head *bh) | |||
1139 | ea_bdebug(bh, "out of memory"); | 1139 | ea_bdebug(bh, "out of memory"); |
1140 | return; | 1140 | return; |
1141 | } | 1141 | } |
1142 | error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); | 1142 | error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); |
1143 | if (error) { | 1143 | if (error) { |
1144 | mb_cache_entry_free(ce); | 1144 | mb_cache_entry_free(ce); |
1145 | if (error == -EBUSY) { | 1145 | if (error == -EBUSY) { |
@@ -1211,8 +1211,8 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header, | |||
1211 | return NULL; /* never share */ | 1211 | return NULL; /* never share */ |
1212 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); | 1212 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); |
1213 | again: | 1213 | again: |
1214 | ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, | 1214 | ce = mb_cache_entry_find_first(ext3_xattr_cache, inode->i_sb->s_bdev, |
1215 | inode->i_sb->s_bdev, hash); | 1215 | hash); |
1216 | while (ce) { | 1216 | while (ce) { |
1217 | struct buffer_head *bh; | 1217 | struct buffer_head *bh; |
1218 | 1218 | ||
@@ -1237,7 +1237,7 @@ again: | |||
1237 | return bh; | 1237 | return bh; |
1238 | } | 1238 | } |
1239 | brelse(bh); | 1239 | brelse(bh); |
1240 | ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); | 1240 | ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); |
1241 | } | 1241 | } |
1242 | return NULL; | 1242 | return NULL; |
1243 | } | 1243 | } |
@@ -1313,9 +1313,7 @@ static void ext3_xattr_rehash(struct ext3_xattr_header *header, | |||
1313 | int __init | 1313 | int __init |
1314 | init_ext3_xattr(void) | 1314 | init_ext3_xattr(void) |
1315 | { | 1315 | { |
1316 | ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, | 1316 | ext3_xattr_cache = mb_cache_create("ext3_xattr", 6); |
1317 | sizeof(struct mb_cache_entry) + | ||
1318 | sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); | ||
1319 | if (!ext3_xattr_cache) | 1317 | if (!ext3_xattr_cache) |
1320 | return -ENOMEM; | 1318 | return -ENOMEM; |
1321 | return 0; | 1319 | return 0; |
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index feaf498feaa6..5e2ed4504ead 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -204,6 +204,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, | |||
204 | return error; | 204 | return error; |
205 | else { | 205 | else { |
206 | inode->i_mode = mode; | 206 | inode->i_mode = mode; |
207 | inode->i_ctime = ext4_current_time(inode); | ||
207 | ext4_mark_inode_dirty(handle, inode); | 208 | ext4_mark_inode_dirty(handle, inode); |
208 | if (error == 0) | 209 | if (error == 0) |
209 | acl = NULL; | 210 | acl = NULL; |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 95b7594c76f9..bd30799a43ed 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -377,14 +377,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
377 | ext4_grpblk_t bit; | 377 | ext4_grpblk_t bit; |
378 | unsigned int i; | 378 | unsigned int i; |
379 | struct ext4_group_desc *desc; | 379 | struct ext4_group_desc *desc; |
380 | struct ext4_super_block *es; | 380 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
381 | struct ext4_sb_info *sbi; | ||
382 | int err = 0, ret, blk_free_count; | 381 | int err = 0, ret, blk_free_count; |
383 | ext4_grpblk_t blocks_freed; | 382 | ext4_grpblk_t blocks_freed; |
384 | struct ext4_group_info *grp; | 383 | struct ext4_group_info *grp; |
385 | 384 | ||
386 | sbi = EXT4_SB(sb); | ||
387 | es = sbi->s_es; | ||
388 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); | 385 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); |
389 | 386 | ||
390 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 387 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
@@ -477,7 +474,6 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | |||
477 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); | 474 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); |
478 | if (!err) | 475 | if (!err) |
479 | err = ret; | 476 | err = ret; |
480 | sb->s_dirt = 1; | ||
481 | 477 | ||
482 | error_return: | 478 | error_return: |
483 | brelse(bitmap_bh); | 479 | brelse(bitmap_bh); |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 5b6973fbf1bd..3db5084db9bd 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -229,16 +229,20 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, | |||
229 | 229 | ||
230 | if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || | 230 | if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || |
231 | (start_blk + count < start_blk) || | 231 | (start_blk + count < start_blk) || |
232 | (start_blk + count > ext4_blocks_count(sbi->s_es))) | 232 | (start_blk + count > ext4_blocks_count(sbi->s_es))) { |
233 | sbi->s_es->s_last_error_block = cpu_to_le64(start_blk); | ||
233 | return 0; | 234 | return 0; |
235 | } | ||
234 | while (n) { | 236 | while (n) { |
235 | entry = rb_entry(n, struct ext4_system_zone, node); | 237 | entry = rb_entry(n, struct ext4_system_zone, node); |
236 | if (start_blk + count - 1 < entry->start_blk) | 238 | if (start_blk + count - 1 < entry->start_blk) |
237 | n = n->rb_left; | 239 | n = n->rb_left; |
238 | else if (start_blk >= (entry->start_blk + entry->count)) | 240 | else if (start_blk >= (entry->start_blk + entry->count)) |
239 | n = n->rb_right; | 241 | n = n->rb_right; |
240 | else | 242 | else { |
243 | sbi->s_es->s_last_error_block = cpu_to_le64(start_blk); | ||
241 | return 0; | 244 | return 0; |
245 | } | ||
242 | } | 246 | } |
243 | return 1; | 247 | return 1; |
244 | } | 248 | } |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ea5e6cb7e2a5..374510f72baa 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -61,10 +61,11 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
61 | } | 61 | } |
62 | 62 | ||
63 | 63 | ||
64 | int ext4_check_dir_entry(const char *function, struct inode *dir, | 64 | int __ext4_check_dir_entry(const char *function, unsigned int line, |
65 | struct ext4_dir_entry_2 *de, | 65 | struct inode *dir, |
66 | struct buffer_head *bh, | 66 | struct ext4_dir_entry_2 *de, |
67 | unsigned int offset) | 67 | struct buffer_head *bh, |
68 | unsigned int offset) | ||
68 | { | 69 | { |
69 | const char *error_msg = NULL; | 70 | const char *error_msg = NULL; |
70 | const int rlen = ext4_rec_len_from_disk(de->rec_len, | 71 | const int rlen = ext4_rec_len_from_disk(de->rec_len, |
@@ -83,11 +84,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, | |||
83 | error_msg = "inode out of bounds"; | 84 | error_msg = "inode out of bounds"; |
84 | 85 | ||
85 | if (error_msg != NULL) | 86 | if (error_msg != NULL) |
86 | ext4_error_inode(function, dir, | 87 | ext4_error_inode(dir, function, line, bh->b_blocknr, |
87 | "bad entry in directory: %s - block=%llu" | 88 | "bad entry in directory: %s - " |
88 | "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", | 89 | "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", |
89 | error_msg, (unsigned long long) bh->b_blocknr, | 90 | error_msg, (unsigned) (offset%bh->b_size), offset, |
90 | (unsigned) (offset%bh->b_size), offset, | ||
91 | le32_to_cpu(de->inode), | 91 | le32_to_cpu(de->inode), |
92 | rlen, de->name_len); | 92 | rlen, de->name_len); |
93 | return error_msg == NULL ? 1 : 0; | 93 | return error_msg == NULL ? 1 : 0; |
@@ -121,7 +121,8 @@ static int ext4_readdir(struct file *filp, | |||
121 | * We don't set the inode dirty flag since it's not | 121 | * We don't set the inode dirty flag since it's not |
122 | * critical that it get flushed back to the disk. | 122 | * critical that it get flushed back to the disk. |
123 | */ | 123 | */ |
124 | ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX); | 124 | ext4_clear_inode_flag(filp->f_path.dentry->d_inode, |
125 | EXT4_INODE_INDEX); | ||
125 | } | 126 | } |
126 | stored = 0; | 127 | stored = 0; |
127 | offset = filp->f_pos & (sb->s_blocksize - 1); | 128 | offset = filp->f_pos & (sb->s_blocksize - 1); |
@@ -193,7 +194,7 @@ revalidate: | |||
193 | while (!error && filp->f_pos < inode->i_size | 194 | while (!error && filp->f_pos < inode->i_size |
194 | && offset < sb->s_blocksize) { | 195 | && offset < sb->s_blocksize) { |
195 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); | 196 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); |
196 | if (!ext4_check_dir_entry("ext4_readdir", inode, de, | 197 | if (!ext4_check_dir_entry(inode, de, |
197 | bh, offset)) { | 198 | bh, offset)) { |
198 | /* | 199 | /* |
199 | * On error, skip the f_pos to the next block | 200 | * On error, skip the f_pos to the next block |
@@ -343,7 +344,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
343 | struct dir_private_info *info; | 344 | struct dir_private_info *info; |
344 | int len; | 345 | int len; |
345 | 346 | ||
346 | info = (struct dir_private_info *) dir_file->private_data; | 347 | info = dir_file->private_data; |
347 | p = &info->root.rb_node; | 348 | p = &info->root.rb_node; |
348 | 349 | ||
349 | /* Create and allocate the fname structure */ | 350 | /* Create and allocate the fname structure */ |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 19a4de57128a..889ec9d5e6ad 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -57,10 +57,13 @@ | |||
57 | #endif | 57 | #endif |
58 | 58 | ||
59 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ | 59 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ |
60 | ext4_error_inode(__func__, (inode), (fmt), ## a) | 60 | ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a) |
61 | |||
62 | #define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \ | ||
63 | ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a) | ||
61 | 64 | ||
62 | #define EXT4_ERROR_FILE(file, fmt, a...) \ | 65 | #define EXT4_ERROR_FILE(file, fmt, a...) \ |
63 | ext4_error_file(__func__, (file), (fmt), ## a) | 66 | ext4_error_file(__func__, __LINE__, (file), (fmt), ## a) |
64 | 67 | ||
65 | /* data type for block offset of block group */ | 68 | /* data type for block offset of block group */ |
66 | typedef int ext4_grpblk_t; | 69 | typedef int ext4_grpblk_t; |
@@ -167,13 +170,15 @@ struct mpage_da_data { | |||
167 | }; | 170 | }; |
168 | #define EXT4_IO_UNWRITTEN 0x1 | 171 | #define EXT4_IO_UNWRITTEN 0x1 |
169 | typedef struct ext4_io_end { | 172 | typedef struct ext4_io_end { |
170 | struct list_head list; /* per-file finished AIO list */ | 173 | struct list_head list; /* per-file finished IO list */ |
171 | struct inode *inode; /* file being written to */ | 174 | struct inode *inode; /* file being written to */ |
172 | unsigned int flag; /* unwritten or not */ | 175 | unsigned int flag; /* unwritten or not */ |
173 | struct page *page; /* page struct for buffer write */ | 176 | struct page *page; /* page struct for buffer write */ |
174 | loff_t offset; /* offset in the file */ | 177 | loff_t offset; /* offset in the file */ |
175 | ssize_t size; /* size of the extent */ | 178 | ssize_t size; /* size of the extent */ |
176 | struct work_struct work; /* data work queue */ | 179 | struct work_struct work; /* data work queue */ |
180 | struct kiocb *iocb; /* iocb struct for AIO */ | ||
181 | int result; /* error value for AIO */ | ||
177 | } ext4_io_end_t; | 182 | } ext4_io_end_t; |
178 | 183 | ||
179 | /* | 184 | /* |
@@ -460,7 +465,7 @@ struct ext4_new_group_data { | |||
460 | }; | 465 | }; |
461 | 466 | ||
462 | /* | 467 | /* |
463 | * Flags used by ext4_get_blocks() | 468 | * Flags used by ext4_map_blocks() |
464 | */ | 469 | */ |
465 | /* Allocate any needed blocks and/or convert an unitialized | 470 | /* Allocate any needed blocks and/or convert an unitialized |
466 | extent to be an initialized ext4 */ | 471 | extent to be an initialized ext4 */ |
@@ -873,7 +878,6 @@ struct ext4_inode_info { | |||
873 | #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ | 878 | #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ |
874 | #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ | 879 | #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ |
875 | #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ | 880 | #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ |
876 | #define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */ | ||
877 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ | 881 | #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ |
878 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ | 882 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ |
879 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ | 883 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ |
@@ -982,7 +986,7 @@ struct ext4_super_block { | |||
982 | __le32 s_last_orphan; /* start of list of inodes to delete */ | 986 | __le32 s_last_orphan; /* start of list of inodes to delete */ |
983 | __le32 s_hash_seed[4]; /* HTREE hash seed */ | 987 | __le32 s_hash_seed[4]; /* HTREE hash seed */ |
984 | __u8 s_def_hash_version; /* Default hash version to use */ | 988 | __u8 s_def_hash_version; /* Default hash version to use */ |
985 | __u8 s_reserved_char_pad; | 989 | __u8 s_jnl_backup_type; |
986 | __le16 s_desc_size; /* size of group descriptor */ | 990 | __le16 s_desc_size; /* size of group descriptor */ |
987 | /*100*/ __le32 s_default_mount_opts; | 991 | /*100*/ __le32 s_default_mount_opts; |
988 | __le32 s_first_meta_bg; /* First metablock block group */ | 992 | __le32 s_first_meta_bg; /* First metablock block group */ |
@@ -1000,12 +1004,34 @@ struct ext4_super_block { | |||
1000 | __le64 s_mmp_block; /* Block for multi-mount protection */ | 1004 | __le64 s_mmp_block; /* Block for multi-mount protection */ |
1001 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ | 1005 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ |
1002 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ | 1006 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ |
1003 | __u8 s_reserved_char_pad2; | 1007 | __u8 s_reserved_char_pad; |
1004 | __le16 s_reserved_pad; | 1008 | __le16 s_reserved_pad; |
1005 | __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ | 1009 | __le64 s_kbytes_written; /* nr of lifetime kilobytes written */ |
1006 | __u32 s_reserved[160]; /* Padding to the end of the block */ | 1010 | __le32 s_snapshot_inum; /* Inode number of active snapshot */ |
1011 | __le32 s_snapshot_id; /* sequential ID of active snapshot */ | ||
1012 | __le64 s_snapshot_r_blocks_count; /* reserved blocks for active | ||
1013 | snapshot's future use */ | ||
1014 | __le32 s_snapshot_list; /* inode number of the head of the | ||
1015 | on-disk snapshot list */ | ||
1016 | #define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count) | ||
1017 | __le32 s_error_count; /* number of fs errors */ | ||
1018 | __le32 s_first_error_time; /* first time an error happened */ | ||
1019 | __le32 s_first_error_ino; /* inode involved in first error */ | ||
1020 | __le64 s_first_error_block; /* block involved of first error */ | ||
1021 | __u8 s_first_error_func[32]; /* function where the error happened */ | ||
1022 | __le32 s_first_error_line; /* line number where error happened */ | ||
1023 | __le32 s_last_error_time; /* most recent time of an error */ | ||
1024 | __le32 s_last_error_ino; /* inode involved in last error */ | ||
1025 | __le32 s_last_error_line; /* line number where error happened */ | ||
1026 | __le64 s_last_error_block; /* block involved of last error */ | ||
1027 | __u8 s_last_error_func[32]; /* function where the error happened */ | ||
1028 | #define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) | ||
1029 | __u8 s_mount_opts[64]; | ||
1030 | __le32 s_reserved[112]; /* Padding to the end of the block */ | ||
1007 | }; | 1031 | }; |
1008 | 1032 | ||
1033 | #define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) | ||
1034 | |||
1009 | #ifdef __KERNEL__ | 1035 | #ifdef __KERNEL__ |
1010 | 1036 | ||
1011 | /* | 1037 | /* |
@@ -1143,6 +1169,9 @@ struct ext4_sb_info { | |||
1143 | 1169 | ||
1144 | /* workqueue for dio unwritten */ | 1170 | /* workqueue for dio unwritten */ |
1145 | struct workqueue_struct *dio_unwritten_wq; | 1171 | struct workqueue_struct *dio_unwritten_wq; |
1172 | |||
1173 | /* timer for periodic error stats printing */ | ||
1174 | struct timer_list s_err_report; | ||
1146 | }; | 1175 | }; |
1147 | 1176 | ||
1148 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1177 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
@@ -1313,6 +1342,10 @@ EXT4_INODE_BIT_FNS(state, state_flags) | |||
1313 | #define EXT4_DEFM_JMODE_DATA 0x0020 | 1342 | #define EXT4_DEFM_JMODE_DATA 0x0020 |
1314 | #define EXT4_DEFM_JMODE_ORDERED 0x0040 | 1343 | #define EXT4_DEFM_JMODE_ORDERED 0x0040 |
1315 | #define EXT4_DEFM_JMODE_WBACK 0x0060 | 1344 | #define EXT4_DEFM_JMODE_WBACK 0x0060 |
1345 | #define EXT4_DEFM_NOBARRIER 0x0100 | ||
1346 | #define EXT4_DEFM_BLOCK_VALIDITY 0x0200 | ||
1347 | #define EXT4_DEFM_DISCARD 0x0400 | ||
1348 | #define EXT4_DEFM_NODELALLOC 0x0800 | ||
1316 | 1349 | ||
1317 | /* | 1350 | /* |
1318 | * Default journal batch times | 1351 | * Default journal batch times |
@@ -1379,6 +1412,43 @@ struct ext4_dir_entry_2 { | |||
1379 | #define EXT4_MAX_REC_LEN ((1<<16)-1) | 1412 | #define EXT4_MAX_REC_LEN ((1<<16)-1) |
1380 | 1413 | ||
1381 | /* | 1414 | /* |
1415 | * If we ever get support for fs block sizes > page_size, we'll need | ||
1416 | * to remove the #if statements in the next two functions... | ||
1417 | */ | ||
1418 | static inline unsigned int | ||
1419 | ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) | ||
1420 | { | ||
1421 | unsigned len = le16_to_cpu(dlen); | ||
1422 | |||
1423 | #if (PAGE_CACHE_SIZE >= 65536) | ||
1424 | if (len == EXT4_MAX_REC_LEN || len == 0) | ||
1425 | return blocksize; | ||
1426 | return (len & 65532) | ((len & 3) << 16); | ||
1427 | #else | ||
1428 | return len; | ||
1429 | #endif | ||
1430 | } | ||
1431 | |||
1432 | static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) | ||
1433 | { | ||
1434 | if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) | ||
1435 | BUG(); | ||
1436 | #if (PAGE_CACHE_SIZE >= 65536) | ||
1437 | if (len < 65536) | ||
1438 | return cpu_to_le16(len); | ||
1439 | if (len == blocksize) { | ||
1440 | if (blocksize == 65536) | ||
1441 | return cpu_to_le16(EXT4_MAX_REC_LEN); | ||
1442 | else | ||
1443 | return cpu_to_le16(0); | ||
1444 | } | ||
1445 | return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); | ||
1446 | #else | ||
1447 | return cpu_to_le16(len); | ||
1448 | #endif | ||
1449 | } | ||
1450 | |||
1451 | /* | ||
1382 | * Hash Tree Directory indexing | 1452 | * Hash Tree Directory indexing |
1383 | * (c) Daniel Phillips, 2001 | 1453 | * (c) Daniel Phillips, 2001 |
1384 | */ | 1454 | */ |
@@ -1510,9 +1580,11 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb, | |||
1510 | ext4_init_block_bitmap(sb, NULL, group, desc) | 1580 | ext4_init_block_bitmap(sb, NULL, group, desc) |
1511 | 1581 | ||
1512 | /* dir.c */ | 1582 | /* dir.c */ |
1513 | extern int ext4_check_dir_entry(const char *, struct inode *, | 1583 | extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, |
1514 | struct ext4_dir_entry_2 *, | 1584 | struct ext4_dir_entry_2 *, |
1515 | struct buffer_head *, unsigned int); | 1585 | struct buffer_head *, unsigned int); |
1586 | #define ext4_check_dir_entry(dir, de, bh, offset) \ | ||
1587 | __ext4_check_dir_entry(__func__, __LINE__, (dir), (de), (bh), (offset)) | ||
1516 | extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | 1588 | extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, |
1517 | __u32 minor_hash, | 1589 | __u32 minor_hash, |
1518 | struct ext4_dir_entry_2 *dirent); | 1590 | struct ext4_dir_entry_2 *dirent); |
@@ -1571,7 +1643,8 @@ extern int ext4_write_inode(struct inode *, struct writeback_control *); | |||
1571 | extern int ext4_setattr(struct dentry *, struct iattr *); | 1643 | extern int ext4_setattr(struct dentry *, struct iattr *); |
1572 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 1644 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
1573 | struct kstat *stat); | 1645 | struct kstat *stat); |
1574 | extern void ext4_delete_inode(struct inode *); | 1646 | extern void ext4_evict_inode(struct inode *); |
1647 | extern void ext4_clear_inode(struct inode *); | ||
1575 | extern int ext4_sync_inode(handle_t *, struct inode *); | 1648 | extern int ext4_sync_inode(handle_t *, struct inode *); |
1576 | extern void ext4_dirty_inode(struct inode *); | 1649 | extern void ext4_dirty_inode(struct inode *); |
1577 | extern int ext4_change_inode_journal_flag(struct inode *, int); | 1650 | extern int ext4_change_inode_journal_flag(struct inode *, int); |
@@ -1601,8 +1674,6 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); | |||
1601 | extern int ext4_ext_migrate(struct inode *); | 1674 | extern int ext4_ext_migrate(struct inode *); |
1602 | 1675 | ||
1603 | /* namei.c */ | 1676 | /* namei.c */ |
1604 | extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize); | ||
1605 | extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize); | ||
1606 | extern int ext4_orphan_add(handle_t *, struct inode *); | 1677 | extern int ext4_orphan_add(handle_t *, struct inode *); |
1607 | extern int ext4_orphan_del(handle_t *, struct inode *); | 1678 | extern int ext4_orphan_del(handle_t *, struct inode *); |
1608 | extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | 1679 | extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, |
@@ -1616,25 +1687,38 @@ extern int ext4_group_extend(struct super_block *sb, | |||
1616 | ext4_fsblk_t n_blocks_count); | 1687 | ext4_fsblk_t n_blocks_count); |
1617 | 1688 | ||
1618 | /* super.c */ | 1689 | /* super.c */ |
1619 | extern void __ext4_error(struct super_block *, const char *, const char *, ...) | 1690 | extern void __ext4_error(struct super_block *, const char *, unsigned int, |
1620 | __attribute__ ((format (printf, 3, 4))); | 1691 | const char *, ...) |
1621 | #define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message) | 1692 | __attribute__ ((format (printf, 4, 5))); |
1622 | extern void ext4_error_inode(const char *, struct inode *, const char *, ...) | 1693 | #define ext4_error(sb, message...) __ext4_error(sb, __func__, \ |
1623 | __attribute__ ((format (printf, 3, 4))); | 1694 | __LINE__, ## message) |
1624 | extern void ext4_error_file(const char *, struct file *, const char *, ...) | 1695 | extern void ext4_error_inode(struct inode *, const char *, unsigned int, |
1625 | __attribute__ ((format (printf, 3, 4))); | 1696 | ext4_fsblk_t, const char *, ...) |
1626 | extern void __ext4_std_error(struct super_block *, const char *, int); | 1697 | __attribute__ ((format (printf, 5, 6))); |
1627 | extern void ext4_abort(struct super_block *, const char *, const char *, ...) | 1698 | extern void ext4_error_file(struct file *, const char *, unsigned int, |
1628 | __attribute__ ((format (printf, 3, 4))); | 1699 | const char *, ...) |
1629 | extern void __ext4_warning(struct super_block *, const char *, | 1700 | __attribute__ ((format (printf, 4, 5))); |
1701 | extern void __ext4_std_error(struct super_block *, const char *, | ||
1702 | unsigned int, int); | ||
1703 | extern void __ext4_abort(struct super_block *, const char *, unsigned int, | ||
1704 | const char *, ...) | ||
1705 | __attribute__ ((format (printf, 4, 5))); | ||
1706 | #define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \ | ||
1707 | __LINE__, ## message) | ||
1708 | extern void __ext4_warning(struct super_block *, const char *, unsigned int, | ||
1630 | const char *, ...) | 1709 | const char *, ...) |
1631 | __attribute__ ((format (printf, 3, 4))); | 1710 | __attribute__ ((format (printf, 4, 5))); |
1632 | #define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message) | 1711 | #define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \ |
1712 | __LINE__, ## message) | ||
1633 | extern void ext4_msg(struct super_block *, const char *, const char *, ...) | 1713 | extern void ext4_msg(struct super_block *, const char *, const char *, ...) |
1634 | __attribute__ ((format (printf, 3, 4))); | 1714 | __attribute__ ((format (printf, 3, 4))); |
1635 | extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, | 1715 | extern void __ext4_grp_locked_error(const char *, unsigned int, \ |
1636 | const char *, const char *, ...) | 1716 | struct super_block *, ext4_group_t, \ |
1637 | __attribute__ ((format (printf, 4, 5))); | 1717 | unsigned long, ext4_fsblk_t, \ |
1718 | const char *, ...) | ||
1719 | __attribute__ ((format (printf, 7, 8))); | ||
1720 | #define ext4_grp_locked_error(sb, grp, message...) \ | ||
1721 | __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message) | ||
1638 | extern void ext4_update_dynamic_rev(struct super_block *sb); | 1722 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
1639 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 1723 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
1640 | __u32 compat); | 1724 | __u32 compat); |
@@ -1768,7 +1852,7 @@ static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi) | |||
1768 | #define ext4_std_error(sb, errno) \ | 1852 | #define ext4_std_error(sb, errno) \ |
1769 | do { \ | 1853 | do { \ |
1770 | if ((errno)) \ | 1854 | if ((errno)) \ |
1771 | __ext4_std_error((sb), __func__, (errno)); \ | 1855 | __ext4_std_error((sb), __func__, __LINE__, (errno)); \ |
1772 | } while (0) | 1856 | } while (0) |
1773 | 1857 | ||
1774 | #ifdef CONFIG_SMP | 1858 | #ifdef CONFIG_SMP |
@@ -1860,6 +1944,12 @@ static inline void ext4_unlock_group(struct super_block *sb, | |||
1860 | spin_unlock(ext4_group_lock_ptr(sb, group)); | 1944 | spin_unlock(ext4_group_lock_ptr(sb, group)); |
1861 | } | 1945 | } |
1862 | 1946 | ||
1947 | static inline void ext4_mark_super_dirty(struct super_block *sb) | ||
1948 | { | ||
1949 | if (EXT4_SB(sb)->s_journal == NULL) | ||
1950 | sb->s_dirt =1; | ||
1951 | } | ||
1952 | |||
1863 | /* | 1953 | /* |
1864 | * Inodes and files operations | 1954 | * Inodes and files operations |
1865 | */ | 1955 | */ |
@@ -1905,9 +1995,6 @@ extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
1905 | ssize_t len); | 1995 | ssize_t len); |
1906 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, | 1996 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, |
1907 | struct ext4_map_blocks *map, int flags); | 1997 | struct ext4_map_blocks *map, int flags); |
1908 | extern int ext4_get_blocks(handle_t *handle, struct inode *inode, | ||
1909 | sector_t block, unsigned int max_blocks, | ||
1910 | struct buffer_head *bh, int flags); | ||
1911 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 1998 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
1912 | __u64 start, __u64 len); | 1999 | __u64 start, __u64 len); |
1913 | /* move_extent.c */ | 2000 | /* move_extent.c */ |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 53d2764d71ca..6e272ef6ba96 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -6,29 +6,29 @@ | |||
6 | 6 | ||
7 | #include <trace/events/ext4.h> | 7 | #include <trace/events/ext4.h> |
8 | 8 | ||
9 | int __ext4_journal_get_undo_access(const char *where, handle_t *handle, | 9 | int __ext4_journal_get_undo_access(const char *where, unsigned int line, |
10 | struct buffer_head *bh) | 10 | handle_t *handle, struct buffer_head *bh) |
11 | { | 11 | { |
12 | int err = 0; | 12 | int err = 0; |
13 | 13 | ||
14 | if (ext4_handle_valid(handle)) { | 14 | if (ext4_handle_valid(handle)) { |
15 | err = jbd2_journal_get_undo_access(handle, bh); | 15 | err = jbd2_journal_get_undo_access(handle, bh); |
16 | if (err) | 16 | if (err) |
17 | ext4_journal_abort_handle(where, __func__, bh, | 17 | ext4_journal_abort_handle(where, line, __func__, bh, |
18 | handle, err); | 18 | handle, err); |
19 | } | 19 | } |
20 | return err; | 20 | return err; |
21 | } | 21 | } |
22 | 22 | ||
23 | int __ext4_journal_get_write_access(const char *where, handle_t *handle, | 23 | int __ext4_journal_get_write_access(const char *where, unsigned int line, |
24 | struct buffer_head *bh) | 24 | handle_t *handle, struct buffer_head *bh) |
25 | { | 25 | { |
26 | int err = 0; | 26 | int err = 0; |
27 | 27 | ||
28 | if (ext4_handle_valid(handle)) { | 28 | if (ext4_handle_valid(handle)) { |
29 | err = jbd2_journal_get_write_access(handle, bh); | 29 | err = jbd2_journal_get_write_access(handle, bh); |
30 | if (err) | 30 | if (err) |
31 | ext4_journal_abort_handle(where, __func__, bh, | 31 | ext4_journal_abort_handle(where, line, __func__, bh, |
32 | handle, err); | 32 | handle, err); |
33 | } | 33 | } |
34 | return err; | 34 | return err; |
@@ -46,9 +46,9 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle, | |||
46 | * If the handle isn't valid we're not journaling, but we still need to | 46 | * If the handle isn't valid we're not journaling, but we still need to |
47 | * call into ext4_journal_revoke() to put the buffer head. | 47 | * call into ext4_journal_revoke() to put the buffer head. |
48 | */ | 48 | */ |
49 | int __ext4_forget(const char *where, handle_t *handle, int is_metadata, | 49 | int __ext4_forget(const char *where, unsigned int line, handle_t *handle, |
50 | struct inode *inode, struct buffer_head *bh, | 50 | int is_metadata, struct inode *inode, |
51 | ext4_fsblk_t blocknr) | 51 | struct buffer_head *bh, ext4_fsblk_t blocknr) |
52 | { | 52 | { |
53 | int err; | 53 | int err; |
54 | 54 | ||
@@ -79,8 +79,8 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata, | |||
79 | BUFFER_TRACE(bh, "call jbd2_journal_forget"); | 79 | BUFFER_TRACE(bh, "call jbd2_journal_forget"); |
80 | err = jbd2_journal_forget(handle, bh); | 80 | err = jbd2_journal_forget(handle, bh); |
81 | if (err) | 81 | if (err) |
82 | ext4_journal_abort_handle(where, __func__, bh, | 82 | ext4_journal_abort_handle(where, line, __func__, |
83 | handle, err); | 83 | bh, handle, err); |
84 | return err; | 84 | return err; |
85 | } | 85 | } |
86 | return 0; | 86 | return 0; |
@@ -92,15 +92,16 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata, | |||
92 | BUFFER_TRACE(bh, "call jbd2_journal_revoke"); | 92 | BUFFER_TRACE(bh, "call jbd2_journal_revoke"); |
93 | err = jbd2_journal_revoke(handle, blocknr, bh); | 93 | err = jbd2_journal_revoke(handle, blocknr, bh); |
94 | if (err) { | 94 | if (err) { |
95 | ext4_journal_abort_handle(where, __func__, bh, handle, err); | 95 | ext4_journal_abort_handle(where, line, __func__, |
96 | ext4_abort(inode->i_sb, __func__, | 96 | bh, handle, err); |
97 | __ext4_abort(inode->i_sb, where, line, | ||
97 | "error %d when attempting revoke", err); | 98 | "error %d when attempting revoke", err); |
98 | } | 99 | } |
99 | BUFFER_TRACE(bh, "exit"); | 100 | BUFFER_TRACE(bh, "exit"); |
100 | return err; | 101 | return err; |
101 | } | 102 | } |
102 | 103 | ||
103 | int __ext4_journal_get_create_access(const char *where, | 104 | int __ext4_journal_get_create_access(const char *where, unsigned int line, |
104 | handle_t *handle, struct buffer_head *bh) | 105 | handle_t *handle, struct buffer_head *bh) |
105 | { | 106 | { |
106 | int err = 0; | 107 | int err = 0; |
@@ -108,22 +109,23 @@ int __ext4_journal_get_create_access(const char *where, | |||
108 | if (ext4_handle_valid(handle)) { | 109 | if (ext4_handle_valid(handle)) { |
109 | err = jbd2_journal_get_create_access(handle, bh); | 110 | err = jbd2_journal_get_create_access(handle, bh); |
110 | if (err) | 111 | if (err) |
111 | ext4_journal_abort_handle(where, __func__, bh, | 112 | ext4_journal_abort_handle(where, line, __func__, |
112 | handle, err); | 113 | bh, handle, err); |
113 | } | 114 | } |
114 | return err; | 115 | return err; |
115 | } | 116 | } |
116 | 117 | ||
117 | int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, | 118 | int __ext4_handle_dirty_metadata(const char *where, unsigned int line, |
118 | struct inode *inode, struct buffer_head *bh) | 119 | handle_t *handle, struct inode *inode, |
120 | struct buffer_head *bh) | ||
119 | { | 121 | { |
120 | int err = 0; | 122 | int err = 0; |
121 | 123 | ||
122 | if (ext4_handle_valid(handle)) { | 124 | if (ext4_handle_valid(handle)) { |
123 | err = jbd2_journal_dirty_metadata(handle, bh); | 125 | err = jbd2_journal_dirty_metadata(handle, bh); |
124 | if (err) | 126 | if (err) |
125 | ext4_journal_abort_handle(where, __func__, bh, | 127 | ext4_journal_abort_handle(where, line, __func__, |
126 | handle, err); | 128 | bh, handle, err); |
127 | } else { | 129 | } else { |
128 | if (inode) | 130 | if (inode) |
129 | mark_buffer_dirty_inode(bh, inode); | 131 | mark_buffer_dirty_inode(bh, inode); |
@@ -132,14 +134,33 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, | |||
132 | if (inode && inode_needs_sync(inode)) { | 134 | if (inode && inode_needs_sync(inode)) { |
133 | sync_dirty_buffer(bh); | 135 | sync_dirty_buffer(bh); |
134 | if (buffer_req(bh) && !buffer_uptodate(bh)) { | 136 | if (buffer_req(bh) && !buffer_uptodate(bh)) { |
135 | ext4_error(inode->i_sb, | 137 | struct ext4_super_block *es; |
136 | "IO error syncing inode, " | 138 | |
137 | "inode=%lu, block=%llu", | 139 | es = EXT4_SB(inode->i_sb)->s_es; |
138 | inode->i_ino, | 140 | es->s_last_error_block = |
139 | (unsigned long long) bh->b_blocknr); | 141 | cpu_to_le64(bh->b_blocknr); |
142 | ext4_error_inode(inode, where, line, | ||
143 | bh->b_blocknr, | ||
144 | "IO error syncing itable block"); | ||
140 | err = -EIO; | 145 | err = -EIO; |
141 | } | 146 | } |
142 | } | 147 | } |
143 | } | 148 | } |
144 | return err; | 149 | return err; |
145 | } | 150 | } |
151 | |||
152 | int __ext4_handle_dirty_super(const char *where, unsigned int line, | ||
153 | handle_t *handle, struct super_block *sb) | ||
154 | { | ||
155 | struct buffer_head *bh = EXT4_SB(sb)->s_sbh; | ||
156 | int err = 0; | ||
157 | |||
158 | if (ext4_handle_valid(handle)) { | ||
159 | err = jbd2_journal_dirty_metadata(handle, bh); | ||
160 | if (err) | ||
161 | ext4_journal_abort_handle(where, line, __func__, | ||
162 | bh, handle, err); | ||
163 | } else | ||
164 | sb->s_dirt = 1; | ||
165 | return err; | ||
166 | } | ||
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index dade0c024797..b0bd792c58c5 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -122,39 +122,47 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); | |||
122 | /* | 122 | /* |
123 | * Wrapper functions with which ext4 calls into JBD. | 123 | * Wrapper functions with which ext4 calls into JBD. |
124 | */ | 124 | */ |
125 | void ext4_journal_abort_handle(const char *caller, const char *err_fn, | 125 | void ext4_journal_abort_handle(const char *caller, unsigned int line, |
126 | const char *err_fn, | ||
126 | struct buffer_head *bh, handle_t *handle, int err); | 127 | struct buffer_head *bh, handle_t *handle, int err); |
127 | 128 | ||
128 | int __ext4_journal_get_undo_access(const char *where, handle_t *handle, | 129 | int __ext4_journal_get_undo_access(const char *where, unsigned int line, |
129 | struct buffer_head *bh); | 130 | handle_t *handle, struct buffer_head *bh); |
130 | 131 | ||
131 | int __ext4_journal_get_write_access(const char *where, handle_t *handle, | 132 | int __ext4_journal_get_write_access(const char *where, unsigned int line, |
132 | struct buffer_head *bh); | 133 | handle_t *handle, struct buffer_head *bh); |
133 | 134 | ||
134 | int __ext4_forget(const char *where, handle_t *handle, int is_metadata, | 135 | int __ext4_forget(const char *where, unsigned int line, handle_t *handle, |
135 | struct inode *inode, struct buffer_head *bh, | 136 | int is_metadata, struct inode *inode, |
136 | ext4_fsblk_t blocknr); | 137 | struct buffer_head *bh, ext4_fsblk_t blocknr); |
137 | 138 | ||
138 | int __ext4_journal_get_create_access(const char *where, | 139 | int __ext4_journal_get_create_access(const char *where, unsigned int line, |
139 | handle_t *handle, struct buffer_head *bh); | 140 | handle_t *handle, struct buffer_head *bh); |
140 | 141 | ||
141 | int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, | 142 | int __ext4_handle_dirty_metadata(const char *where, unsigned int line, |
142 | struct inode *inode, struct buffer_head *bh); | 143 | handle_t *handle, struct inode *inode, |
144 | struct buffer_head *bh); | ||
145 | |||
146 | int __ext4_handle_dirty_super(const char *where, unsigned int line, | ||
147 | handle_t *handle, struct super_block *sb); | ||
143 | 148 | ||
144 | #define ext4_journal_get_undo_access(handle, bh) \ | 149 | #define ext4_journal_get_undo_access(handle, bh) \ |
145 | __ext4_journal_get_undo_access(__func__, (handle), (bh)) | 150 | __ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh)) |
146 | #define ext4_journal_get_write_access(handle, bh) \ | 151 | #define ext4_journal_get_write_access(handle, bh) \ |
147 | __ext4_journal_get_write_access(__func__, (handle), (bh)) | 152 | __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) |
148 | #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ | 153 | #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ |
149 | __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\ | 154 | __ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \ |
150 | (block_nr)) | 155 | (bh), (block_nr)) |
151 | #define ext4_journal_get_create_access(handle, bh) \ | 156 | #define ext4_journal_get_create_access(handle, bh) \ |
152 | __ext4_journal_get_create_access(__func__, (handle), (bh)) | 157 | __ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh)) |
153 | #define ext4_handle_dirty_metadata(handle, inode, bh) \ | 158 | #define ext4_handle_dirty_metadata(handle, inode, bh) \ |
154 | __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) | 159 | __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ |
160 | (bh)) | ||
161 | #define ext4_handle_dirty_super(handle, sb) \ | ||
162 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) | ||
155 | 163 | ||
156 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); | 164 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); |
157 | int __ext4_journal_stop(const char *where, handle_t *handle); | 165 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); |
158 | 166 | ||
159 | #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) | 167 | #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) |
160 | 168 | ||
@@ -207,7 +215,7 @@ static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) | |||
207 | } | 215 | } |
208 | 216 | ||
209 | #define ext4_journal_stop(handle) \ | 217 | #define ext4_journal_stop(handle) \ |
210 | __ext4_journal_stop(__func__, (handle)) | 218 | __ext4_journal_stop(__func__, __LINE__, (handle)) |
211 | 219 | ||
212 | static inline handle_t *ext4_journal_current_handle(void) | 220 | static inline handle_t *ext4_journal_current_handle(void) |
213 | { | 221 | { |
@@ -308,17 +316,15 @@ static inline int ext4_should_writeback_data(struct inode *inode) | |||
308 | * This function controls whether or not we should try to go down the | 316 | * This function controls whether or not we should try to go down the |
309 | * dioread_nolock code paths, which makes it safe to avoid taking | 317 | * dioread_nolock code paths, which makes it safe to avoid taking |
310 | * i_mutex for direct I/O reads. This only works for extent-based | 318 | * i_mutex for direct I/O reads. This only works for extent-based |
311 | * files, and it doesn't work for nobh or if data journaling is | 319 | * files, and it doesn't work if data journaling is enabled, since the |
312 | * enabled, since the dioread_nolock code uses b_private to pass | 320 | * dioread_nolock code uses b_private to pass information back to the |
313 | * information back to the I/O completion handler, and this conflicts | 321 | * I/O completion handler, and this conflicts with the jbd's use of |
314 | * with the jbd's use of b_private. | 322 | * b_private. |
315 | */ | 323 | */ |
316 | static inline int ext4_should_dioread_nolock(struct inode *inode) | 324 | static inline int ext4_should_dioread_nolock(struct inode *inode) |
317 | { | 325 | { |
318 | if (!test_opt(inode->i_sb, DIOREAD_NOLOCK)) | 326 | if (!test_opt(inode->i_sb, DIOREAD_NOLOCK)) |
319 | return 0; | 327 | return 0; |
320 | if (test_opt(inode->i_sb, NOBH)) | ||
321 | return 0; | ||
322 | if (!S_ISREG(inode->i_mode)) | 328 | if (!S_ISREG(inode->i_mode)) |
323 | return 0; | 329 | return 0; |
324 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 330 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 377309c1af65..06328d3e5717 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -401,9 +401,9 @@ static int ext4_valid_extent_entries(struct inode *inode, | |||
401 | return 1; | 401 | return 1; |
402 | } | 402 | } |
403 | 403 | ||
404 | static int __ext4_ext_check(const char *function, struct inode *inode, | 404 | static int __ext4_ext_check(const char *function, unsigned int line, |
405 | struct ext4_extent_header *eh, | 405 | struct inode *inode, struct ext4_extent_header *eh, |
406 | int depth) | 406 | int depth) |
407 | { | 407 | { |
408 | const char *error_msg; | 408 | const char *error_msg; |
409 | int max = 0; | 409 | int max = 0; |
@@ -436,7 +436,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode, | |||
436 | return 0; | 436 | return 0; |
437 | 437 | ||
438 | corrupted: | 438 | corrupted: |
439 | ext4_error_inode(function, inode, | 439 | ext4_error_inode(inode, function, line, 0, |
440 | "bad header/extent: %s - magic %x, " | 440 | "bad header/extent: %s - magic %x, " |
441 | "entries %u, max %u(%u), depth %u(%u)", | 441 | "entries %u, max %u(%u), depth %u(%u)", |
442 | error_msg, le16_to_cpu(eh->eh_magic), | 442 | error_msg, le16_to_cpu(eh->eh_magic), |
@@ -447,7 +447,7 @@ corrupted: | |||
447 | } | 447 | } |
448 | 448 | ||
449 | #define ext4_ext_check(inode, eh, depth) \ | 449 | #define ext4_ext_check(inode, eh, depth) \ |
450 | __ext4_ext_check(__func__, inode, eh, depth) | 450 | __ext4_ext_check(__func__, __LINE__, inode, eh, depth) |
451 | 451 | ||
452 | int ext4_ext_check_inode(struct inode *inode) | 452 | int ext4_ext_check_inode(struct inode *inode) |
453 | { | 453 | { |
@@ -1083,7 +1083,6 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1083 | { | 1083 | { |
1084 | struct ext4_ext_path *curp = path; | 1084 | struct ext4_ext_path *curp = path; |
1085 | struct ext4_extent_header *neh; | 1085 | struct ext4_extent_header *neh; |
1086 | struct ext4_extent_idx *fidx; | ||
1087 | struct buffer_head *bh; | 1086 | struct buffer_head *bh; |
1088 | ext4_fsblk_t newblock; | 1087 | ext4_fsblk_t newblock; |
1089 | int err = 0; | 1088 | int err = 0; |
@@ -1144,10 +1143,10 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1144 | ext4_idx_store_pblock(curp->p_idx, newblock); | 1143 | ext4_idx_store_pblock(curp->p_idx, newblock); |
1145 | 1144 | ||
1146 | neh = ext_inode_hdr(inode); | 1145 | neh = ext_inode_hdr(inode); |
1147 | fidx = EXT_FIRST_INDEX(neh); | ||
1148 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", | 1146 | ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", |
1149 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), | 1147 | le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), |
1150 | le32_to_cpu(fidx->ei_block), idx_pblock(fidx)); | 1148 | le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), |
1149 | idx_pblock(EXT_FIRST_INDEX(neh))); | ||
1151 | 1150 | ||
1152 | neh->eh_depth = cpu_to_le16(path->p_depth + 1); | 1151 | neh->eh_depth = cpu_to_le16(path->p_depth + 1); |
1153 | err = ext4_ext_dirty(handle, inode, curp); | 1152 | err = ext4_ext_dirty(handle, inode, curp); |
@@ -2937,7 +2936,7 @@ fix_extent_len: | |||
2937 | * One of more index blocks maybe needed if the extent tree grow after | 2936 | * One of more index blocks maybe needed if the extent tree grow after |
2938 | * the unintialized extent split. To prevent ENOSPC occur at the IO | 2937 | * the unintialized extent split. To prevent ENOSPC occur at the IO |
2939 | * complete, we need to split the uninitialized extent before DIO submit | 2938 | * complete, we need to split the uninitialized extent before DIO submit |
2940 | * the IO. The uninitilized extent called at this time will be split | 2939 | * the IO. The uninitialized extent called at this time will be split |
2941 | * into three uninitialized extent(at most). After IO complete, the part | 2940 | * into three uninitialized extent(at most). After IO complete, the part |
2942 | * being filled will be convert to initialized by the end_io callback function | 2941 | * being filled will be convert to initialized by the end_io callback function |
2943 | * via ext4_convert_unwritten_extents(). | 2942 | * via ext4_convert_unwritten_extents(). |
@@ -2954,7 +2953,6 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2954 | struct ext4_extent *ex1 = NULL; | 2953 | struct ext4_extent *ex1 = NULL; |
2955 | struct ext4_extent *ex2 = NULL; | 2954 | struct ext4_extent *ex2 = NULL; |
2956 | struct ext4_extent *ex3 = NULL; | 2955 | struct ext4_extent *ex3 = NULL; |
2957 | struct ext4_extent_header *eh; | ||
2958 | ext4_lblk_t ee_block, eof_block; | 2956 | ext4_lblk_t ee_block, eof_block; |
2959 | unsigned int allocated, ee_len, depth; | 2957 | unsigned int allocated, ee_len, depth; |
2960 | ext4_fsblk_t newblock; | 2958 | ext4_fsblk_t newblock; |
@@ -2971,7 +2969,6 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2971 | eof_block = map->m_lblk + map->m_len; | 2969 | eof_block = map->m_lblk + map->m_len; |
2972 | 2970 | ||
2973 | depth = ext_depth(inode); | 2971 | depth = ext_depth(inode); |
2974 | eh = path[depth].p_hdr; | ||
2975 | ex = path[depth].p_ext; | 2972 | ex = path[depth].p_ext; |
2976 | ee_block = le32_to_cpu(ex->ee_block); | 2973 | ee_block = le32_to_cpu(ex->ee_block); |
2977 | ee_len = ext4_ext_get_actual_len(ex); | 2974 | ee_len = ext4_ext_get_actual_len(ex); |
@@ -3058,7 +3055,6 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3058 | err = PTR_ERR(path); | 3055 | err = PTR_ERR(path); |
3059 | goto out; | 3056 | goto out; |
3060 | } | 3057 | } |
3061 | eh = path[depth].p_hdr; | ||
3062 | ex = path[depth].p_ext; | 3058 | ex = path[depth].p_ext; |
3063 | if (ex2 != &newex) | 3059 | if (ex2 != &newex) |
3064 | ex2 = ex; | 3060 | ex2 = ex; |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 5313ae4cda2d..ee92b66d4558 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -70,7 +70,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
70 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 70 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
71 | size_t length = iov_length(iov, nr_segs); | 71 | size_t length = iov_length(iov, nr_segs); |
72 | 72 | ||
73 | if (pos > sbi->s_bitmap_maxbytes) | 73 | if ((pos > sbi->s_bitmap_maxbytes || |
74 | (pos == sbi->s_bitmap_maxbytes && length > 0))) | ||
74 | return -EFBIG; | 75 | return -EFBIG; |
75 | 76 | ||
76 | if (pos + length > sbi->s_bitmap_maxbytes) { | 77 | if (pos + length > sbi->s_bitmap_maxbytes) { |
@@ -123,7 +124,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
123 | if (!IS_ERR(cp)) { | 124 | if (!IS_ERR(cp)) { |
124 | memcpy(sbi->s_es->s_last_mounted, cp, | 125 | memcpy(sbi->s_es->s_last_mounted, cp, |
125 | sizeof(sbi->s_es->s_last_mounted)); | 126 | sizeof(sbi->s_es->s_last_mounted)); |
126 | sb->s_dirt = 1; | 127 | ext4_mark_super_dirty(sb); |
127 | } | 128 | } |
128 | } | 129 | } |
129 | return dquot_file_open(inode, filp); | 130 | return dquot_file_open(inode, filp); |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 25c4b3173fd9..45853e0d1f21 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -222,7 +222,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
222 | is_directory = S_ISDIR(inode->i_mode); | 222 | is_directory = S_ISDIR(inode->i_mode); |
223 | 223 | ||
224 | /* Do this BEFORE marking the inode not in use or returning an error */ | 224 | /* Do this BEFORE marking the inode not in use or returning an error */ |
225 | clear_inode(inode); | 225 | ext4_clear_inode(inode); |
226 | 226 | ||
227 | es = EXT4_SB(sb)->s_es; | 227 | es = EXT4_SB(sb)->s_es; |
228 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { | 228 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { |
@@ -279,7 +279,7 @@ out: | |||
279 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 279 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
280 | if (!fatal) | 280 | if (!fatal) |
281 | fatal = err; | 281 | fatal = err; |
282 | sb->s_dirt = 1; | 282 | ext4_mark_super_dirty(sb); |
283 | } else | 283 | } else |
284 | ext4_error(sb, "bit already cleared for inode %lu", ino); | 284 | ext4_error(sb, "bit already cleared for inode %lu", ino); |
285 | 285 | ||
@@ -965,7 +965,7 @@ got: | |||
965 | percpu_counter_dec(&sbi->s_freeinodes_counter); | 965 | percpu_counter_dec(&sbi->s_freeinodes_counter); |
966 | if (S_ISDIR(mode)) | 966 | if (S_ISDIR(mode)) |
967 | percpu_counter_inc(&sbi->s_dirs_counter); | 967 | percpu_counter_inc(&sbi->s_dirs_counter); |
968 | sb->s_dirt = 1; | 968 | ext4_mark_super_dirty(sb); |
969 | 969 | ||
970 | if (sbi->s_log_groups_per_flex) { | 970 | if (sbi->s_log_groups_per_flex) { |
971 | flex_group = ext4_flex_group(sbi, group); | 971 | flex_group = ext4_flex_group(sbi, group); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0afc8c1d8cf3..4b8debeb3965 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -167,11 +167,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
167 | /* | 167 | /* |
168 | * Called at the last iput() if i_nlink is zero. | 168 | * Called at the last iput() if i_nlink is zero. |
169 | */ | 169 | */ |
170 | void ext4_delete_inode(struct inode *inode) | 170 | void ext4_evict_inode(struct inode *inode) |
171 | { | 171 | { |
172 | handle_t *handle; | 172 | handle_t *handle; |
173 | int err; | 173 | int err; |
174 | 174 | ||
175 | if (inode->i_nlink) { | ||
176 | truncate_inode_pages(&inode->i_data, 0); | ||
177 | goto no_delete; | ||
178 | } | ||
179 | |||
175 | if (!is_bad_inode(inode)) | 180 | if (!is_bad_inode(inode)) |
176 | dquot_initialize(inode); | 181 | dquot_initialize(inode); |
177 | 182 | ||
@@ -221,6 +226,7 @@ void ext4_delete_inode(struct inode *inode) | |||
221 | "couldn't extend journal (err %d)", err); | 226 | "couldn't extend journal (err %d)", err); |
222 | stop_handle: | 227 | stop_handle: |
223 | ext4_journal_stop(handle); | 228 | ext4_journal_stop(handle); |
229 | ext4_orphan_del(NULL, inode); | ||
224 | goto no_delete; | 230 | goto no_delete; |
225 | } | 231 | } |
226 | } | 232 | } |
@@ -245,13 +251,13 @@ void ext4_delete_inode(struct inode *inode) | |||
245 | */ | 251 | */ |
246 | if (ext4_mark_inode_dirty(handle, inode)) | 252 | if (ext4_mark_inode_dirty(handle, inode)) |
247 | /* If that failed, just do the required in-core inode clear. */ | 253 | /* If that failed, just do the required in-core inode clear. */ |
248 | clear_inode(inode); | 254 | ext4_clear_inode(inode); |
249 | else | 255 | else |
250 | ext4_free_inode(handle, inode); | 256 | ext4_free_inode(handle, inode); |
251 | ext4_journal_stop(handle); | 257 | ext4_journal_stop(handle); |
252 | return; | 258 | return; |
253 | no_delete: | 259 | no_delete: |
254 | clear_inode(inode); /* We must guarantee clearing of inode... */ | 260 | ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ |
255 | } | 261 | } |
256 | 262 | ||
257 | typedef struct { | 263 | typedef struct { |
@@ -337,9 +343,11 @@ static int ext4_block_to_path(struct inode *inode, | |||
337 | return n; | 343 | return n; |
338 | } | 344 | } |
339 | 345 | ||
340 | static int __ext4_check_blockref(const char *function, struct inode *inode, | 346 | static int __ext4_check_blockref(const char *function, unsigned int line, |
347 | struct inode *inode, | ||
341 | __le32 *p, unsigned int max) | 348 | __le32 *p, unsigned int max) |
342 | { | 349 | { |
350 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
343 | __le32 *bref = p; | 351 | __le32 *bref = p; |
344 | unsigned int blk; | 352 | unsigned int blk; |
345 | 353 | ||
@@ -348,8 +356,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
348 | if (blk && | 356 | if (blk && |
349 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 357 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
350 | blk, 1))) { | 358 | blk, 1))) { |
351 | ext4_error_inode(function, inode, | 359 | es->s_last_error_block = cpu_to_le64(blk); |
352 | "invalid block reference %u", blk); | 360 | ext4_error_inode(inode, function, line, blk, |
361 | "invalid block"); | ||
353 | return -EIO; | 362 | return -EIO; |
354 | } | 363 | } |
355 | } | 364 | } |
@@ -358,11 +367,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
358 | 367 | ||
359 | 368 | ||
360 | #define ext4_check_indirect_blockref(inode, bh) \ | 369 | #define ext4_check_indirect_blockref(inode, bh) \ |
361 | __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ | 370 | __ext4_check_blockref(__func__, __LINE__, inode, \ |
371 | (__le32 *)(bh)->b_data, \ | ||
362 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) | 372 | EXT4_ADDR_PER_BLOCK((inode)->i_sb)) |
363 | 373 | ||
364 | #define ext4_check_inode_blockref(inode) \ | 374 | #define ext4_check_inode_blockref(inode) \ |
365 | __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ | 375 | __ext4_check_blockref(__func__, __LINE__, inode, \ |
376 | EXT4_I(inode)->i_data, \ | ||
366 | EXT4_NDIR_BLOCKS) | 377 | EXT4_NDIR_BLOCKS) |
367 | 378 | ||
368 | /** | 379 | /** |
@@ -1128,20 +1139,24 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1128 | ext4_discard_preallocations(inode); | 1139 | ext4_discard_preallocations(inode); |
1129 | } | 1140 | } |
1130 | 1141 | ||
1131 | static int check_block_validity(struct inode *inode, const char *func, | 1142 | static int __check_block_validity(struct inode *inode, const char *func, |
1143 | unsigned int line, | ||
1132 | struct ext4_map_blocks *map) | 1144 | struct ext4_map_blocks *map) |
1133 | { | 1145 | { |
1134 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, | 1146 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, |
1135 | map->m_len)) { | 1147 | map->m_len)) { |
1136 | ext4_error_inode(func, inode, | 1148 | ext4_error_inode(inode, func, line, map->m_pblk, |
1137 | "lblock %lu mapped to illegal pblock %llu " | 1149 | "lblock %lu mapped to illegal pblock " |
1138 | "(length %d)", (unsigned long) map->m_lblk, | 1150 | "(length %d)", (unsigned long) map->m_lblk, |
1139 | map->m_pblk, map->m_len); | 1151 | map->m_len); |
1140 | return -EIO; | 1152 | return -EIO; |
1141 | } | 1153 | } |
1142 | return 0; | 1154 | return 0; |
1143 | } | 1155 | } |
1144 | 1156 | ||
1157 | #define check_block_validity(inode, map) \ | ||
1158 | __check_block_validity((inode), __func__, __LINE__, (map)) | ||
1159 | |||
1145 | /* | 1160 | /* |
1146 | * Return the number of contiguous dirty pages in a given inode | 1161 | * Return the number of contiguous dirty pages in a given inode |
1147 | * starting at page frame idx. | 1162 | * starting at page frame idx. |
@@ -1244,7 +1259,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
1244 | up_read((&EXT4_I(inode)->i_data_sem)); | 1259 | up_read((&EXT4_I(inode)->i_data_sem)); |
1245 | 1260 | ||
1246 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 1261 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1247 | int ret = check_block_validity(inode, __func__, map); | 1262 | int ret = check_block_validity(inode, map); |
1248 | if (ret != 0) | 1263 | if (ret != 0) |
1249 | return ret; | 1264 | return ret; |
1250 | } | 1265 | } |
@@ -1324,9 +1339,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
1324 | 1339 | ||
1325 | up_write((&EXT4_I(inode)->i_data_sem)); | 1340 | up_write((&EXT4_I(inode)->i_data_sem)); |
1326 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 1341 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1327 | int ret = check_block_validity(inode, | 1342 | int ret = check_block_validity(inode, map); |
1328 | "ext4_map_blocks_after_alloc", | ||
1329 | map); | ||
1330 | if (ret != 0) | 1343 | if (ret != 0) |
1331 | return ret; | 1344 | return ret; |
1332 | } | 1345 | } |
@@ -1519,9 +1532,25 @@ static int walk_page_buffers(handle_t *handle, | |||
1519 | static int do_journal_get_write_access(handle_t *handle, | 1532 | static int do_journal_get_write_access(handle_t *handle, |
1520 | struct buffer_head *bh) | 1533 | struct buffer_head *bh) |
1521 | { | 1534 | { |
1535 | int dirty = buffer_dirty(bh); | ||
1536 | int ret; | ||
1537 | |||
1522 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1538 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1523 | return 0; | 1539 | return 0; |
1524 | return ext4_journal_get_write_access(handle, bh); | 1540 | /* |
1541 | * __block_prepare_write() could have dirtied some buffers. Clean | ||
1542 | * the dirty bit as jbd2_journal_get_write_access() could complain | ||
1543 | * otherwise about fs integrity issues. Setting of the dirty bit | ||
1544 | * by __block_prepare_write() isn't a real problem here as we clear | ||
1545 | * the bit before releasing a page lock and thus writeback cannot | ||
1546 | * ever write the buffer. | ||
1547 | */ | ||
1548 | if (dirty) | ||
1549 | clear_buffer_dirty(bh); | ||
1550 | ret = ext4_journal_get_write_access(handle, bh); | ||
1551 | if (!ret && dirty) | ||
1552 | ret = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
1553 | return ret; | ||
1525 | } | 1554 | } |
1526 | 1555 | ||
1527 | /* | 1556 | /* |
@@ -1578,11 +1607,9 @@ retry: | |||
1578 | *pagep = page; | 1607 | *pagep = page; |
1579 | 1608 | ||
1580 | if (ext4_should_dioread_nolock(inode)) | 1609 | if (ext4_should_dioread_nolock(inode)) |
1581 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | 1610 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); |
1582 | fsdata, ext4_get_block_write); | ||
1583 | else | 1611 | else |
1584 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, | 1612 | ret = __block_write_begin(page, pos, len, ext4_get_block); |
1585 | fsdata, ext4_get_block); | ||
1586 | 1613 | ||
1587 | if (!ret && ext4_should_journal_data(inode)) { | 1614 | if (!ret && ext4_should_journal_data(inode)) { |
1588 | ret = walk_page_buffers(handle, page_buffers(page), | 1615 | ret = walk_page_buffers(handle, page_buffers(page), |
@@ -1593,7 +1620,7 @@ retry: | |||
1593 | unlock_page(page); | 1620 | unlock_page(page); |
1594 | page_cache_release(page); | 1621 | page_cache_release(page); |
1595 | /* | 1622 | /* |
1596 | * block_write_begin may have instantiated a few blocks | 1623 | * __block_write_begin may have instantiated a few blocks |
1597 | * outside i_size. Trim these off again. Don't need | 1624 | * outside i_size. Trim these off again. Don't need |
1598 | * i_size_read because we hold i_mutex. | 1625 | * i_size_read because we hold i_mutex. |
1599 | * | 1626 | * |
@@ -2194,7 +2221,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2194 | BUG_ON(!handle); | 2221 | BUG_ON(!handle); |
2195 | 2222 | ||
2196 | /* | 2223 | /* |
2197 | * Call ext4_get_blocks() to allocate any delayed allocation | 2224 | * Call ext4_map_blocks() to allocate any delayed allocation |
2198 | * blocks, or to convert an uninitialized extent to be | 2225 | * blocks, or to convert an uninitialized extent to be |
2199 | * initialized (in the case where we have written into | 2226 | * initialized (in the case where we have written into |
2200 | * one or more preallocated blocks). | 2227 | * one or more preallocated blocks). |
@@ -2203,7 +2230,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2203 | * indicate that we are on the delayed allocation path. This | 2230 | * indicate that we are on the delayed allocation path. This |
2204 | * affects functions in many different parts of the allocation | 2231 | * affects functions in many different parts of the allocation |
2205 | * call path. This flag exists primarily because we don't | 2232 | * call path. This flag exists primarily because we don't |
2206 | * want to change *many* call functions, so ext4_get_blocks() | 2233 | * want to change *many* call functions, so ext4_map_blocks() |
2207 | * will set the magic i_delalloc_reserved_flag once the | 2234 | * will set the magic i_delalloc_reserved_flag once the |
2208 | * inode's allocation semaphore is taken. | 2235 | * inode's allocation semaphore is taken. |
2209 | * | 2236 | * |
@@ -2221,6 +2248,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2221 | 2248 | ||
2222 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); | 2249 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); |
2223 | if (blks < 0) { | 2250 | if (blks < 0) { |
2251 | struct super_block *sb = mpd->inode->i_sb; | ||
2252 | |||
2224 | err = blks; | 2253 | err = blks; |
2225 | /* | 2254 | /* |
2226 | * If get block returns with error we simply | 2255 | * If get block returns with error we simply |
@@ -2231,7 +2260,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2231 | return 0; | 2260 | return 0; |
2232 | 2261 | ||
2233 | if (err == -ENOSPC && | 2262 | if (err == -ENOSPC && |
2234 | ext4_count_free_blocks(mpd->inode->i_sb)) { | 2263 | ext4_count_free_blocks(sb)) { |
2235 | mpd->retval = err; | 2264 | mpd->retval = err; |
2236 | return 0; | 2265 | return 0; |
2237 | } | 2266 | } |
@@ -2243,16 +2272,17 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2243 | * writepage and writepages will again try to write | 2272 | * writepage and writepages will again try to write |
2244 | * the same. | 2273 | * the same. |
2245 | */ | 2274 | */ |
2246 | ext4_msg(mpd->inode->i_sb, KERN_CRIT, | 2275 | if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { |
2247 | "delayed block allocation failed for inode %lu at " | 2276 | ext4_msg(sb, KERN_CRIT, |
2248 | "logical offset %llu with max blocks %zd with " | 2277 | "delayed block allocation failed for inode %lu " |
2249 | "error %d", mpd->inode->i_ino, | 2278 | "at logical offset %llu with max blocks %zd " |
2250 | (unsigned long long) next, | 2279 | "with error %d", mpd->inode->i_ino, |
2251 | mpd->b_size >> mpd->inode->i_blkbits, err); | 2280 | (unsigned long long) next, |
2252 | printk(KERN_CRIT "This should not happen!! " | 2281 | mpd->b_size >> mpd->inode->i_blkbits, err); |
2253 | "Data will be lost\n"); | 2282 | ext4_msg(sb, KERN_CRIT, |
2254 | if (err == -ENOSPC) { | 2283 | "This should not happen!! Data will be lost\n"); |
2255 | ext4_print_free_blocks(mpd->inode); | 2284 | if (err == -ENOSPC) |
2285 | ext4_print_free_blocks(mpd->inode); | ||
2256 | } | 2286 | } |
2257 | /* invalidate all the pages */ | 2287 | /* invalidate all the pages */ |
2258 | ext4_da_block_invalidatepages(mpd, next, | 2288 | ext4_da_block_invalidatepages(mpd, next, |
@@ -2320,7 +2350,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
2320 | * XXX Don't go larger than mballoc is willing to allocate | 2350 | * XXX Don't go larger than mballoc is willing to allocate |
2321 | * This is a stopgap solution. We eventually need to fold | 2351 | * This is a stopgap solution. We eventually need to fold |
2322 | * mpage_da_submit_io() into this function and then call | 2352 | * mpage_da_submit_io() into this function and then call |
2323 | * ext4_get_blocks() multiple times in a loop | 2353 | * ext4_map_blocks() multiple times in a loop |
2324 | */ | 2354 | */ |
2325 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) | 2355 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) |
2326 | goto flush_it; | 2356 | goto flush_it; |
@@ -2553,18 +2583,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2553 | /* | 2583 | /* |
2554 | * This function is used as a standard get_block_t calback function | 2584 | * This function is used as a standard get_block_t calback function |
2555 | * when there is no desire to allocate any blocks. It is used as a | 2585 | * when there is no desire to allocate any blocks. It is used as a |
2556 | * callback function for block_prepare_write(), nobh_writepage(), and | 2586 | * callback function for block_prepare_write() and block_write_full_page(). |
2557 | * block_write_full_page(). These functions should only try to map a | 2587 | * These functions should only try to map a single block at a time. |
2558 | * single block at a time. | ||
2559 | * | 2588 | * |
2560 | * Since this function doesn't do block allocations even if the caller | 2589 | * Since this function doesn't do block allocations even if the caller |
2561 | * requests it by passing in create=1, it is critically important that | 2590 | * requests it by passing in create=1, it is critically important that |
2562 | * any caller checks to make sure that any buffer heads are returned | 2591 | * any caller checks to make sure that any buffer heads are returned |
2563 | * by this function are either all already mapped or marked for | 2592 | * by this function are either all already mapped or marked for |
2564 | * delayed allocation before calling nobh_writepage() or | 2593 | * delayed allocation before calling block_write_full_page(). Otherwise, |
2565 | * block_write_full_page(). Otherwise, b_blocknr could be left | 2594 | * b_blocknr could be left unitialized, and the page write functions will |
2566 | * unitialized, and the page write functions will be taken by | 2595 | * be taken by surprise. |
2567 | * surprise. | ||
2568 | */ | 2596 | */ |
2569 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | 2597 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, |
2570 | struct buffer_head *bh_result, int create) | 2598 | struct buffer_head *bh_result, int create) |
@@ -2749,9 +2777,7 @@ static int ext4_writepage(struct page *page, | |||
2749 | return __ext4_journalled_writepage(page, len); | 2777 | return __ext4_journalled_writepage(page, len); |
2750 | } | 2778 | } |
2751 | 2779 | ||
2752 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2780 | if (page_bufs && buffer_uninit(page_bufs)) { |
2753 | ret = nobh_writepage(page, noalloc_get_block_write, wbc); | ||
2754 | else if (page_bufs && buffer_uninit(page_bufs)) { | ||
2755 | ext4_set_bh_endio(page_bufs, inode); | 2781 | ext4_set_bh_endio(page_bufs, inode); |
2756 | ret = block_write_full_page_endio(page, noalloc_get_block_write, | 2782 | ret = block_write_full_page_endio(page, noalloc_get_block_write, |
2757 | wbc, ext4_end_io_buffer_write); | 2783 | wbc, ext4_end_io_buffer_write); |
@@ -3146,13 +3172,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
3146 | int ret, retries = 0; | 3172 | int ret, retries = 0; |
3147 | struct page *page; | 3173 | struct page *page; |
3148 | pgoff_t index; | 3174 | pgoff_t index; |
3149 | unsigned from, to; | ||
3150 | struct inode *inode = mapping->host; | 3175 | struct inode *inode = mapping->host; |
3151 | handle_t *handle; | 3176 | handle_t *handle; |
3152 | 3177 | ||
3153 | index = pos >> PAGE_CACHE_SHIFT; | 3178 | index = pos >> PAGE_CACHE_SHIFT; |
3154 | from = pos & (PAGE_CACHE_SIZE - 1); | ||
3155 | to = from + len; | ||
3156 | 3179 | ||
3157 | if (ext4_nonda_switch(inode->i_sb)) { | 3180 | if (ext4_nonda_switch(inode->i_sb)) { |
3158 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | 3181 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; |
@@ -3185,8 +3208,7 @@ retry: | |||
3185 | } | 3208 | } |
3186 | *pagep = page; | 3209 | *pagep = page; |
3187 | 3210 | ||
3188 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 3211 | ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); |
3189 | ext4_da_get_block_prep); | ||
3190 | if (ret < 0) { | 3212 | if (ret < 0) { |
3191 | unlock_page(page); | 3213 | unlock_page(page); |
3192 | ext4_journal_stop(handle); | 3214 | ext4_journal_stop(handle); |
@@ -3545,15 +3567,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
3545 | 3567 | ||
3546 | retry: | 3568 | retry: |
3547 | if (rw == READ && ext4_should_dioread_nolock(inode)) | 3569 | if (rw == READ && ext4_should_dioread_nolock(inode)) |
3548 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 3570 | ret = __blockdev_direct_IO(rw, iocb, inode, |
3549 | inode->i_sb->s_bdev, iov, | 3571 | inode->i_sb->s_bdev, iov, |
3550 | offset, nr_segs, | 3572 | offset, nr_segs, |
3551 | ext4_get_block, NULL); | 3573 | ext4_get_block, NULL, NULL, 0); |
3552 | else | 3574 | else { |
3553 | ret = blockdev_direct_IO(rw, iocb, inode, | 3575 | ret = blockdev_direct_IO(rw, iocb, inode, |
3554 | inode->i_sb->s_bdev, iov, | 3576 | inode->i_sb->s_bdev, iov, |
3555 | offset, nr_segs, | 3577 | offset, nr_segs, |
3556 | ext4_get_block, NULL); | 3578 | ext4_get_block, NULL); |
3579 | |||
3580 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
3581 | loff_t isize = i_size_read(inode); | ||
3582 | loff_t end = offset + iov_length(iov, nr_segs); | ||
3583 | |||
3584 | if (end > isize) | ||
3585 | vmtruncate(inode, isize); | ||
3586 | } | ||
3587 | } | ||
3557 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3588 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
3558 | goto retry; | 3589 | goto retry; |
3559 | 3590 | ||
@@ -3668,6 +3699,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io) | |||
3668 | return ret; | 3699 | return ret; |
3669 | } | 3700 | } |
3670 | 3701 | ||
3702 | if (io->iocb) | ||
3703 | aio_complete(io->iocb, io->result, 0); | ||
3671 | /* clear the DIO AIO unwritten flag */ | 3704 | /* clear the DIO AIO unwritten flag */ |
3672 | io->flag = 0; | 3705 | io->flag = 0; |
3673 | return ret; | 3706 | return ret; |
@@ -3767,6 +3800,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | |||
3767 | io->offset = 0; | 3800 | io->offset = 0; |
3768 | io->size = 0; | 3801 | io->size = 0; |
3769 | io->page = NULL; | 3802 | io->page = NULL; |
3803 | io->iocb = NULL; | ||
3804 | io->result = 0; | ||
3770 | INIT_WORK(&io->work, ext4_end_io_work); | 3805 | INIT_WORK(&io->work, ext4_end_io_work); |
3771 | INIT_LIST_HEAD(&io->list); | 3806 | INIT_LIST_HEAD(&io->list); |
3772 | } | 3807 | } |
@@ -3796,12 +3831,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3796 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 3831 | if (io_end->flag != EXT4_IO_UNWRITTEN){ |
3797 | ext4_free_io_end(io_end); | 3832 | ext4_free_io_end(io_end); |
3798 | iocb->private = NULL; | 3833 | iocb->private = NULL; |
3799 | goto out; | 3834 | out: |
3835 | if (is_async) | ||
3836 | aio_complete(iocb, ret, 0); | ||
3837 | return; | ||
3800 | } | 3838 | } |
3801 | 3839 | ||
3802 | io_end->offset = offset; | 3840 | io_end->offset = offset; |
3803 | io_end->size = size; | 3841 | io_end->size = size; |
3804 | io_end->flag = EXT4_IO_UNWRITTEN; | 3842 | if (is_async) { |
3843 | io_end->iocb = iocb; | ||
3844 | io_end->result = ret; | ||
3845 | } | ||
3805 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | 3846 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; |
3806 | 3847 | ||
3807 | /* queue the work to convert unwritten extents to written */ | 3848 | /* queue the work to convert unwritten extents to written */ |
@@ -3813,9 +3854,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3813 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 3854 | list_add_tail(&io_end->list, &ei->i_completed_io_list); |
3814 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 3855 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
3815 | iocb->private = NULL; | 3856 | iocb->private = NULL; |
3816 | out: | ||
3817 | if (is_async) | ||
3818 | aio_complete(iocb, ret, 0); | ||
3819 | } | 3857 | } |
3820 | 3858 | ||
3821 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | 3859 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) |
@@ -3941,7 +3979,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3941 | return -ENOMEM; | 3979 | return -ENOMEM; |
3942 | /* | 3980 | /* |
3943 | * we save the io structure for current async | 3981 | * we save the io structure for current async |
3944 | * direct IO, so that later ext4_get_blocks() | 3982 | * direct IO, so that later ext4_map_blocks() |
3945 | * could flag the io structure whether there | 3983 | * could flag the io structure whether there |
3946 | * is a unwritten extents needs to be converted | 3984 | * is a unwritten extents needs to be converted |
3947 | * when IO is completed. | 3985 | * when IO is completed. |
@@ -4132,17 +4170,6 @@ int ext4_block_truncate_page(handle_t *handle, | |||
4132 | length = blocksize - (offset & (blocksize - 1)); | 4170 | length = blocksize - (offset & (blocksize - 1)); |
4133 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 4171 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
4134 | 4172 | ||
4135 | /* | ||
4136 | * For "nobh" option, we can only work if we don't need to | ||
4137 | * read-in the page - otherwise we create buffers to do the IO. | ||
4138 | */ | ||
4139 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && | ||
4140 | ext4_should_writeback_data(inode) && PageUptodate(page)) { | ||
4141 | zero_user(page, offset, length); | ||
4142 | set_page_dirty(page); | ||
4143 | goto unlock; | ||
4144 | } | ||
4145 | |||
4146 | if (!page_has_buffers(page)) | 4173 | if (!page_has_buffers(page)) |
4147 | create_empty_buffers(page, blocksize, 0); | 4174 | create_empty_buffers(page, blocksize, 0); |
4148 | 4175 | ||
@@ -4492,9 +4519,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4492 | * (should be rare). | 4519 | * (should be rare). |
4493 | */ | 4520 | */ |
4494 | if (!bh) { | 4521 | if (!bh) { |
4495 | EXT4_ERROR_INODE(inode, | 4522 | EXT4_ERROR_INODE_BLOCK(inode, nr, |
4496 | "Read failure block=%llu", | 4523 | "Read failure"); |
4497 | (unsigned long long) nr); | ||
4498 | continue; | 4524 | continue; |
4499 | } | 4525 | } |
4500 | 4526 | ||
@@ -4506,27 +4532,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4506 | depth); | 4532 | depth); |
4507 | 4533 | ||
4508 | /* | 4534 | /* |
4509 | * We've probably journalled the indirect block several | ||
4510 | * times during the truncate. But it's no longer | ||
4511 | * needed and we now drop it from the transaction via | ||
4512 | * jbd2_journal_revoke(). | ||
4513 | * | ||
4514 | * That's easy if it's exclusively part of this | ||
4515 | * transaction. But if it's part of the committing | ||
4516 | * transaction then jbd2_journal_forget() will simply | ||
4517 | * brelse() it. That means that if the underlying | ||
4518 | * block is reallocated in ext4_get_block(), | ||
4519 | * unmap_underlying_metadata() will find this block | ||
4520 | * and will try to get rid of it. damn, damn. | ||
4521 | * | ||
4522 | * If this block has already been committed to the | ||
4523 | * journal, a revoke record will be written. And | ||
4524 | * revoke records must be emitted *before* clearing | ||
4525 | * this block's bit in the bitmaps. | ||
4526 | */ | ||
4527 | ext4_forget(handle, 1, inode, bh, bh->b_blocknr); | ||
4528 | |||
4529 | /* | ||
4530 | * Everything below this this pointer has been | 4535 | * Everything below this this pointer has been |
4531 | * released. Now let this top-of-subtree go. | 4536 | * released. Now let this top-of-subtree go. |
4532 | * | 4537 | * |
@@ -4550,8 +4555,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4550 | blocks_for_truncate(inode)); | 4555 | blocks_for_truncate(inode)); |
4551 | } | 4556 | } |
4552 | 4557 | ||
4558 | /* | ||
4559 | * The forget flag here is critical because if | ||
4560 | * we are journaling (and not doing data | ||
4561 | * journaling), we have to make sure a revoke | ||
4562 | * record is written to prevent the journal | ||
4563 | * replay from overwriting the (former) | ||
4564 | * indirect block if it gets reallocated as a | ||
4565 | * data block. This must happen in the same | ||
4566 | * transaction where the data blocks are | ||
4567 | * actually freed. | ||
4568 | */ | ||
4553 | ext4_free_blocks(handle, inode, 0, nr, 1, | 4569 | ext4_free_blocks(handle, inode, 0, nr, 1, |
4554 | EXT4_FREE_BLOCKS_METADATA); | 4570 | EXT4_FREE_BLOCKS_METADATA| |
4571 | EXT4_FREE_BLOCKS_FORGET); | ||
4555 | 4572 | ||
4556 | if (parent_bh) { | 4573 | if (parent_bh) { |
4557 | /* | 4574 | /* |
@@ -4809,8 +4826,8 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4809 | 4826 | ||
4810 | bh = sb_getblk(sb, block); | 4827 | bh = sb_getblk(sb, block); |
4811 | if (!bh) { | 4828 | if (!bh) { |
4812 | EXT4_ERROR_INODE(inode, "unable to read inode block - " | 4829 | EXT4_ERROR_INODE_BLOCK(inode, block, |
4813 | "block %llu", block); | 4830 | "unable to read itable block"); |
4814 | return -EIO; | 4831 | return -EIO; |
4815 | } | 4832 | } |
4816 | if (!buffer_uptodate(bh)) { | 4833 | if (!buffer_uptodate(bh)) { |
@@ -4908,8 +4925,8 @@ make_io: | |||
4908 | submit_bh(READ_META, bh); | 4925 | submit_bh(READ_META, bh); |
4909 | wait_on_buffer(bh); | 4926 | wait_on_buffer(bh); |
4910 | if (!buffer_uptodate(bh)) { | 4927 | if (!buffer_uptodate(bh)) { |
4911 | EXT4_ERROR_INODE(inode, "unable to read inode " | 4928 | EXT4_ERROR_INODE_BLOCK(inode, block, |
4912 | "block %llu", block); | 4929 | "unable to read itable block"); |
4913 | brelse(bh); | 4930 | brelse(bh); |
4914 | return -EIO; | 4931 | return -EIO; |
4915 | } | 4932 | } |
@@ -4980,7 +4997,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, | |||
4980 | /* we are using combined 48 bit field */ | 4997 | /* we are using combined 48 bit field */ |
4981 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | | 4998 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | |
4982 | le32_to_cpu(raw_inode->i_blocks_lo); | 4999 | le32_to_cpu(raw_inode->i_blocks_lo); |
4983 | if (ei->i_flags & EXT4_HUGE_FILE_FL) { | 5000 | if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) { |
4984 | /* i_blocks represent file system block size */ | 5001 | /* i_blocks represent file system block size */ |
4985 | return i_blocks << (inode->i_blkbits - 9); | 5002 | return i_blocks << (inode->i_blkbits - 9); |
4986 | } else { | 5003 | } else { |
@@ -5076,7 +5093,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5076 | transaction_t *transaction; | 5093 | transaction_t *transaction; |
5077 | tid_t tid; | 5094 | tid_t tid; |
5078 | 5095 | ||
5079 | spin_lock(&journal->j_state_lock); | 5096 | read_lock(&journal->j_state_lock); |
5080 | if (journal->j_running_transaction) | 5097 | if (journal->j_running_transaction) |
5081 | transaction = journal->j_running_transaction; | 5098 | transaction = journal->j_running_transaction; |
5082 | else | 5099 | else |
@@ -5085,7 +5102,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5085 | tid = transaction->t_tid; | 5102 | tid = transaction->t_tid; |
5086 | else | 5103 | else |
5087 | tid = journal->j_commit_sequence; | 5104 | tid = journal->j_commit_sequence; |
5088 | spin_unlock(&journal->j_state_lock); | 5105 | read_unlock(&journal->j_state_lock); |
5089 | ei->i_sync_tid = tid; | 5106 | ei->i_sync_tid = tid; |
5090 | ei->i_datasync_tid = tid; | 5107 | ei->i_datasync_tid = tid; |
5091 | } | 5108 | } |
@@ -5130,7 +5147,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5130 | ei->i_file_acl); | 5147 | ei->i_file_acl); |
5131 | ret = -EIO; | 5148 | ret = -EIO; |
5132 | goto bad_inode; | 5149 | goto bad_inode; |
5133 | } else if (ei->i_flags & EXT4_EXTENTS_FL) { | 5150 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
5134 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 5151 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
5135 | (S_ISLNK(inode->i_mode) && | 5152 | (S_ISLNK(inode->i_mode) && |
5136 | !ext4_inode_is_fast_symlink(inode))) | 5153 | !ext4_inode_is_fast_symlink(inode))) |
@@ -5410,9 +5427,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
5410 | if (wbc->sync_mode == WB_SYNC_ALL) | 5427 | if (wbc->sync_mode == WB_SYNC_ALL) |
5411 | sync_dirty_buffer(iloc.bh); | 5428 | sync_dirty_buffer(iloc.bh); |
5412 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 5429 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
5413 | EXT4_ERROR_INODE(inode, | 5430 | EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, |
5414 | "IO error syncing inode (block=%llu)", | 5431 | "IO error syncing inode"); |
5415 | (unsigned long long) iloc.bh->b_blocknr); | ||
5416 | err = -EIO; | 5432 | err = -EIO; |
5417 | } | 5433 | } |
5418 | brelse(iloc.bh); | 5434 | brelse(iloc.bh); |
@@ -5487,10 +5503,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5487 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | 5503 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
5488 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 5504 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
5489 | 5505 | ||
5490 | if (attr->ia_size > sbi->s_bitmap_maxbytes) { | 5506 | if (attr->ia_size > sbi->s_bitmap_maxbytes) |
5491 | error = -EFBIG; | 5507 | return -EFBIG; |
5492 | goto err_out; | ||
5493 | } | ||
5494 | } | 5508 | } |
5495 | } | 5509 | } |
5496 | 5510 | ||
@@ -5533,11 +5547,19 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5533 | ext4_truncate(inode); | 5547 | ext4_truncate(inode); |
5534 | } | 5548 | } |
5535 | 5549 | ||
5536 | rc = inode_setattr(inode, attr); | 5550 | if ((attr->ia_valid & ATTR_SIZE) && |
5551 | attr->ia_size != i_size_read(inode)) | ||
5552 | rc = vmtruncate(inode, attr->ia_size); | ||
5553 | |||
5554 | if (!rc) { | ||
5555 | setattr_copy(inode, attr); | ||
5556 | mark_inode_dirty(inode); | ||
5557 | } | ||
5537 | 5558 | ||
5538 | /* If inode_setattr's call to ext4_truncate failed to get a | 5559 | /* |
5539 | * transaction handle at all, we need to clean up the in-core | 5560 | * If the call to ext4_truncate failed to get a transaction handle at |
5540 | * orphan list manually. */ | 5561 | * all, we need to clean up the in-core orphan list manually. |
5562 | */ | ||
5541 | if (inode->i_nlink) | 5563 | if (inode->i_nlink) |
5542 | ext4_orphan_del(NULL, inode); | 5564 | ext4_orphan_del(NULL, inode); |
5543 | 5565 | ||
@@ -5692,7 +5714,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
5692 | * Calculate the journal credits for a chunk of data modification. | 5714 | * Calculate the journal credits for a chunk of data modification. |
5693 | * | 5715 | * |
5694 | * This is called from DIO, fallocate or whoever calling | 5716 | * This is called from DIO, fallocate or whoever calling |
5695 | * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks. | 5717 | * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks. |
5696 | * | 5718 | * |
5697 | * journal buffers for data blocks are not included here, as DIO | 5719 | * journal buffers for data blocks are not included here, as DIO |
5698 | * and fallocate do no need to journal data buffers. | 5720 | * and fallocate do no need to journal data buffers. |
@@ -5758,7 +5780,6 @@ static int ext4_expand_extra_isize(struct inode *inode, | |||
5758 | { | 5780 | { |
5759 | struct ext4_inode *raw_inode; | 5781 | struct ext4_inode *raw_inode; |
5760 | struct ext4_xattr_ibody_header *header; | 5782 | struct ext4_xattr_ibody_header *header; |
5761 | struct ext4_xattr_entry *entry; | ||
5762 | 5783 | ||
5763 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) | 5784 | if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) |
5764 | return 0; | 5785 | return 0; |
@@ -5766,7 +5787,6 @@ static int ext4_expand_extra_isize(struct inode *inode, | |||
5766 | raw_inode = ext4_raw_inode(&iloc); | 5787 | raw_inode = ext4_raw_inode(&iloc); |
5767 | 5788 | ||
5768 | header = IHDR(inode, raw_inode); | 5789 | header = IHDR(inode, raw_inode); |
5769 | entry = IFIRST(header); | ||
5770 | 5790 | ||
5771 | /* No extended attributes present */ | 5791 | /* No extended attributes present */ |
5772 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || | 5792 | if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 12b3bc026a68..4b4ad4b7ce57 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -446,10 +446,11 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, | |||
446 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | 446 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
447 | blocknr += first + i; | 447 | blocknr += first + i; |
448 | ext4_grp_locked_error(sb, e4b->bd_group, | 448 | ext4_grp_locked_error(sb, e4b->bd_group, |
449 | __func__, "double-free of inode" | 449 | inode ? inode->i_ino : 0, |
450 | " %lu's block %llu(bit %u in group %u)", | 450 | blocknr, |
451 | inode ? inode->i_ino : 0, blocknr, | 451 | "freeing block already freed " |
452 | first + i, e4b->bd_group); | 452 | "(bit %u)", |
453 | first + i); | ||
453 | } | 454 | } |
454 | mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); | 455 | mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); |
455 | } | 456 | } |
@@ -712,9 +713,9 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
712 | grp->bb_fragments = fragments; | 713 | grp->bb_fragments = fragments; |
713 | 714 | ||
714 | if (free != grp->bb_free) { | 715 | if (free != grp->bb_free) { |
715 | ext4_grp_locked_error(sb, group, __func__, | 716 | ext4_grp_locked_error(sb, group, 0, 0, |
716 | "EXT4-fs: group %u: %u blocks in bitmap, %u in gd", | 717 | "%u blocks in bitmap, %u in gd", |
717 | group, free, grp->bb_free); | 718 | free, grp->bb_free); |
718 | /* | 719 | /* |
719 | * If we intent to continue, we consider group descritor | 720 | * If we intent to continue, we consider group descritor |
720 | * corrupt and update bb_free using bitmap value | 721 | * corrupt and update bb_free using bitmap value |
@@ -1296,10 +1297,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1296 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | 1297 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); |
1297 | blocknr += block; | 1298 | blocknr += block; |
1298 | ext4_grp_locked_error(sb, e4b->bd_group, | 1299 | ext4_grp_locked_error(sb, e4b->bd_group, |
1299 | __func__, "double-free of inode" | 1300 | inode ? inode->i_ino : 0, |
1300 | " %lu's block %llu(bit %u in group %u)", | 1301 | blocknr, |
1301 | inode ? inode->i_ino : 0, blocknr, block, | 1302 | "freeing already freed block " |
1302 | e4b->bd_group); | 1303 | "(bit %u)", block); |
1303 | } | 1304 | } |
1304 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); | 1305 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); |
1305 | e4b->bd_info->bb_counters[order]++; | 1306 | e4b->bd_info->bb_counters[order]++; |
@@ -1788,8 +1789,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1788 | * free blocks even though group info says we | 1789 | * free blocks even though group info says we |
1789 | * we have free blocks | 1790 | * we have free blocks |
1790 | */ | 1791 | */ |
1791 | ext4_grp_locked_error(sb, e4b->bd_group, | 1792 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, |
1792 | __func__, "%d free blocks as per " | 1793 | "%d free blocks as per " |
1793 | "group info. But bitmap says 0", | 1794 | "group info. But bitmap says 0", |
1794 | free); | 1795 | free); |
1795 | break; | 1796 | break; |
@@ -1798,8 +1799,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1798 | mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); | 1799 | mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); |
1799 | BUG_ON(ex.fe_len <= 0); | 1800 | BUG_ON(ex.fe_len <= 0); |
1800 | if (free < ex.fe_len) { | 1801 | if (free < ex.fe_len) { |
1801 | ext4_grp_locked_error(sb, e4b->bd_group, | 1802 | ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, |
1802 | __func__, "%d free blocks as per " | 1803 | "%d free blocks as per " |
1803 | "group info. But got %d blocks", | 1804 | "group info. But got %d blocks", |
1804 | free, ex.fe_len); | 1805 | free, ex.fe_len); |
1805 | /* | 1806 | /* |
@@ -1821,8 +1822,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1821 | 1822 | ||
1822 | /* | 1823 | /* |
1823 | * This is a special case for storages like raid5 | 1824 | * This is a special case for storages like raid5 |
1824 | * we try to find stripe-aligned chunks for stripe-size requests | 1825 | * we try to find stripe-aligned chunks for stripe-size-multiple requests |
1825 | * XXX should do so at least for multiples of stripe size as well | ||
1826 | */ | 1826 | */ |
1827 | static noinline_for_stack | 1827 | static noinline_for_stack |
1828 | void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | 1828 | void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, |
@@ -1999,7 +1999,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1999 | ext4_group_t ngroups, group, i; | 1999 | ext4_group_t ngroups, group, i; |
2000 | int cr; | 2000 | int cr; |
2001 | int err = 0; | 2001 | int err = 0; |
2002 | int bsbits; | ||
2003 | struct ext4_sb_info *sbi; | 2002 | struct ext4_sb_info *sbi; |
2004 | struct super_block *sb; | 2003 | struct super_block *sb; |
2005 | struct ext4_buddy e4b; | 2004 | struct ext4_buddy e4b; |
@@ -2041,8 +2040,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
2041 | ac->ac_2order = i - 1; | 2040 | ac->ac_2order = i - 1; |
2042 | } | 2041 | } |
2043 | 2042 | ||
2044 | bsbits = ac->ac_sb->s_blocksize_bits; | ||
2045 | |||
2046 | /* if stream allocation is enabled, use global goal */ | 2043 | /* if stream allocation is enabled, use global goal */ |
2047 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { | 2044 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
2048 | /* TBD: may be hot point */ | 2045 | /* TBD: may be hot point */ |
@@ -2094,8 +2091,8 @@ repeat: | |||
2094 | ac->ac_groups_scanned++; | 2091 | ac->ac_groups_scanned++; |
2095 | if (cr == 0) | 2092 | if (cr == 0) |
2096 | ext4_mb_simple_scan_group(ac, &e4b); | 2093 | ext4_mb_simple_scan_group(ac, &e4b); |
2097 | else if (cr == 1 && | 2094 | else if (cr == 1 && sbi->s_stripe && |
2098 | ac->ac_g_ex.fe_len == sbi->s_stripe) | 2095 | !(ac->ac_g_ex.fe_len % sbi->s_stripe)) |
2099 | ext4_mb_scan_aligned(ac, &e4b); | 2096 | ext4_mb_scan_aligned(ac, &e4b); |
2100 | else | 2097 | else |
2101 | ext4_mb_complex_scan_group(ac, &e4b); | 2098 | ext4_mb_complex_scan_group(ac, &e4b); |
@@ -2221,7 +2218,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) | |||
2221 | 2218 | ||
2222 | rc = seq_open(file, &ext4_mb_seq_groups_ops); | 2219 | rc = seq_open(file, &ext4_mb_seq_groups_ops); |
2223 | if (rc == 0) { | 2220 | if (rc == 0) { |
2224 | struct seq_file *m = (struct seq_file *)file->private_data; | 2221 | struct seq_file *m = file->private_data; |
2225 | m->private = sb; | 2222 | m->private = sb; |
2226 | } | 2223 | } |
2227 | return rc; | 2224 | return rc; |
@@ -2560,6 +2557,22 @@ int ext4_mb_release(struct super_block *sb) | |||
2560 | return 0; | 2557 | return 0; |
2561 | } | 2558 | } |
2562 | 2559 | ||
2560 | static inline void ext4_issue_discard(struct super_block *sb, | ||
2561 | ext4_group_t block_group, ext4_grpblk_t block, int count) | ||
2562 | { | ||
2563 | int ret; | ||
2564 | ext4_fsblk_t discard_block; | ||
2565 | |||
2566 | discard_block = block + ext4_group_first_block_no(sb, block_group); | ||
2567 | trace_ext4_discard_blocks(sb, | ||
2568 | (unsigned long long) discard_block, count); | ||
2569 | ret = sb_issue_discard(sb, discard_block, count); | ||
2570 | if (ret == EOPNOTSUPP) { | ||
2571 | ext4_warning(sb, "discard not supported, disabling"); | ||
2572 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | ||
2573 | } | ||
2574 | } | ||
2575 | |||
2563 | /* | 2576 | /* |
2564 | * This function is called by the jbd2 layer once the commit has finished, | 2577 | * This function is called by the jbd2 layer once the commit has finished, |
2565 | * so we know we can free the blocks that were released with that commit. | 2578 | * so we know we can free the blocks that were released with that commit. |
@@ -2579,22 +2592,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2579 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", | 2592 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
2580 | entry->count, entry->group, entry); | 2593 | entry->count, entry->group, entry); |
2581 | 2594 | ||
2582 | if (test_opt(sb, DISCARD)) { | 2595 | if (test_opt(sb, DISCARD)) |
2583 | int ret; | 2596 | ext4_issue_discard(sb, entry->group, |
2584 | ext4_fsblk_t discard_block; | 2597 | entry->start_blk, entry->count); |
2585 | |||
2586 | discard_block = entry->start_blk + | ||
2587 | ext4_group_first_block_no(sb, entry->group); | ||
2588 | trace_ext4_discard_blocks(sb, | ||
2589 | (unsigned long long)discard_block, | ||
2590 | entry->count); | ||
2591 | ret = sb_issue_discard(sb, discard_block, entry->count); | ||
2592 | if (ret == EOPNOTSUPP) { | ||
2593 | ext4_warning(sb, | ||
2594 | "discard not supported, disabling"); | ||
2595 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | ||
2596 | } | ||
2597 | } | ||
2598 | 2598 | ||
2599 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2599 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2600 | /* we expect to find existing buddy because it's pinned */ | 2600 | /* we expect to find existing buddy because it's pinned */ |
@@ -2704,7 +2704,7 @@ void exit_ext4_mballoc(void) | |||
2704 | 2704 | ||
2705 | 2705 | ||
2706 | /* | 2706 | /* |
2707 | * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps | 2707 | * Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps |
2708 | * Returns 0 if success or error code | 2708 | * Returns 0 if success or error code |
2709 | */ | 2709 | */ |
2710 | static noinline_for_stack int | 2710 | static noinline_for_stack int |
@@ -2712,7 +2712,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2712 | handle_t *handle, unsigned int reserv_blks) | 2712 | handle_t *handle, unsigned int reserv_blks) |
2713 | { | 2713 | { |
2714 | struct buffer_head *bitmap_bh = NULL; | 2714 | struct buffer_head *bitmap_bh = NULL; |
2715 | struct ext4_super_block *es; | ||
2716 | struct ext4_group_desc *gdp; | 2715 | struct ext4_group_desc *gdp; |
2717 | struct buffer_head *gdp_bh; | 2716 | struct buffer_head *gdp_bh; |
2718 | struct ext4_sb_info *sbi; | 2717 | struct ext4_sb_info *sbi; |
@@ -2725,8 +2724,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2725 | 2724 | ||
2726 | sb = ac->ac_sb; | 2725 | sb = ac->ac_sb; |
2727 | sbi = EXT4_SB(sb); | 2726 | sbi = EXT4_SB(sb); |
2728 | es = sbi->s_es; | ||
2729 | |||
2730 | 2727 | ||
2731 | err = -EIO; | 2728 | err = -EIO; |
2732 | bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group); | 2729 | bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group); |
@@ -2812,7 +2809,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2812 | err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); | 2809 | err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); |
2813 | 2810 | ||
2814 | out_err: | 2811 | out_err: |
2815 | sb->s_dirt = 1; | 2812 | ext4_mark_super_dirty(sb); |
2816 | brelse(bitmap_bh); | 2813 | brelse(bitmap_bh); |
2817 | return err; | 2814 | return err; |
2818 | } | 2815 | } |
@@ -2850,7 +2847,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
2850 | int bsbits, max; | 2847 | int bsbits, max; |
2851 | ext4_lblk_t end; | 2848 | ext4_lblk_t end; |
2852 | loff_t size, orig_size, start_off; | 2849 | loff_t size, orig_size, start_off; |
2853 | ext4_lblk_t start, orig_start; | 2850 | ext4_lblk_t start; |
2854 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 2851 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
2855 | struct ext4_prealloc_space *pa; | 2852 | struct ext4_prealloc_space *pa; |
2856 | 2853 | ||
@@ -2881,6 +2878,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
2881 | size = size << bsbits; | 2878 | size = size << bsbits; |
2882 | if (size < i_size_read(ac->ac_inode)) | 2879 | if (size < i_size_read(ac->ac_inode)) |
2883 | size = i_size_read(ac->ac_inode); | 2880 | size = i_size_read(ac->ac_inode); |
2881 | orig_size = size; | ||
2884 | 2882 | ||
2885 | /* max size of free chunks */ | 2883 | /* max size of free chunks */ |
2886 | max = 2 << bsbits; | 2884 | max = 2 << bsbits; |
@@ -2922,8 +2920,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
2922 | start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits; | 2920 | start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits; |
2923 | size = ac->ac_o_ex.fe_len << bsbits; | 2921 | size = ac->ac_o_ex.fe_len << bsbits; |
2924 | } | 2922 | } |
2925 | orig_size = size = size >> bsbits; | 2923 | size = size >> bsbits; |
2926 | orig_start = start = start_off >> bsbits; | 2924 | start = start_off >> bsbits; |
2927 | 2925 | ||
2928 | /* don't cover already allocated blocks in selected range */ | 2926 | /* don't cover already allocated blocks in selected range */ |
2929 | if (ar->pleft && start <= ar->lleft) { | 2927 | if (ar->pleft && start <= ar->lleft) { |
@@ -3547,7 +3545,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3547 | ext4_group_t group; | 3545 | ext4_group_t group; |
3548 | ext4_grpblk_t bit; | 3546 | ext4_grpblk_t bit; |
3549 | unsigned long long grp_blk_start; | 3547 | unsigned long long grp_blk_start; |
3550 | sector_t start; | ||
3551 | int err = 0; | 3548 | int err = 0; |
3552 | int free = 0; | 3549 | int free = 0; |
3553 | 3550 | ||
@@ -3567,10 +3564,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3567 | if (bit >= end) | 3564 | if (bit >= end) |
3568 | break; | 3565 | break; |
3569 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); | 3566 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); |
3570 | start = ext4_group_first_block_no(sb, group) + bit; | ||
3571 | mb_debug(1, " free preallocated %u/%u in group %u\n", | 3567 | mb_debug(1, " free preallocated %u/%u in group %u\n", |
3572 | (unsigned) start, (unsigned) next - bit, | 3568 | (unsigned) ext4_group_first_block_no(sb, group) + bit, |
3573 | (unsigned) group); | 3569 | (unsigned) next - bit, (unsigned) group); |
3574 | free += next - bit; | 3570 | free += next - bit; |
3575 | 3571 | ||
3576 | if (ac) { | 3572 | if (ac) { |
@@ -3581,7 +3577,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3581 | trace_ext4_mballoc_discard(ac); | 3577 | trace_ext4_mballoc_discard(ac); |
3582 | } | 3578 | } |
3583 | 3579 | ||
3584 | trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit, | 3580 | trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit, |
3585 | next - bit); | 3581 | next - bit); |
3586 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3582 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
3587 | bit = next + 1; | 3583 | bit = next + 1; |
@@ -3591,8 +3587,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3591 | pa, (unsigned long) pa->pa_lstart, | 3587 | pa, (unsigned long) pa->pa_lstart, |
3592 | (unsigned long) pa->pa_pstart, | 3588 | (unsigned long) pa->pa_pstart, |
3593 | (unsigned long) pa->pa_len); | 3589 | (unsigned long) pa->pa_len); |
3594 | ext4_grp_locked_error(sb, group, | 3590 | ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u", |
3595 | __func__, "free %u, pa_free %u", | ||
3596 | free, pa->pa_free); | 3591 | free, pa->pa_free); |
3597 | /* | 3592 | /* |
3598 | * pa is already deleted so we use the value obtained | 3593 | * pa is already deleted so we use the value obtained |
@@ -3613,7 +3608,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, | |||
3613 | ext4_group_t group; | 3608 | ext4_group_t group; |
3614 | ext4_grpblk_t bit; | 3609 | ext4_grpblk_t bit; |
3615 | 3610 | ||
3616 | trace_ext4_mb_release_group_pa(ac, pa); | 3611 | trace_ext4_mb_release_group_pa(sb, ac, pa); |
3617 | BUG_ON(pa->pa_deleted == 0); | 3612 | BUG_ON(pa->pa_deleted == 0); |
3618 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3613 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3619 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3614 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
@@ -3889,6 +3884,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
3889 | struct super_block *sb = ac->ac_sb; | 3884 | struct super_block *sb = ac->ac_sb; |
3890 | ext4_group_t ngroups, i; | 3885 | ext4_group_t ngroups, i; |
3891 | 3886 | ||
3887 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | ||
3888 | return; | ||
3889 | |||
3892 | printk(KERN_ERR "EXT4-fs: Can't allocate:" | 3890 | printk(KERN_ERR "EXT4-fs: Can't allocate:" |
3893 | " Allocation context details:\n"); | 3891 | " Allocation context details:\n"); |
3894 | printk(KERN_ERR "EXT4-fs: status %d flags %d\n", | 3892 | printk(KERN_ERR "EXT4-fs: status %d flags %d\n", |
@@ -4255,7 +4253,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | |||
4255 | * to usual allocation | 4253 | * to usual allocation |
4256 | */ | 4254 | */ |
4257 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | 4255 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, |
4258 | struct ext4_allocation_request *ar, int *errp) | 4256 | struct ext4_allocation_request *ar, int *errp) |
4259 | { | 4257 | { |
4260 | int freed; | 4258 | int freed; |
4261 | struct ext4_allocation_context *ac = NULL; | 4259 | struct ext4_allocation_context *ac = NULL; |
@@ -4299,7 +4297,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4299 | inquota = ar->len; | 4297 | inquota = ar->len; |
4300 | if (ar->len == 0) { | 4298 | if (ar->len == 0) { |
4301 | *errp = -EDQUOT; | 4299 | *errp = -EDQUOT; |
4302 | goto out3; | 4300 | goto out; |
4303 | } | 4301 | } |
4304 | } | 4302 | } |
4305 | 4303 | ||
@@ -4307,13 +4305,13 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4307 | if (!ac) { | 4305 | if (!ac) { |
4308 | ar->len = 0; | 4306 | ar->len = 0; |
4309 | *errp = -ENOMEM; | 4307 | *errp = -ENOMEM; |
4310 | goto out1; | 4308 | goto out; |
4311 | } | 4309 | } |
4312 | 4310 | ||
4313 | *errp = ext4_mb_initialize_context(ac, ar); | 4311 | *errp = ext4_mb_initialize_context(ac, ar); |
4314 | if (*errp) { | 4312 | if (*errp) { |
4315 | ar->len = 0; | 4313 | ar->len = 0; |
4316 | goto out2; | 4314 | goto out; |
4317 | } | 4315 | } |
4318 | 4316 | ||
4319 | ac->ac_op = EXT4_MB_HISTORY_PREALLOC; | 4317 | ac->ac_op = EXT4_MB_HISTORY_PREALLOC; |
@@ -4322,7 +4320,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4322 | ext4_mb_normalize_request(ac, ar); | 4320 | ext4_mb_normalize_request(ac, ar); |
4323 | repeat: | 4321 | repeat: |
4324 | /* allocate space in core */ | 4322 | /* allocate space in core */ |
4325 | ext4_mb_regular_allocator(ac); | 4323 | *errp = ext4_mb_regular_allocator(ac); |
4324 | if (*errp) | ||
4325 | goto errout; | ||
4326 | 4326 | ||
4327 | /* as we've just preallocated more space than | 4327 | /* as we've just preallocated more space than |
4328 | * user requested orinally, we store allocated | 4328 | * user requested orinally, we store allocated |
@@ -4333,7 +4333,7 @@ repeat: | |||
4333 | } | 4333 | } |
4334 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4334 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
4335 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); | 4335 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); |
4336 | if (*errp == -EAGAIN) { | 4336 | if (*errp == -EAGAIN) { |
4337 | /* | 4337 | /* |
4338 | * drop the reference that we took | 4338 | * drop the reference that we took |
4339 | * in ext4_mb_use_best_found | 4339 | * in ext4_mb_use_best_found |
@@ -4344,12 +4344,10 @@ repeat: | |||
4344 | ac->ac_b_ex.fe_len = 0; | 4344 | ac->ac_b_ex.fe_len = 0; |
4345 | ac->ac_status = AC_STATUS_CONTINUE; | 4345 | ac->ac_status = AC_STATUS_CONTINUE; |
4346 | goto repeat; | 4346 | goto repeat; |
4347 | } else if (*errp) { | 4347 | } else if (*errp) |
4348 | errout: | ||
4348 | ext4_discard_allocated_blocks(ac); | 4349 | ext4_discard_allocated_blocks(ac); |
4349 | ac->ac_b_ex.fe_len = 0; | 4350 | else { |
4350 | ar->len = 0; | ||
4351 | ext4_mb_show_ac(ac); | ||
4352 | } else { | ||
4353 | block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); | 4351 | block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); |
4354 | ar->len = ac->ac_b_ex.fe_len; | 4352 | ar->len = ac->ac_b_ex.fe_len; |
4355 | } | 4353 | } |
@@ -4358,19 +4356,19 @@ repeat: | |||
4358 | if (freed) | 4356 | if (freed) |
4359 | goto repeat; | 4357 | goto repeat; |
4360 | *errp = -ENOSPC; | 4358 | *errp = -ENOSPC; |
4359 | } | ||
4360 | |||
4361 | if (*errp) { | ||
4361 | ac->ac_b_ex.fe_len = 0; | 4362 | ac->ac_b_ex.fe_len = 0; |
4362 | ar->len = 0; | 4363 | ar->len = 0; |
4363 | ext4_mb_show_ac(ac); | 4364 | ext4_mb_show_ac(ac); |
4364 | } | 4365 | } |
4365 | |||
4366 | ext4_mb_release_context(ac); | 4366 | ext4_mb_release_context(ac); |
4367 | 4367 | out: | |
4368 | out2: | 4368 | if (ac) |
4369 | kmem_cache_free(ext4_ac_cachep, ac); | 4369 | kmem_cache_free(ext4_ac_cachep, ac); |
4370 | out1: | ||
4371 | if (inquota && ar->len < inquota) | 4370 | if (inquota && ar->len < inquota) |
4372 | dquot_free_block(ar->inode, inquota - ar->len); | 4371 | dquot_free_block(ar->inode, inquota - ar->len); |
4373 | out3: | ||
4374 | if (!ar->len) { | 4372 | if (!ar->len) { |
4375 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) | 4373 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) |
4376 | /* release all the reserved blocks if non delalloc */ | 4374 | /* release all the reserved blocks if non delalloc */ |
@@ -4402,6 +4400,7 @@ static noinline_for_stack int | |||
4402 | ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | 4400 | ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, |
4403 | struct ext4_free_data *new_entry) | 4401 | struct ext4_free_data *new_entry) |
4404 | { | 4402 | { |
4403 | ext4_group_t group = e4b->bd_group; | ||
4405 | ext4_grpblk_t block; | 4404 | ext4_grpblk_t block; |
4406 | struct ext4_free_data *entry; | 4405 | struct ext4_free_data *entry; |
4407 | struct ext4_group_info *db = e4b->bd_info; | 4406 | struct ext4_group_info *db = e4b->bd_info; |
@@ -4434,9 +4433,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4434 | else if (block >= (entry->start_blk + entry->count)) | 4433 | else if (block >= (entry->start_blk + entry->count)) |
4435 | n = &(*n)->rb_right; | 4434 | n = &(*n)->rb_right; |
4436 | else { | 4435 | else { |
4437 | ext4_grp_locked_error(sb, e4b->bd_group, __func__, | 4436 | ext4_grp_locked_error(sb, group, 0, |
4438 | "Double free of blocks %d (%d %d)", | 4437 | ext4_group_first_block_no(sb, group) + block, |
4439 | block, entry->start_blk, entry->count); | 4438 | "Block already on to-be-freed list"); |
4440 | return 0; | 4439 | return 0; |
4441 | } | 4440 | } |
4442 | } | 4441 | } |
@@ -4494,7 +4493,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4494 | struct super_block *sb = inode->i_sb; | 4493 | struct super_block *sb = inode->i_sb; |
4495 | struct ext4_allocation_context *ac = NULL; | 4494 | struct ext4_allocation_context *ac = NULL; |
4496 | struct ext4_group_desc *gdp; | 4495 | struct ext4_group_desc *gdp; |
4497 | struct ext4_super_block *es; | ||
4498 | unsigned long freed = 0; | 4496 | unsigned long freed = 0; |
4499 | unsigned int overflow; | 4497 | unsigned int overflow; |
4500 | ext4_grpblk_t bit; | 4498 | ext4_grpblk_t bit; |
@@ -4513,7 +4511,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4513 | } | 4511 | } |
4514 | 4512 | ||
4515 | sbi = EXT4_SB(sb); | 4513 | sbi = EXT4_SB(sb); |
4516 | es = EXT4_SB(sb)->s_es; | ||
4517 | if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && | 4514 | if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
4518 | !ext4_data_block_valid(sbi, block, count)) { | 4515 | !ext4_data_block_valid(sbi, block, count)) { |
4519 | ext4_error(sb, "Freeing blocks not in datazone - " | 4516 | ext4_error(sb, "Freeing blocks not in datazone - " |
@@ -4647,6 +4644,8 @@ do_more: | |||
4647 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4644 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4648 | mb_free_blocks(inode, &e4b, bit, count); | 4645 | mb_free_blocks(inode, &e4b, bit, count); |
4649 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | 4646 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); |
4647 | if (test_opt(sb, DISCARD)) | ||
4648 | ext4_issue_discard(sb, block_group, bit, count); | ||
4650 | } | 4649 | } |
4651 | 4650 | ||
4652 | ret = ext4_free_blks_count(sb, gdp) + count; | 4651 | ret = ext4_free_blks_count(sb, gdp) + count; |
@@ -4680,7 +4679,7 @@ do_more: | |||
4680 | put_bh(bitmap_bh); | 4679 | put_bh(bitmap_bh); |
4681 | goto do_more; | 4680 | goto do_more; |
4682 | } | 4681 | } |
4683 | sb->s_dirt = 1; | 4682 | ext4_mark_super_dirty(sb); |
4684 | error_return: | 4683 | error_return: |
4685 | if (freed) | 4684 | if (freed) |
4686 | dquot_free_block(inode, freed); | 4685 | dquot_free_block(inode, freed); |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 6f3a27ec30bf..1765c2c50a9b 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
376 | * We have the extent map build with the tmp inode. | 376 | * We have the extent map build with the tmp inode. |
377 | * Now copy the i_data across | 377 | * Now copy the i_data across |
378 | */ | 378 | */ |
379 | ei->i_flags |= EXT4_EXTENTS_FL; | 379 | ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS); |
380 | memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); | 380 | memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); |
381 | 381 | ||
382 | /* | 382 | /* |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 52abfa12762a..5f1ed9fc913c 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -148,17 +148,17 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
148 | */ | 148 | */ |
149 | static int | 149 | static int |
150 | mext_check_null_inode(struct inode *inode1, struct inode *inode2, | 150 | mext_check_null_inode(struct inode *inode1, struct inode *inode2, |
151 | const char *function) | 151 | const char *function, unsigned int line) |
152 | { | 152 | { |
153 | int ret = 0; | 153 | int ret = 0; |
154 | 154 | ||
155 | if (inode1 == NULL) { | 155 | if (inode1 == NULL) { |
156 | __ext4_error(inode2->i_sb, function, | 156 | __ext4_error(inode2->i_sb, function, line, |
157 | "Both inodes should not be NULL: " | 157 | "Both inodes should not be NULL: " |
158 | "inode1 NULL inode2 %lu", inode2->i_ino); | 158 | "inode1 NULL inode2 %lu", inode2->i_ino); |
159 | ret = -EIO; | 159 | ret = -EIO; |
160 | } else if (inode2 == NULL) { | 160 | } else if (inode2 == NULL) { |
161 | __ext4_error(inode1->i_sb, function, | 161 | __ext4_error(inode1->i_sb, function, line, |
162 | "Both inodes should not be NULL: " | 162 | "Both inodes should not be NULL: " |
163 | "inode1 %lu inode2 NULL", inode1->i_ino); | 163 | "inode1 %lu inode2 NULL", inode1->i_ino); |
164 | ret = -EIO; | 164 | ret = -EIO; |
@@ -1084,7 +1084,7 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2) | |||
1084 | 1084 | ||
1085 | BUG_ON(inode1 == NULL && inode2 == NULL); | 1085 | BUG_ON(inode1 == NULL && inode2 == NULL); |
1086 | 1086 | ||
1087 | ret = mext_check_null_inode(inode1, inode2, __func__); | 1087 | ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); |
1088 | if (ret < 0) | 1088 | if (ret < 0) |
1089 | goto out; | 1089 | goto out; |
1090 | 1090 | ||
@@ -1121,7 +1121,7 @@ mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) | |||
1121 | 1121 | ||
1122 | BUG_ON(inode1 == NULL && inode2 == NULL); | 1122 | BUG_ON(inode1 == NULL && inode2 == NULL); |
1123 | 1123 | ||
1124 | ret = mext_check_null_inode(inode1, inode2, __func__); | 1124 | ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); |
1125 | if (ret < 0) | 1125 | if (ret < 0) |
1126 | goto out; | 1126 | goto out; |
1127 | 1127 | ||
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a43e6617b351..314c0d3b3fa9 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -179,30 +179,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, | |||
179 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | 179 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, |
180 | struct inode *inode); | 180 | struct inode *inode); |
181 | 181 | ||
182 | unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) | ||
183 | { | ||
184 | unsigned len = le16_to_cpu(dlen); | ||
185 | |||
186 | if (len == EXT4_MAX_REC_LEN || len == 0) | ||
187 | return blocksize; | ||
188 | return (len & 65532) | ((len & 3) << 16); | ||
189 | } | ||
190 | |||
191 | __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) | ||
192 | { | ||
193 | if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) | ||
194 | BUG(); | ||
195 | if (len < 65536) | ||
196 | return cpu_to_le16(len); | ||
197 | if (len == blocksize) { | ||
198 | if (blocksize == 65536) | ||
199 | return cpu_to_le16(EXT4_MAX_REC_LEN); | ||
200 | else | ||
201 | return cpu_to_le16(0); | ||
202 | } | ||
203 | return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); | ||
204 | } | ||
205 | |||
206 | /* | 182 | /* |
207 | * p is at least 6 bytes before the end of page | 183 | * p is at least 6 bytes before the end of page |
208 | */ | 184 | */ |
@@ -605,7 +581,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
605 | dir->i_sb->s_blocksize - | 581 | dir->i_sb->s_blocksize - |
606 | EXT4_DIR_REC_LEN(0)); | 582 | EXT4_DIR_REC_LEN(0)); |
607 | for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { | 583 | for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { |
608 | if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, | 584 | if (!ext4_check_dir_entry(dir, de, bh, |
609 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) | 585 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) |
610 | +((char *)de - bh->b_data))) { | 586 | +((char *)de - bh->b_data))) { |
611 | /* On error, skip the f_pos to the next block. */ | 587 | /* On error, skip the f_pos to the next block. */ |
@@ -844,8 +820,7 @@ static inline int search_dirblock(struct buffer_head *bh, | |||
844 | if ((char *) de + namelen <= dlimit && | 820 | if ((char *) de + namelen <= dlimit && |
845 | ext4_match (namelen, name, de)) { | 821 | ext4_match (namelen, name, de)) { |
846 | /* found a match - just to be sure, do a full check */ | 822 | /* found a match - just to be sure, do a full check */ |
847 | if (!ext4_check_dir_entry("ext4_find_entry", | 823 | if (!ext4_check_dir_entry(dir, de, bh, offset)) |
848 | dir, de, bh, offset)) | ||
849 | return -1; | 824 | return -1; |
850 | *res_dir = de; | 825 | *res_dir = de; |
851 | return 1; | 826 | return 1; |
@@ -1019,7 +994,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q | |||
1019 | int off = (block << EXT4_BLOCK_SIZE_BITS(sb)) | 994 | int off = (block << EXT4_BLOCK_SIZE_BITS(sb)) |
1020 | + ((char *) de - bh->b_data); | 995 | + ((char *) de - bh->b_data); |
1021 | 996 | ||
1022 | if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) { | 997 | if (!ext4_check_dir_entry(dir, de, bh, off)) { |
1023 | brelse(bh); | 998 | brelse(bh); |
1024 | *err = ERR_BAD_DX_DIR; | 999 | *err = ERR_BAD_DX_DIR; |
1025 | goto errout; | 1000 | goto errout; |
@@ -1088,7 +1063,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru | |||
1088 | struct dentry *ext4_get_parent(struct dentry *child) | 1063 | struct dentry *ext4_get_parent(struct dentry *child) |
1089 | { | 1064 | { |
1090 | __u32 ino; | 1065 | __u32 ino; |
1091 | struct inode *inode; | ||
1092 | static const struct qstr dotdot = { | 1066 | static const struct qstr dotdot = { |
1093 | .name = "..", | 1067 | .name = "..", |
1094 | .len = 2, | 1068 | .len = 2, |
@@ -1097,7 +1071,6 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1097 | struct buffer_head *bh; | 1071 | struct buffer_head *bh; |
1098 | 1072 | ||
1099 | bh = ext4_find_entry(child->d_inode, &dotdot, &de); | 1073 | bh = ext4_find_entry(child->d_inode, &dotdot, &de); |
1100 | inode = NULL; | ||
1101 | if (!bh) | 1074 | if (!bh) |
1102 | return ERR_PTR(-ENOENT); | 1075 | return ERR_PTR(-ENOENT); |
1103 | ino = le32_to_cpu(de->inode); | 1076 | ino = le32_to_cpu(de->inode); |
@@ -1305,8 +1278,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
1305 | de = (struct ext4_dir_entry_2 *)bh->b_data; | 1278 | de = (struct ext4_dir_entry_2 *)bh->b_data; |
1306 | top = bh->b_data + blocksize - reclen; | 1279 | top = bh->b_data + blocksize - reclen; |
1307 | while ((char *) de <= top) { | 1280 | while ((char *) de <= top) { |
1308 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, | 1281 | if (!ext4_check_dir_entry(dir, de, bh, offset)) |
1309 | bh, offset)) | ||
1310 | return -EIO; | 1282 | return -EIO; |
1311 | if (ext4_match(namelen, name, de)) | 1283 | if (ext4_match(namelen, name, de)) |
1312 | return -EEXIST; | 1284 | return -EEXIST; |
@@ -1673,7 +1645,7 @@ static int ext4_delete_entry(handle_t *handle, | |||
1673 | pde = NULL; | 1645 | pde = NULL; |
1674 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1646 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1675 | while (i < bh->b_size) { | 1647 | while (i < bh->b_size) { |
1676 | if (!ext4_check_dir_entry("ext4_delete_entry", dir, de, bh, i)) | 1648 | if (!ext4_check_dir_entry(dir, de, bh, i)) |
1677 | return -EIO; | 1649 | return -EIO; |
1678 | if (de == de_del) { | 1650 | if (de == de_del) { |
1679 | BUFFER_TRACE(bh, "get_write_access"); | 1651 | BUFFER_TRACE(bh, "get_write_access"); |
@@ -1956,7 +1928,7 @@ static int empty_dir(struct inode *inode) | |||
1956 | } | 1928 | } |
1957 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 1929 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
1958 | } | 1930 | } |
1959 | if (!ext4_check_dir_entry("empty_dir", inode, de, bh, offset)) { | 1931 | if (!ext4_check_dir_entry(inode, de, bh, offset)) { |
1960 | de = (struct ext4_dir_entry_2 *)(bh->b_data + | 1932 | de = (struct ext4_dir_entry_2 *)(bh->b_data + |
1961 | sb->s_blocksize); | 1933 | sb->s_blocksize); |
1962 | offset = (offset | (sb->s_blocksize - 1)) + 1; | 1934 | offset = (offset | (sb->s_blocksize - 1)) + 1; |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6df797eb9aeb..ca5c8aa00a2f 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -921,8 +921,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
921 | &sbi->s_flex_groups[flex_group].free_inodes); | 921 | &sbi->s_flex_groups[flex_group].free_inodes); |
922 | } | 922 | } |
923 | 923 | ||
924 | ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); | 924 | ext4_handle_dirty_super(handle, sb); |
925 | sb->s_dirt = 1; | ||
926 | 925 | ||
927 | exit_journal: | 926 | exit_journal: |
928 | mutex_unlock(&sbi->s_resize_lock); | 927 | mutex_unlock(&sbi->s_resize_lock); |
@@ -953,7 +952,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
953 | ext4_fsblk_t n_blocks_count) | 952 | ext4_fsblk_t n_blocks_count) |
954 | { | 953 | { |
955 | ext4_fsblk_t o_blocks_count; | 954 | ext4_fsblk_t o_blocks_count; |
956 | ext4_group_t o_groups_count; | ||
957 | ext4_grpblk_t last; | 955 | ext4_grpblk_t last; |
958 | ext4_grpblk_t add; | 956 | ext4_grpblk_t add; |
959 | struct buffer_head *bh; | 957 | struct buffer_head *bh; |
@@ -965,7 +963,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
965 | * yet: we're going to revalidate es->s_blocks_count after | 963 | * yet: we're going to revalidate es->s_blocks_count after |
966 | * taking the s_resize_lock below. */ | 964 | * taking the s_resize_lock below. */ |
967 | o_blocks_count = ext4_blocks_count(es); | 965 | o_blocks_count = ext4_blocks_count(es); |
968 | o_groups_count = EXT4_SB(sb)->s_groups_count; | ||
969 | 966 | ||
970 | if (test_opt(sb, DEBUG)) | 967 | if (test_opt(sb, DEBUG)) |
971 | printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", | 968 | printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", |
@@ -1045,13 +1042,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1045 | goto exit_put; | 1042 | goto exit_put; |
1046 | } | 1043 | } |
1047 | ext4_blocks_count_set(es, o_blocks_count + add); | 1044 | ext4_blocks_count_set(es, o_blocks_count + add); |
1048 | ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); | ||
1049 | sb->s_dirt = 1; | ||
1050 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); | 1045 | mutex_unlock(&EXT4_SB(sb)->s_resize_lock); |
1051 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, | 1046 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
1052 | o_blocks_count + add); | 1047 | o_blocks_count + add); |
1053 | /* We add the blocks to the bitmap and set the group need init bit */ | 1048 | /* We add the blocks to the bitmap and set the group need init bit */ |
1054 | ext4_add_groupblocks(handle, sb, o_blocks_count, add); | 1049 | ext4_add_groupblocks(handle, sb, o_blocks_count, add); |
1050 | ext4_handle_dirty_super(handle, sb); | ||
1055 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, | 1051 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, |
1056 | o_blocks_count + add); | 1052 | o_blocks_count + add); |
1057 | if ((err = ext4_journal_stop(handle))) | 1053 | if ((err = ext4_journal_stop(handle))) |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4e8983a9811b..26147746c272 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -241,14 +241,14 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) | |||
241 | if (sb->s_flags & MS_RDONLY) | 241 | if (sb->s_flags & MS_RDONLY) |
242 | return ERR_PTR(-EROFS); | 242 | return ERR_PTR(-EROFS); |
243 | 243 | ||
244 | vfs_check_frozen(sb, SB_FREEZE_WRITE); | 244 | vfs_check_frozen(sb, SB_FREEZE_TRANS); |
245 | /* Special case here: if the journal has aborted behind our | 245 | /* Special case here: if the journal has aborted behind our |
246 | * backs (eg. EIO in the commit thread), then we still need to | 246 | * backs (eg. EIO in the commit thread), then we still need to |
247 | * take the FS itself readonly cleanly. */ | 247 | * take the FS itself readonly cleanly. */ |
248 | journal = EXT4_SB(sb)->s_journal; | 248 | journal = EXT4_SB(sb)->s_journal; |
249 | if (journal) { | 249 | if (journal) { |
250 | if (is_journal_aborted(journal)) { | 250 | if (is_journal_aborted(journal)) { |
251 | ext4_abort(sb, __func__, "Detected aborted journal"); | 251 | ext4_abort(sb, "Detected aborted journal"); |
252 | return ERR_PTR(-EROFS); | 252 | return ERR_PTR(-EROFS); |
253 | } | 253 | } |
254 | return jbd2_journal_start(journal, nblocks); | 254 | return jbd2_journal_start(journal, nblocks); |
@@ -262,7 +262,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) | |||
262 | * that sync() will call the filesystem's write_super callback if | 262 | * that sync() will call the filesystem's write_super callback if |
263 | * appropriate. | 263 | * appropriate. |
264 | */ | 264 | */ |
265 | int __ext4_journal_stop(const char *where, handle_t *handle) | 265 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) |
266 | { | 266 | { |
267 | struct super_block *sb; | 267 | struct super_block *sb; |
268 | int err; | 268 | int err; |
@@ -279,12 +279,13 @@ int __ext4_journal_stop(const char *where, handle_t *handle) | |||
279 | if (!err) | 279 | if (!err) |
280 | err = rc; | 280 | err = rc; |
281 | if (err) | 281 | if (err) |
282 | __ext4_std_error(sb, where, err); | 282 | __ext4_std_error(sb, where, line, err); |
283 | return err; | 283 | return err; |
284 | } | 284 | } |
285 | 285 | ||
286 | void ext4_journal_abort_handle(const char *caller, const char *err_fn, | 286 | void ext4_journal_abort_handle(const char *caller, unsigned int line, |
287 | struct buffer_head *bh, handle_t *handle, int err) | 287 | const char *err_fn, struct buffer_head *bh, |
288 | handle_t *handle, int err) | ||
288 | { | 289 | { |
289 | char nbuf[16]; | 290 | char nbuf[16]; |
290 | const char *errstr = ext4_decode_error(NULL, err, nbuf); | 291 | const char *errstr = ext4_decode_error(NULL, err, nbuf); |
@@ -300,12 +301,47 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn, | |||
300 | if (is_handle_aborted(handle)) | 301 | if (is_handle_aborted(handle)) |
301 | return; | 302 | return; |
302 | 303 | ||
303 | printk(KERN_ERR "%s: aborting transaction: %s in %s\n", | 304 | printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", |
304 | caller, errstr, err_fn); | 305 | caller, line, errstr, err_fn); |
305 | 306 | ||
306 | jbd2_journal_abort_handle(handle); | 307 | jbd2_journal_abort_handle(handle); |
307 | } | 308 | } |
308 | 309 | ||
310 | static void __save_error_info(struct super_block *sb, const char *func, | ||
311 | unsigned int line) | ||
312 | { | ||
313 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
314 | |||
315 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | ||
316 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | ||
317 | es->s_last_error_time = cpu_to_le32(get_seconds()); | ||
318 | strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); | ||
319 | es->s_last_error_line = cpu_to_le32(line); | ||
320 | if (!es->s_first_error_time) { | ||
321 | es->s_first_error_time = es->s_last_error_time; | ||
322 | strncpy(es->s_first_error_func, func, | ||
323 | sizeof(es->s_first_error_func)); | ||
324 | es->s_first_error_line = cpu_to_le32(line); | ||
325 | es->s_first_error_ino = es->s_last_error_ino; | ||
326 | es->s_first_error_block = es->s_last_error_block; | ||
327 | } | ||
328 | /* | ||
329 | * Start the daily error reporting function if it hasn't been | ||
330 | * started already | ||
331 | */ | ||
332 | if (!es->s_error_count) | ||
333 | mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); | ||
334 | es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1); | ||
335 | } | ||
336 | |||
337 | static void save_error_info(struct super_block *sb, const char *func, | ||
338 | unsigned int line) | ||
339 | { | ||
340 | __save_error_info(sb, func, line); | ||
341 | ext4_commit_super(sb, 1); | ||
342 | } | ||
343 | |||
344 | |||
309 | /* Deal with the reporting of failure conditions on a filesystem such as | 345 | /* Deal with the reporting of failure conditions on a filesystem such as |
310 | * inconsistencies detected or read IO failures. | 346 | * inconsistencies detected or read IO failures. |
311 | * | 347 | * |
@@ -323,11 +359,6 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn, | |||
323 | 359 | ||
324 | static void ext4_handle_error(struct super_block *sb) | 360 | static void ext4_handle_error(struct super_block *sb) |
325 | { | 361 | { |
326 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
327 | |||
328 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | ||
329 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | ||
330 | |||
331 | if (sb->s_flags & MS_RDONLY) | 362 | if (sb->s_flags & MS_RDONLY) |
332 | return; | 363 | return; |
333 | 364 | ||
@@ -342,19 +373,19 @@ static void ext4_handle_error(struct super_block *sb) | |||
342 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 373 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
343 | sb->s_flags |= MS_RDONLY; | 374 | sb->s_flags |= MS_RDONLY; |
344 | } | 375 | } |
345 | ext4_commit_super(sb, 1); | ||
346 | if (test_opt(sb, ERRORS_PANIC)) | 376 | if (test_opt(sb, ERRORS_PANIC)) |
347 | panic("EXT4-fs (device %s): panic forced after error\n", | 377 | panic("EXT4-fs (device %s): panic forced after error\n", |
348 | sb->s_id); | 378 | sb->s_id); |
349 | } | 379 | } |
350 | 380 | ||
351 | void __ext4_error(struct super_block *sb, const char *function, | 381 | void __ext4_error(struct super_block *sb, const char *function, |
352 | const char *fmt, ...) | 382 | unsigned int line, const char *fmt, ...) |
353 | { | 383 | { |
354 | va_list args; | 384 | va_list args; |
355 | 385 | ||
356 | va_start(args, fmt); | 386 | va_start(args, fmt); |
357 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); | 387 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ", |
388 | sb->s_id, function, line, current->comm); | ||
358 | vprintk(fmt, args); | 389 | vprintk(fmt, args); |
359 | printk("\n"); | 390 | printk("\n"); |
360 | va_end(args); | 391 | va_end(args); |
@@ -362,14 +393,22 @@ void __ext4_error(struct super_block *sb, const char *function, | |||
362 | ext4_handle_error(sb); | 393 | ext4_handle_error(sb); |
363 | } | 394 | } |
364 | 395 | ||
365 | void ext4_error_inode(const char *function, struct inode *inode, | 396 | void ext4_error_inode(struct inode *inode, const char *function, |
397 | unsigned int line, ext4_fsblk_t block, | ||
366 | const char *fmt, ...) | 398 | const char *fmt, ...) |
367 | { | 399 | { |
368 | va_list args; | 400 | va_list args; |
401 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | ||
369 | 402 | ||
403 | es->s_last_error_ino = cpu_to_le32(inode->i_ino); | ||
404 | es->s_last_error_block = cpu_to_le64(block); | ||
405 | save_error_info(inode->i_sb, function, line); | ||
370 | va_start(args, fmt); | 406 | va_start(args, fmt); |
371 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ", | 407 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", |
372 | inode->i_sb->s_id, function, inode->i_ino, current->comm); | 408 | inode->i_sb->s_id, function, line, inode->i_ino); |
409 | if (block) | ||
410 | printk("block %llu: ", block); | ||
411 | printk("comm %s: ", current->comm); | ||
373 | vprintk(fmt, args); | 412 | vprintk(fmt, args); |
374 | printk("\n"); | 413 | printk("\n"); |
375 | va_end(args); | 414 | va_end(args); |
@@ -377,20 +416,26 @@ void ext4_error_inode(const char *function, struct inode *inode, | |||
377 | ext4_handle_error(inode->i_sb); | 416 | ext4_handle_error(inode->i_sb); |
378 | } | 417 | } |
379 | 418 | ||
380 | void ext4_error_file(const char *function, struct file *file, | 419 | void ext4_error_file(struct file *file, const char *function, |
381 | const char *fmt, ...) | 420 | unsigned int line, const char *fmt, ...) |
382 | { | 421 | { |
383 | va_list args; | 422 | va_list args; |
423 | struct ext4_super_block *es; | ||
384 | struct inode *inode = file->f_dentry->d_inode; | 424 | struct inode *inode = file->f_dentry->d_inode; |
385 | char pathname[80], *path; | 425 | char pathname[80], *path; |
386 | 426 | ||
427 | es = EXT4_SB(inode->i_sb)->s_es; | ||
428 | es->s_last_error_ino = cpu_to_le32(inode->i_ino); | ||
429 | save_error_info(inode->i_sb, function, line); | ||
387 | va_start(args, fmt); | 430 | va_start(args, fmt); |
388 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); | 431 | path = d_path(&(file->f_path), pathname, sizeof(pathname)); |
389 | if (!path) | 432 | if (!path) |
390 | path = "(unknown)"; | 433 | path = "(unknown)"; |
391 | printk(KERN_CRIT | 434 | printk(KERN_CRIT |
392 | "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ", | 435 | "EXT4-fs error (device %s): %s:%d: inode #%lu " |
393 | inode->i_sb->s_id, function, inode->i_ino, current->comm, path); | 436 | "(comm %s path %s): ", |
437 | inode->i_sb->s_id, function, line, inode->i_ino, | ||
438 | current->comm, path); | ||
394 | vprintk(fmt, args); | 439 | vprintk(fmt, args); |
395 | printk("\n"); | 440 | printk("\n"); |
396 | va_end(args); | 441 | va_end(args); |
@@ -435,7 +480,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno, | |||
435 | /* __ext4_std_error decodes expected errors from journaling functions | 480 | /* __ext4_std_error decodes expected errors from journaling functions |
436 | * automatically and invokes the appropriate error response. */ | 481 | * automatically and invokes the appropriate error response. */ |
437 | 482 | ||
438 | void __ext4_std_error(struct super_block *sb, const char *function, int errno) | 483 | void __ext4_std_error(struct super_block *sb, const char *function, |
484 | unsigned int line, int errno) | ||
439 | { | 485 | { |
440 | char nbuf[16]; | 486 | char nbuf[16]; |
441 | const char *errstr; | 487 | const char *errstr; |
@@ -448,8 +494,9 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno) | |||
448 | return; | 494 | return; |
449 | 495 | ||
450 | errstr = ext4_decode_error(sb, errno, nbuf); | 496 | errstr = ext4_decode_error(sb, errno, nbuf); |
451 | printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", | 497 | printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", |
452 | sb->s_id, function, errstr); | 498 | sb->s_id, function, line, errstr); |
499 | save_error_info(sb, function, line); | ||
453 | 500 | ||
454 | ext4_handle_error(sb); | 501 | ext4_handle_error(sb); |
455 | } | 502 | } |
@@ -464,29 +511,29 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno) | |||
464 | * case we take the easy way out and panic immediately. | 511 | * case we take the easy way out and panic immediately. |
465 | */ | 512 | */ |
466 | 513 | ||
467 | void ext4_abort(struct super_block *sb, const char *function, | 514 | void __ext4_abort(struct super_block *sb, const char *function, |
468 | const char *fmt, ...) | 515 | unsigned int line, const char *fmt, ...) |
469 | { | 516 | { |
470 | va_list args; | 517 | va_list args; |
471 | 518 | ||
519 | save_error_info(sb, function, line); | ||
472 | va_start(args, fmt); | 520 | va_start(args, fmt); |
473 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); | 521 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id, |
522 | function, line); | ||
474 | vprintk(fmt, args); | 523 | vprintk(fmt, args); |
475 | printk("\n"); | 524 | printk("\n"); |
476 | va_end(args); | 525 | va_end(args); |
477 | 526 | ||
527 | if ((sb->s_flags & MS_RDONLY) == 0) { | ||
528 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | ||
529 | sb->s_flags |= MS_RDONLY; | ||
530 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; | ||
531 | if (EXT4_SB(sb)->s_journal) | ||
532 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | ||
533 | save_error_info(sb, function, line); | ||
534 | } | ||
478 | if (test_opt(sb, ERRORS_PANIC)) | 535 | if (test_opt(sb, ERRORS_PANIC)) |
479 | panic("EXT4-fs panic from previous error\n"); | 536 | panic("EXT4-fs panic from previous error\n"); |
480 | |||
481 | if (sb->s_flags & MS_RDONLY) | ||
482 | return; | ||
483 | |||
484 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | ||
485 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | ||
486 | sb->s_flags |= MS_RDONLY; | ||
487 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; | ||
488 | if (EXT4_SB(sb)->s_journal) | ||
489 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | ||
490 | } | 537 | } |
491 | 538 | ||
492 | void ext4_msg (struct super_block * sb, const char *prefix, | 539 | void ext4_msg (struct super_block * sb, const char *prefix, |
@@ -502,38 +549,47 @@ void ext4_msg (struct super_block * sb, const char *prefix, | |||
502 | } | 549 | } |
503 | 550 | ||
504 | void __ext4_warning(struct super_block *sb, const char *function, | 551 | void __ext4_warning(struct super_block *sb, const char *function, |
505 | const char *fmt, ...) | 552 | unsigned int line, const char *fmt, ...) |
506 | { | 553 | { |
507 | va_list args; | 554 | va_list args; |
508 | 555 | ||
509 | va_start(args, fmt); | 556 | va_start(args, fmt); |
510 | printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", | 557 | printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ", |
511 | sb->s_id, function); | 558 | sb->s_id, function, line); |
512 | vprintk(fmt, args); | 559 | vprintk(fmt, args); |
513 | printk("\n"); | 560 | printk("\n"); |
514 | va_end(args); | 561 | va_end(args); |
515 | } | 562 | } |
516 | 563 | ||
517 | void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, | 564 | void __ext4_grp_locked_error(const char *function, unsigned int line, |
518 | const char *function, const char *fmt, ...) | 565 | struct super_block *sb, ext4_group_t grp, |
566 | unsigned long ino, ext4_fsblk_t block, | ||
567 | const char *fmt, ...) | ||
519 | __releases(bitlock) | 568 | __releases(bitlock) |
520 | __acquires(bitlock) | 569 | __acquires(bitlock) |
521 | { | 570 | { |
522 | va_list args; | 571 | va_list args; |
523 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 572 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
524 | 573 | ||
574 | es->s_last_error_ino = cpu_to_le32(ino); | ||
575 | es->s_last_error_block = cpu_to_le64(block); | ||
576 | __save_error_info(sb, function, line); | ||
525 | va_start(args, fmt); | 577 | va_start(args, fmt); |
526 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); | 578 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", |
579 | sb->s_id, function, line, grp); | ||
580 | if (ino) | ||
581 | printk("inode %lu: ", ino); | ||
582 | if (block) | ||
583 | printk("block %llu:", (unsigned long long) block); | ||
527 | vprintk(fmt, args); | 584 | vprintk(fmt, args); |
528 | printk("\n"); | 585 | printk("\n"); |
529 | va_end(args); | 586 | va_end(args); |
530 | 587 | ||
531 | if (test_opt(sb, ERRORS_CONT)) { | 588 | if (test_opt(sb, ERRORS_CONT)) { |
532 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | ||
533 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | ||
534 | ext4_commit_super(sb, 0); | 589 | ext4_commit_super(sb, 0); |
535 | return; | 590 | return; |
536 | } | 591 | } |
592 | |||
537 | ext4_unlock_group(sb, grp); | 593 | ext4_unlock_group(sb, grp); |
538 | ext4_handle_error(sb); | 594 | ext4_handle_error(sb); |
539 | /* | 595 | /* |
@@ -660,8 +716,7 @@ static void ext4_put_super(struct super_block *sb) | |||
660 | err = jbd2_journal_destroy(sbi->s_journal); | 716 | err = jbd2_journal_destroy(sbi->s_journal); |
661 | sbi->s_journal = NULL; | 717 | sbi->s_journal = NULL; |
662 | if (err < 0) | 718 | if (err < 0) |
663 | ext4_abort(sb, __func__, | 719 | ext4_abort(sb, "Couldn't clean up the journal"); |
664 | "Couldn't clean up the journal"); | ||
665 | } | 720 | } |
666 | 721 | ||
667 | ext4_release_system_zone(sb); | 722 | ext4_release_system_zone(sb); |
@@ -813,8 +868,10 @@ static void destroy_inodecache(void) | |||
813 | kmem_cache_destroy(ext4_inode_cachep); | 868 | kmem_cache_destroy(ext4_inode_cachep); |
814 | } | 869 | } |
815 | 870 | ||
816 | static void ext4_clear_inode(struct inode *inode) | 871 | void ext4_clear_inode(struct inode *inode) |
817 | { | 872 | { |
873 | invalidate_inode_buffers(inode); | ||
874 | end_writeback(inode); | ||
818 | dquot_drop(inode); | 875 | dquot_drop(inode); |
819 | ext4_discard_preallocations(inode); | 876 | ext4_discard_preallocations(inode); |
820 | if (EXT4_JOURNAL(inode)) | 877 | if (EXT4_JOURNAL(inode)) |
@@ -946,14 +1003,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
946 | seq_puts(seq, ",journal_async_commit"); | 1003 | seq_puts(seq, ",journal_async_commit"); |
947 | else if (test_opt(sb, JOURNAL_CHECKSUM)) | 1004 | else if (test_opt(sb, JOURNAL_CHECKSUM)) |
948 | seq_puts(seq, ",journal_checksum"); | 1005 | seq_puts(seq, ",journal_checksum"); |
949 | if (test_opt(sb, NOBH)) | ||
950 | seq_puts(seq, ",nobh"); | ||
951 | if (test_opt(sb, I_VERSION)) | 1006 | if (test_opt(sb, I_VERSION)) |
952 | seq_puts(seq, ",i_version"); | 1007 | seq_puts(seq, ",i_version"); |
953 | if (!test_opt(sb, DELALLOC)) | 1008 | if (!test_opt(sb, DELALLOC) && |
1009 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) | ||
954 | seq_puts(seq, ",nodelalloc"); | 1010 | seq_puts(seq, ",nodelalloc"); |
955 | 1011 | ||
956 | |||
957 | if (sbi->s_stripe) | 1012 | if (sbi->s_stripe) |
958 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); | 1013 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); |
959 | /* | 1014 | /* |
@@ -977,7 +1032,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
977 | if (test_opt(sb, NO_AUTO_DA_ALLOC)) | 1032 | if (test_opt(sb, NO_AUTO_DA_ALLOC)) |
978 | seq_puts(seq, ",noauto_da_alloc"); | 1033 | seq_puts(seq, ",noauto_da_alloc"); |
979 | 1034 | ||
980 | if (test_opt(sb, DISCARD)) | 1035 | if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD)) |
981 | seq_puts(seq, ",discard"); | 1036 | seq_puts(seq, ",discard"); |
982 | 1037 | ||
983 | if (test_opt(sb, NOLOAD)) | 1038 | if (test_opt(sb, NOLOAD)) |
@@ -986,6 +1041,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
986 | if (test_opt(sb, DIOREAD_NOLOCK)) | 1041 | if (test_opt(sb, DIOREAD_NOLOCK)) |
987 | seq_puts(seq, ",dioread_nolock"); | 1042 | seq_puts(seq, ",dioread_nolock"); |
988 | 1043 | ||
1044 | if (test_opt(sb, BLOCK_VALIDITY) && | ||
1045 | !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) | ||
1046 | seq_puts(seq, ",block_validity"); | ||
1047 | |||
989 | ext4_show_quota_options(seq, sb); | 1048 | ext4_show_quota_options(seq, sb); |
990 | 1049 | ||
991 | return 0; | 1050 | return 0; |
@@ -1065,6 +1124,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot); | |||
1065 | static int ext4_write_info(struct super_block *sb, int type); | 1124 | static int ext4_write_info(struct super_block *sb, int type); |
1066 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, | 1125 | static int ext4_quota_on(struct super_block *sb, int type, int format_id, |
1067 | char *path); | 1126 | char *path); |
1127 | static int ext4_quota_off(struct super_block *sb, int type); | ||
1068 | static int ext4_quota_on_mount(struct super_block *sb, int type); | 1128 | static int ext4_quota_on_mount(struct super_block *sb, int type); |
1069 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | 1129 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, |
1070 | size_t len, loff_t off); | 1130 | size_t len, loff_t off); |
@@ -1086,7 +1146,7 @@ static const struct dquot_operations ext4_quota_operations = { | |||
1086 | 1146 | ||
1087 | static const struct quotactl_ops ext4_qctl_operations = { | 1147 | static const struct quotactl_ops ext4_qctl_operations = { |
1088 | .quota_on = ext4_quota_on, | 1148 | .quota_on = ext4_quota_on, |
1089 | .quota_off = dquot_quota_off, | 1149 | .quota_off = ext4_quota_off, |
1090 | .quota_sync = dquot_quota_sync, | 1150 | .quota_sync = dquot_quota_sync, |
1091 | .get_info = dquot_get_dqinfo, | 1151 | .get_info = dquot_get_dqinfo, |
1092 | .set_info = dquot_set_dqinfo, | 1152 | .set_info = dquot_set_dqinfo, |
@@ -1100,14 +1160,13 @@ static const struct super_operations ext4_sops = { | |||
1100 | .destroy_inode = ext4_destroy_inode, | 1160 | .destroy_inode = ext4_destroy_inode, |
1101 | .write_inode = ext4_write_inode, | 1161 | .write_inode = ext4_write_inode, |
1102 | .dirty_inode = ext4_dirty_inode, | 1162 | .dirty_inode = ext4_dirty_inode, |
1103 | .delete_inode = ext4_delete_inode, | 1163 | .evict_inode = ext4_evict_inode, |
1104 | .put_super = ext4_put_super, | 1164 | .put_super = ext4_put_super, |
1105 | .sync_fs = ext4_sync_fs, | 1165 | .sync_fs = ext4_sync_fs, |
1106 | .freeze_fs = ext4_freeze, | 1166 | .freeze_fs = ext4_freeze, |
1107 | .unfreeze_fs = ext4_unfreeze, | 1167 | .unfreeze_fs = ext4_unfreeze, |
1108 | .statfs = ext4_statfs, | 1168 | .statfs = ext4_statfs, |
1109 | .remount_fs = ext4_remount, | 1169 | .remount_fs = ext4_remount, |
1110 | .clear_inode = ext4_clear_inode, | ||
1111 | .show_options = ext4_show_options, | 1170 | .show_options = ext4_show_options, |
1112 | #ifdef CONFIG_QUOTA | 1171 | #ifdef CONFIG_QUOTA |
1113 | .quota_read = ext4_quota_read, | 1172 | .quota_read = ext4_quota_read, |
@@ -1121,12 +1180,11 @@ static const struct super_operations ext4_nojournal_sops = { | |||
1121 | .destroy_inode = ext4_destroy_inode, | 1180 | .destroy_inode = ext4_destroy_inode, |
1122 | .write_inode = ext4_write_inode, | 1181 | .write_inode = ext4_write_inode, |
1123 | .dirty_inode = ext4_dirty_inode, | 1182 | .dirty_inode = ext4_dirty_inode, |
1124 | .delete_inode = ext4_delete_inode, | 1183 | .evict_inode = ext4_evict_inode, |
1125 | .write_super = ext4_write_super, | 1184 | .write_super = ext4_write_super, |
1126 | .put_super = ext4_put_super, | 1185 | .put_super = ext4_put_super, |
1127 | .statfs = ext4_statfs, | 1186 | .statfs = ext4_statfs, |
1128 | .remount_fs = ext4_remount, | 1187 | .remount_fs = ext4_remount, |
1129 | .clear_inode = ext4_clear_inode, | ||
1130 | .show_options = ext4_show_options, | 1188 | .show_options = ext4_show_options, |
1131 | #ifdef CONFIG_QUOTA | 1189 | #ifdef CONFIG_QUOTA |
1132 | .quota_read = ext4_quota_read, | 1190 | .quota_read = ext4_quota_read, |
@@ -1624,10 +1682,12 @@ set_qf_format: | |||
1624 | *n_blocks_count = option; | 1682 | *n_blocks_count = option; |
1625 | break; | 1683 | break; |
1626 | case Opt_nobh: | 1684 | case Opt_nobh: |
1627 | set_opt(sbi->s_mount_opt, NOBH); | 1685 | ext4_msg(sb, KERN_WARNING, |
1686 | "Ignoring deprecated nobh option"); | ||
1628 | break; | 1687 | break; |
1629 | case Opt_bh: | 1688 | case Opt_bh: |
1630 | clear_opt(sbi->s_mount_opt, NOBH); | 1689 | ext4_msg(sb, KERN_WARNING, |
1690 | "Ignoring deprecated bh option"); | ||
1631 | break; | 1691 | break; |
1632 | case Opt_i_version: | 1692 | case Opt_i_version: |
1633 | set_opt(sbi->s_mount_opt, I_VERSION); | 1693 | set_opt(sbi->s_mount_opt, I_VERSION); |
@@ -2249,6 +2309,8 @@ static ssize_t session_write_kbytes_show(struct ext4_attr *a, | |||
2249 | { | 2309 | { |
2250 | struct super_block *sb = sbi->s_buddy_cache->i_sb; | 2310 | struct super_block *sb = sbi->s_buddy_cache->i_sb; |
2251 | 2311 | ||
2312 | if (!sb->s_bdev->bd_part) | ||
2313 | return snprintf(buf, PAGE_SIZE, "0\n"); | ||
2252 | return snprintf(buf, PAGE_SIZE, "%lu\n", | 2314 | return snprintf(buf, PAGE_SIZE, "%lu\n", |
2253 | (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 2315 | (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
2254 | sbi->s_sectors_written_start) >> 1); | 2316 | sbi->s_sectors_written_start) >> 1); |
@@ -2259,6 +2321,8 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, | |||
2259 | { | 2321 | { |
2260 | struct super_block *sb = sbi->s_buddy_cache->i_sb; | 2322 | struct super_block *sb = sbi->s_buddy_cache->i_sb; |
2261 | 2323 | ||
2324 | if (!sb->s_bdev->bd_part) | ||
2325 | return snprintf(buf, PAGE_SIZE, "0\n"); | ||
2262 | return snprintf(buf, PAGE_SIZE, "%llu\n", | 2326 | return snprintf(buf, PAGE_SIZE, "%llu\n", |
2263 | (unsigned long long)(sbi->s_kbytes_written + | 2327 | (unsigned long long)(sbi->s_kbytes_written + |
2264 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 2328 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
@@ -2431,6 +2495,53 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) | |||
2431 | return 1; | 2495 | return 1; |
2432 | } | 2496 | } |
2433 | 2497 | ||
2498 | /* | ||
2499 | * This function is called once a day if we have errors logged | ||
2500 | * on the file system | ||
2501 | */ | ||
2502 | static void print_daily_error_info(unsigned long arg) | ||
2503 | { | ||
2504 | struct super_block *sb = (struct super_block *) arg; | ||
2505 | struct ext4_sb_info *sbi; | ||
2506 | struct ext4_super_block *es; | ||
2507 | |||
2508 | sbi = EXT4_SB(sb); | ||
2509 | es = sbi->s_es; | ||
2510 | |||
2511 | if (es->s_error_count) | ||
2512 | ext4_msg(sb, KERN_NOTICE, "error count: %u", | ||
2513 | le32_to_cpu(es->s_error_count)); | ||
2514 | if (es->s_first_error_time) { | ||
2515 | printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d", | ||
2516 | sb->s_id, le32_to_cpu(es->s_first_error_time), | ||
2517 | (int) sizeof(es->s_first_error_func), | ||
2518 | es->s_first_error_func, | ||
2519 | le32_to_cpu(es->s_first_error_line)); | ||
2520 | if (es->s_first_error_ino) | ||
2521 | printk(": inode %u", | ||
2522 | le32_to_cpu(es->s_first_error_ino)); | ||
2523 | if (es->s_first_error_block) | ||
2524 | printk(": block %llu", (unsigned long long) | ||
2525 | le64_to_cpu(es->s_first_error_block)); | ||
2526 | printk("\n"); | ||
2527 | } | ||
2528 | if (es->s_last_error_time) { | ||
2529 | printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d", | ||
2530 | sb->s_id, le32_to_cpu(es->s_last_error_time), | ||
2531 | (int) sizeof(es->s_last_error_func), | ||
2532 | es->s_last_error_func, | ||
2533 | le32_to_cpu(es->s_last_error_line)); | ||
2534 | if (es->s_last_error_ino) | ||
2535 | printk(": inode %u", | ||
2536 | le32_to_cpu(es->s_last_error_ino)); | ||
2537 | if (es->s_last_error_block) | ||
2538 | printk(": block %llu", (unsigned long long) | ||
2539 | le64_to_cpu(es->s_last_error_block)); | ||
2540 | printk("\n"); | ||
2541 | } | ||
2542 | mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ | ||
2543 | } | ||
2544 | |||
2434 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) | 2545 | static int ext4_fill_super(struct super_block *sb, void *data, int silent) |
2435 | __releases(kernel_lock) | 2546 | __releases(kernel_lock) |
2436 | __acquires(kernel_lock) | 2547 | __acquires(kernel_lock) |
@@ -2448,7 +2559,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2448 | struct inode *root; | 2559 | struct inode *root; |
2449 | char *cp; | 2560 | char *cp; |
2450 | const char *descr; | 2561 | const char *descr; |
2451 | int ret = -EINVAL; | 2562 | int ret = -ENOMEM; |
2452 | int blocksize; | 2563 | int blocksize; |
2453 | unsigned int db_count; | 2564 | unsigned int db_count; |
2454 | unsigned int i; | 2565 | unsigned int i; |
@@ -2459,13 +2570,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2459 | 2570 | ||
2460 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 2571 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
2461 | if (!sbi) | 2572 | if (!sbi) |
2462 | return -ENOMEM; | 2573 | goto out_free_orig; |
2463 | 2574 | ||
2464 | sbi->s_blockgroup_lock = | 2575 | sbi->s_blockgroup_lock = |
2465 | kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); | 2576 | kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); |
2466 | if (!sbi->s_blockgroup_lock) { | 2577 | if (!sbi->s_blockgroup_lock) { |
2467 | kfree(sbi); | 2578 | kfree(sbi); |
2468 | return -ENOMEM; | 2579 | goto out_free_orig; |
2469 | } | 2580 | } |
2470 | sb->s_fs_info = sbi; | 2581 | sb->s_fs_info = sbi; |
2471 | sbi->s_mount_opt = 0; | 2582 | sbi->s_mount_opt = 0; |
@@ -2473,8 +2584,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2473 | sbi->s_resgid = EXT4_DEF_RESGID; | 2584 | sbi->s_resgid = EXT4_DEF_RESGID; |
2474 | sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; | 2585 | sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; |
2475 | sbi->s_sb_block = sb_block; | 2586 | sbi->s_sb_block = sb_block; |
2476 | sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part, | 2587 | if (sb->s_bdev->bd_part) |
2477 | sectors[1]); | 2588 | sbi->s_sectors_written_start = |
2589 | part_stat_read(sb->s_bdev->bd_part, sectors[1]); | ||
2478 | 2590 | ||
2479 | unlock_kernel(); | 2591 | unlock_kernel(); |
2480 | 2592 | ||
@@ -2482,6 +2594,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2482 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) | 2594 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) |
2483 | *cp = '!'; | 2595 | *cp = '!'; |
2484 | 2596 | ||
2597 | ret = -EINVAL; | ||
2485 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); | 2598 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); |
2486 | if (!blocksize) { | 2599 | if (!blocksize) { |
2487 | ext4_msg(sb, KERN_ERR, "unable to set blocksize"); | 2600 | ext4_msg(sb, KERN_ERR, "unable to set blocksize"); |
@@ -2546,6 +2659,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2546 | set_opt(sbi->s_mount_opt, ERRORS_CONT); | 2659 | set_opt(sbi->s_mount_opt, ERRORS_CONT); |
2547 | else | 2660 | else |
2548 | set_opt(sbi->s_mount_opt, ERRORS_RO); | 2661 | set_opt(sbi->s_mount_opt, ERRORS_RO); |
2662 | if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) | ||
2663 | set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); | ||
2664 | if (def_mount_opts & EXT4_DEFM_DISCARD) | ||
2665 | set_opt(sbi->s_mount_opt, DISCARD); | ||
2549 | 2666 | ||
2550 | sbi->s_resuid = le16_to_cpu(es->s_def_resuid); | 2667 | sbi->s_resuid = le16_to_cpu(es->s_def_resuid); |
2551 | sbi->s_resgid = le16_to_cpu(es->s_def_resgid); | 2668 | sbi->s_resgid = le16_to_cpu(es->s_def_resgid); |
@@ -2553,15 +2670,23 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2553 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; | 2670 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; |
2554 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; | 2671 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; |
2555 | 2672 | ||
2556 | set_opt(sbi->s_mount_opt, BARRIER); | 2673 | if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) |
2674 | set_opt(sbi->s_mount_opt, BARRIER); | ||
2557 | 2675 | ||
2558 | /* | 2676 | /* |
2559 | * enable delayed allocation by default | 2677 | * enable delayed allocation by default |
2560 | * Use -o nodelalloc to turn it off | 2678 | * Use -o nodelalloc to turn it off |
2561 | */ | 2679 | */ |
2562 | if (!IS_EXT3_SB(sb)) | 2680 | if (!IS_EXT3_SB(sb) && |
2681 | ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) | ||
2563 | set_opt(sbi->s_mount_opt, DELALLOC); | 2682 | set_opt(sbi->s_mount_opt, DELALLOC); |
2564 | 2683 | ||
2684 | if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, | ||
2685 | &journal_devnum, &journal_ioprio, NULL, 0)) { | ||
2686 | ext4_msg(sb, KERN_WARNING, | ||
2687 | "failed to parse options in superblock: %s", | ||
2688 | sbi->s_es->s_mount_opts); | ||
2689 | } | ||
2565 | if (!parse_options((char *) data, sb, &journal_devnum, | 2690 | if (!parse_options((char *) data, sb, &journal_devnum, |
2566 | &journal_ioprio, NULL, 0)) | 2691 | &journal_ioprio, NULL, 0)) |
2567 | goto failed_mount; | 2692 | goto failed_mount; |
@@ -2912,18 +3037,7 @@ no_journal: | |||
2912 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | 3037 | ext4_msg(sb, KERN_ERR, "insufficient memory"); |
2913 | goto failed_mount_wq; | 3038 | goto failed_mount_wq; |
2914 | } | 3039 | } |
2915 | if (test_opt(sb, NOBH)) { | 3040 | |
2916 | if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { | ||
2917 | ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " | ||
2918 | "its supported only with writeback mode"); | ||
2919 | clear_opt(sbi->s_mount_opt, NOBH); | ||
2920 | } | ||
2921 | if (test_opt(sb, DIOREAD_NOLOCK)) { | ||
2922 | ext4_msg(sb, KERN_WARNING, "dioread_nolock option is " | ||
2923 | "not supported with nobh mode"); | ||
2924 | goto failed_mount_wq; | ||
2925 | } | ||
2926 | } | ||
2927 | EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); | 3041 | EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); |
2928 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3042 | if (!EXT4_SB(sb)->dio_unwritten_wq) { |
2929 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3043 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); |
@@ -3010,7 +3124,7 @@ no_journal: | |||
3010 | ext4_ext_init(sb); | 3124 | ext4_ext_init(sb); |
3011 | err = ext4_mb_init(sb, needs_recovery); | 3125 | err = ext4_mb_init(sb, needs_recovery); |
3012 | if (err) { | 3126 | if (err) { |
3013 | ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", | 3127 | ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", |
3014 | err); | 3128 | err); |
3015 | goto failed_mount4; | 3129 | goto failed_mount4; |
3016 | } | 3130 | } |
@@ -3043,7 +3157,14 @@ no_journal: | |||
3043 | descr = "out journal"; | 3157 | descr = "out journal"; |
3044 | 3158 | ||
3045 | ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " | 3159 | ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " |
3046 | "Opts: %s", descr, orig_data); | 3160 | "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, |
3161 | *sbi->s_es->s_mount_opts ? "; " : "", orig_data); | ||
3162 | |||
3163 | init_timer(&sbi->s_err_report); | ||
3164 | sbi->s_err_report.function = print_daily_error_info; | ||
3165 | sbi->s_err_report.data = (unsigned long) sb; | ||
3166 | if (es->s_error_count) | ||
3167 | mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ | ||
3047 | 3168 | ||
3048 | lock_kernel(); | 3169 | lock_kernel(); |
3049 | kfree(orig_data); | 3170 | kfree(orig_data); |
@@ -3093,6 +3214,7 @@ out_fail: | |||
3093 | kfree(sbi->s_blockgroup_lock); | 3214 | kfree(sbi->s_blockgroup_lock); |
3094 | kfree(sbi); | 3215 | kfree(sbi); |
3095 | lock_kernel(); | 3216 | lock_kernel(); |
3217 | out_free_orig: | ||
3096 | kfree(orig_data); | 3218 | kfree(orig_data); |
3097 | return ret; | 3219 | return ret; |
3098 | } | 3220 | } |
@@ -3110,7 +3232,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) | |||
3110 | journal->j_min_batch_time = sbi->s_min_batch_time; | 3232 | journal->j_min_batch_time = sbi->s_min_batch_time; |
3111 | journal->j_max_batch_time = sbi->s_max_batch_time; | 3233 | journal->j_max_batch_time = sbi->s_max_batch_time; |
3112 | 3234 | ||
3113 | spin_lock(&journal->j_state_lock); | 3235 | write_lock(&journal->j_state_lock); |
3114 | if (test_opt(sb, BARRIER)) | 3236 | if (test_opt(sb, BARRIER)) |
3115 | journal->j_flags |= JBD2_BARRIER; | 3237 | journal->j_flags |= JBD2_BARRIER; |
3116 | else | 3238 | else |
@@ -3119,7 +3241,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) | |||
3119 | journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; | 3241 | journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; |
3120 | else | 3242 | else |
3121 | journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; | 3243 | journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; |
3122 | spin_unlock(&journal->j_state_lock); | 3244 | write_unlock(&journal->j_state_lock); |
3123 | } | 3245 | } |
3124 | 3246 | ||
3125 | static journal_t *ext4_get_journal(struct super_block *sb, | 3247 | static journal_t *ext4_get_journal(struct super_block *sb, |
@@ -3327,8 +3449,17 @@ static int ext4_load_journal(struct super_block *sb, | |||
3327 | 3449 | ||
3328 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) | 3450 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) |
3329 | err = jbd2_journal_wipe(journal, !really_read_only); | 3451 | err = jbd2_journal_wipe(journal, !really_read_only); |
3330 | if (!err) | 3452 | if (!err) { |
3453 | char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); | ||
3454 | if (save) | ||
3455 | memcpy(save, ((char *) es) + | ||
3456 | EXT4_S_ERR_START, EXT4_S_ERR_LEN); | ||
3331 | err = jbd2_journal_load(journal); | 3457 | err = jbd2_journal_load(journal); |
3458 | if (save) | ||
3459 | memcpy(((char *) es) + EXT4_S_ERR_START, | ||
3460 | save, EXT4_S_ERR_LEN); | ||
3461 | kfree(save); | ||
3462 | } | ||
3332 | 3463 | ||
3333 | if (err) { | 3464 | if (err) { |
3334 | ext4_msg(sb, KERN_ERR, "error loading journal"); | 3465 | ext4_msg(sb, KERN_ERR, "error loading journal"); |
@@ -3384,10 +3515,14 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
3384 | */ | 3515 | */ |
3385 | if (!(sb->s_flags & MS_RDONLY)) | 3516 | if (!(sb->s_flags & MS_RDONLY)) |
3386 | es->s_wtime = cpu_to_le32(get_seconds()); | 3517 | es->s_wtime = cpu_to_le32(get_seconds()); |
3387 | es->s_kbytes_written = | 3518 | if (sb->s_bdev->bd_part) |
3388 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + | 3519 | es->s_kbytes_written = |
3520 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + | ||
3389 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 3521 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
3390 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); | 3522 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); |
3523 | else | ||
3524 | es->s_kbytes_written = | ||
3525 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); | ||
3391 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( | 3526 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( |
3392 | &EXT4_SB(sb)->s_freeblocks_counter)); | 3527 | &EXT4_SB(sb)->s_freeblocks_counter)); |
3393 | es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( | 3528 | es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( |
@@ -3491,7 +3626,7 @@ int ext4_force_commit(struct super_block *sb) | |||
3491 | 3626 | ||
3492 | journal = EXT4_SB(sb)->s_journal; | 3627 | journal = EXT4_SB(sb)->s_journal; |
3493 | if (journal) { | 3628 | if (journal) { |
3494 | vfs_check_frozen(sb, SB_FREEZE_WRITE); | 3629 | vfs_check_frozen(sb, SB_FREEZE_TRANS); |
3495 | ret = ext4_journal_force_commit(journal); | 3630 | ret = ext4_journal_force_commit(journal); |
3496 | } | 3631 | } |
3497 | 3632 | ||
@@ -3616,7 +3751,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3616 | } | 3751 | } |
3617 | 3752 | ||
3618 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) | 3753 | if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) |
3619 | ext4_abort(sb, __func__, "Abort forced by user"); | 3754 | ext4_abort(sb, "Abort forced by user"); |
3620 | 3755 | ||
3621 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 3756 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
3622 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); | 3757 | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); |
@@ -3981,6 +4116,18 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
3981 | return err; | 4116 | return err; |
3982 | } | 4117 | } |
3983 | 4118 | ||
4119 | static int ext4_quota_off(struct super_block *sb, int type) | ||
4120 | { | ||
4121 | /* Force all delayed allocation blocks to be allocated */ | ||
4122 | if (test_opt(sb, DELALLOC)) { | ||
4123 | down_read(&sb->s_umount); | ||
4124 | sync_filesystem(sb); | ||
4125 | up_read(&sb->s_umount); | ||
4126 | } | ||
4127 | |||
4128 | return dquot_quota_off(sb, type); | ||
4129 | } | ||
4130 | |||
3984 | /* Read data from quotafile - avoid pagecache and such because we cannot afford | 4131 | /* Read data from quotafile - avoid pagecache and such because we cannot afford |
3985 | * acquiring the locks... As quota files are never truncated and quota code | 4132 | * acquiring the locks... As quota files are never truncated and quota code |
3986 | * itself serializes the operations (and noone else should touch the files) | 4133 | * itself serializes the operations (and noone else should touch the files) |
@@ -4030,7 +4177,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
4030 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); | 4177 | ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); |
4031 | int err = 0; | 4178 | int err = 0; |
4032 | int offset = off & (sb->s_blocksize - 1); | 4179 | int offset = off & (sb->s_blocksize - 1); |
4033 | int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; | ||
4034 | struct buffer_head *bh; | 4180 | struct buffer_head *bh; |
4035 | handle_t *handle = journal_current_handle(); | 4181 | handle_t *handle = journal_current_handle(); |
4036 | 4182 | ||
@@ -4055,24 +4201,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
4055 | bh = ext4_bread(handle, inode, blk, 1, &err); | 4201 | bh = ext4_bread(handle, inode, blk, 1, &err); |
4056 | if (!bh) | 4202 | if (!bh) |
4057 | goto out; | 4203 | goto out; |
4058 | if (journal_quota) { | 4204 | err = ext4_journal_get_write_access(handle, bh); |
4059 | err = ext4_journal_get_write_access(handle, bh); | 4205 | if (err) { |
4060 | if (err) { | 4206 | brelse(bh); |
4061 | brelse(bh); | 4207 | goto out; |
4062 | goto out; | ||
4063 | } | ||
4064 | } | 4208 | } |
4065 | lock_buffer(bh); | 4209 | lock_buffer(bh); |
4066 | memcpy(bh->b_data+offset, data, len); | 4210 | memcpy(bh->b_data+offset, data, len); |
4067 | flush_dcache_page(bh->b_page); | 4211 | flush_dcache_page(bh->b_page); |
4068 | unlock_buffer(bh); | 4212 | unlock_buffer(bh); |
4069 | if (journal_quota) | 4213 | err = ext4_handle_dirty_metadata(handle, NULL, bh); |
4070 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
4071 | else { | ||
4072 | /* Always do at least ordered writes for quotas */ | ||
4073 | err = ext4_jbd2_file_inode(handle, inode); | ||
4074 | mark_buffer_dirty(bh); | ||
4075 | } | ||
4076 | brelse(bh); | 4214 | brelse(bh); |
4077 | out: | 4215 | out: |
4078 | if (err) { | 4216 | if (err) { |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 04338009793a..3a8cd8dff1ad 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -458,8 +458,7 @@ static void ext4_xattr_update_super_block(handle_t *handle, | |||
458 | 458 | ||
459 | if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { | 459 | if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { |
460 | EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); | 460 | EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); |
461 | sb->s_dirt = 1; | 461 | ext4_handle_dirty_super(handle, sb); |
462 | ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); | ||
463 | } | 462 | } |
464 | } | 463 | } |
465 | 464 | ||
@@ -1418,7 +1417,7 @@ ext4_xattr_cache_insert(struct buffer_head *bh) | |||
1418 | ea_bdebug(bh, "out of memory"); | 1417 | ea_bdebug(bh, "out of memory"); |
1419 | return; | 1418 | return; |
1420 | } | 1419 | } |
1421 | error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); | 1420 | error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); |
1422 | if (error) { | 1421 | if (error) { |
1423 | mb_cache_entry_free(ce); | 1422 | mb_cache_entry_free(ce); |
1424 | if (error == -EBUSY) { | 1423 | if (error == -EBUSY) { |
@@ -1490,8 +1489,8 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, | |||
1490 | return NULL; /* never share */ | 1489 | return NULL; /* never share */ |
1491 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); | 1490 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); |
1492 | again: | 1491 | again: |
1493 | ce = mb_cache_entry_find_first(ext4_xattr_cache, 0, | 1492 | ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev, |
1494 | inode->i_sb->s_bdev, hash); | 1493 | hash); |
1495 | while (ce) { | 1494 | while (ce) { |
1496 | struct buffer_head *bh; | 1495 | struct buffer_head *bh; |
1497 | 1496 | ||
@@ -1515,7 +1514,7 @@ again: | |||
1515 | return bh; | 1514 | return bh; |
1516 | } | 1515 | } |
1517 | brelse(bh); | 1516 | brelse(bh); |
1518 | ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); | 1517 | ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); |
1519 | } | 1518 | } |
1520 | return NULL; | 1519 | return NULL; |
1521 | } | 1520 | } |
@@ -1591,9 +1590,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, | |||
1591 | int __init | 1590 | int __init |
1592 | init_ext4_xattr(void) | 1591 | init_ext4_xattr(void) |
1593 | { | 1592 | { |
1594 | ext4_xattr_cache = mb_cache_create("ext4_xattr", NULL, | 1593 | ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); |
1595 | sizeof(struct mb_cache_entry) + | ||
1596 | sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); | ||
1597 | if (!ext4_xattr_cache) | 1594 | if (!ext4_xattr_cache) |
1598 | return -ENOMEM; | 1595 | return -ENOMEM; |
1599 | return 0; | 1596 | return 0; |
diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 27ac25725954..d75a77f85c28 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h | |||
@@ -306,7 +306,6 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd, | |||
306 | extern const struct file_operations fat_file_operations; | 306 | extern const struct file_operations fat_file_operations; |
307 | extern const struct inode_operations fat_file_inode_operations; | 307 | extern const struct inode_operations fat_file_inode_operations; |
308 | extern int fat_setattr(struct dentry * dentry, struct iattr * attr); | 308 | extern int fat_setattr(struct dentry * dentry, struct iattr * attr); |
309 | extern int fat_setsize(struct inode *inode, loff_t offset); | ||
310 | extern void fat_truncate_blocks(struct inode *inode, loff_t offset); | 309 | extern void fat_truncate_blocks(struct inode *inode, loff_t offset); |
311 | extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, | 310 | extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, |
312 | struct kstat *stat); | 311 | struct kstat *stat); |
diff --git a/fs/fat/file.c b/fs/fat/file.c index 990dfae022e5..7257752b6d5d 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c | |||
@@ -364,18 +364,6 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) | |||
364 | return 0; | 364 | return 0; |
365 | } | 365 | } |
366 | 366 | ||
367 | int fat_setsize(struct inode *inode, loff_t offset) | ||
368 | { | ||
369 | int error; | ||
370 | |||
371 | error = simple_setsize(inode, offset); | ||
372 | if (error) | ||
373 | return error; | ||
374 | fat_truncate_blocks(inode, offset); | ||
375 | |||
376 | return error; | ||
377 | } | ||
378 | |||
379 | #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) | 367 | #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) |
380 | /* valid file mode bits */ | 368 | /* valid file mode bits */ |
381 | #define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) | 369 | #define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) |
@@ -387,21 +375,6 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) | |||
387 | unsigned int ia_valid; | 375 | unsigned int ia_valid; |
388 | int error; | 376 | int error; |
389 | 377 | ||
390 | /* | ||
391 | * Expand the file. Since inode_setattr() updates ->i_size | ||
392 | * before calling the ->truncate(), but FAT needs to fill the | ||
393 | * hole before it. XXX: this is no longer true with new truncate | ||
394 | * sequence. | ||
395 | */ | ||
396 | if (attr->ia_valid & ATTR_SIZE) { | ||
397 | if (attr->ia_size > inode->i_size) { | ||
398 | error = fat_cont_expand(inode, attr->ia_size); | ||
399 | if (error || attr->ia_valid == ATTR_SIZE) | ||
400 | goto out; | ||
401 | attr->ia_valid &= ~ATTR_SIZE; | ||
402 | } | ||
403 | } | ||
404 | |||
405 | /* Check for setting the inode time. */ | 378 | /* Check for setting the inode time. */ |
406 | ia_valid = attr->ia_valid; | 379 | ia_valid = attr->ia_valid; |
407 | if (ia_valid & TIMES_SET_FLAGS) { | 380 | if (ia_valid & TIMES_SET_FLAGS) { |
@@ -417,6 +390,21 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) | |||
417 | goto out; | 390 | goto out; |
418 | } | 391 | } |
419 | 392 | ||
393 | /* | ||
394 | * Expand the file. Since inode_setattr() updates ->i_size | ||
395 | * before calling the ->truncate(), but FAT needs to fill the | ||
396 | * hole before it. XXX: this is no longer true with new truncate | ||
397 | * sequence. | ||
398 | */ | ||
399 | if (attr->ia_valid & ATTR_SIZE) { | ||
400 | if (attr->ia_size > inode->i_size) { | ||
401 | error = fat_cont_expand(inode, attr->ia_size); | ||
402 | if (error || attr->ia_valid == ATTR_SIZE) | ||
403 | goto out; | ||
404 | attr->ia_valid &= ~ATTR_SIZE; | ||
405 | } | ||
406 | } | ||
407 | |||
420 | if (((attr->ia_valid & ATTR_UID) && | 408 | if (((attr->ia_valid & ATTR_UID) && |
421 | (attr->ia_uid != sbi->options.fs_uid)) || | 409 | (attr->ia_uid != sbi->options.fs_uid)) || |
422 | ((attr->ia_valid & ATTR_GID) && | 410 | ((attr->ia_valid & ATTR_GID) && |
@@ -441,12 +429,11 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) | |||
441 | } | 429 | } |
442 | 430 | ||
443 | if (attr->ia_valid & ATTR_SIZE) { | 431 | if (attr->ia_valid & ATTR_SIZE) { |
444 | error = fat_setsize(inode, attr->ia_size); | 432 | truncate_setsize(inode, attr->ia_size); |
445 | if (error) | 433 | fat_truncate_blocks(inode, attr->ia_size); |
446 | goto out; | ||
447 | } | 434 | } |
448 | 435 | ||
449 | generic_setattr(inode, attr); | 436 | setattr_copy(inode, attr); |
450 | mark_inode_dirty(inode); | 437 | mark_inode_dirty(inode); |
451 | out: | 438 | out: |
452 | return error; | 439 | return error; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 7bf45aee56d7..830058057d33 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -159,7 +159,7 @@ static int fat_write_begin(struct file *file, struct address_space *mapping, | |||
159 | int err; | 159 | int err; |
160 | 160 | ||
161 | *pagep = NULL; | 161 | *pagep = NULL; |
162 | err = cont_write_begin_newtrunc(file, mapping, pos, len, flags, | 162 | err = cont_write_begin(file, mapping, pos, len, flags, |
163 | pagep, fsdata, fat_get_block, | 163 | pagep, fsdata, fat_get_block, |
164 | &MSDOS_I(mapping->host)->mmu_private); | 164 | &MSDOS_I(mapping->host)->mmu_private); |
165 | if (err < 0) | 165 | if (err < 0) |
@@ -212,8 +212,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, | |||
212 | * FAT need to use the DIO_LOCKING for avoiding the race | 212 | * FAT need to use the DIO_LOCKING for avoiding the race |
213 | * condition of fat_get_block() and ->truncate(). | 213 | * condition of fat_get_block() and ->truncate(). |
214 | */ | 214 | */ |
215 | ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev, | 215 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, |
216 | iov, offset, nr_segs, fat_get_block, NULL); | 216 | iov, offset, nr_segs, fat_get_block, NULL); |
217 | if (ret < 0 && (rw & WRITE)) | 217 | if (ret < 0 && (rw & WRITE)) |
218 | fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); | 218 | fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); |
219 | 219 | ||
@@ -263,7 +263,7 @@ static const struct address_space_operations fat_aops = { | |||
263 | * check if the location is still valid and retry if it | 263 | * check if the location is still valid and retry if it |
264 | * isn't. Otherwise we do changes. | 264 | * isn't. Otherwise we do changes. |
265 | * 5. Spinlock is used to protect hash/unhash/location check/lookup | 265 | * 5. Spinlock is used to protect hash/unhash/location check/lookup |
266 | * 6. fat_clear_inode() unhashes the F-d-c entry. | 266 | * 6. fat_evict_inode() unhashes the F-d-c entry. |
267 | * 7. lookup() and readdir() do igrab() if they find a F-d-c entry | 267 | * 7. lookup() and readdir() do igrab() if they find a F-d-c entry |
268 | * and consider negative result as cache miss. | 268 | * and consider negative result as cache miss. |
269 | */ | 269 | */ |
@@ -448,16 +448,15 @@ out: | |||
448 | 448 | ||
449 | EXPORT_SYMBOL_GPL(fat_build_inode); | 449 | EXPORT_SYMBOL_GPL(fat_build_inode); |
450 | 450 | ||
451 | static void fat_delete_inode(struct inode *inode) | 451 | static void fat_evict_inode(struct inode *inode) |
452 | { | 452 | { |
453 | truncate_inode_pages(&inode->i_data, 0); | 453 | truncate_inode_pages(&inode->i_data, 0); |
454 | inode->i_size = 0; | 454 | if (!inode->i_nlink) { |
455 | fat_truncate_blocks(inode, 0); | 455 | inode->i_size = 0; |
456 | clear_inode(inode); | 456 | fat_truncate_blocks(inode, 0); |
457 | } | 457 | } |
458 | 458 | invalidate_inode_buffers(inode); | |
459 | static void fat_clear_inode(struct inode *inode) | 459 | end_writeback(inode); |
460 | { | ||
461 | fat_cache_inval_inode(inode); | 460 | fat_cache_inval_inode(inode); |
462 | fat_detach(inode); | 461 | fat_detach(inode); |
463 | } | 462 | } |
@@ -674,12 +673,11 @@ static const struct super_operations fat_sops = { | |||
674 | .alloc_inode = fat_alloc_inode, | 673 | .alloc_inode = fat_alloc_inode, |
675 | .destroy_inode = fat_destroy_inode, | 674 | .destroy_inode = fat_destroy_inode, |
676 | .write_inode = fat_write_inode, | 675 | .write_inode = fat_write_inode, |
677 | .delete_inode = fat_delete_inode, | 676 | .evict_inode = fat_evict_inode, |
678 | .put_super = fat_put_super, | 677 | .put_super = fat_put_super, |
679 | .write_super = fat_write_super, | 678 | .write_super = fat_write_super, |
680 | .sync_fs = fat_sync_fs, | 679 | .sync_fs = fat_sync_fs, |
681 | .statfs = fat_statfs, | 680 | .statfs = fat_statfs, |
682 | .clear_inode = fat_clear_inode, | ||
683 | .remount_fs = fat_remount, | 681 | .remount_fs = fat_remount, |
684 | 682 | ||
685 | .show_options = fat_show_options, | 683 | .show_options = fat_show_options, |
diff --git a/fs/fcntl.c b/fs/fcntl.c index 9d175d623aab..6769fd0f35b8 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -767,11 +767,22 @@ void kill_fasync(struct fasync_struct **fp, int sig, int band) | |||
767 | } | 767 | } |
768 | EXPORT_SYMBOL(kill_fasync); | 768 | EXPORT_SYMBOL(kill_fasync); |
769 | 769 | ||
770 | static int __init fasync_init(void) | 770 | static int __init fcntl_init(void) |
771 | { | 771 | { |
772 | /* please add new bits here to ensure allocation uniqueness */ | ||
773 | BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( | ||
774 | O_RDONLY | O_WRONLY | O_RDWR | | ||
775 | O_CREAT | O_EXCL | O_NOCTTY | | ||
776 | O_TRUNC | O_APPEND | O_NONBLOCK | | ||
777 | __O_SYNC | O_DSYNC | FASYNC | | ||
778 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | | ||
779 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | | ||
780 | FMODE_EXEC | ||
781 | )); | ||
782 | |||
772 | fasync_cache = kmem_cache_create("fasync_cache", | 783 | fasync_cache = kmem_cache_create("fasync_cache", |
773 | sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); | 784 | sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL); |
774 | return 0; | 785 | return 0; |
775 | } | 786 | } |
776 | 787 | ||
777 | module_init(fasync_init) | 788 | module_init(fcntl_init) |
@@ -39,28 +39,27 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */ | |||
39 | */ | 39 | */ |
40 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); | 40 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); |
41 | 41 | ||
42 | static inline void * alloc_fdmem(unsigned int size) | 42 | static inline void *alloc_fdmem(unsigned int size) |
43 | { | 43 | { |
44 | if (size <= PAGE_SIZE) | 44 | void *data; |
45 | return kmalloc(size, GFP_KERNEL); | 45 | |
46 | else | 46 | data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN); |
47 | return vmalloc(size); | 47 | if (data != NULL) |
48 | return data; | ||
49 | |||
50 | return vmalloc(size); | ||
48 | } | 51 | } |
49 | 52 | ||
50 | static inline void free_fdarr(struct fdtable *fdt) | 53 | static void free_fdmem(void *ptr) |
51 | { | 54 | { |
52 | if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) | 55 | is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr); |
53 | kfree(fdt->fd); | ||
54 | else | ||
55 | vfree(fdt->fd); | ||
56 | } | 56 | } |
57 | 57 | ||
58 | static inline void free_fdset(struct fdtable *fdt) | 58 | static void __free_fdtable(struct fdtable *fdt) |
59 | { | 59 | { |
60 | if (fdt->max_fds <= (PAGE_SIZE * BITS_PER_BYTE / 2)) | 60 | free_fdmem(fdt->fd); |
61 | kfree(fdt->open_fds); | 61 | free_fdmem(fdt->open_fds); |
62 | else | 62 | kfree(fdt); |
63 | vfree(fdt->open_fds); | ||
64 | } | 63 | } |
65 | 64 | ||
66 | static void free_fdtable_work(struct work_struct *work) | 65 | static void free_fdtable_work(struct work_struct *work) |
@@ -75,9 +74,8 @@ static void free_fdtable_work(struct work_struct *work) | |||
75 | spin_unlock_bh(&f->lock); | 74 | spin_unlock_bh(&f->lock); |
76 | while(fdt) { | 75 | while(fdt) { |
77 | struct fdtable *next = fdt->next; | 76 | struct fdtable *next = fdt->next; |
78 | vfree(fdt->fd); | 77 | |
79 | free_fdset(fdt); | 78 | __free_fdtable(fdt); |
80 | kfree(fdt); | ||
81 | fdt = next; | 79 | fdt = next; |
82 | } | 80 | } |
83 | } | 81 | } |
@@ -98,7 +96,7 @@ void free_fdtable_rcu(struct rcu_head *rcu) | |||
98 | container_of(fdt, struct files_struct, fdtab)); | 96 | container_of(fdt, struct files_struct, fdtab)); |
99 | return; | 97 | return; |
100 | } | 98 | } |
101 | if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) { | 99 | if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { |
102 | kfree(fdt->fd); | 100 | kfree(fdt->fd); |
103 | kfree(fdt->open_fds); | 101 | kfree(fdt->open_fds); |
104 | kfree(fdt); | 102 | kfree(fdt); |
@@ -178,13 +176,12 @@ static struct fdtable * alloc_fdtable(unsigned int nr) | |||
178 | fdt->open_fds = (fd_set *)data; | 176 | fdt->open_fds = (fd_set *)data; |
179 | data += nr / BITS_PER_BYTE; | 177 | data += nr / BITS_PER_BYTE; |
180 | fdt->close_on_exec = (fd_set *)data; | 178 | fdt->close_on_exec = (fd_set *)data; |
181 | INIT_RCU_HEAD(&fdt->rcu); | ||
182 | fdt->next = NULL; | 179 | fdt->next = NULL; |
183 | 180 | ||
184 | return fdt; | 181 | return fdt; |
185 | 182 | ||
186 | out_arr: | 183 | out_arr: |
187 | free_fdarr(fdt); | 184 | free_fdmem(fdt->fd); |
188 | out_fdt: | 185 | out_fdt: |
189 | kfree(fdt); | 186 | kfree(fdt); |
190 | out: | 187 | out: |
@@ -214,9 +211,7 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
214 | * caller and alloc_fdtable(). Cheaper to catch it here... | 211 | * caller and alloc_fdtable(). Cheaper to catch it here... |
215 | */ | 212 | */ |
216 | if (unlikely(new_fdt->max_fds <= nr)) { | 213 | if (unlikely(new_fdt->max_fds <= nr)) { |
217 | free_fdarr(new_fdt); | 214 | __free_fdtable(new_fdt); |
218 | free_fdset(new_fdt); | ||
219 | kfree(new_fdt); | ||
220 | return -EMFILE; | 215 | return -EMFILE; |
221 | } | 216 | } |
222 | /* | 217 | /* |
@@ -232,9 +227,7 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
232 | free_fdtable(cur_fdt); | 227 | free_fdtable(cur_fdt); |
233 | } else { | 228 | } else { |
234 | /* Somebody else expanded, so undo our attempt */ | 229 | /* Somebody else expanded, so undo our attempt */ |
235 | free_fdarr(new_fdt); | 230 | __free_fdtable(new_fdt); |
236 | free_fdset(new_fdt); | ||
237 | kfree(new_fdt); | ||
238 | } | 231 | } |
239 | return 1; | 232 | return 1; |
240 | } | 233 | } |
@@ -312,7 +305,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
312 | new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; | 305 | new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; |
313 | new_fdt->open_fds = (fd_set *)&newf->open_fds_init; | 306 | new_fdt->open_fds = (fd_set *)&newf->open_fds_init; |
314 | new_fdt->fd = &newf->fd_array[0]; | 307 | new_fdt->fd = &newf->fd_array[0]; |
315 | INIT_RCU_HEAD(&new_fdt->rcu); | ||
316 | new_fdt->next = NULL; | 308 | new_fdt->next = NULL; |
317 | 309 | ||
318 | spin_lock(&oldf->file_lock); | 310 | spin_lock(&oldf->file_lock); |
@@ -325,11 +317,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
325 | while (unlikely(open_files > new_fdt->max_fds)) { | 317 | while (unlikely(open_files > new_fdt->max_fds)) { |
326 | spin_unlock(&oldf->file_lock); | 318 | spin_unlock(&oldf->file_lock); |
327 | 319 | ||
328 | if (new_fdt != &newf->fdtab) { | 320 | if (new_fdt != &newf->fdtab) |
329 | free_fdarr(new_fdt); | 321 | __free_fdtable(new_fdt); |
330 | free_fdset(new_fdt); | ||
331 | kfree(new_fdt); | ||
332 | } | ||
333 | 322 | ||
334 | new_fdt = alloc_fdtable(open_files - 1); | 323 | new_fdt = alloc_fdtable(open_files - 1); |
335 | if (!new_fdt) { | 324 | if (!new_fdt) { |
@@ -339,9 +328,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | |||
339 | 328 | ||
340 | /* beyond sysctl_nr_open; nothing to do */ | 329 | /* beyond sysctl_nr_open; nothing to do */ |
341 | if (unlikely(new_fdt->max_fds < open_files)) { | 330 | if (unlikely(new_fdt->max_fds < open_files)) { |
342 | free_fdarr(new_fdt); | 331 | __free_fdtable(new_fdt); |
343 | free_fdset(new_fdt); | ||
344 | kfree(new_fdt); | ||
345 | *errorp = -EMFILE; | 332 | *errorp = -EMFILE; |
346 | goto out_release; | 333 | goto out_release; |
347 | } | 334 | } |
@@ -430,7 +417,6 @@ struct files_struct init_files = { | |||
430 | .fd = &init_files.fd_array[0], | 417 | .fd = &init_files.fd_array[0], |
431 | .close_on_exec = (fd_set *)&init_files.close_on_exec_init, | 418 | .close_on_exec = (fd_set *)&init_files.close_on_exec_init, |
432 | .open_fds = (fd_set *)&init_files.open_fds_init, | 419 | .open_fds = (fd_set *)&init_files.open_fds_init, |
433 | .rcu = RCU_HEAD_INIT, | ||
434 | }, | 420 | }, |
435 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), | 421 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), |
436 | }; | 422 | }; |
diff --git a/fs/file_table.c b/fs/file_table.c index 5c7d10ead4ad..2fc3b3c08911 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -230,6 +230,15 @@ static void __fput(struct file *file) | |||
230 | might_sleep(); | 230 | might_sleep(); |
231 | 231 | ||
232 | fsnotify_close(file); | 232 | fsnotify_close(file); |
233 | |||
234 | /* | ||
235 | * fsnotify_create_event may have taken one or more references on this | ||
236 | * file. If it did so it left one reference for us to drop to make sure | ||
237 | * its calls to fput could not prematurely destroy the file. | ||
238 | */ | ||
239 | if (atomic_long_read(&file->f_count)) | ||
240 | return fput(file); | ||
241 | |||
233 | /* | 242 | /* |
234 | * The function eventpoll_release() should be the first called | 243 | * The function eventpoll_release() should be the first called |
235 | * in the file cleanup chain. | 244 | * in the file cleanup chain. |
@@ -289,11 +298,20 @@ struct file *fget(unsigned int fd) | |||
289 | EXPORT_SYMBOL(fget); | 298 | EXPORT_SYMBOL(fget); |
290 | 299 | ||
291 | /* | 300 | /* |
292 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. | 301 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. |
293 | * You can use this only if it is guranteed that the current task already | 302 | * |
294 | * holds a refcnt to that file. That check has to be done at fget() only | 303 | * You can use this instead of fget if you satisfy all of the following |
295 | * and a flag is returned to be passed to the corresponding fput_light(). | 304 | * conditions: |
296 | * There must not be a cloning between an fget_light/fput_light pair. | 305 | * 1) You must call fput_light before exiting the syscall and returning control |
306 | * to userspace (i.e. you cannot remember the returned struct file * after | ||
307 | * returning to userspace). | ||
308 | * 2) You must not call filp_close on the returned struct file * in between | ||
309 | * calls to fget_light and fput_light. | ||
310 | * 3) You must not clone the current task in between the calls to fget_light | ||
311 | * and fput_light. | ||
312 | * | ||
313 | * The fput_needed flag returned by fget_light should be passed to the | ||
314 | * corresponding fput_light. | ||
297 | */ | 315 | */ |
298 | struct file *fget_light(unsigned int fd, int *fput_needed) | 316 | struct file *fget_light(unsigned int fd, int *fput_needed) |
299 | { | 317 | { |
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h index 50ab5eecb99b..881aa3d217f0 100644 --- a/fs/freevxfs/vxfs_extern.h +++ b/fs/freevxfs/vxfs_extern.h | |||
@@ -63,7 +63,7 @@ extern void vxfs_put_fake_inode(struct inode *); | |||
63 | extern struct vxfs_inode_info * vxfs_blkiget(struct super_block *, u_long, ino_t); | 63 | extern struct vxfs_inode_info * vxfs_blkiget(struct super_block *, u_long, ino_t); |
64 | extern struct vxfs_inode_info * vxfs_stiget(struct super_block *, ino_t); | 64 | extern struct vxfs_inode_info * vxfs_stiget(struct super_block *, ino_t); |
65 | extern struct inode * vxfs_iget(struct super_block *, ino_t); | 65 | extern struct inode * vxfs_iget(struct super_block *, ino_t); |
66 | extern void vxfs_clear_inode(struct inode *); | 66 | extern void vxfs_evict_inode(struct inode *); |
67 | 67 | ||
68 | /* vxfs_lookup.c */ | 68 | /* vxfs_lookup.c */ |
69 | extern const struct inode_operations vxfs_dir_inode_ops; | 69 | extern const struct inode_operations vxfs_dir_inode_ops; |
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 03a6ea5e99f7..79d1b4ea13e7 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c | |||
@@ -337,15 +337,17 @@ vxfs_iget(struct super_block *sbp, ino_t ino) | |||
337 | } | 337 | } |
338 | 338 | ||
339 | /** | 339 | /** |
340 | * vxfs_clear_inode - remove inode from main memory | 340 | * vxfs_evict_inode - remove inode from main memory |
341 | * @ip: inode to discard. | 341 | * @ip: inode to discard. |
342 | * | 342 | * |
343 | * Description: | 343 | * Description: |
344 | * vxfs_clear_inode() is called on the final iput and frees the private | 344 | * vxfs_evict_inode() is called on the final iput and frees the private |
345 | * inode area. | 345 | * inode area. |
346 | */ | 346 | */ |
347 | void | 347 | void |
348 | vxfs_clear_inode(struct inode *ip) | 348 | vxfs_evict_inode(struct inode *ip) |
349 | { | 349 | { |
350 | truncate_inode_pages(&ip->i_data, 0); | ||
351 | end_writeback(ip); | ||
350 | kmem_cache_free(vxfs_inode_cachep, ip->i_private); | 352 | kmem_cache_free(vxfs_inode_cachep, ip->i_private); |
351 | } | 353 | } |
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 1e8af939b3e4..dc0c041e85cb 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c | |||
@@ -61,7 +61,7 @@ static int vxfs_statfs(struct dentry *, struct kstatfs *); | |||
61 | static int vxfs_remount(struct super_block *, int *, char *); | 61 | static int vxfs_remount(struct super_block *, int *, char *); |
62 | 62 | ||
63 | static const struct super_operations vxfs_super_ops = { | 63 | static const struct super_operations vxfs_super_ops = { |
64 | .clear_inode = vxfs_clear_inode, | 64 | .evict_inode = vxfs_evict_inode, |
65 | .put_super = vxfs_put_super, | 65 | .put_super = vxfs_put_super, |
66 | .statfs = vxfs_statfs, | 66 | .statfs = vxfs_statfs, |
67 | .remount_fs = vxfs_remount, | 67 | .remount_fs = vxfs_remount, |
@@ -135,7 +135,7 @@ static int vxfs_remount(struct super_block *sb, int *flags, char *data) | |||
135 | } | 135 | } |
136 | 136 | ||
137 | /** | 137 | /** |
138 | * vxfs_read_super - read superblock into memory and initalize filesystem | 138 | * vxfs_read_super - read superblock into memory and initialize filesystem |
139 | * @sbp: VFS superblock (to fill) | 139 | * @sbp: VFS superblock (to fill) |
140 | * @dp: fs private mount data | 140 | * @dp: fs private mount data |
141 | * @silent: do not complain loudly when sth is wrong | 141 | * @silent: do not complain loudly when sth is wrong |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d5be1693ac93..2f76c4a081a2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -26,15 +26,9 @@ | |||
26 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
29 | #include <linux/tracepoint.h> | ||
29 | #include "internal.h" | 30 | #include "internal.h" |
30 | 31 | ||
31 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
32 | |||
33 | /* | ||
34 | * We don't actually have pdflush, but this one is exported though /proc... | ||
35 | */ | ||
36 | int nr_pdflush_threads; | ||
37 | |||
38 | /* | 32 | /* |
39 | * Passed into wb_writeback(), essentially a subset of writeback_control | 33 | * Passed into wb_writeback(), essentially a subset of writeback_control |
40 | */ | 34 | */ |
@@ -50,6 +44,21 @@ struct wb_writeback_work { | |||
50 | struct completion *done; /* set if the caller waits */ | 44 | struct completion *done; /* set if the caller waits */ |
51 | }; | 45 | }; |
52 | 46 | ||
47 | /* | ||
48 | * Include the creation of the trace points after defining the | ||
49 | * wb_writeback_work structure so that the definition remains local to this | ||
50 | * file. | ||
51 | */ | ||
52 | #define CREATE_TRACE_POINTS | ||
53 | #include <trace/events/writeback.h> | ||
54 | |||
55 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
56 | |||
57 | /* | ||
58 | * We don't actually have pdflush, but this one is exported though /proc... | ||
59 | */ | ||
60 | int nr_pdflush_threads; | ||
61 | |||
53 | /** | 62 | /** |
54 | * writeback_in_progress - determine whether there is writeback in progress | 63 | * writeback_in_progress - determine whether there is writeback in progress |
55 | * @bdi: the device's backing_dev_info structure. | 64 | * @bdi: the device's backing_dev_info structure. |
@@ -65,22 +74,21 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
65 | static void bdi_queue_work(struct backing_dev_info *bdi, | 74 | static void bdi_queue_work(struct backing_dev_info *bdi, |
66 | struct wb_writeback_work *work) | 75 | struct wb_writeback_work *work) |
67 | { | 76 | { |
68 | spin_lock(&bdi->wb_lock); | 77 | trace_writeback_queue(bdi, work); |
69 | list_add_tail(&work->list, &bdi->work_list); | ||
70 | spin_unlock(&bdi->wb_lock); | ||
71 | 78 | ||
72 | /* | 79 | spin_lock_bh(&bdi->wb_lock); |
73 | * If the default thread isn't there, make sure we add it. When | 80 | list_add_tail(&work->list, &bdi->work_list); |
74 | * it gets created and wakes up, we'll run this work. | 81 | if (bdi->wb.task) { |
75 | */ | 82 | wake_up_process(bdi->wb.task); |
76 | if (unlikely(list_empty_careful(&bdi->wb_list))) | 83 | } else { |
84 | /* | ||
85 | * The bdi thread isn't there, wake up the forker thread which | ||
86 | * will create and run it. | ||
87 | */ | ||
88 | trace_writeback_nothread(bdi, work); | ||
77 | wake_up_process(default_backing_dev_info.wb.task); | 89 | wake_up_process(default_backing_dev_info.wb.task); |
78 | else { | ||
79 | struct bdi_writeback *wb = &bdi->wb; | ||
80 | |||
81 | if (wb->task) | ||
82 | wake_up_process(wb->task); | ||
83 | } | 90 | } |
91 | spin_unlock_bh(&bdi->wb_lock); | ||
84 | } | 92 | } |
85 | 93 | ||
86 | static void | 94 | static void |
@@ -95,8 +103,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
95 | */ | 103 | */ |
96 | work = kzalloc(sizeof(*work), GFP_ATOMIC); | 104 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
97 | if (!work) { | 105 | if (!work) { |
98 | if (bdi->wb.task) | 106 | if (bdi->wb.task) { |
107 | trace_writeback_nowork(bdi); | ||
99 | wake_up_process(bdi->wb.task); | 108 | wake_up_process(bdi->wb.task); |
109 | } | ||
100 | return; | 110 | return; |
101 | } | 111 | } |
102 | 112 | ||
@@ -352,7 +362,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
352 | 362 | ||
353 | spin_lock(&inode_lock); | 363 | spin_lock(&inode_lock); |
354 | inode->i_state &= ~I_SYNC; | 364 | inode->i_state &= ~I_SYNC; |
355 | if (!(inode->i_state & (I_FREEING | I_CLEAR))) { | 365 | if (!(inode->i_state & I_FREEING)) { |
356 | if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { | 366 | if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { |
357 | /* | 367 | /* |
358 | * More pages get dirtied by a fast dirtier. | 368 | * More pages get dirtied by a fast dirtier. |
@@ -499,7 +509,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
499 | if (inode_dirtied_after(inode, wbc->wb_start)) | 509 | if (inode_dirtied_after(inode, wbc->wb_start)) |
500 | return 1; | 510 | return 1; |
501 | 511 | ||
502 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); | 512 | BUG_ON(inode->i_state & I_FREEING); |
503 | __iget(inode); | 513 | __iget(inode); |
504 | pages_skipped = wbc->pages_skipped; | 514 | pages_skipped = wbc->pages_skipped; |
505 | writeback_single_inode(inode, wbc); | 515 | writeback_single_inode(inode, wbc); |
@@ -530,7 +540,8 @@ void writeback_inodes_wb(struct bdi_writeback *wb, | |||
530 | { | 540 | { |
531 | int ret = 0; | 541 | int ret = 0; |
532 | 542 | ||
533 | wbc->wb_start = jiffies; /* livelock avoidance */ | 543 | if (!wbc->wb_start) |
544 | wbc->wb_start = jiffies; /* livelock avoidance */ | ||
534 | spin_lock(&inode_lock); | 545 | spin_lock(&inode_lock); |
535 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 546 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
536 | queue_io(wb, wbc->older_than_this); | 547 | queue_io(wb, wbc->older_than_this); |
@@ -559,7 +570,6 @@ static void __writeback_inodes_sb(struct super_block *sb, | |||
559 | { | 570 | { |
560 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 571 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
561 | 572 | ||
562 | wbc->wb_start = jiffies; /* livelock avoidance */ | ||
563 | spin_lock(&inode_lock); | 573 | spin_lock(&inode_lock); |
564 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 574 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
565 | queue_io(wb, wbc->older_than_this); | 575 | queue_io(wb, wbc->older_than_this); |
@@ -625,6 +635,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
625 | wbc.range_end = LLONG_MAX; | 635 | wbc.range_end = LLONG_MAX; |
626 | } | 636 | } |
627 | 637 | ||
638 | wbc.wb_start = jiffies; /* livelock avoidance */ | ||
628 | for (;;) { | 639 | for (;;) { |
629 | /* | 640 | /* |
630 | * Stop writeback when nr_pages has been consumed | 641 | * Stop writeback when nr_pages has been consumed |
@@ -642,10 +653,14 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
642 | wbc.more_io = 0; | 653 | wbc.more_io = 0; |
643 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 654 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
644 | wbc.pages_skipped = 0; | 655 | wbc.pages_skipped = 0; |
656 | |||
657 | trace_wbc_writeback_start(&wbc, wb->bdi); | ||
645 | if (work->sb) | 658 | if (work->sb) |
646 | __writeback_inodes_sb(work->sb, wb, &wbc); | 659 | __writeback_inodes_sb(work->sb, wb, &wbc); |
647 | else | 660 | else |
648 | writeback_inodes_wb(wb, &wbc); | 661 | writeback_inodes_wb(wb, &wbc); |
662 | trace_wbc_writeback_written(&wbc, wb->bdi); | ||
663 | |||
649 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 664 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
650 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 665 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
651 | 666 | ||
@@ -673,6 +688,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
673 | if (!list_empty(&wb->b_more_io)) { | 688 | if (!list_empty(&wb->b_more_io)) { |
674 | inode = list_entry(wb->b_more_io.prev, | 689 | inode = list_entry(wb->b_more_io.prev, |
675 | struct inode, i_list); | 690 | struct inode, i_list); |
691 | trace_wbc_writeback_wait(&wbc, wb->bdi); | ||
676 | inode_wait_for_writeback(inode); | 692 | inode_wait_for_writeback(inode); |
677 | } | 693 | } |
678 | spin_unlock(&inode_lock); | 694 | spin_unlock(&inode_lock); |
@@ -685,17 +701,17 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
685 | * Return the next wb_writeback_work struct that hasn't been processed yet. | 701 | * Return the next wb_writeback_work struct that hasn't been processed yet. |
686 | */ | 702 | */ |
687 | static struct wb_writeback_work * | 703 | static struct wb_writeback_work * |
688 | get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) | 704 | get_next_work_item(struct backing_dev_info *bdi) |
689 | { | 705 | { |
690 | struct wb_writeback_work *work = NULL; | 706 | struct wb_writeback_work *work = NULL; |
691 | 707 | ||
692 | spin_lock(&bdi->wb_lock); | 708 | spin_lock_bh(&bdi->wb_lock); |
693 | if (!list_empty(&bdi->work_list)) { | 709 | if (!list_empty(&bdi->work_list)) { |
694 | work = list_entry(bdi->work_list.next, | 710 | work = list_entry(bdi->work_list.next, |
695 | struct wb_writeback_work, list); | 711 | struct wb_writeback_work, list); |
696 | list_del_init(&work->list); | 712 | list_del_init(&work->list); |
697 | } | 713 | } |
698 | spin_unlock(&bdi->wb_lock); | 714 | spin_unlock_bh(&bdi->wb_lock); |
699 | return work; | 715 | return work; |
700 | } | 716 | } |
701 | 717 | ||
@@ -743,7 +759,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
743 | struct wb_writeback_work *work; | 759 | struct wb_writeback_work *work; |
744 | long wrote = 0; | 760 | long wrote = 0; |
745 | 761 | ||
746 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 762 | while ((work = get_next_work_item(bdi)) != NULL) { |
747 | /* | 763 | /* |
748 | * Override sync mode, in case we must wait for completion | 764 | * Override sync mode, in case we must wait for completion |
749 | * because this thread is exiting now. | 765 | * because this thread is exiting now. |
@@ -751,6 +767,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
751 | if (force_wait) | 767 | if (force_wait) |
752 | work->sync_mode = WB_SYNC_ALL; | 768 | work->sync_mode = WB_SYNC_ALL; |
753 | 769 | ||
770 | trace_writeback_exec(bdi, work); | ||
771 | |||
754 | wrote += wb_writeback(wb, work); | 772 | wrote += wb_writeback(wb, work); |
755 | 773 | ||
756 | /* | 774 | /* |
@@ -775,47 +793,66 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
775 | * Handle writeback of dirty data for the device backed by this bdi. Also | 793 | * Handle writeback of dirty data for the device backed by this bdi. Also |
776 | * wakes up periodically and does kupdated style flushing. | 794 | * wakes up periodically and does kupdated style flushing. |
777 | */ | 795 | */ |
778 | int bdi_writeback_task(struct bdi_writeback *wb) | 796 | int bdi_writeback_thread(void *data) |
779 | { | 797 | { |
780 | unsigned long last_active = jiffies; | 798 | struct bdi_writeback *wb = data; |
781 | unsigned long wait_jiffies = -1UL; | 799 | struct backing_dev_info *bdi = wb->bdi; |
782 | long pages_written; | 800 | long pages_written; |
783 | 801 | ||
802 | current->flags |= PF_FLUSHER | PF_SWAPWRITE; | ||
803 | set_freezable(); | ||
804 | wb->last_active = jiffies; | ||
805 | |||
806 | /* | ||
807 | * Our parent may run at a different priority, just set us to normal | ||
808 | */ | ||
809 | set_user_nice(current, 0); | ||
810 | |||
811 | trace_writeback_thread_start(bdi); | ||
812 | |||
784 | while (!kthread_should_stop()) { | 813 | while (!kthread_should_stop()) { |
814 | /* | ||
815 | * Remove own delayed wake-up timer, since we are already awake | ||
816 | * and we'll take care of the preriodic write-back. | ||
817 | */ | ||
818 | del_timer(&wb->wakeup_timer); | ||
819 | |||
785 | pages_written = wb_do_writeback(wb, 0); | 820 | pages_written = wb_do_writeback(wb, 0); |
786 | 821 | ||
822 | trace_writeback_pages_written(pages_written); | ||
823 | |||
787 | if (pages_written) | 824 | if (pages_written) |
788 | last_active = jiffies; | 825 | wb->last_active = jiffies; |
789 | else if (wait_jiffies != -1UL) { | ||
790 | unsigned long max_idle; | ||
791 | 826 | ||
792 | /* | 827 | set_current_state(TASK_INTERRUPTIBLE); |
793 | * Longest period of inactivity that we tolerate. If we | 828 | if (!list_empty(&bdi->work_list)) { |
794 | * see dirty data again later, the task will get | 829 | __set_current_state(TASK_RUNNING); |
795 | * recreated automatically. | 830 | continue; |
796 | */ | ||
797 | max_idle = max(5UL * 60 * HZ, wait_jiffies); | ||
798 | if (time_after(jiffies, max_idle + last_active)) | ||
799 | break; | ||
800 | } | 831 | } |
801 | 832 | ||
802 | if (dirty_writeback_interval) { | 833 | if (wb_has_dirty_io(wb) && dirty_writeback_interval) |
803 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); | 834 | schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); |
804 | schedule_timeout_interruptible(wait_jiffies); | 835 | else { |
805 | } else { | 836 | /* |
806 | set_current_state(TASK_INTERRUPTIBLE); | 837 | * We have nothing to do, so can go sleep without any |
807 | if (list_empty_careful(&wb->bdi->work_list) && | 838 | * timeout and save power. When a work is queued or |
808 | !kthread_should_stop()) | 839 | * something is made dirty - we will be woken up. |
809 | schedule(); | 840 | */ |
810 | __set_current_state(TASK_RUNNING); | 841 | schedule(); |
811 | } | 842 | } |
812 | 843 | ||
813 | try_to_freeze(); | 844 | try_to_freeze(); |
814 | } | 845 | } |
815 | 846 | ||
847 | /* Flush any work that raced with us exiting */ | ||
848 | if (!list_empty(&bdi->work_list)) | ||
849 | wb_do_writeback(wb, 1); | ||
850 | |||
851 | trace_writeback_thread_stop(bdi); | ||
816 | return 0; | 852 | return 0; |
817 | } | 853 | } |
818 | 854 | ||
855 | |||
819 | /* | 856 | /* |
820 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back | 857 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back |
821 | * the whole world. | 858 | * the whole world. |
@@ -890,6 +927,8 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) | |||
890 | void __mark_inode_dirty(struct inode *inode, int flags) | 927 | void __mark_inode_dirty(struct inode *inode, int flags) |
891 | { | 928 | { |
892 | struct super_block *sb = inode->i_sb; | 929 | struct super_block *sb = inode->i_sb; |
930 | struct backing_dev_info *bdi = NULL; | ||
931 | bool wakeup_bdi = false; | ||
893 | 932 | ||
894 | /* | 933 | /* |
895 | * Don't do this for I_DIRTY_PAGES - that doesn't actually | 934 | * Don't do this for I_DIRTY_PAGES - that doesn't actually |
@@ -935,7 +974,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
935 | if (hlist_unhashed(&inode->i_hash)) | 974 | if (hlist_unhashed(&inode->i_hash)) |
936 | goto out; | 975 | goto out; |
937 | } | 976 | } |
938 | if (inode->i_state & (I_FREEING|I_CLEAR)) | 977 | if (inode->i_state & I_FREEING) |
939 | goto out; | 978 | goto out; |
940 | 979 | ||
941 | /* | 980 | /* |
@@ -943,22 +982,31 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
943 | * reposition it (that would break b_dirty time-ordering). | 982 | * reposition it (that would break b_dirty time-ordering). |
944 | */ | 983 | */ |
945 | if (!was_dirty) { | 984 | if (!was_dirty) { |
946 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 985 | bdi = inode_to_bdi(inode); |
947 | struct backing_dev_info *bdi = wb->bdi; | 986 | |
948 | 987 | if (bdi_cap_writeback_dirty(bdi)) { | |
949 | if (bdi_cap_writeback_dirty(bdi) && | 988 | WARN(!test_bit(BDI_registered, &bdi->state), |
950 | !test_bit(BDI_registered, &bdi->state)) { | 989 | "bdi-%s not registered\n", bdi->name); |
951 | WARN_ON(1); | 990 | |
952 | printk(KERN_ERR "bdi-%s not registered\n", | 991 | /* |
953 | bdi->name); | 992 | * If this is the first dirty inode for this |
993 | * bdi, we have to wake-up the corresponding | ||
994 | * bdi thread to make sure background | ||
995 | * write-back happens later. | ||
996 | */ | ||
997 | if (!wb_has_dirty_io(&bdi->wb)) | ||
998 | wakeup_bdi = true; | ||
954 | } | 999 | } |
955 | 1000 | ||
956 | inode->dirtied_when = jiffies; | 1001 | inode->dirtied_when = jiffies; |
957 | list_move(&inode->i_list, &wb->b_dirty); | 1002 | list_move(&inode->i_list, &bdi->wb.b_dirty); |
958 | } | 1003 | } |
959 | } | 1004 | } |
960 | out: | 1005 | out: |
961 | spin_unlock(&inode_lock); | 1006 | spin_unlock(&inode_lock); |
1007 | |||
1008 | if (wakeup_bdi) | ||
1009 | bdi_wakeup_thread_delayed(bdi); | ||
962 | } | 1010 | } |
963 | EXPORT_SYMBOL(__mark_inode_dirty); | 1011 | EXPORT_SYMBOL(__mark_inode_dirty); |
964 | 1012 | ||
@@ -1001,7 +1049,7 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1001 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 1049 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
1002 | struct address_space *mapping; | 1050 | struct address_space *mapping; |
1003 | 1051 | ||
1004 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 1052 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) |
1005 | continue; | 1053 | continue; |
1006 | mapping = inode->i_mapping; | 1054 | mapping = inode->i_mapping; |
1007 | if (mapping->nrpages == 0) | 1055 | if (mapping->nrpages == 0) |
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig index cc94bb9563f2..3f6dfa989881 100644 --- a/fs/fscache/Kconfig +++ b/fs/fscache/Kconfig | |||
@@ -1,7 +1,6 @@ | |||
1 | 1 | ||
2 | config FSCACHE | 2 | config FSCACHE |
3 | tristate "General filesystem local caching manager" | 3 | tristate "General filesystem local caching manager" |
4 | select SLOW_WORK | ||
5 | help | 4 | help |
6 | This option enables a generic filesystem caching manager that can be | 5 | This option enables a generic filesystem caching manager that can be |
7 | used by various network and other filesystems to cache data locally. | 6 | used by various network and other filesystems to cache data locally. |
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index edd7434ab6e5..6a026441c5a6 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h | |||
@@ -82,6 +82,14 @@ extern unsigned fscache_defer_lookup; | |||
82 | extern unsigned fscache_defer_create; | 82 | extern unsigned fscache_defer_create; |
83 | extern unsigned fscache_debug; | 83 | extern unsigned fscache_debug; |
84 | extern struct kobject *fscache_root; | 84 | extern struct kobject *fscache_root; |
85 | extern struct workqueue_struct *fscache_object_wq; | ||
86 | extern struct workqueue_struct *fscache_op_wq; | ||
87 | DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait); | ||
88 | |||
89 | static inline bool fscache_object_congested(void) | ||
90 | { | ||
91 | return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); | ||
92 | } | ||
85 | 93 | ||
86 | extern int fscache_wait_bit(void *); | 94 | extern int fscache_wait_bit(void *); |
87 | extern int fscache_wait_bit_interruptible(void *); | 95 | extern int fscache_wait_bit_interruptible(void *); |
diff --git a/fs/fscache/main.c b/fs/fscache/main.c index add6bdb53f04..f9d856773f79 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/completion.h> | 16 | #include <linux/completion.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/seq_file.h> | ||
18 | #include "internal.h" | 19 | #include "internal.h" |
19 | 20 | ||
20 | MODULE_DESCRIPTION("FS Cache Manager"); | 21 | MODULE_DESCRIPTION("FS Cache Manager"); |
@@ -40,22 +41,105 @@ MODULE_PARM_DESC(fscache_debug, | |||
40 | "FS-Cache debugging mask"); | 41 | "FS-Cache debugging mask"); |
41 | 42 | ||
42 | struct kobject *fscache_root; | 43 | struct kobject *fscache_root; |
44 | struct workqueue_struct *fscache_object_wq; | ||
45 | struct workqueue_struct *fscache_op_wq; | ||
46 | |||
47 | DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait); | ||
48 | |||
49 | /* these values serve as lower bounds, will be adjusted in fscache_init() */ | ||
50 | static unsigned fscache_object_max_active = 4; | ||
51 | static unsigned fscache_op_max_active = 2; | ||
52 | |||
53 | #ifdef CONFIG_SYSCTL | ||
54 | static struct ctl_table_header *fscache_sysctl_header; | ||
55 | |||
56 | static int fscache_max_active_sysctl(struct ctl_table *table, int write, | ||
57 | void __user *buffer, | ||
58 | size_t *lenp, loff_t *ppos) | ||
59 | { | ||
60 | struct workqueue_struct **wqp = table->extra1; | ||
61 | unsigned int *datap = table->data; | ||
62 | int ret; | ||
63 | |||
64 | ret = proc_dointvec(table, write, buffer, lenp, ppos); | ||
65 | if (ret == 0) | ||
66 | workqueue_set_max_active(*wqp, *datap); | ||
67 | return ret; | ||
68 | } | ||
69 | |||
70 | ctl_table fscache_sysctls[] = { | ||
71 | { | ||
72 | .procname = "object_max_active", | ||
73 | .data = &fscache_object_max_active, | ||
74 | .maxlen = sizeof(unsigned), | ||
75 | .mode = 0644, | ||
76 | .proc_handler = fscache_max_active_sysctl, | ||
77 | .extra1 = &fscache_object_wq, | ||
78 | }, | ||
79 | { | ||
80 | .procname = "operation_max_active", | ||
81 | .data = &fscache_op_max_active, | ||
82 | .maxlen = sizeof(unsigned), | ||
83 | .mode = 0644, | ||
84 | .proc_handler = fscache_max_active_sysctl, | ||
85 | .extra1 = &fscache_op_wq, | ||
86 | }, | ||
87 | {} | ||
88 | }; | ||
89 | |||
90 | ctl_table fscache_sysctls_root[] = { | ||
91 | { | ||
92 | .procname = "fscache", | ||
93 | .mode = 0555, | ||
94 | .child = fscache_sysctls, | ||
95 | }, | ||
96 | {} | ||
97 | }; | ||
98 | #endif | ||
43 | 99 | ||
44 | /* | 100 | /* |
45 | * initialise the fs caching module | 101 | * initialise the fs caching module |
46 | */ | 102 | */ |
47 | static int __init fscache_init(void) | 103 | static int __init fscache_init(void) |
48 | { | 104 | { |
105 | unsigned int nr_cpus = num_possible_cpus(); | ||
106 | unsigned int cpu; | ||
49 | int ret; | 107 | int ret; |
50 | 108 | ||
51 | ret = slow_work_register_user(THIS_MODULE); | 109 | fscache_object_max_active = |
52 | if (ret < 0) | 110 | clamp_val(nr_cpus, |
53 | goto error_slow_work; | 111 | fscache_object_max_active, WQ_UNBOUND_MAX_ACTIVE); |
112 | |||
113 | ret = -ENOMEM; | ||
114 | fscache_object_wq = alloc_workqueue("fscache_object", WQ_UNBOUND, | ||
115 | fscache_object_max_active); | ||
116 | if (!fscache_object_wq) | ||
117 | goto error_object_wq; | ||
118 | |||
119 | fscache_op_max_active = | ||
120 | clamp_val(fscache_object_max_active / 2, | ||
121 | fscache_op_max_active, WQ_UNBOUND_MAX_ACTIVE); | ||
122 | |||
123 | ret = -ENOMEM; | ||
124 | fscache_op_wq = alloc_workqueue("fscache_operation", WQ_UNBOUND, | ||
125 | fscache_op_max_active); | ||
126 | if (!fscache_op_wq) | ||
127 | goto error_op_wq; | ||
128 | |||
129 | for_each_possible_cpu(cpu) | ||
130 | init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu)); | ||
54 | 131 | ||
55 | ret = fscache_proc_init(); | 132 | ret = fscache_proc_init(); |
56 | if (ret < 0) | 133 | if (ret < 0) |
57 | goto error_proc; | 134 | goto error_proc; |
58 | 135 | ||
136 | #ifdef CONFIG_SYSCTL | ||
137 | ret = -ENOMEM; | ||
138 | fscache_sysctl_header = register_sysctl_table(fscache_sysctls_root); | ||
139 | if (!fscache_sysctl_header) | ||
140 | goto error_sysctl; | ||
141 | #endif | ||
142 | |||
59 | fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar", | 143 | fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar", |
60 | sizeof(struct fscache_cookie), | 144 | sizeof(struct fscache_cookie), |
61 | 0, | 145 | 0, |
@@ -78,10 +162,16 @@ static int __init fscache_init(void) | |||
78 | error_kobj: | 162 | error_kobj: |
79 | kmem_cache_destroy(fscache_cookie_jar); | 163 | kmem_cache_destroy(fscache_cookie_jar); |
80 | error_cookie_jar: | 164 | error_cookie_jar: |
165 | #ifdef CONFIG_SYSCTL | ||
166 | unregister_sysctl_table(fscache_sysctl_header); | ||
167 | error_sysctl: | ||
168 | #endif | ||
81 | fscache_proc_cleanup(); | 169 | fscache_proc_cleanup(); |
82 | error_proc: | 170 | error_proc: |
83 | slow_work_unregister_user(THIS_MODULE); | 171 | destroy_workqueue(fscache_op_wq); |
84 | error_slow_work: | 172 | error_op_wq: |
173 | destroy_workqueue(fscache_object_wq); | ||
174 | error_object_wq: | ||
85 | return ret; | 175 | return ret; |
86 | } | 176 | } |
87 | 177 | ||
@@ -96,8 +186,12 @@ static void __exit fscache_exit(void) | |||
96 | 186 | ||
97 | kobject_put(fscache_root); | 187 | kobject_put(fscache_root); |
98 | kmem_cache_destroy(fscache_cookie_jar); | 188 | kmem_cache_destroy(fscache_cookie_jar); |
189 | #ifdef CONFIG_SYSCTL | ||
190 | unregister_sysctl_table(fscache_sysctl_header); | ||
191 | #endif | ||
99 | fscache_proc_cleanup(); | 192 | fscache_proc_cleanup(); |
100 | slow_work_unregister_user(THIS_MODULE); | 193 | destroy_workqueue(fscache_op_wq); |
194 | destroy_workqueue(fscache_object_wq); | ||
101 | printk(KERN_NOTICE "FS-Cache: Unloaded\n"); | 195 | printk(KERN_NOTICE "FS-Cache: Unloaded\n"); |
102 | } | 196 | } |
103 | 197 | ||
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index 4a8eb31c5338..ebe29c581380 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c | |||
@@ -34,8 +34,8 @@ struct fscache_objlist_data { | |||
34 | #define FSCACHE_OBJLIST_CONFIG_NOREADS 0x00000200 /* show objects without active reads */ | 34 | #define FSCACHE_OBJLIST_CONFIG_NOREADS 0x00000200 /* show objects without active reads */ |
35 | #define FSCACHE_OBJLIST_CONFIG_EVENTS 0x00000400 /* show objects with events */ | 35 | #define FSCACHE_OBJLIST_CONFIG_EVENTS 0x00000400 /* show objects with events */ |
36 | #define FSCACHE_OBJLIST_CONFIG_NOEVENTS 0x00000800 /* show objects without no events */ | 36 | #define FSCACHE_OBJLIST_CONFIG_NOEVENTS 0x00000800 /* show objects without no events */ |
37 | #define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with slow work */ | 37 | #define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with work */ |
38 | #define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without slow work */ | 38 | #define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without work */ |
39 | 39 | ||
40 | u8 buf[512]; /* key and aux data buffer */ | 40 | u8 buf[512]; /* key and aux data buffer */ |
41 | }; | 41 | }; |
@@ -231,12 +231,11 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
231 | READS, NOREADS); | 231 | READS, NOREADS); |
232 | FILTER(obj->events & obj->event_mask, | 232 | FILTER(obj->events & obj->event_mask, |
233 | EVENTS, NOEVENTS); | 233 | EVENTS, NOEVENTS); |
234 | FILTER(obj->work.flags & ~(1UL << SLOW_WORK_VERY_SLOW), | 234 | FILTER(work_busy(&obj->work), WORK, NOWORK); |
235 | WORK, NOWORK); | ||
236 | } | 235 | } |
237 | 236 | ||
238 | seq_printf(m, | 237 | seq_printf(m, |
239 | "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1lx | ", | 238 | "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1x | ", |
240 | obj->debug_id, | 239 | obj->debug_id, |
241 | obj->parent ? obj->parent->debug_id : -1, | 240 | obj->parent ? obj->parent->debug_id : -1, |
242 | fscache_object_states_short[obj->state], | 241 | fscache_object_states_short[obj->state], |
@@ -249,7 +248,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
249 | obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK, | 248 | obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK, |
250 | obj->events, | 249 | obj->events, |
251 | obj->flags, | 250 | obj->flags, |
252 | obj->work.flags); | 251 | work_busy(&obj->work)); |
253 | 252 | ||
254 | no_cookie = true; | 253 | no_cookie = true; |
255 | keylen = auxlen = 0; | 254 | keylen = auxlen = 0; |
diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 0b589a9b4ffc..b6b897c550ac 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c | |||
@@ -14,7 +14,6 @@ | |||
14 | 14 | ||
15 | #define FSCACHE_DEBUG_LEVEL COOKIE | 15 | #define FSCACHE_DEBUG_LEVEL COOKIE |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/seq_file.h> | ||
18 | #include "internal.h" | 17 | #include "internal.h" |
19 | 18 | ||
20 | const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { | 19 | const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { |
@@ -50,12 +49,8 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { | |||
50 | [FSCACHE_OBJECT_DEAD] = "DEAD", | 49 | [FSCACHE_OBJECT_DEAD] = "DEAD", |
51 | }; | 50 | }; |
52 | 51 | ||
53 | static void fscache_object_slow_work_put_ref(struct slow_work *); | 52 | static int fscache_get_object(struct fscache_object *); |
54 | static int fscache_object_slow_work_get_ref(struct slow_work *); | 53 | static void fscache_put_object(struct fscache_object *); |
55 | static void fscache_object_slow_work_execute(struct slow_work *); | ||
56 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
57 | static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *); | ||
58 | #endif | ||
59 | static void fscache_initialise_object(struct fscache_object *); | 54 | static void fscache_initialise_object(struct fscache_object *); |
60 | static void fscache_lookup_object(struct fscache_object *); | 55 | static void fscache_lookup_object(struct fscache_object *); |
61 | static void fscache_object_available(struct fscache_object *); | 56 | static void fscache_object_available(struct fscache_object *); |
@@ -64,17 +59,6 @@ static void fscache_withdraw_object(struct fscache_object *); | |||
64 | static void fscache_enqueue_dependents(struct fscache_object *); | 59 | static void fscache_enqueue_dependents(struct fscache_object *); |
65 | static void fscache_dequeue_object(struct fscache_object *); | 60 | static void fscache_dequeue_object(struct fscache_object *); |
66 | 61 | ||
67 | const struct slow_work_ops fscache_object_slow_work_ops = { | ||
68 | .owner = THIS_MODULE, | ||
69 | .get_ref = fscache_object_slow_work_get_ref, | ||
70 | .put_ref = fscache_object_slow_work_put_ref, | ||
71 | .execute = fscache_object_slow_work_execute, | ||
72 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
73 | .desc = fscache_object_slow_work_desc, | ||
74 | #endif | ||
75 | }; | ||
76 | EXPORT_SYMBOL(fscache_object_slow_work_ops); | ||
77 | |||
78 | /* | 62 | /* |
79 | * we need to notify the parent when an op completes that we had outstanding | 63 | * we need to notify the parent when an op completes that we had outstanding |
80 | * upon it | 64 | * upon it |
@@ -345,7 +329,7 @@ unsupported_event: | |||
345 | /* | 329 | /* |
346 | * execute an object | 330 | * execute an object |
347 | */ | 331 | */ |
348 | static void fscache_object_slow_work_execute(struct slow_work *work) | 332 | void fscache_object_work_func(struct work_struct *work) |
349 | { | 333 | { |
350 | struct fscache_object *object = | 334 | struct fscache_object *object = |
351 | container_of(work, struct fscache_object, work); | 335 | container_of(work, struct fscache_object, work); |
@@ -359,23 +343,9 @@ static void fscache_object_slow_work_execute(struct slow_work *work) | |||
359 | if (object->events & object->event_mask) | 343 | if (object->events & object->event_mask) |
360 | fscache_enqueue_object(object); | 344 | fscache_enqueue_object(object); |
361 | clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | 345 | clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); |
346 | fscache_put_object(object); | ||
362 | } | 347 | } |
363 | 348 | EXPORT_SYMBOL(fscache_object_work_func); | |
364 | /* | ||
365 | * describe an object for slow-work debugging | ||
366 | */ | ||
367 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
368 | static void fscache_object_slow_work_desc(struct slow_work *work, | ||
369 | struct seq_file *m) | ||
370 | { | ||
371 | struct fscache_object *object = | ||
372 | container_of(work, struct fscache_object, work); | ||
373 | |||
374 | seq_printf(m, "FSC: OBJ%x: %s", | ||
375 | object->debug_id, | ||
376 | fscache_object_states_short[object->state]); | ||
377 | } | ||
378 | #endif | ||
379 | 349 | ||
380 | /* | 350 | /* |
381 | * initialise an object | 351 | * initialise an object |
@@ -393,7 +363,6 @@ static void fscache_initialise_object(struct fscache_object *object) | |||
393 | _enter(""); | 363 | _enter(""); |
394 | ASSERT(object->cookie != NULL); | 364 | ASSERT(object->cookie != NULL); |
395 | ASSERT(object->cookie->parent != NULL); | 365 | ASSERT(object->cookie->parent != NULL); |
396 | ASSERT(list_empty(&object->work.link)); | ||
397 | 366 | ||
398 | if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) | | 367 | if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) | |
399 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | 368 | (1 << FSCACHE_OBJECT_EV_RELEASE) | |
@@ -671,10 +640,8 @@ static void fscache_drop_object(struct fscache_object *object) | |||
671 | object->parent = NULL; | 640 | object->parent = NULL; |
672 | } | 641 | } |
673 | 642 | ||
674 | /* this just shifts the object release to the slow work processor */ | 643 | /* this just shifts the object release to the work processor */ |
675 | fscache_stat(&fscache_n_cop_put_object); | 644 | fscache_put_object(object); |
676 | object->cache->ops->put_object(object); | ||
677 | fscache_stat_d(&fscache_n_cop_put_object); | ||
678 | 645 | ||
679 | _leave(""); | 646 | _leave(""); |
680 | } | 647 | } |
@@ -758,12 +725,10 @@ void fscache_withdrawing_object(struct fscache_cache *cache, | |||
758 | } | 725 | } |
759 | 726 | ||
760 | /* | 727 | /* |
761 | * allow the slow work item processor to get a ref on an object | 728 | * get a ref on an object |
762 | */ | 729 | */ |
763 | static int fscache_object_slow_work_get_ref(struct slow_work *work) | 730 | static int fscache_get_object(struct fscache_object *object) |
764 | { | 731 | { |
765 | struct fscache_object *object = | ||
766 | container_of(work, struct fscache_object, work); | ||
767 | int ret; | 732 | int ret; |
768 | 733 | ||
769 | fscache_stat(&fscache_n_cop_grab_object); | 734 | fscache_stat(&fscache_n_cop_grab_object); |
@@ -773,13 +738,10 @@ static int fscache_object_slow_work_get_ref(struct slow_work *work) | |||
773 | } | 738 | } |
774 | 739 | ||
775 | /* | 740 | /* |
776 | * allow the slow work item processor to discard a ref on a work item | 741 | * discard a ref on a work item |
777 | */ | 742 | */ |
778 | static void fscache_object_slow_work_put_ref(struct slow_work *work) | 743 | static void fscache_put_object(struct fscache_object *object) |
779 | { | 744 | { |
780 | struct fscache_object *object = | ||
781 | container_of(work, struct fscache_object, work); | ||
782 | |||
783 | fscache_stat(&fscache_n_cop_put_object); | 745 | fscache_stat(&fscache_n_cop_put_object); |
784 | object->cache->ops->put_object(object); | 746 | object->cache->ops->put_object(object); |
785 | fscache_stat_d(&fscache_n_cop_put_object); | 747 | fscache_stat_d(&fscache_n_cop_put_object); |
@@ -792,8 +754,48 @@ void fscache_enqueue_object(struct fscache_object *object) | |||
792 | { | 754 | { |
793 | _enter("{OBJ%x}", object->debug_id); | 755 | _enter("{OBJ%x}", object->debug_id); |
794 | 756 | ||
795 | slow_work_enqueue(&object->work); | 757 | if (fscache_get_object(object) >= 0) { |
758 | wait_queue_head_t *cong_wq = | ||
759 | &get_cpu_var(fscache_object_cong_wait); | ||
760 | |||
761 | if (queue_work(fscache_object_wq, &object->work)) { | ||
762 | if (fscache_object_congested()) | ||
763 | wake_up(cong_wq); | ||
764 | } else | ||
765 | fscache_put_object(object); | ||
766 | |||
767 | put_cpu_var(fscache_object_cong_wait); | ||
768 | } | ||
769 | } | ||
770 | |||
771 | /** | ||
772 | * fscache_object_sleep_till_congested - Sleep until object wq is congested | ||
773 | * @timoutp: Scheduler sleep timeout | ||
774 | * | ||
775 | * Allow an object handler to sleep until the object workqueue is congested. | ||
776 | * | ||
777 | * The caller must set up a wake up event before calling this and must have set | ||
778 | * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own | ||
779 | * condition before calling this function as no test is made here. | ||
780 | * | ||
781 | * %true is returned if the object wq is congested, %false otherwise. | ||
782 | */ | ||
783 | bool fscache_object_sleep_till_congested(signed long *timeoutp) | ||
784 | { | ||
785 | wait_queue_head_t *cong_wq = &__get_cpu_var(fscache_object_cong_wait); | ||
786 | DEFINE_WAIT(wait); | ||
787 | |||
788 | if (fscache_object_congested()) | ||
789 | return true; | ||
790 | |||
791 | add_wait_queue_exclusive(cong_wq, &wait); | ||
792 | if (!fscache_object_congested()) | ||
793 | *timeoutp = schedule_timeout(*timeoutp); | ||
794 | finish_wait(cong_wq, &wait); | ||
795 | |||
796 | return fscache_object_congested(); | ||
796 | } | 797 | } |
798 | EXPORT_SYMBOL_GPL(fscache_object_sleep_till_congested); | ||
797 | 799 | ||
798 | /* | 800 | /* |
799 | * enqueue the dependents of an object for metadata-type processing | 801 | * enqueue the dependents of an object for metadata-type processing |
@@ -819,9 +821,7 @@ static void fscache_enqueue_dependents(struct fscache_object *object) | |||
819 | 821 | ||
820 | /* sort onto appropriate lists */ | 822 | /* sort onto appropriate lists */ |
821 | fscache_enqueue_object(dep); | 823 | fscache_enqueue_object(dep); |
822 | fscache_stat(&fscache_n_cop_put_object); | 824 | fscache_put_object(dep); |
823 | dep->cache->ops->put_object(dep); | ||
824 | fscache_stat_d(&fscache_n_cop_put_object); | ||
825 | 825 | ||
826 | if (!list_empty(&object->dependents)) | 826 | if (!list_empty(&object->dependents)) |
827 | cond_resched_lock(&object->lock); | 827 | cond_resched_lock(&object->lock); |
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index f17cecafae44..b9f34eaede09 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c | |||
@@ -42,16 +42,12 @@ void fscache_enqueue_operation(struct fscache_operation *op) | |||
42 | 42 | ||
43 | fscache_stat(&fscache_n_op_enqueue); | 43 | fscache_stat(&fscache_n_op_enqueue); |
44 | switch (op->flags & FSCACHE_OP_TYPE) { | 44 | switch (op->flags & FSCACHE_OP_TYPE) { |
45 | case FSCACHE_OP_FAST: | 45 | case FSCACHE_OP_ASYNC: |
46 | _debug("queue fast"); | 46 | _debug("queue async"); |
47 | atomic_inc(&op->usage); | 47 | atomic_inc(&op->usage); |
48 | if (!schedule_work(&op->fast_work)) | 48 | if (!queue_work(fscache_op_wq, &op->work)) |
49 | fscache_put_operation(op); | 49 | fscache_put_operation(op); |
50 | break; | 50 | break; |
51 | case FSCACHE_OP_SLOW: | ||
52 | _debug("queue slow"); | ||
53 | slow_work_enqueue(&op->slow_work); | ||
54 | break; | ||
55 | case FSCACHE_OP_MYTHREAD: | 51 | case FSCACHE_OP_MYTHREAD: |
56 | _debug("queue for caller's attention"); | 52 | _debug("queue for caller's attention"); |
57 | break; | 53 | break; |
@@ -455,36 +451,13 @@ void fscache_operation_gc(struct work_struct *work) | |||
455 | } | 451 | } |
456 | 452 | ||
457 | /* | 453 | /* |
458 | * allow the slow work item processor to get a ref on an operation | 454 | * execute an operation using fs_op_wq to provide processing context - |
459 | */ | 455 | * the caller holds a ref to this object, so we don't need to hold one |
460 | static int fscache_op_get_ref(struct slow_work *work) | ||
461 | { | ||
462 | struct fscache_operation *op = | ||
463 | container_of(work, struct fscache_operation, slow_work); | ||
464 | |||
465 | atomic_inc(&op->usage); | ||
466 | return 0; | ||
467 | } | ||
468 | |||
469 | /* | ||
470 | * allow the slow work item processor to discard a ref on an operation | ||
471 | */ | ||
472 | static void fscache_op_put_ref(struct slow_work *work) | ||
473 | { | ||
474 | struct fscache_operation *op = | ||
475 | container_of(work, struct fscache_operation, slow_work); | ||
476 | |||
477 | fscache_put_operation(op); | ||
478 | } | ||
479 | |||
480 | /* | ||
481 | * execute an operation using the slow thread pool to provide processing context | ||
482 | * - the caller holds a ref to this object, so we don't need to hold one | ||
483 | */ | 456 | */ |
484 | static void fscache_op_execute(struct slow_work *work) | 457 | void fscache_op_work_func(struct work_struct *work) |
485 | { | 458 | { |
486 | struct fscache_operation *op = | 459 | struct fscache_operation *op = |
487 | container_of(work, struct fscache_operation, slow_work); | 460 | container_of(work, struct fscache_operation, work); |
488 | unsigned long start; | 461 | unsigned long start; |
489 | 462 | ||
490 | _enter("{OBJ%x OP%x,%d}", | 463 | _enter("{OBJ%x OP%x,%d}", |
@@ -494,31 +467,7 @@ static void fscache_op_execute(struct slow_work *work) | |||
494 | start = jiffies; | 467 | start = jiffies; |
495 | op->processor(op); | 468 | op->processor(op); |
496 | fscache_hist(fscache_ops_histogram, start); | 469 | fscache_hist(fscache_ops_histogram, start); |
470 | fscache_put_operation(op); | ||
497 | 471 | ||
498 | _leave(""); | 472 | _leave(""); |
499 | } | 473 | } |
500 | |||
501 | /* | ||
502 | * describe an operation for slow-work debugging | ||
503 | */ | ||
504 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
505 | static void fscache_op_desc(struct slow_work *work, struct seq_file *m) | ||
506 | { | ||
507 | struct fscache_operation *op = | ||
508 | container_of(work, struct fscache_operation, slow_work); | ||
509 | |||
510 | seq_printf(m, "FSC: OBJ%x OP%x: %s/%s fl=%lx", | ||
511 | op->object->debug_id, op->debug_id, | ||
512 | op->name, op->state, op->flags); | ||
513 | } | ||
514 | #endif | ||
515 | |||
516 | const struct slow_work_ops fscache_op_slow_work_ops = { | ||
517 | .owner = THIS_MODULE, | ||
518 | .get_ref = fscache_op_get_ref, | ||
519 | .put_ref = fscache_op_put_ref, | ||
520 | .execute = fscache_op_execute, | ||
521 | #ifdef CONFIG_SLOW_WORK_DEBUG | ||
522 | .desc = fscache_op_desc, | ||
523 | #endif | ||
524 | }; | ||
diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 723b889fd219..41c441c2058d 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c | |||
@@ -105,7 +105,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie, | |||
105 | 105 | ||
106 | page_busy: | 106 | page_busy: |
107 | /* we might want to wait here, but that could deadlock the allocator as | 107 | /* we might want to wait here, but that could deadlock the allocator as |
108 | * the slow-work threads writing to the cache may all end up sleeping | 108 | * the work threads writing to the cache may all end up sleeping |
109 | * on memory allocation */ | 109 | * on memory allocation */ |
110 | fscache_stat(&fscache_n_store_vmscan_busy); | 110 | fscache_stat(&fscache_n_store_vmscan_busy); |
111 | return false; | 111 | return false; |
@@ -188,9 +188,8 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) | |||
188 | return -ENOMEM; | 188 | return -ENOMEM; |
189 | } | 189 | } |
190 | 190 | ||
191 | fscache_operation_init(op, NULL); | 191 | fscache_operation_init(op, fscache_attr_changed_op, NULL); |
192 | fscache_operation_init_slow(op, fscache_attr_changed_op); | 192 | op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); |
193 | op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE); | ||
194 | fscache_set_op_name(op, "Attr"); | 193 | fscache_set_op_name(op, "Attr"); |
195 | 194 | ||
196 | spin_lock(&cookie->lock); | 195 | spin_lock(&cookie->lock); |
@@ -218,24 +217,6 @@ nobufs: | |||
218 | EXPORT_SYMBOL(__fscache_attr_changed); | 217 | EXPORT_SYMBOL(__fscache_attr_changed); |
219 | 218 | ||
220 | /* | 219 | /* |
221 | * handle secondary execution given to a retrieval op on behalf of the | ||
222 | * cache | ||
223 | */ | ||
224 | static void fscache_retrieval_work(struct work_struct *work) | ||
225 | { | ||
226 | struct fscache_retrieval *op = | ||
227 | container_of(work, struct fscache_retrieval, op.fast_work); | ||
228 | unsigned long start; | ||
229 | |||
230 | _enter("{OP%x}", op->op.debug_id); | ||
231 | |||
232 | start = jiffies; | ||
233 | op->op.processor(&op->op); | ||
234 | fscache_hist(fscache_ops_histogram, start); | ||
235 | fscache_put_operation(&op->op); | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * release a retrieval op reference | 220 | * release a retrieval op reference |
240 | */ | 221 | */ |
241 | static void fscache_release_retrieval_op(struct fscache_operation *_op) | 222 | static void fscache_release_retrieval_op(struct fscache_operation *_op) |
@@ -269,13 +250,12 @@ static struct fscache_retrieval *fscache_alloc_retrieval( | |||
269 | return NULL; | 250 | return NULL; |
270 | } | 251 | } |
271 | 252 | ||
272 | fscache_operation_init(&op->op, fscache_release_retrieval_op); | 253 | fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op); |
273 | op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING); | 254 | op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING); |
274 | op->mapping = mapping; | 255 | op->mapping = mapping; |
275 | op->end_io_func = end_io_func; | 256 | op->end_io_func = end_io_func; |
276 | op->context = context; | 257 | op->context = context; |
277 | op->start_time = jiffies; | 258 | op->start_time = jiffies; |
278 | INIT_WORK(&op->op.fast_work, fscache_retrieval_work); | ||
279 | INIT_LIST_HEAD(&op->to_do); | 259 | INIT_LIST_HEAD(&op->to_do); |
280 | fscache_set_op_name(&op->op, "Retr"); | 260 | fscache_set_op_name(&op->op, "Retr"); |
281 | return op; | 261 | return op; |
@@ -795,9 +775,9 @@ int __fscache_write_page(struct fscache_cookie *cookie, | |||
795 | if (!op) | 775 | if (!op) |
796 | goto nomem; | 776 | goto nomem; |
797 | 777 | ||
798 | fscache_operation_init(&op->op, fscache_release_write_op); | 778 | fscache_operation_init(&op->op, fscache_write_op, |
799 | fscache_operation_init_slow(&op->op, fscache_write_op); | 779 | fscache_release_write_op); |
800 | op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING); | 780 | op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING); |
801 | fscache_set_op_name(&op->op, "Write1"); | 781 | fscache_set_op_name(&op->op, "Write1"); |
802 | 782 | ||
803 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); | 783 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); |
@@ -852,7 +832,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, | |||
852 | fscache_stat(&fscache_n_store_ops); | 832 | fscache_stat(&fscache_n_store_ops); |
853 | fscache_stat(&fscache_n_stores_ok); | 833 | fscache_stat(&fscache_n_stores_ok); |
854 | 834 | ||
855 | /* the slow work queue now carries its own ref on the object */ | 835 | /* the work queue now carries its own ref on the object */ |
856 | fscache_put_operation(&op->op); | 836 | fscache_put_operation(&op->op); |
857 | _leave(" = 0"); | 837 | _leave(" = 0"); |
858 | return 0; | 838 | return 0; |
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9424796d6634..69ad053ffd78 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -239,7 +239,6 @@ static u64 fuse_get_unique(struct fuse_conn *fc) | |||
239 | 239 | ||
240 | static void queue_request(struct fuse_conn *fc, struct fuse_req *req) | 240 | static void queue_request(struct fuse_conn *fc, struct fuse_req *req) |
241 | { | 241 | { |
242 | req->in.h.unique = fuse_get_unique(fc); | ||
243 | req->in.h.len = sizeof(struct fuse_in_header) + | 242 | req->in.h.len = sizeof(struct fuse_in_header) + |
244 | len_args(req->in.numargs, (struct fuse_arg *) req->in.args); | 243 | len_args(req->in.numargs, (struct fuse_arg *) req->in.args); |
245 | list_add_tail(&req->list, &fc->pending); | 244 | list_add_tail(&req->list, &fc->pending); |
@@ -261,6 +260,7 @@ static void flush_bg_queue(struct fuse_conn *fc) | |||
261 | req = list_entry(fc->bg_queue.next, struct fuse_req, list); | 260 | req = list_entry(fc->bg_queue.next, struct fuse_req, list); |
262 | list_del(&req->list); | 261 | list_del(&req->list); |
263 | fc->active_background++; | 262 | fc->active_background++; |
263 | req->in.h.unique = fuse_get_unique(fc); | ||
264 | queue_request(fc, req); | 264 | queue_request(fc, req); |
265 | } | 265 | } |
266 | } | 266 | } |
@@ -398,6 +398,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) | |||
398 | else if (fc->conn_error) | 398 | else if (fc->conn_error) |
399 | req->out.h.error = -ECONNREFUSED; | 399 | req->out.h.error = -ECONNREFUSED; |
400 | else { | 400 | else { |
401 | req->in.h.unique = fuse_get_unique(fc); | ||
401 | queue_request(fc, req); | 402 | queue_request(fc, req); |
402 | /* acquire extra reference, since request is still needed | 403 | /* acquire extra reference, since request is still needed |
403 | after request_end() */ | 404 | after request_end() */ |
@@ -450,6 +451,23 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) | |||
450 | } | 451 | } |
451 | EXPORT_SYMBOL_GPL(fuse_request_send_background); | 452 | EXPORT_SYMBOL_GPL(fuse_request_send_background); |
452 | 453 | ||
454 | static int fuse_request_send_notify_reply(struct fuse_conn *fc, | ||
455 | struct fuse_req *req, u64 unique) | ||
456 | { | ||
457 | int err = -ENODEV; | ||
458 | |||
459 | req->isreply = 0; | ||
460 | req->in.h.unique = unique; | ||
461 | spin_lock(&fc->lock); | ||
462 | if (fc->connected) { | ||
463 | queue_request(fc, req); | ||
464 | err = 0; | ||
465 | } | ||
466 | spin_unlock(&fc->lock); | ||
467 | |||
468 | return err; | ||
469 | } | ||
470 | |||
453 | /* | 471 | /* |
454 | * Called under fc->lock | 472 | * Called under fc->lock |
455 | * | 473 | * |
@@ -535,13 +553,13 @@ static void fuse_copy_finish(struct fuse_copy_state *cs) | |||
535 | if (!cs->write) { | 553 | if (!cs->write) { |
536 | buf->ops->unmap(cs->pipe, buf, cs->mapaddr); | 554 | buf->ops->unmap(cs->pipe, buf, cs->mapaddr); |
537 | } else { | 555 | } else { |
538 | kunmap_atomic(cs->mapaddr, KM_USER0); | 556 | kunmap(buf->page); |
539 | buf->len = PAGE_SIZE - cs->len; | 557 | buf->len = PAGE_SIZE - cs->len; |
540 | } | 558 | } |
541 | cs->currbuf = NULL; | 559 | cs->currbuf = NULL; |
542 | cs->mapaddr = NULL; | 560 | cs->mapaddr = NULL; |
543 | } else if (cs->mapaddr) { | 561 | } else if (cs->mapaddr) { |
544 | kunmap_atomic(cs->mapaddr, KM_USER0); | 562 | kunmap(cs->pg); |
545 | if (cs->write) { | 563 | if (cs->write) { |
546 | flush_dcache_page(cs->pg); | 564 | flush_dcache_page(cs->pg); |
547 | set_page_dirty_lock(cs->pg); | 565 | set_page_dirty_lock(cs->pg); |
@@ -572,7 +590,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) | |||
572 | 590 | ||
573 | BUG_ON(!cs->nr_segs); | 591 | BUG_ON(!cs->nr_segs); |
574 | cs->currbuf = buf; | 592 | cs->currbuf = buf; |
575 | cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); | 593 | cs->mapaddr = buf->ops->map(cs->pipe, buf, 0); |
576 | cs->len = buf->len; | 594 | cs->len = buf->len; |
577 | cs->buf = cs->mapaddr + buf->offset; | 595 | cs->buf = cs->mapaddr + buf->offset; |
578 | cs->pipebufs++; | 596 | cs->pipebufs++; |
@@ -592,7 +610,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) | |||
592 | buf->len = 0; | 610 | buf->len = 0; |
593 | 611 | ||
594 | cs->currbuf = buf; | 612 | cs->currbuf = buf; |
595 | cs->mapaddr = kmap_atomic(page, KM_USER0); | 613 | cs->mapaddr = kmap(page); |
596 | cs->buf = cs->mapaddr; | 614 | cs->buf = cs->mapaddr; |
597 | cs->len = PAGE_SIZE; | 615 | cs->len = PAGE_SIZE; |
598 | cs->pipebufs++; | 616 | cs->pipebufs++; |
@@ -611,7 +629,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) | |||
611 | return err; | 629 | return err; |
612 | BUG_ON(err != 1); | 630 | BUG_ON(err != 1); |
613 | offset = cs->addr % PAGE_SIZE; | 631 | offset = cs->addr % PAGE_SIZE; |
614 | cs->mapaddr = kmap_atomic(cs->pg, KM_USER0); | 632 | cs->mapaddr = kmap(cs->pg); |
615 | cs->buf = cs->mapaddr + offset; | 633 | cs->buf = cs->mapaddr + offset; |
616 | cs->len = min(PAGE_SIZE - offset, cs->seglen); | 634 | cs->len = min(PAGE_SIZE - offset, cs->seglen); |
617 | cs->seglen -= cs->len; | 635 | cs->seglen -= cs->len; |
@@ -1231,6 +1249,199 @@ err: | |||
1231 | return err; | 1249 | return err; |
1232 | } | 1250 | } |
1233 | 1251 | ||
1252 | static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, | ||
1253 | struct fuse_copy_state *cs) | ||
1254 | { | ||
1255 | struct fuse_notify_store_out outarg; | ||
1256 | struct inode *inode; | ||
1257 | struct address_space *mapping; | ||
1258 | u64 nodeid; | ||
1259 | int err; | ||
1260 | pgoff_t index; | ||
1261 | unsigned int offset; | ||
1262 | unsigned int num; | ||
1263 | loff_t file_size; | ||
1264 | loff_t end; | ||
1265 | |||
1266 | err = -EINVAL; | ||
1267 | if (size < sizeof(outarg)) | ||
1268 | goto out_finish; | ||
1269 | |||
1270 | err = fuse_copy_one(cs, &outarg, sizeof(outarg)); | ||
1271 | if (err) | ||
1272 | goto out_finish; | ||
1273 | |||
1274 | err = -EINVAL; | ||
1275 | if (size - sizeof(outarg) != outarg.size) | ||
1276 | goto out_finish; | ||
1277 | |||
1278 | nodeid = outarg.nodeid; | ||
1279 | |||
1280 | down_read(&fc->killsb); | ||
1281 | |||
1282 | err = -ENOENT; | ||
1283 | if (!fc->sb) | ||
1284 | goto out_up_killsb; | ||
1285 | |||
1286 | inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); | ||
1287 | if (!inode) | ||
1288 | goto out_up_killsb; | ||
1289 | |||
1290 | mapping = inode->i_mapping; | ||
1291 | index = outarg.offset >> PAGE_CACHE_SHIFT; | ||
1292 | offset = outarg.offset & ~PAGE_CACHE_MASK; | ||
1293 | file_size = i_size_read(inode); | ||
1294 | end = outarg.offset + outarg.size; | ||
1295 | if (end > file_size) { | ||
1296 | file_size = end; | ||
1297 | fuse_write_update_size(inode, file_size); | ||
1298 | } | ||
1299 | |||
1300 | num = outarg.size; | ||
1301 | while (num) { | ||
1302 | struct page *page; | ||
1303 | unsigned int this_num; | ||
1304 | |||
1305 | err = -ENOMEM; | ||
1306 | page = find_or_create_page(mapping, index, | ||
1307 | mapping_gfp_mask(mapping)); | ||
1308 | if (!page) | ||
1309 | goto out_iput; | ||
1310 | |||
1311 | this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); | ||
1312 | err = fuse_copy_page(cs, &page, offset, this_num, 0); | ||
1313 | if (!err && offset == 0 && (num != 0 || file_size == end)) | ||
1314 | SetPageUptodate(page); | ||
1315 | unlock_page(page); | ||
1316 | page_cache_release(page); | ||
1317 | |||
1318 | if (err) | ||
1319 | goto out_iput; | ||
1320 | |||
1321 | num -= this_num; | ||
1322 | offset = 0; | ||
1323 | index++; | ||
1324 | } | ||
1325 | |||
1326 | err = 0; | ||
1327 | |||
1328 | out_iput: | ||
1329 | iput(inode); | ||
1330 | out_up_killsb: | ||
1331 | up_read(&fc->killsb); | ||
1332 | out_finish: | ||
1333 | fuse_copy_finish(cs); | ||
1334 | return err; | ||
1335 | } | ||
1336 | |||
1337 | static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) | ||
1338 | { | ||
1339 | int i; | ||
1340 | |||
1341 | for (i = 0; i < req->num_pages; i++) { | ||
1342 | struct page *page = req->pages[i]; | ||
1343 | page_cache_release(page); | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1347 | static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, | ||
1348 | struct fuse_notify_retrieve_out *outarg) | ||
1349 | { | ||
1350 | int err; | ||
1351 | struct address_space *mapping = inode->i_mapping; | ||
1352 | struct fuse_req *req; | ||
1353 | pgoff_t index; | ||
1354 | loff_t file_size; | ||
1355 | unsigned int num; | ||
1356 | unsigned int offset; | ||
1357 | size_t total_len; | ||
1358 | |||
1359 | req = fuse_get_req(fc); | ||
1360 | if (IS_ERR(req)) | ||
1361 | return PTR_ERR(req); | ||
1362 | |||
1363 | offset = outarg->offset & ~PAGE_CACHE_MASK; | ||
1364 | |||
1365 | req->in.h.opcode = FUSE_NOTIFY_REPLY; | ||
1366 | req->in.h.nodeid = outarg->nodeid; | ||
1367 | req->in.numargs = 2; | ||
1368 | req->in.argpages = 1; | ||
1369 | req->page_offset = offset; | ||
1370 | req->end = fuse_retrieve_end; | ||
1371 | |||
1372 | index = outarg->offset >> PAGE_CACHE_SHIFT; | ||
1373 | file_size = i_size_read(inode); | ||
1374 | num = outarg->size; | ||
1375 | if (outarg->offset > file_size) | ||
1376 | num = 0; | ||
1377 | else if (outarg->offset + num > file_size) | ||
1378 | num = file_size - outarg->offset; | ||
1379 | |||
1380 | while (num) { | ||
1381 | struct page *page; | ||
1382 | unsigned int this_num; | ||
1383 | |||
1384 | page = find_get_page(mapping, index); | ||
1385 | if (!page) | ||
1386 | break; | ||
1387 | |||
1388 | this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset); | ||
1389 | req->pages[req->num_pages] = page; | ||
1390 | req->num_pages++; | ||
1391 | |||
1392 | num -= this_num; | ||
1393 | total_len += this_num; | ||
1394 | } | ||
1395 | req->misc.retrieve_in.offset = outarg->offset; | ||
1396 | req->misc.retrieve_in.size = total_len; | ||
1397 | req->in.args[0].size = sizeof(req->misc.retrieve_in); | ||
1398 | req->in.args[0].value = &req->misc.retrieve_in; | ||
1399 | req->in.args[1].size = total_len; | ||
1400 | |||
1401 | err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); | ||
1402 | if (err) | ||
1403 | fuse_retrieve_end(fc, req); | ||
1404 | |||
1405 | return err; | ||
1406 | } | ||
1407 | |||
1408 | static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, | ||
1409 | struct fuse_copy_state *cs) | ||
1410 | { | ||
1411 | struct fuse_notify_retrieve_out outarg; | ||
1412 | struct inode *inode; | ||
1413 | int err; | ||
1414 | |||
1415 | err = -EINVAL; | ||
1416 | if (size != sizeof(outarg)) | ||
1417 | goto copy_finish; | ||
1418 | |||
1419 | err = fuse_copy_one(cs, &outarg, sizeof(outarg)); | ||
1420 | if (err) | ||
1421 | goto copy_finish; | ||
1422 | |||
1423 | fuse_copy_finish(cs); | ||
1424 | |||
1425 | down_read(&fc->killsb); | ||
1426 | err = -ENOENT; | ||
1427 | if (fc->sb) { | ||
1428 | u64 nodeid = outarg.nodeid; | ||
1429 | |||
1430 | inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid); | ||
1431 | if (inode) { | ||
1432 | err = fuse_retrieve(fc, inode, &outarg); | ||
1433 | iput(inode); | ||
1434 | } | ||
1435 | } | ||
1436 | up_read(&fc->killsb); | ||
1437 | |||
1438 | return err; | ||
1439 | |||
1440 | copy_finish: | ||
1441 | fuse_copy_finish(cs); | ||
1442 | return err; | ||
1443 | } | ||
1444 | |||
1234 | static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, | 1445 | static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, |
1235 | unsigned int size, struct fuse_copy_state *cs) | 1446 | unsigned int size, struct fuse_copy_state *cs) |
1236 | { | 1447 | { |
@@ -1244,6 +1455,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, | |||
1244 | case FUSE_NOTIFY_INVAL_ENTRY: | 1455 | case FUSE_NOTIFY_INVAL_ENTRY: |
1245 | return fuse_notify_inval_entry(fc, size, cs); | 1456 | return fuse_notify_inval_entry(fc, size, cs); |
1246 | 1457 | ||
1458 | case FUSE_NOTIFY_STORE: | ||
1459 | return fuse_notify_store(fc, size, cs); | ||
1460 | |||
1461 | case FUSE_NOTIFY_RETRIEVE: | ||
1462 | return fuse_notify_retrieve(fc, size, cs); | ||
1463 | |||
1247 | default: | 1464 | default: |
1248 | fuse_copy_finish(cs); | 1465 | fuse_copy_finish(cs); |
1249 | return -EINVAL; | 1466 | return -EINVAL; |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 3cdc5f78a406..c9627c95482d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -1016,7 +1016,7 @@ static int fuse_permission(struct inode *inode, int mask) | |||
1016 | exist. So if permissions are revoked this won't be | 1016 | exist. So if permissions are revoked this won't be |
1017 | noticed immediately, only after the attribute | 1017 | noticed immediately, only after the attribute |
1018 | timeout has expired */ | 1018 | timeout has expired */ |
1019 | } else if (mask & MAY_ACCESS) { | 1019 | } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { |
1020 | err = fuse_access(inode, mask); | 1020 | err = fuse_access(inode, mask); |
1021 | } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { | 1021 | } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { |
1022 | if (!(inode->i_mode & S_IXUGO)) { | 1022 | if (!(inode->i_mode & S_IXUGO)) { |
@@ -1270,21 +1270,18 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, | |||
1270 | if (!fuse_allow_task(fc, current)) | 1270 | if (!fuse_allow_task(fc, current)) |
1271 | return -EACCES; | 1271 | return -EACCES; |
1272 | 1272 | ||
1273 | if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { | 1273 | if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) |
1274 | err = inode_change_ok(inode, attr); | 1274 | attr->ia_valid |= ATTR_FORCE; |
1275 | if (err) | 1275 | |
1276 | return err; | 1276 | err = inode_change_ok(inode, attr); |
1277 | } | 1277 | if (err) |
1278 | return err; | ||
1278 | 1279 | ||
1279 | if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) | 1280 | if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) |
1280 | return 0; | 1281 | return 0; |
1281 | 1282 | ||
1282 | if (attr->ia_valid & ATTR_SIZE) { | 1283 | if (attr->ia_valid & ATTR_SIZE) |
1283 | err = inode_newsize_ok(inode, attr->ia_size); | ||
1284 | if (err) | ||
1285 | return err; | ||
1286 | is_truncate = true; | 1284 | is_truncate = true; |
1287 | } | ||
1288 | 1285 | ||
1289 | req = fuse_get_req(fc); | 1286 | req = fuse_get_req(fc); |
1290 | if (IS_ERR(req)) | 1287 | if (IS_ERR(req)) |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ada0adeb3bb5..147c1f71bdb9 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -706,7 +706,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping, | |||
706 | return 0; | 706 | return 0; |
707 | } | 707 | } |
708 | 708 | ||
709 | static void fuse_write_update_size(struct inode *inode, loff_t pos) | 709 | void fuse_write_update_size(struct inode *inode, loff_t pos) |
710 | { | 710 | { |
711 | struct fuse_conn *fc = get_fuse_conn(inode); | 711 | struct fuse_conn *fc = get_fuse_conn(inode); |
712 | struct fuse_inode *fi = get_fuse_inode(inode); | 712 | struct fuse_inode *fi = get_fuse_inode(inode); |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 8f309f04064e..57d4a3a0f102 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -272,6 +272,7 @@ struct fuse_req { | |||
272 | struct fuse_write_in in; | 272 | struct fuse_write_in in; |
273 | struct fuse_write_out out; | 273 | struct fuse_write_out out; |
274 | } write; | 274 | } write; |
275 | struct fuse_notify_retrieve_in retrieve_in; | ||
275 | struct fuse_lk_in lk_in; | 276 | struct fuse_lk_in lk_in; |
276 | } misc; | 277 | } misc; |
277 | 278 | ||
@@ -748,4 +749,6 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | |||
748 | unsigned fuse_file_poll(struct file *file, poll_table *wait); | 749 | unsigned fuse_file_poll(struct file *file, poll_table *wait); |
749 | int fuse_dev_release(struct inode *inode, struct file *file); | 750 | int fuse_dev_release(struct inode *inode, struct file *file); |
750 | 751 | ||
752 | void fuse_write_update_size(struct inode *inode, loff_t pos); | ||
753 | |||
751 | #endif /* _FS_FUSE_I_H */ | 754 | #endif /* _FS_FUSE_I_H */ |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ec14d19ce501..da9e6e11374c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -122,8 +122,10 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, | |||
122 | fuse_request_send_noreply(fc, req); | 122 | fuse_request_send_noreply(fc, req); |
123 | } | 123 | } |
124 | 124 | ||
125 | static void fuse_clear_inode(struct inode *inode) | 125 | static void fuse_evict_inode(struct inode *inode) |
126 | { | 126 | { |
127 | truncate_inode_pages(&inode->i_data, 0); | ||
128 | end_writeback(inode); | ||
127 | if (inode->i_sb->s_flags & MS_ACTIVE) { | 129 | if (inode->i_sb->s_flags & MS_ACTIVE) { |
128 | struct fuse_conn *fc = get_fuse_conn(inode); | 130 | struct fuse_conn *fc = get_fuse_conn(inode); |
129 | struct fuse_inode *fi = get_fuse_inode(inode); | 131 | struct fuse_inode *fi = get_fuse_inode(inode); |
@@ -736,7 +738,7 @@ static const struct export_operations fuse_export_operations = { | |||
736 | static const struct super_operations fuse_super_operations = { | 738 | static const struct super_operations fuse_super_operations = { |
737 | .alloc_inode = fuse_alloc_inode, | 739 | .alloc_inode = fuse_alloc_inode, |
738 | .destroy_inode = fuse_destroy_inode, | 740 | .destroy_inode = fuse_destroy_inode, |
739 | .clear_inode = fuse_clear_inode, | 741 | .evict_inode = fuse_evict_inode, |
740 | .drop_inode = generic_delete_inode, | 742 | .drop_inode = generic_delete_inode, |
741 | .remount_fs = fuse_remount_fs, | 743 | .remount_fs = fuse_remount_fs, |
742 | .put_super = fuse_put_super, | 744 | .put_super = fuse_put_super, |
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index a47b43107112..cc9665522148 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
@@ -7,7 +7,6 @@ config GFS2_FS | |||
7 | select IP_SCTP if DLM_SCTP | 7 | select IP_SCTP if DLM_SCTP |
8 | select FS_POSIX_ACL | 8 | select FS_POSIX_ACL |
9 | select CRC32 | 9 | select CRC32 |
10 | select SLOW_WORK | ||
11 | select QUOTACTL | 10 | select QUOTACTL |
12 | help | 11 | help |
13 | A cluster filesystem. | 12 | A cluster filesystem. |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 5e96cbd8a454..194fe16d8418 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -697,12 +697,12 @@ out: | |||
697 | page_cache_release(page); | 697 | page_cache_release(page); |
698 | 698 | ||
699 | /* | 699 | /* |
700 | * XXX(hch): the call below should probably be replaced with | 700 | * XXX(truncate): the call below should probably be replaced with |
701 | * a call to the gfs2-specific truncate blocks helper to actually | 701 | * a call to the gfs2-specific truncate blocks helper to actually |
702 | * release disk blocks.. | 702 | * release disk blocks.. |
703 | */ | 703 | */ |
704 | if (pos + len > ip->i_inode.i_size) | 704 | if (pos + len > ip->i_inode.i_size) |
705 | simple_setsize(&ip->i_inode, ip->i_inode.i_size); | 705 | truncate_setsize(&ip->i_inode, ip->i_inode.i_size); |
706 | out_endtrans: | 706 | out_endtrans: |
707 | gfs2_trans_end(sdp); | 707 | gfs2_trans_end(sdp); |
708 | out_trans_fail: | 708 | out_trans_fail: |
@@ -1042,9 +1042,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
1042 | if (rv != 1) | 1042 | if (rv != 1) |
1043 | goto out; /* dio not valid, fall back to buffered i/o */ | 1043 | goto out; /* dio not valid, fall back to buffered i/o */ |
1044 | 1044 | ||
1045 | rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, | 1045 | rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
1046 | iov, offset, nr_segs, | 1046 | offset, nr_segs, gfs2_get_block_direct, |
1047 | gfs2_get_block_direct, NULL); | 1047 | NULL, NULL, 0); |
1048 | out: | 1048 | out: |
1049 | gfs2_glock_dq_m(1, &gh); | 1049 | gfs2_glock_dq_m(1, &gh); |
1050 | gfs2_holder_uninit(&gh); | 1050 | gfs2_holder_uninit(&gh); |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 8fcbce48a128..fdbf4b366fa5 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -12,7 +12,6 @@ | |||
12 | 12 | ||
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/workqueue.h> | 14 | #include <linux/workqueue.h> |
15 | #include <linux/slow-work.h> | ||
16 | #include <linux/dlm.h> | 15 | #include <linux/dlm.h> |
17 | #include <linux/buffer_head.h> | 16 | #include <linux/buffer_head.h> |
18 | 17 | ||
@@ -383,7 +382,7 @@ struct gfs2_journal_extent { | |||
383 | struct gfs2_jdesc { | 382 | struct gfs2_jdesc { |
384 | struct list_head jd_list; | 383 | struct list_head jd_list; |
385 | struct list_head extent_list; | 384 | struct list_head extent_list; |
386 | struct slow_work jd_work; | 385 | struct work_struct jd_work; |
387 | struct inode *jd_inode; | 386 | struct inode *jd_inode; |
388 | unsigned long jd_flags; | 387 | unsigned long jd_flags; |
389 | #define JDF_RECOVERY 1 | 388 | #define JDF_RECOVERY 1 |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index f03afd9c44bc..08140f185a37 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -84,7 +84,7 @@ static int iget_skip_test(struct inode *inode, void *opaque) | |||
84 | struct gfs2_skip_data *data = opaque; | 84 | struct gfs2_skip_data *data = opaque; |
85 | 85 | ||
86 | if (ip->i_no_addr == data->no_addr) { | 86 | if (ip->i_no_addr == data->no_addr) { |
87 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ | 87 | if (inode->i_state & (I_FREEING|I_WILL_FREE)){ |
88 | data->skipped = 1; | 88 | data->skipped = 1; |
89 | return 0; | 89 | return 0; |
90 | } | 90 | } |
@@ -991,18 +991,29 @@ fail: | |||
991 | 991 | ||
992 | static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | 992 | static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) |
993 | { | 993 | { |
994 | struct inode *inode = &ip->i_inode; | ||
994 | struct buffer_head *dibh; | 995 | struct buffer_head *dibh; |
995 | int error; | 996 | int error; |
996 | 997 | ||
997 | error = gfs2_meta_inode_buffer(ip, &dibh); | 998 | error = gfs2_meta_inode_buffer(ip, &dibh); |
998 | if (!error) { | 999 | if (error) |
999 | error = inode_setattr(&ip->i_inode, attr); | 1000 | return error; |
1000 | gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); | 1001 | |
1001 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1002 | if ((attr->ia_valid & ATTR_SIZE) && |
1002 | gfs2_dinode_out(ip, dibh->b_data); | 1003 | attr->ia_size != i_size_read(inode)) { |
1003 | brelse(dibh); | 1004 | error = vmtruncate(inode, attr->ia_size); |
1005 | if (error) | ||
1006 | return error; | ||
1004 | } | 1007 | } |
1005 | return error; | 1008 | |
1009 | setattr_copy(inode, attr); | ||
1010 | mark_inode_dirty(inode); | ||
1011 | |||
1012 | gfs2_assert_warn(GFS2_SB(inode), !error); | ||
1013 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1014 | gfs2_dinode_out(ip, dibh->b_data); | ||
1015 | brelse(dibh); | ||
1016 | return 0; | ||
1006 | } | 1017 | } |
1007 | 1018 | ||
1008 | /** | 1019 | /** |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 6a857e24f947..cde1248a6225 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -595,7 +595,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
595 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) | 595 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
596 | goto skip_barrier; | 596 | goto skip_barrier; |
597 | get_bh(bh); | 597 | get_bh(bh); |
598 | submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh); | 598 | submit_bh(WRITE_BARRIER | REQ_META, bh); |
599 | wait_on_buffer(bh); | 599 | wait_on_buffer(bh); |
600 | if (buffer_eopnotsupp(bh)) { | 600 | if (buffer_eopnotsupp(bh)) { |
601 | clear_buffer_eopnotsupp(bh); | 601 | clear_buffer_eopnotsupp(bh); |
@@ -605,7 +605,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
605 | lock_buffer(bh); | 605 | lock_buffer(bh); |
606 | skip_barrier: | 606 | skip_barrier: |
607 | get_bh(bh); | 607 | get_bh(bh); |
608 | submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh); | 608 | submit_bh(WRITE_SYNC | REQ_META, bh); |
609 | wait_on_buffer(bh); | 609 | wait_on_buffer(bh); |
610 | } | 610 | } |
611 | if (!buffer_uptodate(bh)) | 611 | if (!buffer_uptodate(bh)) |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index fb2a5f93b7c3..b1e9630eb46a 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/gfs2_ondisk.h> | 16 | #include <linux/gfs2_ondisk.h> |
17 | #include <asm/atomic.h> | 17 | #include <asm/atomic.h> |
18 | #include <linux/slow-work.h> | ||
19 | 18 | ||
20 | #include "gfs2.h" | 19 | #include "gfs2.h" |
21 | #include "incore.h" | 20 | #include "incore.h" |
@@ -24,6 +23,7 @@ | |||
24 | #include "util.h" | 23 | #include "util.h" |
25 | #include "glock.h" | 24 | #include "glock.h" |
26 | #include "quota.h" | 25 | #include "quota.h" |
26 | #include "recovery.h" | ||
27 | 27 | ||
28 | static struct shrinker qd_shrinker = { | 28 | static struct shrinker qd_shrinker = { |
29 | .shrink = gfs2_shrink_qd_memory, | 29 | .shrink = gfs2_shrink_qd_memory, |
@@ -138,9 +138,11 @@ static int __init init_gfs2_fs(void) | |||
138 | if (error) | 138 | if (error) |
139 | goto fail_unregister; | 139 | goto fail_unregister; |
140 | 140 | ||
141 | error = slow_work_register_user(THIS_MODULE); | 141 | error = -ENOMEM; |
142 | if (error) | 142 | gfs_recovery_wq = alloc_workqueue("gfs_recovery", |
143 | goto fail_slow; | 143 | WQ_NON_REENTRANT | WQ_RESCUER, 0); |
144 | if (!gfs_recovery_wq) | ||
145 | goto fail_wq; | ||
144 | 146 | ||
145 | gfs2_register_debugfs(); | 147 | gfs2_register_debugfs(); |
146 | 148 | ||
@@ -148,7 +150,7 @@ static int __init init_gfs2_fs(void) | |||
148 | 150 | ||
149 | return 0; | 151 | return 0; |
150 | 152 | ||
151 | fail_slow: | 153 | fail_wq: |
152 | unregister_filesystem(&gfs2meta_fs_type); | 154 | unregister_filesystem(&gfs2meta_fs_type); |
153 | fail_unregister: | 155 | fail_unregister: |
154 | unregister_filesystem(&gfs2_fs_type); | 156 | unregister_filesystem(&gfs2_fs_type); |
@@ -190,7 +192,7 @@ static void __exit exit_gfs2_fs(void) | |||
190 | gfs2_unregister_debugfs(); | 192 | gfs2_unregister_debugfs(); |
191 | unregister_filesystem(&gfs2_fs_type); | 193 | unregister_filesystem(&gfs2_fs_type); |
192 | unregister_filesystem(&gfs2meta_fs_type); | 194 | unregister_filesystem(&gfs2meta_fs_type); |
193 | slow_work_unregister_user(THIS_MODULE); | 195 | destroy_workqueue(gfs_recovery_wq); |
194 | 196 | ||
195 | kmem_cache_destroy(gfs2_quotad_cachep); | 197 | kmem_cache_destroy(gfs2_quotad_cachep); |
196 | kmem_cache_destroy(gfs2_rgrpd_cachep); | 198 | kmem_cache_destroy(gfs2_rgrpd_cachep); |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 18176d0b75d7..f3b071f921aa 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -36,8 +36,8 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb | |||
36 | { | 36 | { |
37 | struct buffer_head *bh, *head; | 37 | struct buffer_head *bh, *head; |
38 | int nr_underway = 0; | 38 | int nr_underway = 0; |
39 | int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ? | 39 | int write_op = REQ_META | |
40 | WRITE_SYNC_PLUG : WRITE)); | 40 | (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE); |
41 | 41 | ||
42 | BUG_ON(!PageLocked(page)); | 42 | BUG_ON(!PageLocked(page)); |
43 | BUG_ON(!page_has_buffers(page)); | 43 | BUG_ON(!page_has_buffers(page)); |
@@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, | |||
225 | } | 225 | } |
226 | bh->b_end_io = end_buffer_read_sync; | 226 | bh->b_end_io = end_buffer_read_sync; |
227 | get_bh(bh); | 227 | get_bh(bh); |
228 | submit_bh(READ_SYNC | (1 << BIO_RW_META), bh); | 228 | submit_bh(READ_SYNC | REQ_META, bh); |
229 | if (!(flags & DIO_WAIT)) | 229 | if (!(flags & DIO_WAIT)) |
230 | return 0; | 230 | return 0; |
231 | 231 | ||
@@ -432,7 +432,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) | |||
432 | if (buffer_uptodate(first_bh)) | 432 | if (buffer_uptodate(first_bh)) |
433 | goto out; | 433 | goto out; |
434 | if (!buffer_locked(first_bh)) | 434 | if (!buffer_locked(first_bh)) |
435 | ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh); | 435 | ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh); |
436 | 436 | ||
437 | dblock++; | 437 | dblock++; |
438 | extlen--; | 438 | extlen--; |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 45a4a36195d8..4d4b1e8ac64c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/namei.h> | 17 | #include <linux/namei.h> |
18 | #include <linux/mount.h> | 18 | #include <linux/mount.h> |
19 | #include <linux/gfs2_ondisk.h> | 19 | #include <linux/gfs2_ondisk.h> |
20 | #include <linux/slow-work.h> | ||
21 | #include <linux/quotaops.h> | 20 | #include <linux/quotaops.h> |
22 | 21 | ||
23 | #include "gfs2.h" | 22 | #include "gfs2.h" |
@@ -275,7 +274,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | |||
275 | 274 | ||
276 | bio->bi_end_io = end_bio_io_page; | 275 | bio->bi_end_io = end_bio_io_page; |
277 | bio->bi_private = page; | 276 | bio->bi_private = page; |
278 | submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); | 277 | submit_bio(READ_SYNC | REQ_META, bio); |
279 | wait_on_page_locked(page); | 278 | wait_on_page_locked(page); |
280 | bio_put(bio); | 279 | bio_put(bio); |
281 | if (!PageUptodate(page)) { | 280 | if (!PageUptodate(page)) { |
@@ -673,7 +672,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) | |||
673 | break; | 672 | break; |
674 | 673 | ||
675 | INIT_LIST_HEAD(&jd->extent_list); | 674 | INIT_LIST_HEAD(&jd->extent_list); |
676 | slow_work_init(&jd->jd_work, &gfs2_recover_ops); | 675 | INIT_WORK(&jd->jd_work, gfs2_recover_func); |
677 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); | 676 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); |
678 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { | 677 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { |
679 | if (!jd->jd_inode) | 678 | if (!jd->jd_inode) |
@@ -782,7 +781,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
782 | if (sdp->sd_lockstruct.ls_first) { | 781 | if (sdp->sd_lockstruct.ls_first) { |
783 | unsigned int x; | 782 | unsigned int x; |
784 | for (x = 0; x < sdp->sd_journals; x++) { | 783 | for (x = 0; x < sdp->sd_journals; x++) { |
785 | error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x)); | 784 | error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x), |
785 | true); | ||
786 | if (error) { | 786 | if (error) { |
787 | fs_err(sdp, "error recovering journal %u: %d\n", | 787 | fs_err(sdp, "error recovering journal %u: %d\n", |
788 | x, error); | 788 | x, error); |
@@ -792,7 +792,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
792 | 792 | ||
793 | gfs2_others_may_mount(sdp); | 793 | gfs2_others_may_mount(sdp); |
794 | } else if (!sdp->sd_args.ar_spectator) { | 794 | } else if (!sdp->sd_args.ar_spectator) { |
795 | error = gfs2_recover_journal(sdp->sd_jdesc); | 795 | error = gfs2_recover_journal(sdp->sd_jdesc, true); |
796 | if (error) { | 796 | if (error) { |
797 | fs_err(sdp, "error recovering my journal: %d\n", error); | 797 | fs_err(sdp, "error recovering my journal: %d\n", error); |
798 | goto fail_jinode_gh; | 798 | goto fail_jinode_gh; |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 98cdd05f3316..1009be2c9737 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -1072,7 +1072,7 @@ int gfs2_permission(struct inode *inode, int mask) | |||
1072 | } | 1072 | } |
1073 | 1073 | ||
1074 | /* | 1074 | /* |
1075 | * XXX: should be changed to have proper ordering by opencoding simple_setsize | 1075 | * XXX(truncate): the truncate_setsize calls should be moved to the end. |
1076 | */ | 1076 | */ |
1077 | static int setattr_size(struct inode *inode, struct iattr *attr) | 1077 | static int setattr_size(struct inode *inode, struct iattr *attr) |
1078 | { | 1078 | { |
@@ -1084,10 +1084,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr) | |||
1084 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); | 1084 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); |
1085 | if (error) | 1085 | if (error) |
1086 | return error; | 1086 | return error; |
1087 | error = simple_setsize(inode, attr->ia_size); | 1087 | truncate_setsize(inode, attr->ia_size); |
1088 | gfs2_trans_end(sdp); | 1088 | gfs2_trans_end(sdp); |
1089 | if (error) | ||
1090 | return error; | ||
1091 | } | 1089 | } |
1092 | 1090 | ||
1093 | error = gfs2_truncatei(ip, attr->ia_size); | 1091 | error = gfs2_truncatei(ip, attr->ia_size); |
@@ -1136,8 +1134,16 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
1136 | if (error) | 1134 | if (error) |
1137 | goto out_end_trans; | 1135 | goto out_end_trans; |
1138 | 1136 | ||
1139 | error = inode_setattr(inode, attr); | 1137 | if ((attr->ia_valid & ATTR_SIZE) && |
1140 | gfs2_assert_warn(sdp, !error); | 1138 | attr->ia_size != i_size_read(inode)) { |
1139 | int error; | ||
1140 | |||
1141 | error = vmtruncate(inode, attr->ia_size); | ||
1142 | gfs2_assert_warn(sdp, !error); | ||
1143 | } | ||
1144 | |||
1145 | setattr_copy(inode, attr); | ||
1146 | mark_inode_dirty(inode); | ||
1141 | 1147 | ||
1142 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1148 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
1143 | gfs2_dinode_out(ip, dibh->b_data); | 1149 | gfs2_dinode_out(ip, dibh->b_data); |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 8bb643cb2658..1bc6b5695e6d 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -1449,10 +1449,10 @@ static int gfs2_quota_get_xstate(struct super_block *sb, | |||
1449 | 1449 | ||
1450 | switch (sdp->sd_args.ar_quota) { | 1450 | switch (sdp->sd_args.ar_quota) { |
1451 | case GFS2_QUOTA_ON: | 1451 | case GFS2_QUOTA_ON: |
1452 | fqs->qs_flags |= (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD); | 1452 | fqs->qs_flags |= (FS_QUOTA_UDQ_ENFD | FS_QUOTA_GDQ_ENFD); |
1453 | /*FALLTHRU*/ | 1453 | /*FALLTHRU*/ |
1454 | case GFS2_QUOTA_ACCOUNT: | 1454 | case GFS2_QUOTA_ACCOUNT: |
1455 | fqs->qs_flags |= (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT); | 1455 | fqs->qs_flags |= (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT); |
1456 | break; | 1456 | break; |
1457 | case GFS2_QUOTA_OFF: | 1457 | case GFS2_QUOTA_OFF: |
1458 | break; | 1458 | break; |
@@ -1498,7 +1498,7 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, | |||
1498 | 1498 | ||
1499 | qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; | 1499 | qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; |
1500 | fdq->d_version = FS_DQUOT_VERSION; | 1500 | fdq->d_version = FS_DQUOT_VERSION; |
1501 | fdq->d_flags = (type == QUOTA_USER) ? XFS_USER_QUOTA : XFS_GROUP_QUOTA; | 1501 | fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; |
1502 | fdq->d_id = id; | 1502 | fdq->d_id = id; |
1503 | fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit); | 1503 | fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit); |
1504 | fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn); | 1504 | fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn); |
@@ -1533,12 +1533,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
1533 | switch(type) { | 1533 | switch(type) { |
1534 | case USRQUOTA: | 1534 | case USRQUOTA: |
1535 | type = QUOTA_USER; | 1535 | type = QUOTA_USER; |
1536 | if (fdq->d_flags != XFS_USER_QUOTA) | 1536 | if (fdq->d_flags != FS_USER_QUOTA) |
1537 | return -EINVAL; | 1537 | return -EINVAL; |
1538 | break; | 1538 | break; |
1539 | case GRPQUOTA: | 1539 | case GRPQUOTA: |
1540 | type = QUOTA_GROUP; | 1540 | type = QUOTA_GROUP; |
1541 | if (fdq->d_flags != XFS_GROUP_QUOTA) | 1541 | if (fdq->d_flags != FS_GROUP_QUOTA) |
1542 | return -EINVAL; | 1542 | return -EINVAL; |
1543 | break; | 1543 | break; |
1544 | default: | 1544 | default: |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 4b9bece3d437..f7f89a94a5a4 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
15 | #include <linux/gfs2_ondisk.h> | 15 | #include <linux/gfs2_ondisk.h> |
16 | #include <linux/crc32.h> | 16 | #include <linux/crc32.h> |
17 | #include <linux/slow-work.h> | ||
18 | 17 | ||
19 | #include "gfs2.h" | 18 | #include "gfs2.h" |
20 | #include "incore.h" | 19 | #include "incore.h" |
@@ -28,6 +27,8 @@ | |||
28 | #include "util.h" | 27 | #include "util.h" |
29 | #include "dir.h" | 28 | #include "dir.h" |
30 | 29 | ||
30 | struct workqueue_struct *gfs_recovery_wq; | ||
31 | |||
31 | int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, | 32 | int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, |
32 | struct buffer_head **bh) | 33 | struct buffer_head **bh) |
33 | { | 34 | { |
@@ -443,23 +444,7 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | |||
443 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); | 444 | kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); |
444 | } | 445 | } |
445 | 446 | ||
446 | static int gfs2_recover_get_ref(struct slow_work *work) | 447 | void gfs2_recover_func(struct work_struct *work) |
447 | { | ||
448 | struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); | ||
449 | if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags)) | ||
450 | return -EBUSY; | ||
451 | return 0; | ||
452 | } | ||
453 | |||
454 | static void gfs2_recover_put_ref(struct slow_work *work) | ||
455 | { | ||
456 | struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); | ||
457 | clear_bit(JDF_RECOVERY, &jd->jd_flags); | ||
458 | smp_mb__after_clear_bit(); | ||
459 | wake_up_bit(&jd->jd_flags, JDF_RECOVERY); | ||
460 | } | ||
461 | |||
462 | static void gfs2_recover_work(struct slow_work *work) | ||
463 | { | 448 | { |
464 | struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); | 449 | struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); |
465 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | 450 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); |
@@ -578,7 +563,7 @@ static void gfs2_recover_work(struct slow_work *work) | |||
578 | gfs2_glock_dq_uninit(&j_gh); | 563 | gfs2_glock_dq_uninit(&j_gh); |
579 | 564 | ||
580 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); | 565 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); |
581 | return; | 566 | goto done; |
582 | 567 | ||
583 | fail_gunlock_tr: | 568 | fail_gunlock_tr: |
584 | gfs2_glock_dq_uninit(&t_gh); | 569 | gfs2_glock_dq_uninit(&t_gh); |
@@ -590,32 +575,35 @@ fail_gunlock_j: | |||
590 | } | 575 | } |
591 | 576 | ||
592 | fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); | 577 | fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done"); |
593 | |||
594 | fail: | 578 | fail: |
595 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); | 579 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); |
580 | done: | ||
581 | clear_bit(JDF_RECOVERY, &jd->jd_flags); | ||
582 | smp_mb__after_clear_bit(); | ||
583 | wake_up_bit(&jd->jd_flags, JDF_RECOVERY); | ||
596 | } | 584 | } |
597 | 585 | ||
598 | struct slow_work_ops gfs2_recover_ops = { | ||
599 | .owner = THIS_MODULE, | ||
600 | .get_ref = gfs2_recover_get_ref, | ||
601 | .put_ref = gfs2_recover_put_ref, | ||
602 | .execute = gfs2_recover_work, | ||
603 | }; | ||
604 | |||
605 | |||
606 | static int gfs2_recovery_wait(void *word) | 586 | static int gfs2_recovery_wait(void *word) |
607 | { | 587 | { |
608 | schedule(); | 588 | schedule(); |
609 | return 0; | 589 | return 0; |
610 | } | 590 | } |
611 | 591 | ||
612 | int gfs2_recover_journal(struct gfs2_jdesc *jd) | 592 | int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) |
613 | { | 593 | { |
614 | int rv; | 594 | int rv; |
615 | rv = slow_work_enqueue(&jd->jd_work); | 595 | |
616 | if (rv) | 596 | if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags)) |
617 | return rv; | 597 | return -EBUSY; |
618 | wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); | 598 | |
599 | /* we have JDF_RECOVERY, queue should always succeed */ | ||
600 | rv = queue_work(gfs_recovery_wq, &jd->jd_work); | ||
601 | BUG_ON(!rv); | ||
602 | |||
603 | if (wait) | ||
604 | wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, | ||
605 | TASK_UNINTERRUPTIBLE); | ||
606 | |||
619 | return 0; | 607 | return 0; |
620 | } | 608 | } |
621 | 609 | ||
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 1616ac22569a..2226136c7647 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h | |||
@@ -12,6 +12,8 @@ | |||
12 | 12 | ||
13 | #include "incore.h" | 13 | #include "incore.h" |
14 | 14 | ||
15 | extern struct workqueue_struct *gfs_recovery_wq; | ||
16 | |||
15 | static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk) | 17 | static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk) |
16 | { | 18 | { |
17 | if (++*blk == sdp->sd_jdesc->jd_blocks) | 19 | if (++*blk == sdp->sd_jdesc->jd_blocks) |
@@ -27,8 +29,8 @@ extern void gfs2_revoke_clean(struct gfs2_sbd *sdp); | |||
27 | 29 | ||
28 | extern int gfs2_find_jhead(struct gfs2_jdesc *jd, | 30 | extern int gfs2_find_jhead(struct gfs2_jdesc *jd, |
29 | struct gfs2_log_header_host *head); | 31 | struct gfs2_log_header_host *head); |
30 | extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd); | 32 | extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd, bool wait); |
31 | extern struct slow_work_ops gfs2_recover_ops; | 33 | extern void gfs2_recover_func(struct work_struct *work); |
32 | 34 | ||
33 | #endif /* __RECOVERY_DOT_H__ */ | 35 | #endif /* __RECOVERY_DOT_H__ */ |
34 | 36 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 4140811a921c..77cb9f830ee4 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -1188,7 +1188,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1188 | * node for later deallocation. | 1188 | * node for later deallocation. |
1189 | */ | 1189 | */ |
1190 | 1190 | ||
1191 | static void gfs2_drop_inode(struct inode *inode) | 1191 | static int gfs2_drop_inode(struct inode *inode) |
1192 | { | 1192 | { |
1193 | struct gfs2_inode *ip = GFS2_I(inode); | 1193 | struct gfs2_inode *ip = GFS2_I(inode); |
1194 | 1194 | ||
@@ -1197,26 +1197,7 @@ static void gfs2_drop_inode(struct inode *inode) | |||
1197 | if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) | 1197 | if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) |
1198 | clear_nlink(inode); | 1198 | clear_nlink(inode); |
1199 | } | 1199 | } |
1200 | generic_drop_inode(inode); | 1200 | return generic_drop_inode(inode); |
1201 | } | ||
1202 | |||
1203 | /** | ||
1204 | * gfs2_clear_inode - Deallocate an inode when VFS is done with it | ||
1205 | * @inode: The VFS inode | ||
1206 | * | ||
1207 | */ | ||
1208 | |||
1209 | static void gfs2_clear_inode(struct inode *inode) | ||
1210 | { | ||
1211 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1212 | |||
1213 | ip->i_gl->gl_object = NULL; | ||
1214 | gfs2_glock_put(ip->i_gl); | ||
1215 | ip->i_gl = NULL; | ||
1216 | if (ip->i_iopen_gh.gh_gl) { | ||
1217 | ip->i_iopen_gh.gh_gl->gl_object = NULL; | ||
1218 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); | ||
1219 | } | ||
1220 | } | 1201 | } |
1221 | 1202 | ||
1222 | static int is_ancestor(const struct dentry *d1, const struct dentry *d2) | 1203 | static int is_ancestor(const struct dentry *d1, const struct dentry *d2) |
@@ -1344,13 +1325,16 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1344 | * is safe, just less efficient. | 1325 | * is safe, just less efficient. |
1345 | */ | 1326 | */ |
1346 | 1327 | ||
1347 | static void gfs2_delete_inode(struct inode *inode) | 1328 | static void gfs2_evict_inode(struct inode *inode) |
1348 | { | 1329 | { |
1349 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; | 1330 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; |
1350 | struct gfs2_inode *ip = GFS2_I(inode); | 1331 | struct gfs2_inode *ip = GFS2_I(inode); |
1351 | struct gfs2_holder gh; | 1332 | struct gfs2_holder gh; |
1352 | int error; | 1333 | int error; |
1353 | 1334 | ||
1335 | if (inode->i_nlink) | ||
1336 | goto out; | ||
1337 | |||
1354 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | 1338 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
1355 | if (unlikely(error)) { | 1339 | if (unlikely(error)) { |
1356 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); | 1340 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); |
@@ -1404,10 +1388,18 @@ out_unlock: | |||
1404 | gfs2_holder_uninit(&ip->i_iopen_gh); | 1388 | gfs2_holder_uninit(&ip->i_iopen_gh); |
1405 | gfs2_glock_dq_uninit(&gh); | 1389 | gfs2_glock_dq_uninit(&gh); |
1406 | if (error && error != GLR_TRYFAILED && error != -EROFS) | 1390 | if (error && error != GLR_TRYFAILED && error != -EROFS) |
1407 | fs_warn(sdp, "gfs2_delete_inode: %d\n", error); | 1391 | fs_warn(sdp, "gfs2_evict_inode: %d\n", error); |
1408 | out: | 1392 | out: |
1409 | truncate_inode_pages(&inode->i_data, 0); | 1393 | truncate_inode_pages(&inode->i_data, 0); |
1410 | clear_inode(inode); | 1394 | end_writeback(inode); |
1395 | |||
1396 | ip->i_gl->gl_object = NULL; | ||
1397 | gfs2_glock_put(ip->i_gl); | ||
1398 | ip->i_gl = NULL; | ||
1399 | if (ip->i_iopen_gh.gh_gl) { | ||
1400 | ip->i_iopen_gh.gh_gl->gl_object = NULL; | ||
1401 | gfs2_glock_dq_uninit(&ip->i_iopen_gh); | ||
1402 | } | ||
1411 | } | 1403 | } |
1412 | 1404 | ||
1413 | static struct inode *gfs2_alloc_inode(struct super_block *sb) | 1405 | static struct inode *gfs2_alloc_inode(struct super_block *sb) |
@@ -1431,14 +1423,13 @@ const struct super_operations gfs2_super_ops = { | |||
1431 | .alloc_inode = gfs2_alloc_inode, | 1423 | .alloc_inode = gfs2_alloc_inode, |
1432 | .destroy_inode = gfs2_destroy_inode, | 1424 | .destroy_inode = gfs2_destroy_inode, |
1433 | .write_inode = gfs2_write_inode, | 1425 | .write_inode = gfs2_write_inode, |
1434 | .delete_inode = gfs2_delete_inode, | 1426 | .evict_inode = gfs2_evict_inode, |
1435 | .put_super = gfs2_put_super, | 1427 | .put_super = gfs2_put_super, |
1436 | .sync_fs = gfs2_sync_fs, | 1428 | .sync_fs = gfs2_sync_fs, |
1437 | .freeze_fs = gfs2_freeze, | 1429 | .freeze_fs = gfs2_freeze, |
1438 | .unfreeze_fs = gfs2_unfreeze, | 1430 | .unfreeze_fs = gfs2_unfreeze, |
1439 | .statfs = gfs2_statfs, | 1431 | .statfs = gfs2_statfs, |
1440 | .remount_fs = gfs2_remount_fs, | 1432 | .remount_fs = gfs2_remount_fs, |
1441 | .clear_inode = gfs2_clear_inode, | ||
1442 | .drop_inode = gfs2_drop_inode, | 1433 | .drop_inode = gfs2_drop_inode, |
1443 | .show_options = gfs2_show_options, | 1434 | .show_options = gfs2_show_options, |
1444 | }; | 1435 | }; |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index d019d0d55e00..ccacffd2faaa 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include "quota.h" | 25 | #include "quota.h" |
26 | #include "util.h" | 26 | #include "util.h" |
27 | #include "glops.h" | 27 | #include "glops.h" |
28 | #include "recovery.h" | ||
28 | 29 | ||
29 | struct gfs2_attr { | 30 | struct gfs2_attr { |
30 | struct attribute attr; | 31 | struct attribute attr; |
@@ -376,7 +377,7 @@ static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | |||
376 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { | 377 | list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { |
377 | if (jd->jd_jid != jid) | 378 | if (jd->jd_jid != jid) |
378 | continue; | 379 | continue; |
379 | rv = slow_work_enqueue(&jd->jd_work); | 380 | rv = gfs2_recover_journal(jd, false); |
380 | break; | 381 | break; |
381 | } | 382 | } |
382 | out: | 383 | out: |
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 82f93da00d1b..776af6eb4bcb 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
@@ -1296,6 +1296,7 @@ fail: | |||
1296 | 1296 | ||
1297 | int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) | 1297 | int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) |
1298 | { | 1298 | { |
1299 | struct inode *inode = &ip->i_inode; | ||
1299 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1300 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1300 | struct gfs2_ea_location el; | 1301 | struct gfs2_ea_location el; |
1301 | struct buffer_head *dibh; | 1302 | struct buffer_head *dibh; |
@@ -1321,14 +1322,25 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) | |||
1321 | return error; | 1322 | return error; |
1322 | 1323 | ||
1323 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1324 | error = gfs2_meta_inode_buffer(ip, &dibh); |
1324 | if (!error) { | 1325 | if (error) |
1325 | error = inode_setattr(&ip->i_inode, attr); | 1326 | goto out_trans_end; |
1326 | gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); | 1327 | |
1327 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1328 | if ((attr->ia_valid & ATTR_SIZE) && |
1328 | gfs2_dinode_out(ip, dibh->b_data); | 1329 | attr->ia_size != i_size_read(inode)) { |
1329 | brelse(dibh); | 1330 | int error; |
1331 | |||
1332 | error = vmtruncate(inode, attr->ia_size); | ||
1333 | gfs2_assert_warn(GFS2_SB(inode), !error); | ||
1330 | } | 1334 | } |
1331 | 1335 | ||
1336 | setattr_copy(inode, attr); | ||
1337 | mark_inode_dirty(inode); | ||
1338 | |||
1339 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1340 | gfs2_dinode_out(ip, dibh->b_data); | ||
1341 | brelse(dibh); | ||
1342 | |||
1343 | out_trans_end: | ||
1332 | gfs2_trans_end(sdp); | 1344 | gfs2_trans_end(sdp); |
1333 | return error; | 1345 | return error; |
1334 | } | 1346 | } |
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index fe35e3b626c4..4f55651aaa51 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h | |||
@@ -193,7 +193,7 @@ extern int hfs_inode_setattr(struct dentry *, struct iattr *); | |||
193 | extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, | 193 | extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, |
194 | __be32 log_size, __be32 phys_size, u32 clump_size); | 194 | __be32 log_size, __be32 phys_size, u32 clump_size); |
195 | extern struct inode *hfs_iget(struct super_block *, struct hfs_cat_key *, hfs_cat_rec *); | 195 | extern struct inode *hfs_iget(struct super_block *, struct hfs_cat_key *, hfs_cat_rec *); |
196 | extern void hfs_clear_inode(struct inode *); | 196 | extern void hfs_evict_inode(struct inode *); |
197 | extern void hfs_delete_inode(struct inode *); | 197 | extern void hfs_delete_inode(struct inode *); |
198 | 198 | ||
199 | /* attr.c */ | 199 | /* attr.c */ |
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 14f5cb1b9fdc..397b7adc7ce6 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c | |||
@@ -39,10 +39,19 @@ static int hfs_write_begin(struct file *file, struct address_space *mapping, | |||
39 | loff_t pos, unsigned len, unsigned flags, | 39 | loff_t pos, unsigned len, unsigned flags, |
40 | struct page **pagep, void **fsdata) | 40 | struct page **pagep, void **fsdata) |
41 | { | 41 | { |
42 | int ret; | ||
43 | |||
42 | *pagep = NULL; | 44 | *pagep = NULL; |
43 | return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 45 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
44 | hfs_get_block, | 46 | hfs_get_block, |
45 | &HFS_I(mapping->host)->phys_size); | 47 | &HFS_I(mapping->host)->phys_size); |
48 | if (unlikely(ret)) { | ||
49 | loff_t isize = mapping->host->i_size; | ||
50 | if (pos + len > isize) | ||
51 | vmtruncate(mapping->host, isize); | ||
52 | } | ||
53 | |||
54 | return ret; | ||
46 | } | 55 | } |
47 | 56 | ||
48 | static sector_t hfs_bmap(struct address_space *mapping, sector_t block) | 57 | static sector_t hfs_bmap(struct address_space *mapping, sector_t block) |
@@ -112,9 +121,24 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, | |||
112 | { | 121 | { |
113 | struct file *file = iocb->ki_filp; | 122 | struct file *file = iocb->ki_filp; |
114 | struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; | 123 | struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; |
124 | ssize_t ret; | ||
115 | 125 | ||
116 | return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 126 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
117 | offset, nr_segs, hfs_get_block, NULL); | 127 | offset, nr_segs, hfs_get_block, NULL); |
128 | |||
129 | /* | ||
130 | * In case of error extending write may have instantiated a few | ||
131 | * blocks outside i_size. Trim these off again. | ||
132 | */ | ||
133 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
134 | loff_t isize = i_size_read(inode); | ||
135 | loff_t end = offset + iov_length(iov, nr_segs); | ||
136 | |||
137 | if (end > isize) | ||
138 | vmtruncate(inode, isize); | ||
139 | } | ||
140 | |||
141 | return ret; | ||
118 | } | 142 | } |
119 | 143 | ||
120 | static int hfs_writepages(struct address_space *mapping, | 144 | static int hfs_writepages(struct address_space *mapping, |
@@ -507,8 +531,10 @@ out: | |||
507 | return NULL; | 531 | return NULL; |
508 | } | 532 | } |
509 | 533 | ||
510 | void hfs_clear_inode(struct inode *inode) | 534 | void hfs_evict_inode(struct inode *inode) |
511 | { | 535 | { |
536 | truncate_inode_pages(&inode->i_data, 0); | ||
537 | end_writeback(inode); | ||
512 | if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { | 538 | if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { |
513 | HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; | 539 | HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; |
514 | iput(HFS_I(inode)->rsrc_inode); | 540 | iput(HFS_I(inode)->rsrc_inode); |
@@ -588,13 +614,43 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) | |||
588 | attr->ia_mode = inode->i_mode & ~S_IWUGO; | 614 | attr->ia_mode = inode->i_mode & ~S_IWUGO; |
589 | attr->ia_mode &= S_ISDIR(inode->i_mode) ? ~hsb->s_dir_umask: ~hsb->s_file_umask; | 615 | attr->ia_mode &= S_ISDIR(inode->i_mode) ? ~hsb->s_dir_umask: ~hsb->s_file_umask; |
590 | } | 616 | } |
591 | error = inode_setattr(inode, attr); | ||
592 | if (error) | ||
593 | return error; | ||
594 | 617 | ||
618 | if ((attr->ia_valid & ATTR_SIZE) && | ||
619 | attr->ia_size != i_size_read(inode)) { | ||
620 | error = vmtruncate(inode, attr->ia_size); | ||
621 | if (error) | ||
622 | return error; | ||
623 | } | ||
624 | |||
625 | setattr_copy(inode, attr); | ||
626 | mark_inode_dirty(inode); | ||
595 | return 0; | 627 | return 0; |
596 | } | 628 | } |
597 | 629 | ||
630 | static int hfs_file_fsync(struct file *filp, int datasync) | ||
631 | { | ||
632 | struct inode *inode = filp->f_mapping->host; | ||
633 | struct super_block * sb; | ||
634 | int ret, err; | ||
635 | |||
636 | /* sync the inode to buffers */ | ||
637 | ret = write_inode_now(inode, 0); | ||
638 | |||
639 | /* sync the superblock to buffers */ | ||
640 | sb = inode->i_sb; | ||
641 | if (sb->s_dirt) { | ||
642 | lock_super(sb); | ||
643 | sb->s_dirt = 0; | ||
644 | if (!(sb->s_flags & MS_RDONLY)) | ||
645 | hfs_mdb_commit(sb); | ||
646 | unlock_super(sb); | ||
647 | } | ||
648 | /* .. finally sync the buffers to disk */ | ||
649 | err = sync_blockdev(sb->s_bdev); | ||
650 | if (!ret) | ||
651 | ret = err; | ||
652 | return ret; | ||
653 | } | ||
598 | 654 | ||
599 | static const struct file_operations hfs_file_operations = { | 655 | static const struct file_operations hfs_file_operations = { |
600 | .llseek = generic_file_llseek, | 656 | .llseek = generic_file_llseek, |
@@ -604,7 +660,7 @@ static const struct file_operations hfs_file_operations = { | |||
604 | .aio_write = generic_file_aio_write, | 660 | .aio_write = generic_file_aio_write, |
605 | .mmap = generic_file_mmap, | 661 | .mmap = generic_file_mmap, |
606 | .splice_read = generic_file_splice_read, | 662 | .splice_read = generic_file_splice_read, |
607 | .fsync = file_fsync, | 663 | .fsync = hfs_file_fsync, |
608 | .open = hfs_file_open, | 664 | .open = hfs_file_open, |
609 | .release = hfs_file_release, | 665 | .release = hfs_file_release, |
610 | }; | 666 | }; |
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 0a81eb7111f3..34235d4bf08b 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
@@ -181,7 +181,7 @@ static const struct super_operations hfs_super_operations = { | |||
181 | .alloc_inode = hfs_alloc_inode, | 181 | .alloc_inode = hfs_alloc_inode, |
182 | .destroy_inode = hfs_destroy_inode, | 182 | .destroy_inode = hfs_destroy_inode, |
183 | .write_inode = hfs_write_inode, | 183 | .write_inode = hfs_write_inode, |
184 | .clear_inode = hfs_clear_inode, | 184 | .evict_inode = hfs_evict_inode, |
185 | .put_super = hfs_put_super, | 185 | .put_super = hfs_put_super, |
186 | .write_super = hfs_write_super, | 186 | .write_super = hfs_write_super, |
187 | .sync_fs = hfs_sync_fs, | 187 | .sync_fs = hfs_sync_fs, |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 6505c30ad965..dc856be3c2b0 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
@@ -351,6 +351,7 @@ int hfsplus_show_options(struct seq_file *, struct vfsmount *); | |||
351 | 351 | ||
352 | /* super.c */ | 352 | /* super.c */ |
353 | struct inode *hfsplus_iget(struct super_block *, unsigned long); | 353 | struct inode *hfsplus_iget(struct super_block *, unsigned long); |
354 | int hfsplus_sync_fs(struct super_block *sb, int wait); | ||
354 | 355 | ||
355 | /* tables.c */ | 356 | /* tables.c */ |
356 | extern u16 hfsplus_case_fold_table[]; | 357 | extern u16 hfsplus_case_fold_table[]; |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 9bbb82924a22..c5a979d62c65 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
@@ -31,10 +31,19 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping, | |||
31 | loff_t pos, unsigned len, unsigned flags, | 31 | loff_t pos, unsigned len, unsigned flags, |
32 | struct page **pagep, void **fsdata) | 32 | struct page **pagep, void **fsdata) |
33 | { | 33 | { |
34 | int ret; | ||
35 | |||
34 | *pagep = NULL; | 36 | *pagep = NULL; |
35 | return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 37 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
36 | hfsplus_get_block, | 38 | hfsplus_get_block, |
37 | &HFSPLUS_I(mapping->host).phys_size); | 39 | &HFSPLUS_I(mapping->host).phys_size); |
40 | if (unlikely(ret)) { | ||
41 | loff_t isize = mapping->host->i_size; | ||
42 | if (pos + len > isize) | ||
43 | vmtruncate(mapping->host, isize); | ||
44 | } | ||
45 | |||
46 | return ret; | ||
38 | } | 47 | } |
39 | 48 | ||
40 | static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) | 49 | static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) |
@@ -105,9 +114,24 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, | |||
105 | { | 114 | { |
106 | struct file *file = iocb->ki_filp; | 115 | struct file *file = iocb->ki_filp; |
107 | struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; | 116 | struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; |
117 | ssize_t ret; | ||
108 | 118 | ||
109 | return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 119 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
110 | offset, nr_segs, hfsplus_get_block, NULL); | 120 | offset, nr_segs, hfsplus_get_block, NULL); |
121 | |||
122 | /* | ||
123 | * In case of error extending write may have instantiated a few | ||
124 | * blocks outside i_size. Trim these off again. | ||
125 | */ | ||
126 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
127 | loff_t isize = i_size_read(inode); | ||
128 | loff_t end = offset + iov_length(iov, nr_segs); | ||
129 | |||
130 | if (end > isize) | ||
131 | vmtruncate(inode, isize); | ||
132 | } | ||
133 | |||
134 | return ret; | ||
111 | } | 135 | } |
112 | 136 | ||
113 | static int hfsplus_writepages(struct address_space *mapping, | 137 | static int hfsplus_writepages(struct address_space *mapping, |
@@ -266,9 +290,56 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) | |||
266 | return 0; | 290 | return 0; |
267 | } | 291 | } |
268 | 292 | ||
293 | static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) | ||
294 | { | ||
295 | struct inode *inode = dentry->d_inode; | ||
296 | int error; | ||
297 | |||
298 | error = inode_change_ok(inode, attr); | ||
299 | if (error) | ||
300 | return error; | ||
301 | |||
302 | if ((attr->ia_valid & ATTR_SIZE) && | ||
303 | attr->ia_size != i_size_read(inode)) { | ||
304 | error = vmtruncate(inode, attr->ia_size); | ||
305 | if (error) | ||
306 | return error; | ||
307 | } | ||
308 | |||
309 | setattr_copy(inode, attr); | ||
310 | mark_inode_dirty(inode); | ||
311 | return 0; | ||
312 | } | ||
313 | |||
314 | static int hfsplus_file_fsync(struct file *filp, int datasync) | ||
315 | { | ||
316 | struct inode *inode = filp->f_mapping->host; | ||
317 | struct super_block * sb; | ||
318 | int ret, err; | ||
319 | |||
320 | /* sync the inode to buffers */ | ||
321 | ret = write_inode_now(inode, 0); | ||
322 | |||
323 | /* sync the superblock to buffers */ | ||
324 | sb = inode->i_sb; | ||
325 | if (sb->s_dirt) { | ||
326 | if (!(sb->s_flags & MS_RDONLY)) | ||
327 | hfsplus_sync_fs(sb, 1); | ||
328 | else | ||
329 | sb->s_dirt = 0; | ||
330 | } | ||
331 | |||
332 | /* .. finally sync the buffers to disk */ | ||
333 | err = sync_blockdev(sb->s_bdev); | ||
334 | if (!ret) | ||
335 | ret = err; | ||
336 | return ret; | ||
337 | } | ||
338 | |||
269 | static const struct inode_operations hfsplus_file_inode_operations = { | 339 | static const struct inode_operations hfsplus_file_inode_operations = { |
270 | .lookup = hfsplus_file_lookup, | 340 | .lookup = hfsplus_file_lookup, |
271 | .truncate = hfsplus_file_truncate, | 341 | .truncate = hfsplus_file_truncate, |
342 | .setattr = hfsplus_setattr, | ||
272 | .setxattr = hfsplus_setxattr, | 343 | .setxattr = hfsplus_setxattr, |
273 | .getxattr = hfsplus_getxattr, | 344 | .getxattr = hfsplus_getxattr, |
274 | .listxattr = hfsplus_listxattr, | 345 | .listxattr = hfsplus_listxattr, |
@@ -282,7 +353,7 @@ static const struct file_operations hfsplus_file_operations = { | |||
282 | .aio_write = generic_file_aio_write, | 353 | .aio_write = generic_file_aio_write, |
283 | .mmap = generic_file_mmap, | 354 | .mmap = generic_file_mmap, |
284 | .splice_read = generic_file_splice_read, | 355 | .splice_read = generic_file_splice_read, |
285 | .fsync = file_fsync, | 356 | .fsync = hfsplus_file_fsync, |
286 | .open = hfsplus_file_open, | 357 | .open = hfsplus_file_open, |
287 | .release = hfsplus_file_release, | 358 | .release = hfsplus_file_release, |
288 | .unlocked_ioctl = hfsplus_ioctl, | 359 | .unlocked_ioctl = hfsplus_ioctl, |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 74b473a8ef92..3b55c050c742 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -145,16 +145,18 @@ static int hfsplus_write_inode(struct inode *inode, | |||
145 | return ret; | 145 | return ret; |
146 | } | 146 | } |
147 | 147 | ||
148 | static void hfsplus_clear_inode(struct inode *inode) | 148 | static void hfsplus_evict_inode(struct inode *inode) |
149 | { | 149 | { |
150 | dprint(DBG_INODE, "hfsplus_clear_inode: %lu\n", inode->i_ino); | 150 | dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); |
151 | truncate_inode_pages(&inode->i_data, 0); | ||
152 | end_writeback(inode); | ||
151 | if (HFSPLUS_IS_RSRC(inode)) { | 153 | if (HFSPLUS_IS_RSRC(inode)) { |
152 | HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL; | 154 | HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL; |
153 | iput(HFSPLUS_I(inode).rsrc_inode); | 155 | iput(HFSPLUS_I(inode).rsrc_inode); |
154 | } | 156 | } |
155 | } | 157 | } |
156 | 158 | ||
157 | static int hfsplus_sync_fs(struct super_block *sb, int wait) | 159 | int hfsplus_sync_fs(struct super_block *sb, int wait) |
158 | { | 160 | { |
159 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 161 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; |
160 | 162 | ||
@@ -293,7 +295,7 @@ static const struct super_operations hfsplus_sops = { | |||
293 | .alloc_inode = hfsplus_alloc_inode, | 295 | .alloc_inode = hfsplus_alloc_inode, |
294 | .destroy_inode = hfsplus_destroy_inode, | 296 | .destroy_inode = hfsplus_destroy_inode, |
295 | .write_inode = hfsplus_write_inode, | 297 | .write_inode = hfsplus_write_inode, |
296 | .clear_inode = hfsplus_clear_inode, | 298 | .evict_inode = hfsplus_evict_inode, |
297 | .put_super = hfsplus_put_super, | 299 | .put_super = hfsplus_put_super, |
298 | .write_super = hfsplus_write_super, | 300 | .write_super = hfsplus_write_super, |
299 | .sync_fs = hfsplus_sync_fs, | 301 | .sync_fs = hfsplus_sync_fs, |
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 2f34f8f2134b..6bbd75c5589b 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h | |||
@@ -53,18 +53,28 @@ struct hostfs_iattr { | |||
53 | struct timespec ia_ctime; | 53 | struct timespec ia_ctime; |
54 | }; | 54 | }; |
55 | 55 | ||
56 | extern int stat_file(const char *path, unsigned long long *inode_out, | 56 | struct hostfs_stat { |
57 | int *mode_out, int *nlink_out, int *uid_out, int *gid_out, | 57 | unsigned long long ino; |
58 | unsigned long long *size_out, struct timespec *atime_out, | 58 | unsigned int mode; |
59 | struct timespec *mtime_out, struct timespec *ctime_out, | 59 | unsigned int nlink; |
60 | int *blksize_out, unsigned long long *blocks_out, int fd); | 60 | unsigned int uid; |
61 | unsigned int gid; | ||
62 | unsigned long long size; | ||
63 | struct timespec atime, mtime, ctime; | ||
64 | unsigned int blksize; | ||
65 | unsigned long long blocks; | ||
66 | unsigned int maj; | ||
67 | unsigned int min; | ||
68 | }; | ||
69 | |||
70 | extern int stat_file(const char *path, struct hostfs_stat *p, int fd); | ||
61 | extern int access_file(char *path, int r, int w, int x); | 71 | extern int access_file(char *path, int r, int w, int x); |
62 | extern int open_file(char *path, int r, int w, int append); | 72 | extern int open_file(char *path, int r, int w, int append); |
63 | extern int file_type(const char *path, int *maj, int *min); | ||
64 | extern void *open_dir(char *path, int *err_out); | 73 | extern void *open_dir(char *path, int *err_out); |
65 | extern char *read_dir(void *stream, unsigned long long *pos, | 74 | extern char *read_dir(void *stream, unsigned long long *pos, |
66 | unsigned long long *ino_out, int *len_out); | 75 | unsigned long long *ino_out, int *len_out); |
67 | extern void close_file(void *stream); | 76 | extern void close_file(void *stream); |
77 | extern int replace_file(int oldfd, int fd); | ||
68 | extern void close_dir(void *stream); | 78 | extern void close_dir(void *stream); |
69 | extern int read_file(int fd, unsigned long long *offset, char *buf, int len); | 79 | extern int read_file(int fd, unsigned long long *offset, char *buf, int len); |
70 | extern int write_file(int fd, unsigned long long *offset, const char *buf, | 80 | extern int write_file(int fd, unsigned long long *offset, const char *buf, |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 87ac1891a185..dd1e55535a4e 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -14,12 +14,12 @@ | |||
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/seq_file.h> | 15 | #include <linux/seq_file.h> |
16 | #include <linux/mount.h> | 16 | #include <linux/mount.h> |
17 | #include <linux/namei.h> | ||
17 | #include "hostfs.h" | 18 | #include "hostfs.h" |
18 | #include "init.h" | 19 | #include "init.h" |
19 | #include "kern.h" | 20 | #include "kern.h" |
20 | 21 | ||
21 | struct hostfs_inode_info { | 22 | struct hostfs_inode_info { |
22 | char *host_filename; | ||
23 | int fd; | 23 | int fd; |
24 | fmode_t mode; | 24 | fmode_t mode; |
25 | struct inode vfs_inode; | 25 | struct inode vfs_inode; |
@@ -49,7 +49,7 @@ static int append = 0; | |||
49 | 49 | ||
50 | static const struct inode_operations hostfs_iops; | 50 | static const struct inode_operations hostfs_iops; |
51 | static const struct inode_operations hostfs_dir_iops; | 51 | static const struct inode_operations hostfs_dir_iops; |
52 | static const struct address_space_operations hostfs_link_aops; | 52 | static const struct inode_operations hostfs_link_iops; |
53 | 53 | ||
54 | #ifndef MODULE | 54 | #ifndef MODULE |
55 | static int __init hostfs_args(char *options, int *add) | 55 | static int __init hostfs_args(char *options, int *add) |
@@ -90,71 +90,58 @@ __uml_setup("hostfs=", hostfs_args, | |||
90 | ); | 90 | ); |
91 | #endif | 91 | #endif |
92 | 92 | ||
93 | static char *dentry_name(struct dentry *dentry, int extra) | 93 | static char *__dentry_name(struct dentry *dentry, char *name) |
94 | { | 94 | { |
95 | struct dentry *parent; | 95 | char *p = __dentry_path(dentry, name, PATH_MAX); |
96 | char *root, *name; | 96 | char *root; |
97 | int len; | 97 | size_t len; |
98 | |||
99 | len = 0; | ||
100 | parent = dentry; | ||
101 | while (parent->d_parent != parent) { | ||
102 | len += parent->d_name.len + 1; | ||
103 | parent = parent->d_parent; | ||
104 | } | ||
105 | 98 | ||
106 | root = HOSTFS_I(parent->d_inode)->host_filename; | 99 | spin_unlock(&dcache_lock); |
107 | len += strlen(root); | ||
108 | name = kmalloc(len + extra + 1, GFP_KERNEL); | ||
109 | if (name == NULL) | ||
110 | return NULL; | ||
111 | 100 | ||
112 | name[len] = '\0'; | 101 | root = dentry->d_sb->s_fs_info; |
113 | parent = dentry; | 102 | len = strlen(root); |
114 | while (parent->d_parent != parent) { | 103 | if (IS_ERR(p)) { |
115 | len -= parent->d_name.len + 1; | 104 | __putname(name); |
116 | name[len] = '/'; | 105 | return NULL; |
117 | strncpy(&name[len + 1], parent->d_name.name, | 106 | } |
118 | parent->d_name.len); | 107 | strncpy(name, root, PATH_MAX); |
119 | parent = parent->d_parent; | 108 | if (len > p - name) { |
109 | __putname(name); | ||
110 | return NULL; | ||
111 | } | ||
112 | if (p > name + len) { | ||
113 | char *s = name + len; | ||
114 | while ((*s++ = *p++) != '\0') | ||
115 | ; | ||
120 | } | 116 | } |
121 | strncpy(name, root, strlen(root)); | ||
122 | return name; | 117 | return name; |
123 | } | 118 | } |
124 | 119 | ||
125 | static char *inode_name(struct inode *ino, int extra) | 120 | static char *dentry_name(struct dentry *dentry) |
126 | { | 121 | { |
127 | struct dentry *dentry; | 122 | char *name = __getname(); |
123 | if (!name) | ||
124 | return NULL; | ||
128 | 125 | ||
129 | dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); | 126 | spin_lock(&dcache_lock); |
130 | return dentry_name(dentry, extra); | 127 | return __dentry_name(dentry, name); /* will unlock */ |
131 | } | 128 | } |
132 | 129 | ||
133 | static int read_name(struct inode *ino, char *name) | 130 | static char *inode_name(struct inode *ino) |
134 | { | 131 | { |
135 | /* | 132 | struct dentry *dentry; |
136 | * The non-int inode fields are copied into ints by stat_file and | 133 | char *name = __getname(); |
137 | * then copied into the inode because passing the actual pointers | 134 | if (!name) |
138 | * in and having them treated as int * breaks on big-endian machines | 135 | return NULL; |
139 | */ | ||
140 | int err; | ||
141 | int i_mode, i_nlink, i_blksize; | ||
142 | unsigned long long i_size; | ||
143 | unsigned long long i_ino; | ||
144 | unsigned long long i_blocks; | ||
145 | |||
146 | err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid, | ||
147 | &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime, | ||
148 | &ino->i_ctime, &i_blksize, &i_blocks, -1); | ||
149 | if (err) | ||
150 | return err; | ||
151 | 136 | ||
152 | ino->i_ino = i_ino; | 137 | spin_lock(&dcache_lock); |
153 | ino->i_mode = i_mode; | 138 | if (list_empty(&ino->i_dentry)) { |
154 | ino->i_nlink = i_nlink; | 139 | spin_unlock(&dcache_lock); |
155 | ino->i_size = i_size; | 140 | __putname(name); |
156 | ino->i_blocks = i_blocks; | 141 | return NULL; |
157 | return 0; | 142 | } |
143 | dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias); | ||
144 | return __dentry_name(dentry, name); /* will unlock */ | ||
158 | } | 145 | } |
159 | 146 | ||
160 | static char *follow_link(char *link) | 147 | static char *follow_link(char *link) |
@@ -205,53 +192,11 @@ static char *follow_link(char *link) | |||
205 | return ERR_PTR(n); | 192 | return ERR_PTR(n); |
206 | } | 193 | } |
207 | 194 | ||
208 | static int hostfs_read_inode(struct inode *ino) | ||
209 | { | ||
210 | char *name; | ||
211 | int err = 0; | ||
212 | |||
213 | /* | ||
214 | * Unfortunately, we are called from iget() when we don't have a dentry | ||
215 | * allocated yet. | ||
216 | */ | ||
217 | if (list_empty(&ino->i_dentry)) | ||
218 | goto out; | ||
219 | |||
220 | err = -ENOMEM; | ||
221 | name = inode_name(ino, 0); | ||
222 | if (name == NULL) | ||
223 | goto out; | ||
224 | |||
225 | if (file_type(name, NULL, NULL) == OS_TYPE_SYMLINK) { | ||
226 | name = follow_link(name); | ||
227 | if (IS_ERR(name)) { | ||
228 | err = PTR_ERR(name); | ||
229 | goto out; | ||
230 | } | ||
231 | } | ||
232 | |||
233 | err = read_name(ino, name); | ||
234 | kfree(name); | ||
235 | out: | ||
236 | return err; | ||
237 | } | ||
238 | |||
239 | static struct inode *hostfs_iget(struct super_block *sb) | 195 | static struct inode *hostfs_iget(struct super_block *sb) |
240 | { | 196 | { |
241 | struct inode *inode; | 197 | struct inode *inode = new_inode(sb); |
242 | long ret; | ||
243 | |||
244 | inode = iget_locked(sb, 0); | ||
245 | if (!inode) | 198 | if (!inode) |
246 | return ERR_PTR(-ENOMEM); | 199 | return ERR_PTR(-ENOMEM); |
247 | if (inode->i_state & I_NEW) { | ||
248 | ret = hostfs_read_inode(inode); | ||
249 | if (ret < 0) { | ||
250 | iget_failed(inode); | ||
251 | return ERR_PTR(ret); | ||
252 | } | ||
253 | unlock_new_inode(inode); | ||
254 | } | ||
255 | return inode; | 200 | return inode; |
256 | } | 201 | } |
257 | 202 | ||
@@ -269,7 +214,7 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) | |||
269 | long long f_files; | 214 | long long f_files; |
270 | long long f_ffree; | 215 | long long f_ffree; |
271 | 216 | ||
272 | err = do_statfs(HOSTFS_I(dentry->d_sb->s_root->d_inode)->host_filename, | 217 | err = do_statfs(dentry->d_sb->s_fs_info, |
273 | &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, | 218 | &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, |
274 | &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), | 219 | &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), |
275 | &sf->f_namelen, sf->f_spare); | 220 | &sf->f_namelen, sf->f_spare); |
@@ -288,47 +233,32 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) | |||
288 | { | 233 | { |
289 | struct hostfs_inode_info *hi; | 234 | struct hostfs_inode_info *hi; |
290 | 235 | ||
291 | hi = kmalloc(sizeof(*hi), GFP_KERNEL); | 236 | hi = kzalloc(sizeof(*hi), GFP_KERNEL); |
292 | if (hi == NULL) | 237 | if (hi == NULL) |
293 | return NULL; | 238 | return NULL; |
294 | 239 | hi->fd = -1; | |
295 | *hi = ((struct hostfs_inode_info) { .host_filename = NULL, | ||
296 | .fd = -1, | ||
297 | .mode = 0 }); | ||
298 | inode_init_once(&hi->vfs_inode); | 240 | inode_init_once(&hi->vfs_inode); |
299 | return &hi->vfs_inode; | 241 | return &hi->vfs_inode; |
300 | } | 242 | } |
301 | 243 | ||
302 | static void hostfs_delete_inode(struct inode *inode) | 244 | static void hostfs_evict_inode(struct inode *inode) |
303 | { | 245 | { |
304 | truncate_inode_pages(&inode->i_data, 0); | 246 | truncate_inode_pages(&inode->i_data, 0); |
247 | end_writeback(inode); | ||
305 | if (HOSTFS_I(inode)->fd != -1) { | 248 | if (HOSTFS_I(inode)->fd != -1) { |
306 | close_file(&HOSTFS_I(inode)->fd); | 249 | close_file(&HOSTFS_I(inode)->fd); |
307 | HOSTFS_I(inode)->fd = -1; | 250 | HOSTFS_I(inode)->fd = -1; |
308 | } | 251 | } |
309 | clear_inode(inode); | ||
310 | } | 252 | } |
311 | 253 | ||
312 | static void hostfs_destroy_inode(struct inode *inode) | 254 | static void hostfs_destroy_inode(struct inode *inode) |
313 | { | 255 | { |
314 | kfree(HOSTFS_I(inode)->host_filename); | ||
315 | |||
316 | /* | ||
317 | * XXX: This should not happen, probably. The check is here for | ||
318 | * additional safety. | ||
319 | */ | ||
320 | if (HOSTFS_I(inode)->fd != -1) { | ||
321 | close_file(&HOSTFS_I(inode)->fd); | ||
322 | printk(KERN_DEBUG "Closing host fd in .destroy_inode\n"); | ||
323 | } | ||
324 | |||
325 | kfree(HOSTFS_I(inode)); | 256 | kfree(HOSTFS_I(inode)); |
326 | } | 257 | } |
327 | 258 | ||
328 | static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | 259 | static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) |
329 | { | 260 | { |
330 | struct inode *root = vfs->mnt_sb->s_root->d_inode; | 261 | const char *root_path = vfs->mnt_sb->s_fs_info; |
331 | const char *root_path = HOSTFS_I(root)->host_filename; | ||
332 | size_t offset = strlen(root_ino) + 1; | 262 | size_t offset = strlen(root_ino) + 1; |
333 | 263 | ||
334 | if (strlen(root_path) > offset) | 264 | if (strlen(root_path) > offset) |
@@ -339,9 +269,8 @@ static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
339 | 269 | ||
340 | static const struct super_operations hostfs_sbops = { | 270 | static const struct super_operations hostfs_sbops = { |
341 | .alloc_inode = hostfs_alloc_inode, | 271 | .alloc_inode = hostfs_alloc_inode, |
342 | .drop_inode = generic_delete_inode, | ||
343 | .delete_inode = hostfs_delete_inode, | ||
344 | .destroy_inode = hostfs_destroy_inode, | 272 | .destroy_inode = hostfs_destroy_inode, |
273 | .evict_inode = hostfs_evict_inode, | ||
345 | .statfs = hostfs_statfs, | 274 | .statfs = hostfs_statfs, |
346 | .show_options = hostfs_show_options, | 275 | .show_options = hostfs_show_options, |
347 | }; | 276 | }; |
@@ -353,11 +282,11 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) | |||
353 | unsigned long long next, ino; | 282 | unsigned long long next, ino; |
354 | int error, len; | 283 | int error, len; |
355 | 284 | ||
356 | name = dentry_name(file->f_path.dentry, 0); | 285 | name = dentry_name(file->f_path.dentry); |
357 | if (name == NULL) | 286 | if (name == NULL) |
358 | return -ENOMEM; | 287 | return -ENOMEM; |
359 | dir = open_dir(name, &error); | 288 | dir = open_dir(name, &error); |
360 | kfree(name); | 289 | __putname(name); |
361 | if (dir == NULL) | 290 | if (dir == NULL) |
362 | return -error; | 291 | return -error; |
363 | next = file->f_pos; | 292 | next = file->f_pos; |
@@ -373,40 +302,59 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) | |||
373 | 302 | ||
374 | int hostfs_file_open(struct inode *ino, struct file *file) | 303 | int hostfs_file_open(struct inode *ino, struct file *file) |
375 | { | 304 | { |
305 | static DEFINE_MUTEX(open_mutex); | ||
376 | char *name; | 306 | char *name; |
377 | fmode_t mode = 0; | 307 | fmode_t mode = 0; |
308 | int err; | ||
378 | int r = 0, w = 0, fd; | 309 | int r = 0, w = 0, fd; |
379 | 310 | ||
380 | mode = file->f_mode & (FMODE_READ | FMODE_WRITE); | 311 | mode = file->f_mode & (FMODE_READ | FMODE_WRITE); |
381 | if ((mode & HOSTFS_I(ino)->mode) == mode) | 312 | if ((mode & HOSTFS_I(ino)->mode) == mode) |
382 | return 0; | 313 | return 0; |
383 | 314 | ||
384 | /* | 315 | mode |= HOSTFS_I(ino)->mode; |
385 | * The file may already have been opened, but with the wrong access, | ||
386 | * so this resets things and reopens the file with the new access. | ||
387 | */ | ||
388 | if (HOSTFS_I(ino)->fd != -1) { | ||
389 | close_file(&HOSTFS_I(ino)->fd); | ||
390 | HOSTFS_I(ino)->fd = -1; | ||
391 | } | ||
392 | 316 | ||
393 | HOSTFS_I(ino)->mode |= mode; | 317 | retry: |
394 | if (HOSTFS_I(ino)->mode & FMODE_READ) | 318 | if (mode & FMODE_READ) |
395 | r = 1; | 319 | r = 1; |
396 | if (HOSTFS_I(ino)->mode & FMODE_WRITE) | 320 | if (mode & FMODE_WRITE) |
397 | w = 1; | 321 | w = 1; |
398 | if (w) | 322 | if (w) |
399 | r = 1; | 323 | r = 1; |
400 | 324 | ||
401 | name = dentry_name(file->f_path.dentry, 0); | 325 | name = dentry_name(file->f_path.dentry); |
402 | if (name == NULL) | 326 | if (name == NULL) |
403 | return -ENOMEM; | 327 | return -ENOMEM; |
404 | 328 | ||
405 | fd = open_file(name, r, w, append); | 329 | fd = open_file(name, r, w, append); |
406 | kfree(name); | 330 | __putname(name); |
407 | if (fd < 0) | 331 | if (fd < 0) |
408 | return fd; | 332 | return fd; |
409 | FILE_HOSTFS_I(file)->fd = fd; | 333 | |
334 | mutex_lock(&open_mutex); | ||
335 | /* somebody else had handled it first? */ | ||
336 | if ((mode & HOSTFS_I(ino)->mode) == mode) { | ||
337 | mutex_unlock(&open_mutex); | ||
338 | return 0; | ||
339 | } | ||
340 | if ((mode | HOSTFS_I(ino)->mode) != mode) { | ||
341 | mode |= HOSTFS_I(ino)->mode; | ||
342 | mutex_unlock(&open_mutex); | ||
343 | close_file(&fd); | ||
344 | goto retry; | ||
345 | } | ||
346 | if (HOSTFS_I(ino)->fd == -1) { | ||
347 | HOSTFS_I(ino)->fd = fd; | ||
348 | } else { | ||
349 | err = replace_file(fd, HOSTFS_I(ino)->fd); | ||
350 | close_file(&fd); | ||
351 | if (err < 0) { | ||
352 | mutex_unlock(&open_mutex); | ||
353 | return err; | ||
354 | } | ||
355 | } | ||
356 | HOSTFS_I(ino)->mode = mode; | ||
357 | mutex_unlock(&open_mutex); | ||
410 | 358 | ||
411 | return 0; | 359 | return 0; |
412 | } | 360 | } |
@@ -544,54 +492,50 @@ static const struct address_space_operations hostfs_aops = { | |||
544 | .write_end = hostfs_write_end, | 492 | .write_end = hostfs_write_end, |
545 | }; | 493 | }; |
546 | 494 | ||
547 | static int init_inode(struct inode *inode, struct dentry *dentry) | 495 | static int read_name(struct inode *ino, char *name) |
548 | { | 496 | { |
549 | char *name; | 497 | dev_t rdev; |
550 | int type, err = -ENOMEM; | 498 | struct hostfs_stat st; |
551 | int maj, min; | 499 | int err = stat_file(name, &st, -1); |
552 | dev_t rdev = 0; | 500 | if (err) |
501 | return err; | ||
553 | 502 | ||
554 | if (dentry) { | 503 | /* Reencode maj and min with the kernel encoding.*/ |
555 | name = dentry_name(dentry, 0); | 504 | rdev = MKDEV(st.maj, st.min); |
556 | if (name == NULL) | ||
557 | goto out; | ||
558 | type = file_type(name, &maj, &min); | ||
559 | /* Reencode maj and min with the kernel encoding.*/ | ||
560 | rdev = MKDEV(maj, min); | ||
561 | kfree(name); | ||
562 | } | ||
563 | else type = OS_TYPE_DIR; | ||
564 | 505 | ||
565 | err = 0; | 506 | switch (st.mode & S_IFMT) { |
566 | if (type == OS_TYPE_SYMLINK) | 507 | case S_IFLNK: |
567 | inode->i_op = &page_symlink_inode_operations; | 508 | ino->i_op = &hostfs_link_iops; |
568 | else if (type == OS_TYPE_DIR) | ||
569 | inode->i_op = &hostfs_dir_iops; | ||
570 | else inode->i_op = &hostfs_iops; | ||
571 | |||
572 | if (type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops; | ||
573 | else inode->i_fop = &hostfs_file_fops; | ||
574 | |||
575 | if (type == OS_TYPE_SYMLINK) | ||
576 | inode->i_mapping->a_ops = &hostfs_link_aops; | ||
577 | else inode->i_mapping->a_ops = &hostfs_aops; | ||
578 | |||
579 | switch (type) { | ||
580 | case OS_TYPE_CHARDEV: | ||
581 | init_special_inode(inode, S_IFCHR, rdev); | ||
582 | break; | 509 | break; |
583 | case OS_TYPE_BLOCKDEV: | 510 | case S_IFDIR: |
584 | init_special_inode(inode, S_IFBLK, rdev); | 511 | ino->i_op = &hostfs_dir_iops; |
512 | ino->i_fop = &hostfs_dir_fops; | ||
585 | break; | 513 | break; |
586 | case OS_TYPE_FIFO: | 514 | case S_IFCHR: |
587 | init_special_inode(inode, S_IFIFO, 0); | 515 | case S_IFBLK: |
516 | case S_IFIFO: | ||
517 | case S_IFSOCK: | ||
518 | init_special_inode(ino, st.mode & S_IFMT, rdev); | ||
519 | ino->i_op = &hostfs_iops; | ||
588 | break; | 520 | break; |
589 | case OS_TYPE_SOCK: | 521 | |
590 | init_special_inode(inode, S_IFSOCK, 0); | 522 | default: |
591 | break; | 523 | ino->i_op = &hostfs_iops; |
592 | } | 524 | ino->i_fop = &hostfs_file_fops; |
593 | out: | 525 | ino->i_mapping->a_ops = &hostfs_aops; |
594 | return err; | 526 | } |
527 | |||
528 | ino->i_ino = st.ino; | ||
529 | ino->i_mode = st.mode; | ||
530 | ino->i_nlink = st.nlink; | ||
531 | ino->i_uid = st.uid; | ||
532 | ino->i_gid = st.gid; | ||
533 | ino->i_atime = st.atime; | ||
534 | ino->i_mtime = st.mtime; | ||
535 | ino->i_ctime = st.ctime; | ||
536 | ino->i_size = st.size; | ||
537 | ino->i_blocks = st.blocks; | ||
538 | return 0; | ||
595 | } | 539 | } |
596 | 540 | ||
597 | int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, | 541 | int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, |
@@ -607,12 +551,8 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
607 | goto out; | 551 | goto out; |
608 | } | 552 | } |
609 | 553 | ||
610 | error = init_inode(inode, dentry); | ||
611 | if (error) | ||
612 | goto out_put; | ||
613 | |||
614 | error = -ENOMEM; | 554 | error = -ENOMEM; |
615 | name = dentry_name(dentry, 0); | 555 | name = dentry_name(dentry); |
616 | if (name == NULL) | 556 | if (name == NULL) |
617 | goto out_put; | 557 | goto out_put; |
618 | 558 | ||
@@ -622,9 +562,10 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
622 | mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); | 562 | mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); |
623 | if (fd < 0) | 563 | if (fd < 0) |
624 | error = fd; | 564 | error = fd; |
625 | else error = read_name(inode, name); | 565 | else |
566 | error = read_name(inode, name); | ||
626 | 567 | ||
627 | kfree(name); | 568 | __putname(name); |
628 | if (error) | 569 | if (error) |
629 | goto out_put; | 570 | goto out_put; |
630 | 571 | ||
@@ -652,17 +593,14 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, | |||
652 | goto out; | 593 | goto out; |
653 | } | 594 | } |
654 | 595 | ||
655 | err = init_inode(inode, dentry); | ||
656 | if (err) | ||
657 | goto out_put; | ||
658 | |||
659 | err = -ENOMEM; | 596 | err = -ENOMEM; |
660 | name = dentry_name(dentry, 0); | 597 | name = dentry_name(dentry); |
661 | if (name == NULL) | 598 | if (name == NULL) |
662 | goto out_put; | 599 | goto out_put; |
663 | 600 | ||
664 | err = read_name(inode, name); | 601 | err = read_name(inode, name); |
665 | kfree(name); | 602 | |
603 | __putname(name); | ||
666 | if (err == -ENOENT) { | 604 | if (err == -ENOENT) { |
667 | iput(inode); | 605 | iput(inode); |
668 | inode = NULL; | 606 | inode = NULL; |
@@ -680,36 +618,21 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, | |||
680 | return ERR_PTR(err); | 618 | return ERR_PTR(err); |
681 | } | 619 | } |
682 | 620 | ||
683 | static char *inode_dentry_name(struct inode *ino, struct dentry *dentry) | ||
684 | { | ||
685 | char *file; | ||
686 | int len; | ||
687 | |||
688 | file = inode_name(ino, dentry->d_name.len + 1); | ||
689 | if (file == NULL) | ||
690 | return NULL; | ||
691 | strcat(file, "/"); | ||
692 | len = strlen(file); | ||
693 | strncat(file, dentry->d_name.name, dentry->d_name.len); | ||
694 | file[len + dentry->d_name.len] = '\0'; | ||
695 | return file; | ||
696 | } | ||
697 | |||
698 | int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) | 621 | int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) |
699 | { | 622 | { |
700 | char *from_name, *to_name; | 623 | char *from_name, *to_name; |
701 | int err; | 624 | int err; |
702 | 625 | ||
703 | if ((from_name = inode_dentry_name(ino, from)) == NULL) | 626 | if ((from_name = dentry_name(from)) == NULL) |
704 | return -ENOMEM; | 627 | return -ENOMEM; |
705 | to_name = dentry_name(to, 0); | 628 | to_name = dentry_name(to); |
706 | if (to_name == NULL) { | 629 | if (to_name == NULL) { |
707 | kfree(from_name); | 630 | __putname(from_name); |
708 | return -ENOMEM; | 631 | return -ENOMEM; |
709 | } | 632 | } |
710 | err = link_file(to_name, from_name); | 633 | err = link_file(to_name, from_name); |
711 | kfree(from_name); | 634 | __putname(from_name); |
712 | kfree(to_name); | 635 | __putname(to_name); |
713 | return err; | 636 | return err; |
714 | } | 637 | } |
715 | 638 | ||
@@ -718,13 +641,14 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry) | |||
718 | char *file; | 641 | char *file; |
719 | int err; | 642 | int err; |
720 | 643 | ||
721 | if ((file = inode_dentry_name(ino, dentry)) == NULL) | ||
722 | return -ENOMEM; | ||
723 | if (append) | 644 | if (append) |
724 | return -EPERM; | 645 | return -EPERM; |
725 | 646 | ||
647 | if ((file = dentry_name(dentry)) == NULL) | ||
648 | return -ENOMEM; | ||
649 | |||
726 | err = unlink_file(file); | 650 | err = unlink_file(file); |
727 | kfree(file); | 651 | __putname(file); |
728 | return err; | 652 | return err; |
729 | } | 653 | } |
730 | 654 | ||
@@ -733,10 +657,10 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) | |||
733 | char *file; | 657 | char *file; |
734 | int err; | 658 | int err; |
735 | 659 | ||
736 | if ((file = inode_dentry_name(ino, dentry)) == NULL) | 660 | if ((file = dentry_name(dentry)) == NULL) |
737 | return -ENOMEM; | 661 | return -ENOMEM; |
738 | err = make_symlink(file, to); | 662 | err = make_symlink(file, to); |
739 | kfree(file); | 663 | __putname(file); |
740 | return err; | 664 | return err; |
741 | } | 665 | } |
742 | 666 | ||
@@ -745,10 +669,10 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) | |||
745 | char *file; | 669 | char *file; |
746 | int err; | 670 | int err; |
747 | 671 | ||
748 | if ((file = inode_dentry_name(ino, dentry)) == NULL) | 672 | if ((file = dentry_name(dentry)) == NULL) |
749 | return -ENOMEM; | 673 | return -ENOMEM; |
750 | err = do_mkdir(file, mode); | 674 | err = do_mkdir(file, mode); |
751 | kfree(file); | 675 | __putname(file); |
752 | return err; | 676 | return err; |
753 | } | 677 | } |
754 | 678 | ||
@@ -757,10 +681,10 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry) | |||
757 | char *file; | 681 | char *file; |
758 | int err; | 682 | int err; |
759 | 683 | ||
760 | if ((file = inode_dentry_name(ino, dentry)) == NULL) | 684 | if ((file = dentry_name(dentry)) == NULL) |
761 | return -ENOMEM; | 685 | return -ENOMEM; |
762 | err = do_rmdir(file); | 686 | err = do_rmdir(file); |
763 | kfree(file); | 687 | __putname(file); |
764 | return err; | 688 | return err; |
765 | } | 689 | } |
766 | 690 | ||
@@ -776,22 +700,20 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | |||
776 | goto out; | 700 | goto out; |
777 | } | 701 | } |
778 | 702 | ||
779 | err = init_inode(inode, dentry); | ||
780 | if (err) | ||
781 | goto out_put; | ||
782 | |||
783 | err = -ENOMEM; | 703 | err = -ENOMEM; |
784 | name = dentry_name(dentry, 0); | 704 | name = dentry_name(dentry); |
785 | if (name == NULL) | 705 | if (name == NULL) |
786 | goto out_put; | 706 | goto out_put; |
787 | 707 | ||
788 | init_special_inode(inode, mode, dev); | 708 | init_special_inode(inode, mode, dev); |
789 | err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); | 709 | err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); |
790 | if (err) | 710 | if (!err) |
791 | goto out_free; | 711 | goto out_free; |
792 | 712 | ||
793 | err = read_name(inode, name); | 713 | err = read_name(inode, name); |
794 | kfree(name); | 714 | __putname(name); |
715 | if (err) | ||
716 | goto out_put; | ||
795 | if (err) | 717 | if (err) |
796 | goto out_put; | 718 | goto out_put; |
797 | 719 | ||
@@ -799,7 +721,7 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | |||
799 | return 0; | 721 | return 0; |
800 | 722 | ||
801 | out_free: | 723 | out_free: |
802 | kfree(name); | 724 | __putname(name); |
803 | out_put: | 725 | out_put: |
804 | iput(inode); | 726 | iput(inode); |
805 | out: | 727 | out: |
@@ -812,15 +734,15 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, | |||
812 | char *from_name, *to_name; | 734 | char *from_name, *to_name; |
813 | int err; | 735 | int err; |
814 | 736 | ||
815 | if ((from_name = inode_dentry_name(from_ino, from)) == NULL) | 737 | if ((from_name = dentry_name(from)) == NULL) |
816 | return -ENOMEM; | 738 | return -ENOMEM; |
817 | if ((to_name = inode_dentry_name(to_ino, to)) == NULL) { | 739 | if ((to_name = dentry_name(to)) == NULL) { |
818 | kfree(from_name); | 740 | __putname(from_name); |
819 | return -ENOMEM; | 741 | return -ENOMEM; |
820 | } | 742 | } |
821 | err = rename_file(from_name, to_name); | 743 | err = rename_file(from_name, to_name); |
822 | kfree(from_name); | 744 | __putname(from_name); |
823 | kfree(to_name); | 745 | __putname(to_name); |
824 | return err; | 746 | return err; |
825 | } | 747 | } |
826 | 748 | ||
@@ -832,7 +754,7 @@ int hostfs_permission(struct inode *ino, int desired) | |||
832 | if (desired & MAY_READ) r = 1; | 754 | if (desired & MAY_READ) r = 1; |
833 | if (desired & MAY_WRITE) w = 1; | 755 | if (desired & MAY_WRITE) w = 1; |
834 | if (desired & MAY_EXEC) x = 1; | 756 | if (desired & MAY_EXEC) x = 1; |
835 | name = inode_name(ino, 0); | 757 | name = inode_name(ino); |
836 | if (name == NULL) | 758 | if (name == NULL) |
837 | return -ENOMEM; | 759 | return -ENOMEM; |
838 | 760 | ||
@@ -841,7 +763,7 @@ int hostfs_permission(struct inode *ino, int desired) | |||
841 | err = 0; | 763 | err = 0; |
842 | else | 764 | else |
843 | err = access_file(name, r, w, x); | 765 | err = access_file(name, r, w, x); |
844 | kfree(name); | 766 | __putname(name); |
845 | if (!err) | 767 | if (!err) |
846 | err = generic_permission(ino, desired, NULL); | 768 | err = generic_permission(ino, desired, NULL); |
847 | return err; | 769 | return err; |
@@ -849,13 +771,14 @@ int hostfs_permission(struct inode *ino, int desired) | |||
849 | 771 | ||
850 | int hostfs_setattr(struct dentry *dentry, struct iattr *attr) | 772 | int hostfs_setattr(struct dentry *dentry, struct iattr *attr) |
851 | { | 773 | { |
774 | struct inode *inode = dentry->d_inode; | ||
852 | struct hostfs_iattr attrs; | 775 | struct hostfs_iattr attrs; |
853 | char *name; | 776 | char *name; |
854 | int err; | 777 | int err; |
855 | 778 | ||
856 | int fd = HOSTFS_I(dentry->d_inode)->fd; | 779 | int fd = HOSTFS_I(inode)->fd; |
857 | 780 | ||
858 | err = inode_change_ok(dentry->d_inode, attr); | 781 | err = inode_change_ok(inode, attr); |
859 | if (err) | 782 | if (err) |
860 | return err; | 783 | return err; |
861 | 784 | ||
@@ -897,15 +820,26 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
897 | if (attr->ia_valid & ATTR_MTIME_SET) { | 820 | if (attr->ia_valid & ATTR_MTIME_SET) { |
898 | attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; | 821 | attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; |
899 | } | 822 | } |
900 | name = dentry_name(dentry, 0); | 823 | name = dentry_name(dentry); |
901 | if (name == NULL) | 824 | if (name == NULL) |
902 | return -ENOMEM; | 825 | return -ENOMEM; |
903 | err = set_attr(name, &attrs, fd); | 826 | err = set_attr(name, &attrs, fd); |
904 | kfree(name); | 827 | __putname(name); |
905 | if (err) | 828 | if (err) |
906 | return err; | 829 | return err; |
907 | 830 | ||
908 | return inode_setattr(dentry->d_inode, attr); | 831 | if ((attr->ia_valid & ATTR_SIZE) && |
832 | attr->ia_size != i_size_read(inode)) { | ||
833 | int error; | ||
834 | |||
835 | error = vmtruncate(inode, attr->ia_size); | ||
836 | if (err) | ||
837 | return err; | ||
838 | } | ||
839 | |||
840 | setattr_copy(inode, attr); | ||
841 | mark_inode_dirty(inode); | ||
842 | return 0; | ||
909 | } | 843 | } |
910 | 844 | ||
911 | static const struct inode_operations hostfs_iops = { | 845 | static const struct inode_operations hostfs_iops = { |
@@ -935,32 +869,41 @@ static const struct inode_operations hostfs_dir_iops = { | |||
935 | .setattr = hostfs_setattr, | 869 | .setattr = hostfs_setattr, |
936 | }; | 870 | }; |
937 | 871 | ||
938 | int hostfs_link_readpage(struct file *file, struct page *page) | 872 | static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd) |
939 | { | 873 | { |
940 | char *buffer, *name; | 874 | char *link = __getname(); |
941 | int err; | 875 | if (link) { |
942 | 876 | char *path = dentry_name(dentry); | |
943 | buffer = kmap(page); | 877 | int err = -ENOMEM; |
944 | name = inode_name(page->mapping->host, 0); | 878 | if (path) { |
945 | if (name == NULL) | 879 | int err = hostfs_do_readlink(path, link, PATH_MAX); |
946 | return -ENOMEM; | 880 | if (err == PATH_MAX) |
947 | err = hostfs_do_readlink(name, buffer, PAGE_CACHE_SIZE); | 881 | err = -E2BIG; |
948 | kfree(name); | 882 | __putname(path); |
949 | if (err == PAGE_CACHE_SIZE) | 883 | } |
950 | err = -E2BIG; | 884 | if (err < 0) { |
951 | else if (err > 0) { | 885 | __putname(link); |
952 | flush_dcache_page(page); | 886 | link = ERR_PTR(err); |
953 | SetPageUptodate(page); | 887 | } |
954 | if (PageError(page)) ClearPageError(page); | 888 | } else { |
955 | err = 0; | 889 | link = ERR_PTR(-ENOMEM); |
956 | } | 890 | } |
957 | kunmap(page); | 891 | |
958 | unlock_page(page); | 892 | nd_set_link(nd, link); |
959 | return err; | 893 | return NULL; |
960 | } | 894 | } |
961 | 895 | ||
962 | static const struct address_space_operations hostfs_link_aops = { | 896 | static void hostfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) |
963 | .readpage = hostfs_link_readpage, | 897 | { |
898 | char *s = nd_get_link(nd); | ||
899 | if (!IS_ERR(s)) | ||
900 | __putname(s); | ||
901 | } | ||
902 | |||
903 | static const struct inode_operations hostfs_link_iops = { | ||
904 | .readlink = generic_readlink, | ||
905 | .follow_link = hostfs_follow_link, | ||
906 | .put_link = hostfs_put_link, | ||
964 | }; | 907 | }; |
965 | 908 | ||
966 | static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) | 909 | static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) |
@@ -980,49 +923,41 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) | |||
980 | req_root = ""; | 923 | req_root = ""; |
981 | 924 | ||
982 | err = -ENOMEM; | 925 | err = -ENOMEM; |
983 | host_root_path = kmalloc(strlen(root_ino) + 1 | 926 | sb->s_fs_info = host_root_path = |
984 | + strlen(req_root) + 1, GFP_KERNEL); | 927 | kmalloc(strlen(root_ino) + strlen(req_root) + 2, GFP_KERNEL); |
985 | if (host_root_path == NULL) | 928 | if (host_root_path == NULL) |
986 | goto out; | 929 | goto out; |
987 | 930 | ||
988 | sprintf(host_root_path, "%s/%s", root_ino, req_root); | 931 | sprintf(host_root_path, "%s/%s", root_ino, req_root); |
989 | 932 | ||
990 | root_inode = hostfs_iget(sb); | 933 | root_inode = new_inode(sb); |
991 | if (IS_ERR(root_inode)) { | 934 | if (!root_inode) |
992 | err = PTR_ERR(root_inode); | 935 | goto out; |
993 | goto out_free; | ||
994 | } | ||
995 | 936 | ||
996 | err = init_inode(root_inode, NULL); | 937 | err = read_name(root_inode, host_root_path); |
997 | if (err) | 938 | if (err) |
998 | goto out_put; | 939 | goto out_put; |
999 | 940 | ||
1000 | HOSTFS_I(root_inode)->host_filename = host_root_path; | 941 | if (S_ISLNK(root_inode->i_mode)) { |
1001 | /* | 942 | char *name = follow_link(host_root_path); |
1002 | * Avoid that in the error path, iput(root_inode) frees again | 943 | if (IS_ERR(name)) |
1003 | * host_root_path through hostfs_destroy_inode! | 944 | err = PTR_ERR(name); |
1004 | */ | 945 | else |
1005 | host_root_path = NULL; | 946 | err = read_name(root_inode, name); |
947 | kfree(name); | ||
948 | if (err) | ||
949 | goto out_put; | ||
950 | } | ||
1006 | 951 | ||
1007 | err = -ENOMEM; | 952 | err = -ENOMEM; |
1008 | sb->s_root = d_alloc_root(root_inode); | 953 | sb->s_root = d_alloc_root(root_inode); |
1009 | if (sb->s_root == NULL) | 954 | if (sb->s_root == NULL) |
1010 | goto out_put; | 955 | goto out_put; |
1011 | 956 | ||
1012 | err = hostfs_read_inode(root_inode); | ||
1013 | if (err) { | ||
1014 | /* No iput in this case because the dput does that for us */ | ||
1015 | dput(sb->s_root); | ||
1016 | sb->s_root = NULL; | ||
1017 | goto out; | ||
1018 | } | ||
1019 | |||
1020 | return 0; | 957 | return 0; |
1021 | 958 | ||
1022 | out_put: | 959 | out_put: |
1023 | iput(root_inode); | 960 | iput(root_inode); |
1024 | out_free: | ||
1025 | kfree(host_root_path); | ||
1026 | out: | 961 | out: |
1027 | return err; | 962 | return err; |
1028 | } | 963 | } |
@@ -1034,11 +969,17 @@ static int hostfs_read_sb(struct file_system_type *type, | |||
1034 | return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt); | 969 | return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt); |
1035 | } | 970 | } |
1036 | 971 | ||
972 | static void hostfs_kill_sb(struct super_block *s) | ||
973 | { | ||
974 | kill_anon_super(s); | ||
975 | kfree(s->s_fs_info); | ||
976 | } | ||
977 | |||
1037 | static struct file_system_type hostfs_type = { | 978 | static struct file_system_type hostfs_type = { |
1038 | .owner = THIS_MODULE, | 979 | .owner = THIS_MODULE, |
1039 | .name = "hostfs", | 980 | .name = "hostfs", |
1040 | .get_sb = hostfs_read_sb, | 981 | .get_sb = hostfs_read_sb, |
1041 | .kill_sb = kill_anon_super, | 982 | .kill_sb = hostfs_kill_sb, |
1042 | .fs_flags = 0, | 983 | .fs_flags = 0, |
1043 | }; | 984 | }; |
1044 | 985 | ||
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index b79424f93282..6777aa06ce2c 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c | |||
@@ -19,11 +19,27 @@ | |||
19 | #include "user.h" | 19 | #include "user.h" |
20 | #include <utime.h> | 20 | #include <utime.h> |
21 | 21 | ||
22 | int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, | 22 | static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p) |
23 | int *nlink_out, int *uid_out, int *gid_out, | 23 | { |
24 | unsigned long long *size_out, struct timespec *atime_out, | 24 | p->ino = buf->st_ino; |
25 | struct timespec *mtime_out, struct timespec *ctime_out, | 25 | p->mode = buf->st_mode; |
26 | int *blksize_out, unsigned long long *blocks_out, int fd) | 26 | p->nlink = buf->st_nlink; |
27 | p->uid = buf->st_uid; | ||
28 | p->gid = buf->st_gid; | ||
29 | p->size = buf->st_size; | ||
30 | p->atime.tv_sec = buf->st_atime; | ||
31 | p->atime.tv_nsec = 0; | ||
32 | p->ctime.tv_sec = buf->st_ctime; | ||
33 | p->ctime.tv_nsec = 0; | ||
34 | p->mtime.tv_sec = buf->st_mtime; | ||
35 | p->mtime.tv_nsec = 0; | ||
36 | p->blksize = buf->st_blksize; | ||
37 | p->blocks = buf->st_blocks; | ||
38 | p->maj = os_major(buf->st_rdev); | ||
39 | p->min = os_minor(buf->st_rdev); | ||
40 | } | ||
41 | |||
42 | int stat_file(const char *path, struct hostfs_stat *p, int fd) | ||
27 | { | 43 | { |
28 | struct stat64 buf; | 44 | struct stat64 buf; |
29 | 45 | ||
@@ -33,68 +49,10 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, | |||
33 | } else if (lstat64(path, &buf) < 0) { | 49 | } else if (lstat64(path, &buf) < 0) { |
34 | return -errno; | 50 | return -errno; |
35 | } | 51 | } |
36 | 52 | stat64_to_hostfs(&buf, p); | |
37 | if (inode_out != NULL) | ||
38 | *inode_out = buf.st_ino; | ||
39 | if (mode_out != NULL) | ||
40 | *mode_out = buf.st_mode; | ||
41 | if (nlink_out != NULL) | ||
42 | *nlink_out = buf.st_nlink; | ||
43 | if (uid_out != NULL) | ||
44 | *uid_out = buf.st_uid; | ||
45 | if (gid_out != NULL) | ||
46 | *gid_out = buf.st_gid; | ||
47 | if (size_out != NULL) | ||
48 | *size_out = buf.st_size; | ||
49 | if (atime_out != NULL) { | ||
50 | atime_out->tv_sec = buf.st_atime; | ||
51 | atime_out->tv_nsec = 0; | ||
52 | } | ||
53 | if (mtime_out != NULL) { | ||
54 | mtime_out->tv_sec = buf.st_mtime; | ||
55 | mtime_out->tv_nsec = 0; | ||
56 | } | ||
57 | if (ctime_out != NULL) { | ||
58 | ctime_out->tv_sec = buf.st_ctime; | ||
59 | ctime_out->tv_nsec = 0; | ||
60 | } | ||
61 | if (blksize_out != NULL) | ||
62 | *blksize_out = buf.st_blksize; | ||
63 | if (blocks_out != NULL) | ||
64 | *blocks_out = buf.st_blocks; | ||
65 | return 0; | 53 | return 0; |
66 | } | 54 | } |
67 | 55 | ||
68 | int file_type(const char *path, int *maj, int *min) | ||
69 | { | ||
70 | struct stat64 buf; | ||
71 | |||
72 | if (lstat64(path, &buf) < 0) | ||
73 | return -errno; | ||
74 | /* | ||
75 | * We cannot pass rdev as is because glibc and the kernel disagree | ||
76 | * about its definition. | ||
77 | */ | ||
78 | if (maj != NULL) | ||
79 | *maj = major(buf.st_rdev); | ||
80 | if (min != NULL) | ||
81 | *min = minor(buf.st_rdev); | ||
82 | |||
83 | if (S_ISDIR(buf.st_mode)) | ||
84 | return OS_TYPE_DIR; | ||
85 | else if (S_ISLNK(buf.st_mode)) | ||
86 | return OS_TYPE_SYMLINK; | ||
87 | else if (S_ISCHR(buf.st_mode)) | ||
88 | return OS_TYPE_CHARDEV; | ||
89 | else if (S_ISBLK(buf.st_mode)) | ||
90 | return OS_TYPE_BLOCKDEV; | ||
91 | else if (S_ISFIFO(buf.st_mode)) | ||
92 | return OS_TYPE_FIFO; | ||
93 | else if (S_ISSOCK(buf.st_mode)) | ||
94 | return OS_TYPE_SOCK; | ||
95 | else return OS_TYPE_FILE; | ||
96 | } | ||
97 | |||
98 | int access_file(char *path, int r, int w, int x) | 56 | int access_file(char *path, int r, int w, int x) |
99 | { | 57 | { |
100 | int mode = 0; | 58 | int mode = 0; |
@@ -202,6 +160,11 @@ int fsync_file(int fd, int datasync) | |||
202 | return 0; | 160 | return 0; |
203 | } | 161 | } |
204 | 162 | ||
163 | int replace_file(int oldfd, int fd) | ||
164 | { | ||
165 | return dup2(oldfd, fd); | ||
166 | } | ||
167 | |||
205 | void close_file(void *stream) | 168 | void close_file(void *stream) |
206 | { | 169 | { |
207 | close(*((int *) stream)); | 170 | close(*((int *) stream)); |
@@ -235,8 +198,8 @@ int file_create(char *name, int ur, int uw, int ux, int gr, | |||
235 | 198 | ||
236 | int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) | 199 | int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) |
237 | { | 200 | { |
201 | struct hostfs_stat st; | ||
238 | struct timeval times[2]; | 202 | struct timeval times[2]; |
239 | struct timespec atime_ts, mtime_ts; | ||
240 | int err, ma; | 203 | int err, ma; |
241 | 204 | ||
242 | if (attrs->ia_valid & HOSTFS_ATTR_MODE) { | 205 | if (attrs->ia_valid & HOSTFS_ATTR_MODE) { |
@@ -279,15 +242,14 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) | |||
279 | */ | 242 | */ |
280 | ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET); | 243 | ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET); |
281 | if (attrs->ia_valid & ma) { | 244 | if (attrs->ia_valid & ma) { |
282 | err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, | 245 | err = stat_file(file, &st, fd); |
283 | &atime_ts, &mtime_ts, NULL, NULL, NULL, fd); | ||
284 | if (err != 0) | 246 | if (err != 0) |
285 | return err; | 247 | return err; |
286 | 248 | ||
287 | times[0].tv_sec = atime_ts.tv_sec; | 249 | times[0].tv_sec = st.atime.tv_sec; |
288 | times[0].tv_usec = atime_ts.tv_nsec / 1000; | 250 | times[0].tv_usec = st.atime.tv_nsec / 1000; |
289 | times[1].tv_sec = mtime_ts.tv_sec; | 251 | times[1].tv_sec = st.mtime.tv_sec; |
290 | times[1].tv_usec = mtime_ts.tv_nsec / 1000; | 252 | times[1].tv_usec = st.mtime.tv_nsec / 1000; |
291 | 253 | ||
292 | if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) { | 254 | if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) { |
293 | times[0].tv_sec = attrs->ia_atime.tv_sec; | 255 | times[0].tv_sec = attrs->ia_atime.tv_sec; |
@@ -308,9 +270,9 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) | |||
308 | 270 | ||
309 | /* Note: ctime is not handled */ | 271 | /* Note: ctime is not handled */ |
310 | if (attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)) { | 272 | if (attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)) { |
311 | err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, | 273 | err = stat_file(file, &st, fd); |
312 | &attrs->ia_atime, &attrs->ia_mtime, NULL, | 274 | attrs->ia_atime = st.atime; |
313 | NULL, NULL, fd); | 275 | attrs->ia_mtime = st.mtime; |
314 | if (err != 0) | 276 | if (err != 0) |
315 | return err; | 277 | return err; |
316 | } | 278 | } |
@@ -361,7 +323,7 @@ int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor) | |||
361 | { | 323 | { |
362 | int err; | 324 | int err; |
363 | 325 | ||
364 | err = mknod(file, mode, makedev(major, minor)); | 326 | err = mknod(file, mode, os_makedev(major, minor)); |
365 | if (err) | 327 | if (err) |
366 | return -errno; | 328 | return -errno; |
367 | return 0; | 329 | return 0; |
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index a9ae9bfa752f..c0340887c7ea 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c | |||
@@ -97,10 +97,19 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping, | |||
97 | loff_t pos, unsigned len, unsigned flags, | 97 | loff_t pos, unsigned len, unsigned flags, |
98 | struct page **pagep, void **fsdata) | 98 | struct page **pagep, void **fsdata) |
99 | { | 99 | { |
100 | int ret; | ||
101 | |||
100 | *pagep = NULL; | 102 | *pagep = NULL; |
101 | return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 103 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
102 | hpfs_get_block, | 104 | hpfs_get_block, |
103 | &hpfs_i(mapping->host)->mmu_private); | 105 | &hpfs_i(mapping->host)->mmu_private); |
106 | if (unlikely(ret)) { | ||
107 | loff_t isize = mapping->host->i_size; | ||
108 | if (pos + len > isize) | ||
109 | vmtruncate(mapping->host, isize); | ||
110 | } | ||
111 | |||
112 | return ret; | ||
104 | } | 113 | } |
105 | 114 | ||
106 | static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) | 115 | static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) |
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 75f9d4324851..b59eac0232a0 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h | |||
@@ -281,7 +281,7 @@ void hpfs_write_inode(struct inode *); | |||
281 | void hpfs_write_inode_nolock(struct inode *); | 281 | void hpfs_write_inode_nolock(struct inode *); |
282 | int hpfs_setattr(struct dentry *, struct iattr *); | 282 | int hpfs_setattr(struct dentry *, struct iattr *); |
283 | void hpfs_write_if_changed(struct inode *); | 283 | void hpfs_write_if_changed(struct inode *); |
284 | void hpfs_delete_inode(struct inode *); | 284 | void hpfs_evict_inode(struct inode *); |
285 | 285 | ||
286 | /* map.c */ | 286 | /* map.c */ |
287 | 287 | ||
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 1042a9bc97f3..56f0da1cfd10 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c | |||
@@ -277,9 +277,15 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
277 | if (error) | 277 | if (error) |
278 | goto out_unlock; | 278 | goto out_unlock; |
279 | 279 | ||
280 | error = inode_setattr(inode, attr); | 280 | if ((attr->ia_valid & ATTR_SIZE) && |
281 | if (error) | 281 | attr->ia_size != i_size_read(inode)) { |
282 | goto out_unlock; | 282 | error = vmtruncate(inode, attr->ia_size); |
283 | if (error) | ||
284 | return error; | ||
285 | } | ||
286 | |||
287 | setattr_copy(inode, attr); | ||
288 | mark_inode_dirty(inode); | ||
283 | 289 | ||
284 | hpfs_write_inode(inode); | 290 | hpfs_write_inode(inode); |
285 | 291 | ||
@@ -296,11 +302,13 @@ void hpfs_write_if_changed(struct inode *inode) | |||
296 | hpfs_write_inode(inode); | 302 | hpfs_write_inode(inode); |
297 | } | 303 | } |
298 | 304 | ||
299 | void hpfs_delete_inode(struct inode *inode) | 305 | void hpfs_evict_inode(struct inode *inode) |
300 | { | 306 | { |
301 | truncate_inode_pages(&inode->i_data, 0); | 307 | truncate_inode_pages(&inode->i_data, 0); |
302 | lock_kernel(); | 308 | end_writeback(inode); |
303 | hpfs_remove_fnode(inode->i_sb, inode->i_ino); | 309 | if (!inode->i_nlink) { |
304 | unlock_kernel(); | 310 | lock_kernel(); |
305 | clear_inode(inode); | 311 | hpfs_remove_fnode(inode->i_sb, inode->i_ino); |
312 | unlock_kernel(); | ||
313 | } | ||
306 | } | 314 | } |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index aa53842c599c..2607010be2fe 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -450,7 +450,7 @@ static const struct super_operations hpfs_sops = | |||
450 | { | 450 | { |
451 | .alloc_inode = hpfs_alloc_inode, | 451 | .alloc_inode = hpfs_alloc_inode, |
452 | .destroy_inode = hpfs_destroy_inode, | 452 | .destroy_inode = hpfs_destroy_inode, |
453 | .delete_inode = hpfs_delete_inode, | 453 | .evict_inode = hpfs_evict_inode, |
454 | .put_super = hpfs_put_super, | 454 | .put_super = hpfs_put_super, |
455 | .statfs = hpfs_statfs, | 455 | .statfs = hpfs_statfs, |
456 | .remount_fs = hpfs_remount_fs, | 456 | .remount_fs = hpfs_remount_fs, |
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 826c3f9d29ac..7b027720d820 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/statfs.h> | 16 | #include <linux/statfs.h> |
17 | #include <linux/types.h> | 17 | #include <linux/types.h> |
18 | #include <linux/pid_namespace.h> | ||
18 | #include <asm/uaccess.h> | 19 | #include <asm/uaccess.h> |
19 | #include "os.h" | 20 | #include "os.h" |
20 | 21 | ||
@@ -623,12 +624,11 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb) | |||
623 | return &hi->vfs_inode; | 624 | return &hi->vfs_inode; |
624 | } | 625 | } |
625 | 626 | ||
626 | void hppfs_delete_inode(struct inode *ino) | 627 | void hppfs_evict_inode(struct inode *ino) |
627 | { | 628 | { |
629 | end_writeback(ino); | ||
628 | dput(HPPFS_I(ino)->proc_dentry); | 630 | dput(HPPFS_I(ino)->proc_dentry); |
629 | mntput(ino->i_sb->s_fs_info); | 631 | mntput(ino->i_sb->s_fs_info); |
630 | |||
631 | clear_inode(ino); | ||
632 | } | 632 | } |
633 | 633 | ||
634 | static void hppfs_destroy_inode(struct inode *inode) | 634 | static void hppfs_destroy_inode(struct inode *inode) |
@@ -639,7 +639,7 @@ static void hppfs_destroy_inode(struct inode *inode) | |||
639 | static const struct super_operations hppfs_sbops = { | 639 | static const struct super_operations hppfs_sbops = { |
640 | .alloc_inode = hppfs_alloc_inode, | 640 | .alloc_inode = hppfs_alloc_inode, |
641 | .destroy_inode = hppfs_destroy_inode, | 641 | .destroy_inode = hppfs_destroy_inode, |
642 | .delete_inode = hppfs_delete_inode, | 642 | .evict_inode = hppfs_evict_inode, |
643 | .statfs = hppfs_statfs, | 643 | .statfs = hppfs_statfs, |
644 | }; | 644 | }; |
645 | 645 | ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a4e9a7ec3691..6e5bd42f3860 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -371,27 +371,10 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) | |||
371 | hugetlb_unreserve_pages(inode, start, freed); | 371 | hugetlb_unreserve_pages(inode, start, freed); |
372 | } | 372 | } |
373 | 373 | ||
374 | static void hugetlbfs_delete_inode(struct inode *inode) | 374 | static void hugetlbfs_evict_inode(struct inode *inode) |
375 | { | 375 | { |
376 | truncate_hugepages(inode, 0); | 376 | truncate_hugepages(inode, 0); |
377 | clear_inode(inode); | 377 | end_writeback(inode); |
378 | } | ||
379 | |||
380 | static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) | ||
381 | { | ||
382 | if (generic_detach_inode(inode)) { | ||
383 | truncate_hugepages(inode, 0); | ||
384 | clear_inode(inode); | ||
385 | destroy_inode(inode); | ||
386 | } | ||
387 | } | ||
388 | |||
389 | static void hugetlbfs_drop_inode(struct inode *inode) | ||
390 | { | ||
391 | if (!inode->i_nlink) | ||
392 | generic_delete_inode(inode); | ||
393 | else | ||
394 | hugetlbfs_forget_inode(inode); | ||
395 | } | 378 | } |
396 | 379 | ||
397 | static inline void | 380 | static inline void |
@@ -448,19 +431,20 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
448 | 431 | ||
449 | error = inode_change_ok(inode, attr); | 432 | error = inode_change_ok(inode, attr); |
450 | if (error) | 433 | if (error) |
451 | goto out; | 434 | return error; |
452 | 435 | ||
453 | if (ia_valid & ATTR_SIZE) { | 436 | if (ia_valid & ATTR_SIZE) { |
454 | error = -EINVAL; | 437 | error = -EINVAL; |
455 | if (!(attr->ia_size & ~huge_page_mask(h))) | 438 | if (attr->ia_size & ~huge_page_mask(h)) |
456 | error = hugetlb_vmtruncate(inode, attr->ia_size); | 439 | return -EINVAL; |
440 | error = hugetlb_vmtruncate(inode, attr->ia_size); | ||
457 | if (error) | 441 | if (error) |
458 | goto out; | 442 | return error; |
459 | attr->ia_valid &= ~ATTR_SIZE; | ||
460 | } | 443 | } |
461 | error = inode_setattr(inode, attr); | 444 | |
462 | out: | 445 | setattr_copy(inode, attr); |
463 | return error; | 446 | mark_inode_dirty(inode); |
447 | return 0; | ||
464 | } | 448 | } |
465 | 449 | ||
466 | static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, | 450 | static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, |
@@ -712,9 +696,8 @@ static const struct inode_operations hugetlbfs_inode_operations = { | |||
712 | static const struct super_operations hugetlbfs_ops = { | 696 | static const struct super_operations hugetlbfs_ops = { |
713 | .alloc_inode = hugetlbfs_alloc_inode, | 697 | .alloc_inode = hugetlbfs_alloc_inode, |
714 | .destroy_inode = hugetlbfs_destroy_inode, | 698 | .destroy_inode = hugetlbfs_destroy_inode, |
699 | .evict_inode = hugetlbfs_evict_inode, | ||
715 | .statfs = hugetlbfs_statfs, | 700 | .statfs = hugetlbfs_statfs, |
716 | .delete_inode = hugetlbfs_delete_inode, | ||
717 | .drop_inode = hugetlbfs_drop_inode, | ||
718 | .put_super = hugetlbfs_put_super, | 701 | .put_super = hugetlbfs_put_super, |
719 | .show_options = generic_show_options, | 702 | .show_options = generic_show_options, |
720 | }; | 703 | }; |
diff --git a/fs/inode.c b/fs/inode.c index 722860b323a9..86464332e590 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/pagemap.h> | 20 | #include <linux/pagemap.h> |
21 | #include <linux/cdev.h> | 21 | #include <linux/cdev.h> |
22 | #include <linux/bootmem.h> | 22 | #include <linux/bootmem.h> |
23 | #include <linux/inotify.h> | ||
24 | #include <linux/fsnotify.h> | 23 | #include <linux/fsnotify.h> |
25 | #include <linux/mount.h> | 24 | #include <linux/mount.h> |
26 | #include <linux/async.h> | 25 | #include <linux/async.h> |
@@ -264,12 +263,8 @@ void inode_init_once(struct inode *inode) | |||
264 | INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap); | 263 | INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap); |
265 | INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); | 264 | INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); |
266 | i_size_ordered_init(inode); | 265 | i_size_ordered_init(inode); |
267 | #ifdef CONFIG_INOTIFY | ||
268 | INIT_LIST_HEAD(&inode->inotify_watches); | ||
269 | mutex_init(&inode->inotify_mutex); | ||
270 | #endif | ||
271 | #ifdef CONFIG_FSNOTIFY | 266 | #ifdef CONFIG_FSNOTIFY |
272 | INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries); | 267 | INIT_HLIST_HEAD(&inode->i_fsnotify_marks); |
273 | #endif | 268 | #endif |
274 | } | 269 | } |
275 | EXPORT_SYMBOL(inode_init_once); | 270 | EXPORT_SYMBOL(inode_init_once); |
@@ -294,32 +289,34 @@ void __iget(struct inode *inode) | |||
294 | inodes_stat.nr_unused--; | 289 | inodes_stat.nr_unused--; |
295 | } | 290 | } |
296 | 291 | ||
297 | /** | 292 | void end_writeback(struct inode *inode) |
298 | * clear_inode - clear an inode | ||
299 | * @inode: inode to clear | ||
300 | * | ||
301 | * This is called by the filesystem to tell us | ||
302 | * that the inode is no longer useful. We just | ||
303 | * terminate it with extreme prejudice. | ||
304 | */ | ||
305 | void clear_inode(struct inode *inode) | ||
306 | { | 293 | { |
307 | might_sleep(); | 294 | might_sleep(); |
308 | invalidate_inode_buffers(inode); | ||
309 | |||
310 | BUG_ON(inode->i_data.nrpages); | 295 | BUG_ON(inode->i_data.nrpages); |
296 | BUG_ON(!list_empty(&inode->i_data.private_list)); | ||
311 | BUG_ON(!(inode->i_state & I_FREEING)); | 297 | BUG_ON(!(inode->i_state & I_FREEING)); |
312 | BUG_ON(inode->i_state & I_CLEAR); | 298 | BUG_ON(inode->i_state & I_CLEAR); |
313 | inode_sync_wait(inode); | 299 | inode_sync_wait(inode); |
314 | if (inode->i_sb->s_op->clear_inode) | 300 | inode->i_state = I_FREEING | I_CLEAR; |
315 | inode->i_sb->s_op->clear_inode(inode); | 301 | } |
302 | EXPORT_SYMBOL(end_writeback); | ||
303 | |||
304 | static void evict(struct inode *inode) | ||
305 | { | ||
306 | const struct super_operations *op = inode->i_sb->s_op; | ||
307 | |||
308 | if (op->evict_inode) { | ||
309 | op->evict_inode(inode); | ||
310 | } else { | ||
311 | if (inode->i_data.nrpages) | ||
312 | truncate_inode_pages(&inode->i_data, 0); | ||
313 | end_writeback(inode); | ||
314 | } | ||
316 | if (S_ISBLK(inode->i_mode) && inode->i_bdev) | 315 | if (S_ISBLK(inode->i_mode) && inode->i_bdev) |
317 | bd_forget(inode); | 316 | bd_forget(inode); |
318 | if (S_ISCHR(inode->i_mode) && inode->i_cdev) | 317 | if (S_ISCHR(inode->i_mode) && inode->i_cdev) |
319 | cd_forget(inode); | 318 | cd_forget(inode); |
320 | inode->i_state = I_CLEAR; | ||
321 | } | 319 | } |
322 | EXPORT_SYMBOL(clear_inode); | ||
323 | 320 | ||
324 | /* | 321 | /* |
325 | * dispose_list - dispose of the contents of a local list | 322 | * dispose_list - dispose of the contents of a local list |
@@ -338,9 +335,7 @@ static void dispose_list(struct list_head *head) | |||
338 | inode = list_first_entry(head, struct inode, i_list); | 335 | inode = list_first_entry(head, struct inode, i_list); |
339 | list_del(&inode->i_list); | 336 | list_del(&inode->i_list); |
340 | 337 | ||
341 | if (inode->i_data.nrpages) | 338 | evict(inode); |
342 | truncate_inode_pages(&inode->i_data, 0); | ||
343 | clear_inode(inode); | ||
344 | 339 | ||
345 | spin_lock(&inode_lock); | 340 | spin_lock(&inode_lock); |
346 | hlist_del_init(&inode->i_hash); | 341 | hlist_del_init(&inode->i_hash); |
@@ -413,7 +408,6 @@ int invalidate_inodes(struct super_block *sb) | |||
413 | 408 | ||
414 | down_write(&iprune_sem); | 409 | down_write(&iprune_sem); |
415 | spin_lock(&inode_lock); | 410 | spin_lock(&inode_lock); |
416 | inotify_unmount_inodes(&sb->s_inodes); | ||
417 | fsnotify_unmount_inodes(&sb->s_inodes); | 411 | fsnotify_unmount_inodes(&sb->s_inodes); |
418 | busy = invalidate_list(&sb->s_inodes, &throw_away); | 412 | busy = invalidate_list(&sb->s_inodes, &throw_away); |
419 | spin_unlock(&inode_lock); | 413 | spin_unlock(&inode_lock); |
@@ -553,7 +547,7 @@ repeat: | |||
553 | continue; | 547 | continue; |
554 | if (!test(inode, data)) | 548 | if (!test(inode, data)) |
555 | continue; | 549 | continue; |
556 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { | 550 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) { |
557 | __wait_on_freeing_inode(inode); | 551 | __wait_on_freeing_inode(inode); |
558 | goto repeat; | 552 | goto repeat; |
559 | } | 553 | } |
@@ -578,7 +572,7 @@ repeat: | |||
578 | continue; | 572 | continue; |
579 | if (inode->i_sb != sb) | 573 | if (inode->i_sb != sb) |
580 | continue; | 574 | continue; |
581 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { | 575 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) { |
582 | __wait_on_freeing_inode(inode); | 576 | __wait_on_freeing_inode(inode); |
583 | goto repeat; | 577 | goto repeat; |
584 | } | 578 | } |
@@ -840,7 +834,7 @@ EXPORT_SYMBOL(iunique); | |||
840 | struct inode *igrab(struct inode *inode) | 834 | struct inode *igrab(struct inode *inode) |
841 | { | 835 | { |
842 | spin_lock(&inode_lock); | 836 | spin_lock(&inode_lock); |
843 | if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))) | 837 | if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) |
844 | __iget(inode); | 838 | __iget(inode); |
845 | else | 839 | else |
846 | /* | 840 | /* |
@@ -1089,7 +1083,7 @@ int insert_inode_locked(struct inode *inode) | |||
1089 | continue; | 1083 | continue; |
1090 | if (old->i_sb != sb) | 1084 | if (old->i_sb != sb) |
1091 | continue; | 1085 | continue; |
1092 | if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) | 1086 | if (old->i_state & (I_FREEING|I_WILL_FREE)) |
1093 | continue; | 1087 | continue; |
1094 | break; | 1088 | break; |
1095 | } | 1089 | } |
@@ -1128,7 +1122,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, | |||
1128 | continue; | 1122 | continue; |
1129 | if (!test(old, data)) | 1123 | if (!test(old, data)) |
1130 | continue; | 1124 | continue; |
1131 | if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) | 1125 | if (old->i_state & (I_FREEING|I_WILL_FREE)) |
1132 | continue; | 1126 | continue; |
1133 | break; | 1127 | break; |
1134 | } | 1128 | } |
@@ -1180,69 +1174,51 @@ void remove_inode_hash(struct inode *inode) | |||
1180 | } | 1174 | } |
1181 | EXPORT_SYMBOL(remove_inode_hash); | 1175 | EXPORT_SYMBOL(remove_inode_hash); |
1182 | 1176 | ||
1177 | int generic_delete_inode(struct inode *inode) | ||
1178 | { | ||
1179 | return 1; | ||
1180 | } | ||
1181 | EXPORT_SYMBOL(generic_delete_inode); | ||
1182 | |||
1183 | /* | 1183 | /* |
1184 | * Tell the filesystem that this inode is no longer of any interest and should | 1184 | * Normal UNIX filesystem behaviour: delete the |
1185 | * be completely destroyed. | 1185 | * inode when the usage count drops to zero, and |
1186 | * | 1186 | * i_nlink is zero. |
1187 | * We leave the inode in the inode hash table until *after* the filesystem's | ||
1188 | * ->delete_inode completes. This ensures that an iget (such as nfsd might | ||
1189 | * instigate) will always find up-to-date information either in the hash or on | ||
1190 | * disk. | ||
1191 | * | ||
1192 | * I_FREEING is set so that no-one will take a new reference to the inode while | ||
1193 | * it is being deleted. | ||
1194 | */ | 1187 | */ |
1195 | void generic_delete_inode(struct inode *inode) | 1188 | int generic_drop_inode(struct inode *inode) |
1196 | { | 1189 | { |
1197 | const struct super_operations *op = inode->i_sb->s_op; | 1190 | return !inode->i_nlink || hlist_unhashed(&inode->i_hash); |
1198 | |||
1199 | list_del_init(&inode->i_list); | ||
1200 | list_del_init(&inode->i_sb_list); | ||
1201 | WARN_ON(inode->i_state & I_NEW); | ||
1202 | inode->i_state |= I_FREEING; | ||
1203 | inodes_stat.nr_inodes--; | ||
1204 | spin_unlock(&inode_lock); | ||
1205 | |||
1206 | if (op->delete_inode) { | ||
1207 | void (*delete)(struct inode *) = op->delete_inode; | ||
1208 | /* Filesystems implementing their own | ||
1209 | * s_op->delete_inode are required to call | ||
1210 | * truncate_inode_pages and clear_inode() | ||
1211 | * internally */ | ||
1212 | delete(inode); | ||
1213 | } else { | ||
1214 | truncate_inode_pages(&inode->i_data, 0); | ||
1215 | clear_inode(inode); | ||
1216 | } | ||
1217 | spin_lock(&inode_lock); | ||
1218 | hlist_del_init(&inode->i_hash); | ||
1219 | spin_unlock(&inode_lock); | ||
1220 | wake_up_inode(inode); | ||
1221 | BUG_ON(inode->i_state != I_CLEAR); | ||
1222 | destroy_inode(inode); | ||
1223 | } | 1191 | } |
1224 | EXPORT_SYMBOL(generic_delete_inode); | 1192 | EXPORT_SYMBOL_GPL(generic_drop_inode); |
1225 | 1193 | ||
1226 | /** | 1194 | /* |
1227 | * generic_detach_inode - remove inode from inode lists | 1195 | * Called when we're dropping the last reference |
1228 | * @inode: inode to remove | 1196 | * to an inode. |
1229 | * | ||
1230 | * Remove inode from inode lists, write it if it's dirty. This is just an | ||
1231 | * internal VFS helper exported for hugetlbfs. Do not use! | ||
1232 | * | 1197 | * |
1233 | * Returns 1 if inode should be completely destroyed. | 1198 | * Call the FS "drop_inode()" function, defaulting to |
1199 | * the legacy UNIX filesystem behaviour. If it tells | ||
1200 | * us to evict inode, do so. Otherwise, retain inode | ||
1201 | * in cache if fs is alive, sync and evict if fs is | ||
1202 | * shutting down. | ||
1234 | */ | 1203 | */ |
1235 | int generic_detach_inode(struct inode *inode) | 1204 | static void iput_final(struct inode *inode) |
1236 | { | 1205 | { |
1237 | struct super_block *sb = inode->i_sb; | 1206 | struct super_block *sb = inode->i_sb; |
1207 | const struct super_operations *op = inode->i_sb->s_op; | ||
1208 | int drop; | ||
1238 | 1209 | ||
1239 | if (!hlist_unhashed(&inode->i_hash)) { | 1210 | if (op && op->drop_inode) |
1211 | drop = op->drop_inode(inode); | ||
1212 | else | ||
1213 | drop = generic_drop_inode(inode); | ||
1214 | |||
1215 | if (!drop) { | ||
1240 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | 1216 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) |
1241 | list_move(&inode->i_list, &inode_unused); | 1217 | list_move(&inode->i_list, &inode_unused); |
1242 | inodes_stat.nr_unused++; | 1218 | inodes_stat.nr_unused++; |
1243 | if (sb->s_flags & MS_ACTIVE) { | 1219 | if (sb->s_flags & MS_ACTIVE) { |
1244 | spin_unlock(&inode_lock); | 1220 | spin_unlock(&inode_lock); |
1245 | return 0; | 1221 | return; |
1246 | } | 1222 | } |
1247 | WARN_ON(inode->i_state & I_NEW); | 1223 | WARN_ON(inode->i_state & I_NEW); |
1248 | inode->i_state |= I_WILL_FREE; | 1224 | inode->i_state |= I_WILL_FREE; |
@@ -1260,56 +1236,15 @@ int generic_detach_inode(struct inode *inode) | |||
1260 | inode->i_state |= I_FREEING; | 1236 | inode->i_state |= I_FREEING; |
1261 | inodes_stat.nr_inodes--; | 1237 | inodes_stat.nr_inodes--; |
1262 | spin_unlock(&inode_lock); | 1238 | spin_unlock(&inode_lock); |
1263 | return 1; | 1239 | evict(inode); |
1264 | } | 1240 | spin_lock(&inode_lock); |
1265 | EXPORT_SYMBOL_GPL(generic_detach_inode); | 1241 | hlist_del_init(&inode->i_hash); |
1266 | 1242 | spin_unlock(&inode_lock); | |
1267 | static void generic_forget_inode(struct inode *inode) | ||
1268 | { | ||
1269 | if (!generic_detach_inode(inode)) | ||
1270 | return; | ||
1271 | if (inode->i_data.nrpages) | ||
1272 | truncate_inode_pages(&inode->i_data, 0); | ||
1273 | clear_inode(inode); | ||
1274 | wake_up_inode(inode); | 1243 | wake_up_inode(inode); |
1244 | BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); | ||
1275 | destroy_inode(inode); | 1245 | destroy_inode(inode); |
1276 | } | 1246 | } |
1277 | 1247 | ||
1278 | /* | ||
1279 | * Normal UNIX filesystem behaviour: delete the | ||
1280 | * inode when the usage count drops to zero, and | ||
1281 | * i_nlink is zero. | ||
1282 | */ | ||
1283 | void generic_drop_inode(struct inode *inode) | ||
1284 | { | ||
1285 | if (!inode->i_nlink) | ||
1286 | generic_delete_inode(inode); | ||
1287 | else | ||
1288 | generic_forget_inode(inode); | ||
1289 | } | ||
1290 | EXPORT_SYMBOL_GPL(generic_drop_inode); | ||
1291 | |||
1292 | /* | ||
1293 | * Called when we're dropping the last reference | ||
1294 | * to an inode. | ||
1295 | * | ||
1296 | * Call the FS "drop()" function, defaulting to | ||
1297 | * the legacy UNIX filesystem behaviour.. | ||
1298 | * | ||
1299 | * NOTE! NOTE! NOTE! We're called with the inode lock | ||
1300 | * held, and the drop function is supposed to release | ||
1301 | * the lock! | ||
1302 | */ | ||
1303 | static inline void iput_final(struct inode *inode) | ||
1304 | { | ||
1305 | const struct super_operations *op = inode->i_sb->s_op; | ||
1306 | void (*drop)(struct inode *) = generic_drop_inode; | ||
1307 | |||
1308 | if (op && op->drop_inode) | ||
1309 | drop = op->drop_inode; | ||
1310 | drop(inode); | ||
1311 | } | ||
1312 | |||
1313 | /** | 1248 | /** |
1314 | * iput - put an inode | 1249 | * iput - put an inode |
1315 | * @inode: inode to put | 1250 | * @inode: inode to put |
@@ -1322,7 +1257,7 @@ static inline void iput_final(struct inode *inode) | |||
1322 | void iput(struct inode *inode) | 1257 | void iput(struct inode *inode) |
1323 | { | 1258 | { |
1324 | if (inode) { | 1259 | if (inode) { |
1325 | BUG_ON(inode->i_state == I_CLEAR); | 1260 | BUG_ON(inode->i_state & I_CLEAR); |
1326 | 1261 | ||
1327 | if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) | 1262 | if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) |
1328 | iput_final(inode); | 1263 | iput_final(inode); |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 93d1e47647bd..f19ce94693d8 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -1281,13 +1281,9 @@ int journal_check_used_features (journal_t *journal, unsigned long compat, | |||
1281 | int journal_check_available_features (journal_t *journal, unsigned long compat, | 1281 | int journal_check_available_features (journal_t *journal, unsigned long compat, |
1282 | unsigned long ro, unsigned long incompat) | 1282 | unsigned long ro, unsigned long incompat) |
1283 | { | 1283 | { |
1284 | journal_superblock_t *sb; | ||
1285 | |||
1286 | if (!compat && !ro && !incompat) | 1284 | if (!compat && !ro && !incompat) |
1287 | return 1; | 1285 | return 1; |
1288 | 1286 | ||
1289 | sb = journal->j_superblock; | ||
1290 | |||
1291 | /* We can support any known requested features iff the | 1287 | /* We can support any known requested features iff the |
1292 | * superblock is in version 2. Otherwise we fail to support any | 1288 | * superblock is in version 2. Otherwise we fail to support any |
1293 | * extended sb features. */ | 1289 | * extended sb features. */ |
@@ -1481,7 +1477,6 @@ int journal_flush(journal_t *journal) | |||
1481 | 1477 | ||
1482 | int journal_wipe(journal_t *journal, int write) | 1478 | int journal_wipe(journal_t *journal, int write) |
1483 | { | 1479 | { |
1484 | journal_superblock_t *sb; | ||
1485 | int err = 0; | 1480 | int err = 0; |
1486 | 1481 | ||
1487 | J_ASSERT (!(journal->j_flags & JFS_LOADED)); | 1482 | J_ASSERT (!(journal->j_flags & JFS_LOADED)); |
@@ -1490,8 +1485,6 @@ int journal_wipe(journal_t *journal, int write) | |||
1490 | if (err) | 1485 | if (err) |
1491 | return err; | 1486 | return err; |
1492 | 1487 | ||
1493 | sb = journal->j_superblock; | ||
1494 | |||
1495 | if (!journal->j_tail) | 1488 | if (!journal->j_tail) |
1496 | goto no_recovery; | 1489 | goto no_recovery; |
1497 | 1490 | ||
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 54c9bc9e1b17..81051dafebf5 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c | |||
@@ -283,12 +283,9 @@ int journal_recover(journal_t *journal) | |||
283 | int journal_skip_recovery(journal_t *journal) | 283 | int journal_skip_recovery(journal_t *journal) |
284 | { | 284 | { |
285 | int err; | 285 | int err; |
286 | journal_superblock_t * sb; | ||
287 | |||
288 | struct recovery_info info; | 286 | struct recovery_info info; |
289 | 287 | ||
290 | memset (&info, 0, sizeof(info)); | 288 | memset (&info, 0, sizeof(info)); |
291 | sb = journal->j_superblock; | ||
292 | 289 | ||
293 | err = do_one_pass(journal, &info, PASS_SCAN); | 290 | err = do_one_pass(journal, &info, PASS_SCAN); |
294 | 291 | ||
@@ -297,7 +294,8 @@ int journal_skip_recovery(journal_t *journal) | |||
297 | ++journal->j_transaction_sequence; | 294 | ++journal->j_transaction_sequence; |
298 | } else { | 295 | } else { |
299 | #ifdef CONFIG_JBD_DEBUG | 296 | #ifdef CONFIG_JBD_DEBUG |
300 | int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); | 297 | int dropped = info.end_transaction - |
298 | be32_to_cpu(journal->j_superblock->s_sequence); | ||
301 | #endif | 299 | #endif |
302 | jbd_debug(1, | 300 | jbd_debug(1, |
303 | "JBD: ignoring %d transaction%s from the journal.\n", | 301 | "JBD: ignoring %d transaction%s from the journal.\n", |
@@ -321,11 +319,6 @@ static int do_one_pass(journal_t *journal, | |||
321 | unsigned int sequence; | 319 | unsigned int sequence; |
322 | int blocktype; | 320 | int blocktype; |
323 | 321 | ||
324 | /* Precompute the maximum metadata descriptors in a descriptor block */ | ||
325 | int MAX_BLOCKS_PER_DESC; | ||
326 | MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) | ||
327 | / sizeof(journal_block_tag_t)); | ||
328 | |||
329 | /* | 322 | /* |
330 | * First thing is to establish what we expect to find in the log | 323 | * First thing is to establish what we expect to find in the log |
331 | * (in terms of transaction IDs), and where (in terms of log | 324 | * (in terms of transaction IDs), and where (in terms of log |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 076d1cc44f95..1c23a0f4e8a3 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -118,13 +118,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
118 | void __jbd2_log_wait_for_space(journal_t *journal) | 118 | void __jbd2_log_wait_for_space(journal_t *journal) |
119 | { | 119 | { |
120 | int nblocks, space_left; | 120 | int nblocks, space_left; |
121 | assert_spin_locked(&journal->j_state_lock); | 121 | /* assert_spin_locked(&journal->j_state_lock); */ |
122 | 122 | ||
123 | nblocks = jbd_space_needed(journal); | 123 | nblocks = jbd_space_needed(journal); |
124 | while (__jbd2_log_space_left(journal) < nblocks) { | 124 | while (__jbd2_log_space_left(journal) < nblocks) { |
125 | if (journal->j_flags & JBD2_ABORT) | 125 | if (journal->j_flags & JBD2_ABORT) |
126 | return; | 126 | return; |
127 | spin_unlock(&journal->j_state_lock); | 127 | write_unlock(&journal->j_state_lock); |
128 | mutex_lock(&journal->j_checkpoint_mutex); | 128 | mutex_lock(&journal->j_checkpoint_mutex); |
129 | 129 | ||
130 | /* | 130 | /* |
@@ -138,7 +138,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
138 | * filesystem, so abort the journal and leave a stack | 138 | * filesystem, so abort the journal and leave a stack |
139 | * trace for forensic evidence. | 139 | * trace for forensic evidence. |
140 | */ | 140 | */ |
141 | spin_lock(&journal->j_state_lock); | 141 | write_lock(&journal->j_state_lock); |
142 | spin_lock(&journal->j_list_lock); | 142 | spin_lock(&journal->j_list_lock); |
143 | nblocks = jbd_space_needed(journal); | 143 | nblocks = jbd_space_needed(journal); |
144 | space_left = __jbd2_log_space_left(journal); | 144 | space_left = __jbd2_log_space_left(journal); |
@@ -149,7 +149,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
149 | if (journal->j_committing_transaction) | 149 | if (journal->j_committing_transaction) |
150 | tid = journal->j_committing_transaction->t_tid; | 150 | tid = journal->j_committing_transaction->t_tid; |
151 | spin_unlock(&journal->j_list_lock); | 151 | spin_unlock(&journal->j_list_lock); |
152 | spin_unlock(&journal->j_state_lock); | 152 | write_unlock(&journal->j_state_lock); |
153 | if (chkpt) { | 153 | if (chkpt) { |
154 | jbd2_log_do_checkpoint(journal); | 154 | jbd2_log_do_checkpoint(journal); |
155 | } else if (jbd2_cleanup_journal_tail(journal) == 0) { | 155 | } else if (jbd2_cleanup_journal_tail(journal) == 0) { |
@@ -167,7 +167,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
167 | WARN_ON(1); | 167 | WARN_ON(1); |
168 | jbd2_journal_abort(journal, 0); | 168 | jbd2_journal_abort(journal, 0); |
169 | } | 169 | } |
170 | spin_lock(&journal->j_state_lock); | 170 | write_lock(&journal->j_state_lock); |
171 | } else { | 171 | } else { |
172 | spin_unlock(&journal->j_list_lock); | 172 | spin_unlock(&journal->j_list_lock); |
173 | } | 173 | } |
@@ -474,7 +474,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
474 | * next transaction ID we will write, and where it will | 474 | * next transaction ID we will write, and where it will |
475 | * start. */ | 475 | * start. */ |
476 | 476 | ||
477 | spin_lock(&journal->j_state_lock); | 477 | write_lock(&journal->j_state_lock); |
478 | spin_lock(&journal->j_list_lock); | 478 | spin_lock(&journal->j_list_lock); |
479 | transaction = journal->j_checkpoint_transactions; | 479 | transaction = journal->j_checkpoint_transactions; |
480 | if (transaction) { | 480 | if (transaction) { |
@@ -496,7 +496,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
496 | /* If the oldest pinned transaction is at the tail of the log | 496 | /* If the oldest pinned transaction is at the tail of the log |
497 | already then there's not much we can do right now. */ | 497 | already then there's not much we can do right now. */ |
498 | if (journal->j_tail_sequence == first_tid) { | 498 | if (journal->j_tail_sequence == first_tid) { |
499 | spin_unlock(&journal->j_state_lock); | 499 | write_unlock(&journal->j_state_lock); |
500 | return 1; | 500 | return 1; |
501 | } | 501 | } |
502 | 502 | ||
@@ -516,7 +516,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
516 | journal->j_free += freed; | 516 | journal->j_free += freed; |
517 | journal->j_tail_sequence = first_tid; | 517 | journal->j_tail_sequence = first_tid; |
518 | journal->j_tail = blocknr; | 518 | journal->j_tail = blocknr; |
519 | spin_unlock(&journal->j_state_lock); | 519 | write_unlock(&journal->j_state_lock); |
520 | 520 | ||
521 | /* | 521 | /* |
522 | * If there is an external journal, we need to make sure that | 522 | * If there is an external journal, we need to make sure that |
@@ -775,7 +775,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
775 | J_ASSERT(transaction->t_log_list == NULL); | 775 | J_ASSERT(transaction->t_log_list == NULL); |
776 | J_ASSERT(transaction->t_checkpoint_list == NULL); | 776 | J_ASSERT(transaction->t_checkpoint_list == NULL); |
777 | J_ASSERT(transaction->t_checkpoint_io_list == NULL); | 777 | J_ASSERT(transaction->t_checkpoint_io_list == NULL); |
778 | J_ASSERT(transaction->t_updates == 0); | 778 | J_ASSERT(atomic_read(&transaction->t_updates) == 0); |
779 | J_ASSERT(journal->j_committing_transaction != transaction); | 779 | J_ASSERT(journal->j_committing_transaction != transaction); |
780 | J_ASSERT(journal->j_running_transaction != transaction); | 780 | J_ASSERT(journal->j_running_transaction != transaction); |
781 | 781 | ||
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 75716d3d2be0..f52e5e8049f1 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -150,11 +150,11 @@ static int journal_submit_commit_record(journal_t *journal, | |||
150 | */ | 150 | */ |
151 | if (ret == -EOPNOTSUPP && barrier_done) { | 151 | if (ret == -EOPNOTSUPP && barrier_done) { |
152 | printk(KERN_WARNING | 152 | printk(KERN_WARNING |
153 | "JBD: barrier-based sync failed on %s - " | 153 | "JBD2: Disabling barriers on %s, " |
154 | "disabling barriers\n", journal->j_devname); | 154 | "not supported by device\n", journal->j_devname); |
155 | spin_lock(&journal->j_state_lock); | 155 | write_lock(&journal->j_state_lock); |
156 | journal->j_flags &= ~JBD2_BARRIER; | 156 | journal->j_flags &= ~JBD2_BARRIER; |
157 | spin_unlock(&journal->j_state_lock); | 157 | write_unlock(&journal->j_state_lock); |
158 | 158 | ||
159 | /* And try again, without the barrier */ | 159 | /* And try again, without the barrier */ |
160 | lock_buffer(bh); | 160 | lock_buffer(bh); |
@@ -180,11 +180,11 @@ retry: | |||
180 | wait_on_buffer(bh); | 180 | wait_on_buffer(bh); |
181 | if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { | 181 | if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { |
182 | printk(KERN_WARNING | 182 | printk(KERN_WARNING |
183 | "JBD2: wait_on_commit_record: sync failed on %s - " | 183 | "JBD2: %s: disabling barries on %s - not supported " |
184 | "disabling barriers\n", journal->j_devname); | 184 | "by device\n", __func__, journal->j_devname); |
185 | spin_lock(&journal->j_state_lock); | 185 | write_lock(&journal->j_state_lock); |
186 | journal->j_flags &= ~JBD2_BARRIER; | 186 | journal->j_flags &= ~JBD2_BARRIER; |
187 | spin_unlock(&journal->j_state_lock); | 187 | write_unlock(&journal->j_state_lock); |
188 | 188 | ||
189 | lock_buffer(bh); | 189 | lock_buffer(bh); |
190 | clear_buffer_dirty(bh); | 190 | clear_buffer_dirty(bh); |
@@ -400,7 +400,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
400 | jbd_debug(1, "JBD: starting commit of transaction %d\n", | 400 | jbd_debug(1, "JBD: starting commit of transaction %d\n", |
401 | commit_transaction->t_tid); | 401 | commit_transaction->t_tid); |
402 | 402 | ||
403 | spin_lock(&journal->j_state_lock); | 403 | write_lock(&journal->j_state_lock); |
404 | commit_transaction->t_state = T_LOCKED; | 404 | commit_transaction->t_state = T_LOCKED; |
405 | 405 | ||
406 | /* | 406 | /* |
@@ -417,23 +417,23 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
417 | stats.run.rs_locked); | 417 | stats.run.rs_locked); |
418 | 418 | ||
419 | spin_lock(&commit_transaction->t_handle_lock); | 419 | spin_lock(&commit_transaction->t_handle_lock); |
420 | while (commit_transaction->t_updates) { | 420 | while (atomic_read(&commit_transaction->t_updates)) { |
421 | DEFINE_WAIT(wait); | 421 | DEFINE_WAIT(wait); |
422 | 422 | ||
423 | prepare_to_wait(&journal->j_wait_updates, &wait, | 423 | prepare_to_wait(&journal->j_wait_updates, &wait, |
424 | TASK_UNINTERRUPTIBLE); | 424 | TASK_UNINTERRUPTIBLE); |
425 | if (commit_transaction->t_updates) { | 425 | if (atomic_read(&commit_transaction->t_updates)) { |
426 | spin_unlock(&commit_transaction->t_handle_lock); | 426 | spin_unlock(&commit_transaction->t_handle_lock); |
427 | spin_unlock(&journal->j_state_lock); | 427 | write_unlock(&journal->j_state_lock); |
428 | schedule(); | 428 | schedule(); |
429 | spin_lock(&journal->j_state_lock); | 429 | write_lock(&journal->j_state_lock); |
430 | spin_lock(&commit_transaction->t_handle_lock); | 430 | spin_lock(&commit_transaction->t_handle_lock); |
431 | } | 431 | } |
432 | finish_wait(&journal->j_wait_updates, &wait); | 432 | finish_wait(&journal->j_wait_updates, &wait); |
433 | } | 433 | } |
434 | spin_unlock(&commit_transaction->t_handle_lock); | 434 | spin_unlock(&commit_transaction->t_handle_lock); |
435 | 435 | ||
436 | J_ASSERT (commit_transaction->t_outstanding_credits <= | 436 | J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <= |
437 | journal->j_max_transaction_buffers); | 437 | journal->j_max_transaction_buffers); |
438 | 438 | ||
439 | /* | 439 | /* |
@@ -497,7 +497,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
497 | start_time = ktime_get(); | 497 | start_time = ktime_get(); |
498 | commit_transaction->t_log_start = journal->j_head; | 498 | commit_transaction->t_log_start = journal->j_head; |
499 | wake_up(&journal->j_wait_transaction_locked); | 499 | wake_up(&journal->j_wait_transaction_locked); |
500 | spin_unlock(&journal->j_state_lock); | 500 | write_unlock(&journal->j_state_lock); |
501 | 501 | ||
502 | jbd_debug (3, "JBD: commit phase 2\n"); | 502 | jbd_debug (3, "JBD: commit phase 2\n"); |
503 | 503 | ||
@@ -519,19 +519,20 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
519 | * transaction! Now comes the tricky part: we need to write out | 519 | * transaction! Now comes the tricky part: we need to write out |
520 | * metadata. Loop over the transaction's entire buffer list: | 520 | * metadata. Loop over the transaction's entire buffer list: |
521 | */ | 521 | */ |
522 | spin_lock(&journal->j_state_lock); | 522 | write_lock(&journal->j_state_lock); |
523 | commit_transaction->t_state = T_COMMIT; | 523 | commit_transaction->t_state = T_COMMIT; |
524 | spin_unlock(&journal->j_state_lock); | 524 | write_unlock(&journal->j_state_lock); |
525 | 525 | ||
526 | trace_jbd2_commit_logging(journal, commit_transaction); | 526 | trace_jbd2_commit_logging(journal, commit_transaction); |
527 | stats.run.rs_logging = jiffies; | 527 | stats.run.rs_logging = jiffies; |
528 | stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing, | 528 | stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing, |
529 | stats.run.rs_logging); | 529 | stats.run.rs_logging); |
530 | stats.run.rs_blocks = commit_transaction->t_outstanding_credits; | 530 | stats.run.rs_blocks = |
531 | atomic_read(&commit_transaction->t_outstanding_credits); | ||
531 | stats.run.rs_blocks_logged = 0; | 532 | stats.run.rs_blocks_logged = 0; |
532 | 533 | ||
533 | J_ASSERT(commit_transaction->t_nr_buffers <= | 534 | J_ASSERT(commit_transaction->t_nr_buffers <= |
534 | commit_transaction->t_outstanding_credits); | 535 | atomic_read(&commit_transaction->t_outstanding_credits)); |
535 | 536 | ||
536 | err = 0; | 537 | err = 0; |
537 | descriptor = NULL; | 538 | descriptor = NULL; |
@@ -616,7 +617,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
616 | * the free space in the log, but this counter is changed | 617 | * the free space in the log, but this counter is changed |
617 | * by jbd2_journal_next_log_block() also. | 618 | * by jbd2_journal_next_log_block() also. |
618 | */ | 619 | */ |
619 | commit_transaction->t_outstanding_credits--; | 620 | atomic_dec(&commit_transaction->t_outstanding_credits); |
620 | 621 | ||
621 | /* Bump b_count to prevent truncate from stumbling over | 622 | /* Bump b_count to prevent truncate from stumbling over |
622 | the shadowed buffer! @@@ This can go if we ever get | 623 | the shadowed buffer! @@@ This can go if we ever get |
@@ -977,7 +978,7 @@ restart_loop: | |||
977 | * __jbd2_journal_drop_transaction(). Otherwise we could race with | 978 | * __jbd2_journal_drop_transaction(). Otherwise we could race with |
978 | * other checkpointing code processing the transaction... | 979 | * other checkpointing code processing the transaction... |
979 | */ | 980 | */ |
980 | spin_lock(&journal->j_state_lock); | 981 | write_lock(&journal->j_state_lock); |
981 | spin_lock(&journal->j_list_lock); | 982 | spin_lock(&journal->j_list_lock); |
982 | /* | 983 | /* |
983 | * Now recheck if some buffers did not get attached to the transaction | 984 | * Now recheck if some buffers did not get attached to the transaction |
@@ -985,7 +986,7 @@ restart_loop: | |||
985 | */ | 986 | */ |
986 | if (commit_transaction->t_forget) { | 987 | if (commit_transaction->t_forget) { |
987 | spin_unlock(&journal->j_list_lock); | 988 | spin_unlock(&journal->j_list_lock); |
988 | spin_unlock(&journal->j_state_lock); | 989 | write_unlock(&journal->j_state_lock); |
989 | goto restart_loop; | 990 | goto restart_loop; |
990 | } | 991 | } |
991 | 992 | ||
@@ -1003,7 +1004,8 @@ restart_loop: | |||
1003 | * File the transaction statistics | 1004 | * File the transaction statistics |
1004 | */ | 1005 | */ |
1005 | stats.ts_tid = commit_transaction->t_tid; | 1006 | stats.ts_tid = commit_transaction->t_tid; |
1006 | stats.run.rs_handle_count = commit_transaction->t_handle_count; | 1007 | stats.run.rs_handle_count = |
1008 | atomic_read(&commit_transaction->t_handle_count); | ||
1007 | trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, | 1009 | trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, |
1008 | commit_transaction->t_tid, &stats.run); | 1010 | commit_transaction->t_tid, &stats.run); |
1009 | 1011 | ||
@@ -1037,7 +1039,7 @@ restart_loop: | |||
1037 | journal->j_average_commit_time*3) / 4; | 1039 | journal->j_average_commit_time*3) / 4; |
1038 | else | 1040 | else |
1039 | journal->j_average_commit_time = commit_time; | 1041 | journal->j_average_commit_time = commit_time; |
1040 | spin_unlock(&journal->j_state_lock); | 1042 | write_unlock(&journal->j_state_lock); |
1041 | 1043 | ||
1042 | if (commit_transaction->t_checkpoint_list == NULL && | 1044 | if (commit_transaction->t_checkpoint_list == NULL && |
1043 | commit_transaction->t_checkpoint_io_list == NULL) { | 1045 | commit_transaction->t_checkpoint_io_list == NULL) { |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 036880895bfc..ad5866aaf0f9 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/hash.h> | 41 | #include <linux/hash.h> |
42 | #include <linux/log2.h> | 42 | #include <linux/log2.h> |
43 | #include <linux/vmalloc.h> | 43 | #include <linux/vmalloc.h> |
44 | #include <linux/backing-dev.h> | ||
44 | 45 | ||
45 | #define CREATE_TRACE_POINTS | 46 | #define CREATE_TRACE_POINTS |
46 | #include <trace/events/jbd2.h> | 47 | #include <trace/events/jbd2.h> |
@@ -48,8 +49,6 @@ | |||
48 | #include <asm/uaccess.h> | 49 | #include <asm/uaccess.h> |
49 | #include <asm/page.h> | 50 | #include <asm/page.h> |
50 | 51 | ||
51 | EXPORT_SYMBOL(jbd2_journal_start); | ||
52 | EXPORT_SYMBOL(jbd2_journal_restart); | ||
53 | EXPORT_SYMBOL(jbd2_journal_extend); | 52 | EXPORT_SYMBOL(jbd2_journal_extend); |
54 | EXPORT_SYMBOL(jbd2_journal_stop); | 53 | EXPORT_SYMBOL(jbd2_journal_stop); |
55 | EXPORT_SYMBOL(jbd2_journal_lock_updates); | 54 | EXPORT_SYMBOL(jbd2_journal_lock_updates); |
@@ -143,7 +142,7 @@ static int kjournald2(void *arg) | |||
143 | /* | 142 | /* |
144 | * And now, wait forever for commit wakeup events. | 143 | * And now, wait forever for commit wakeup events. |
145 | */ | 144 | */ |
146 | spin_lock(&journal->j_state_lock); | 145 | write_lock(&journal->j_state_lock); |
147 | 146 | ||
148 | loop: | 147 | loop: |
149 | if (journal->j_flags & JBD2_UNMOUNT) | 148 | if (journal->j_flags & JBD2_UNMOUNT) |
@@ -154,10 +153,10 @@ loop: | |||
154 | 153 | ||
155 | if (journal->j_commit_sequence != journal->j_commit_request) { | 154 | if (journal->j_commit_sequence != journal->j_commit_request) { |
156 | jbd_debug(1, "OK, requests differ\n"); | 155 | jbd_debug(1, "OK, requests differ\n"); |
157 | spin_unlock(&journal->j_state_lock); | 156 | write_unlock(&journal->j_state_lock); |
158 | del_timer_sync(&journal->j_commit_timer); | 157 | del_timer_sync(&journal->j_commit_timer); |
159 | jbd2_journal_commit_transaction(journal); | 158 | jbd2_journal_commit_transaction(journal); |
160 | spin_lock(&journal->j_state_lock); | 159 | write_lock(&journal->j_state_lock); |
161 | goto loop; | 160 | goto loop; |
162 | } | 161 | } |
163 | 162 | ||
@@ -169,9 +168,9 @@ loop: | |||
169 | * be already stopped. | 168 | * be already stopped. |
170 | */ | 169 | */ |
171 | jbd_debug(1, "Now suspending kjournald2\n"); | 170 | jbd_debug(1, "Now suspending kjournald2\n"); |
172 | spin_unlock(&journal->j_state_lock); | 171 | write_unlock(&journal->j_state_lock); |
173 | refrigerator(); | 172 | refrigerator(); |
174 | spin_lock(&journal->j_state_lock); | 173 | write_lock(&journal->j_state_lock); |
175 | } else { | 174 | } else { |
176 | /* | 175 | /* |
177 | * We assume on resume that commits are already there, | 176 | * We assume on resume that commits are already there, |
@@ -191,9 +190,9 @@ loop: | |||
191 | if (journal->j_flags & JBD2_UNMOUNT) | 190 | if (journal->j_flags & JBD2_UNMOUNT) |
192 | should_sleep = 0; | 191 | should_sleep = 0; |
193 | if (should_sleep) { | 192 | if (should_sleep) { |
194 | spin_unlock(&journal->j_state_lock); | 193 | write_unlock(&journal->j_state_lock); |
195 | schedule(); | 194 | schedule(); |
196 | spin_lock(&journal->j_state_lock); | 195 | write_lock(&journal->j_state_lock); |
197 | } | 196 | } |
198 | finish_wait(&journal->j_wait_commit, &wait); | 197 | finish_wait(&journal->j_wait_commit, &wait); |
199 | } | 198 | } |
@@ -211,7 +210,7 @@ loop: | |||
211 | goto loop; | 210 | goto loop; |
212 | 211 | ||
213 | end_loop: | 212 | end_loop: |
214 | spin_unlock(&journal->j_state_lock); | 213 | write_unlock(&journal->j_state_lock); |
215 | del_timer_sync(&journal->j_commit_timer); | 214 | del_timer_sync(&journal->j_commit_timer); |
216 | journal->j_task = NULL; | 215 | journal->j_task = NULL; |
217 | wake_up(&journal->j_wait_done_commit); | 216 | wake_up(&journal->j_wait_done_commit); |
@@ -234,16 +233,16 @@ static int jbd2_journal_start_thread(journal_t *journal) | |||
234 | 233 | ||
235 | static void journal_kill_thread(journal_t *journal) | 234 | static void journal_kill_thread(journal_t *journal) |
236 | { | 235 | { |
237 | spin_lock(&journal->j_state_lock); | 236 | write_lock(&journal->j_state_lock); |
238 | journal->j_flags |= JBD2_UNMOUNT; | 237 | journal->j_flags |= JBD2_UNMOUNT; |
239 | 238 | ||
240 | while (journal->j_task) { | 239 | while (journal->j_task) { |
241 | wake_up(&journal->j_wait_commit); | 240 | wake_up(&journal->j_wait_commit); |
242 | spin_unlock(&journal->j_state_lock); | 241 | write_unlock(&journal->j_state_lock); |
243 | wait_event(journal->j_wait_done_commit, journal->j_task == NULL); | 242 | wait_event(journal->j_wait_done_commit, journal->j_task == NULL); |
244 | spin_lock(&journal->j_state_lock); | 243 | write_lock(&journal->j_state_lock); |
245 | } | 244 | } |
246 | spin_unlock(&journal->j_state_lock); | 245 | write_unlock(&journal->j_state_lock); |
247 | } | 246 | } |
248 | 247 | ||
249 | /* | 248 | /* |
@@ -310,7 +309,17 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, | |||
310 | */ | 309 | */ |
311 | J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); | 310 | J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); |
312 | 311 | ||
313 | new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); | 312 | retry_alloc: |
313 | new_bh = alloc_buffer_head(GFP_NOFS); | ||
314 | if (!new_bh) { | ||
315 | /* | ||
316 | * Failure is not an option, but __GFP_NOFAIL is going | ||
317 | * away; so we retry ourselves here. | ||
318 | */ | ||
319 | congestion_wait(BLK_RW_ASYNC, HZ/50); | ||
320 | goto retry_alloc; | ||
321 | } | ||
322 | |||
314 | /* keep subsequent assertions sane */ | 323 | /* keep subsequent assertions sane */ |
315 | new_bh->b_state = 0; | 324 | new_bh->b_state = 0; |
316 | init_buffer(new_bh, NULL, NULL); | 325 | init_buffer(new_bh, NULL, NULL); |
@@ -442,7 +451,7 @@ int __jbd2_log_space_left(journal_t *journal) | |||
442 | { | 451 | { |
443 | int left = journal->j_free; | 452 | int left = journal->j_free; |
444 | 453 | ||
445 | assert_spin_locked(&journal->j_state_lock); | 454 | /* assert_spin_locked(&journal->j_state_lock); */ |
446 | 455 | ||
447 | /* | 456 | /* |
448 | * Be pessimistic here about the number of those free blocks which | 457 | * Be pessimistic here about the number of those free blocks which |
@@ -487,9 +496,9 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid) | |||
487 | { | 496 | { |
488 | int ret; | 497 | int ret; |
489 | 498 | ||
490 | spin_lock(&journal->j_state_lock); | 499 | write_lock(&journal->j_state_lock); |
491 | ret = __jbd2_log_start_commit(journal, tid); | 500 | ret = __jbd2_log_start_commit(journal, tid); |
492 | spin_unlock(&journal->j_state_lock); | 501 | write_unlock(&journal->j_state_lock); |
493 | return ret; | 502 | return ret; |
494 | } | 503 | } |
495 | 504 | ||
@@ -508,7 +517,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal) | |||
508 | transaction_t *transaction = NULL; | 517 | transaction_t *transaction = NULL; |
509 | tid_t tid; | 518 | tid_t tid; |
510 | 519 | ||
511 | spin_lock(&journal->j_state_lock); | 520 | read_lock(&journal->j_state_lock); |
512 | if (journal->j_running_transaction && !current->journal_info) { | 521 | if (journal->j_running_transaction && !current->journal_info) { |
513 | transaction = journal->j_running_transaction; | 522 | transaction = journal->j_running_transaction; |
514 | __jbd2_log_start_commit(journal, transaction->t_tid); | 523 | __jbd2_log_start_commit(journal, transaction->t_tid); |
@@ -516,12 +525,12 @@ int jbd2_journal_force_commit_nested(journal_t *journal) | |||
516 | transaction = journal->j_committing_transaction; | 525 | transaction = journal->j_committing_transaction; |
517 | 526 | ||
518 | if (!transaction) { | 527 | if (!transaction) { |
519 | spin_unlock(&journal->j_state_lock); | 528 | read_unlock(&journal->j_state_lock); |
520 | return 0; /* Nothing to retry */ | 529 | return 0; /* Nothing to retry */ |
521 | } | 530 | } |
522 | 531 | ||
523 | tid = transaction->t_tid; | 532 | tid = transaction->t_tid; |
524 | spin_unlock(&journal->j_state_lock); | 533 | read_unlock(&journal->j_state_lock); |
525 | jbd2_log_wait_commit(journal, tid); | 534 | jbd2_log_wait_commit(journal, tid); |
526 | return 1; | 535 | return 1; |
527 | } | 536 | } |
@@ -535,7 +544,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) | |||
535 | { | 544 | { |
536 | int ret = 0; | 545 | int ret = 0; |
537 | 546 | ||
538 | spin_lock(&journal->j_state_lock); | 547 | write_lock(&journal->j_state_lock); |
539 | if (journal->j_running_transaction) { | 548 | if (journal->j_running_transaction) { |
540 | tid_t tid = journal->j_running_transaction->t_tid; | 549 | tid_t tid = journal->j_running_transaction->t_tid; |
541 | 550 | ||
@@ -554,7 +563,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) | |||
554 | *ptid = journal->j_committing_transaction->t_tid; | 563 | *ptid = journal->j_committing_transaction->t_tid; |
555 | ret = 1; | 564 | ret = 1; |
556 | } | 565 | } |
557 | spin_unlock(&journal->j_state_lock); | 566 | write_unlock(&journal->j_state_lock); |
558 | return ret; | 567 | return ret; |
559 | } | 568 | } |
560 | 569 | ||
@@ -566,26 +575,24 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) | |||
566 | { | 575 | { |
567 | int err = 0; | 576 | int err = 0; |
568 | 577 | ||
578 | read_lock(&journal->j_state_lock); | ||
569 | #ifdef CONFIG_JBD2_DEBUG | 579 | #ifdef CONFIG_JBD2_DEBUG |
570 | spin_lock(&journal->j_state_lock); | ||
571 | if (!tid_geq(journal->j_commit_request, tid)) { | 580 | if (!tid_geq(journal->j_commit_request, tid)) { |
572 | printk(KERN_EMERG | 581 | printk(KERN_EMERG |
573 | "%s: error: j_commit_request=%d, tid=%d\n", | 582 | "%s: error: j_commit_request=%d, tid=%d\n", |
574 | __func__, journal->j_commit_request, tid); | 583 | __func__, journal->j_commit_request, tid); |
575 | } | 584 | } |
576 | spin_unlock(&journal->j_state_lock); | ||
577 | #endif | 585 | #endif |
578 | spin_lock(&journal->j_state_lock); | ||
579 | while (tid_gt(tid, journal->j_commit_sequence)) { | 586 | while (tid_gt(tid, journal->j_commit_sequence)) { |
580 | jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", | 587 | jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", |
581 | tid, journal->j_commit_sequence); | 588 | tid, journal->j_commit_sequence); |
582 | wake_up(&journal->j_wait_commit); | 589 | wake_up(&journal->j_wait_commit); |
583 | spin_unlock(&journal->j_state_lock); | 590 | read_unlock(&journal->j_state_lock); |
584 | wait_event(journal->j_wait_done_commit, | 591 | wait_event(journal->j_wait_done_commit, |
585 | !tid_gt(tid, journal->j_commit_sequence)); | 592 | !tid_gt(tid, journal->j_commit_sequence)); |
586 | spin_lock(&journal->j_state_lock); | 593 | read_lock(&journal->j_state_lock); |
587 | } | 594 | } |
588 | spin_unlock(&journal->j_state_lock); | 595 | read_unlock(&journal->j_state_lock); |
589 | 596 | ||
590 | if (unlikely(is_journal_aborted(journal))) { | 597 | if (unlikely(is_journal_aborted(journal))) { |
591 | printk(KERN_EMERG "journal commit I/O error\n"); | 598 | printk(KERN_EMERG "journal commit I/O error\n"); |
@@ -602,7 +609,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) | |||
602 | { | 609 | { |
603 | unsigned long blocknr; | 610 | unsigned long blocknr; |
604 | 611 | ||
605 | spin_lock(&journal->j_state_lock); | 612 | write_lock(&journal->j_state_lock); |
606 | J_ASSERT(journal->j_free > 1); | 613 | J_ASSERT(journal->j_free > 1); |
607 | 614 | ||
608 | blocknr = journal->j_head; | 615 | blocknr = journal->j_head; |
@@ -610,7 +617,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) | |||
610 | journal->j_free--; | 617 | journal->j_free--; |
611 | if (journal->j_head == journal->j_last) | 618 | if (journal->j_head == journal->j_last) |
612 | journal->j_head = journal->j_first; | 619 | journal->j_head = journal->j_first; |
613 | spin_unlock(&journal->j_state_lock); | 620 | write_unlock(&journal->j_state_lock); |
614 | return jbd2_journal_bmap(journal, blocknr, retp); | 621 | return jbd2_journal_bmap(journal, blocknr, retp); |
615 | } | 622 | } |
616 | 623 | ||
@@ -830,7 +837,7 @@ static journal_t * journal_init_common (void) | |||
830 | mutex_init(&journal->j_checkpoint_mutex); | 837 | mutex_init(&journal->j_checkpoint_mutex); |
831 | spin_lock_init(&journal->j_revoke_lock); | 838 | spin_lock_init(&journal->j_revoke_lock); |
832 | spin_lock_init(&journal->j_list_lock); | 839 | spin_lock_init(&journal->j_list_lock); |
833 | spin_lock_init(&journal->j_state_lock); | 840 | rwlock_init(&journal->j_state_lock); |
834 | 841 | ||
835 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); | 842 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); |
836 | journal->j_min_batch_time = 0; | 843 | journal->j_min_batch_time = 0; |
@@ -1096,14 +1103,14 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
1096 | set_buffer_uptodate(bh); | 1103 | set_buffer_uptodate(bh); |
1097 | } | 1104 | } |
1098 | 1105 | ||
1099 | spin_lock(&journal->j_state_lock); | 1106 | read_lock(&journal->j_state_lock); |
1100 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", | 1107 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", |
1101 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 1108 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); |
1102 | 1109 | ||
1103 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); | 1110 | sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); |
1104 | sb->s_start = cpu_to_be32(journal->j_tail); | 1111 | sb->s_start = cpu_to_be32(journal->j_tail); |
1105 | sb->s_errno = cpu_to_be32(journal->j_errno); | 1112 | sb->s_errno = cpu_to_be32(journal->j_errno); |
1106 | spin_unlock(&journal->j_state_lock); | 1113 | read_unlock(&journal->j_state_lock); |
1107 | 1114 | ||
1108 | BUFFER_TRACE(bh, "marking dirty"); | 1115 | BUFFER_TRACE(bh, "marking dirty"); |
1109 | mark_buffer_dirty(bh); | 1116 | mark_buffer_dirty(bh); |
@@ -1124,12 +1131,12 @@ out: | |||
1124 | * any future commit will have to be careful to update the | 1131 | * any future commit will have to be careful to update the |
1125 | * superblock again to re-record the true start of the log. */ | 1132 | * superblock again to re-record the true start of the log. */ |
1126 | 1133 | ||
1127 | spin_lock(&journal->j_state_lock); | 1134 | write_lock(&journal->j_state_lock); |
1128 | if (sb->s_start) | 1135 | if (sb->s_start) |
1129 | journal->j_flags &= ~JBD2_FLUSHED; | 1136 | journal->j_flags &= ~JBD2_FLUSHED; |
1130 | else | 1137 | else |
1131 | journal->j_flags |= JBD2_FLUSHED; | 1138 | journal->j_flags |= JBD2_FLUSHED; |
1132 | spin_unlock(&journal->j_state_lock); | 1139 | write_unlock(&journal->j_state_lock); |
1133 | } | 1140 | } |
1134 | 1141 | ||
1135 | /* | 1142 | /* |
@@ -1391,13 +1398,9 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, | |||
1391 | int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, | 1398 | int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, |
1392 | unsigned long ro, unsigned long incompat) | 1399 | unsigned long ro, unsigned long incompat) |
1393 | { | 1400 | { |
1394 | journal_superblock_t *sb; | ||
1395 | |||
1396 | if (!compat && !ro && !incompat) | 1401 | if (!compat && !ro && !incompat) |
1397 | return 1; | 1402 | return 1; |
1398 | 1403 | ||
1399 | sb = journal->j_superblock; | ||
1400 | |||
1401 | /* We can support any known requested features iff the | 1404 | /* We can support any known requested features iff the |
1402 | * superblock is in version 2. Otherwise we fail to support any | 1405 | * superblock is in version 2. Otherwise we fail to support any |
1403 | * extended sb features. */ | 1406 | * extended sb features. */ |
@@ -1545,7 +1548,7 @@ int jbd2_journal_flush(journal_t *journal) | |||
1545 | transaction_t *transaction = NULL; | 1548 | transaction_t *transaction = NULL; |
1546 | unsigned long old_tail; | 1549 | unsigned long old_tail; |
1547 | 1550 | ||
1548 | spin_lock(&journal->j_state_lock); | 1551 | write_lock(&journal->j_state_lock); |
1549 | 1552 | ||
1550 | /* Force everything buffered to the log... */ | 1553 | /* Force everything buffered to the log... */ |
1551 | if (journal->j_running_transaction) { | 1554 | if (journal->j_running_transaction) { |
@@ -1558,10 +1561,10 @@ int jbd2_journal_flush(journal_t *journal) | |||
1558 | if (transaction) { | 1561 | if (transaction) { |
1559 | tid_t tid = transaction->t_tid; | 1562 | tid_t tid = transaction->t_tid; |
1560 | 1563 | ||
1561 | spin_unlock(&journal->j_state_lock); | 1564 | write_unlock(&journal->j_state_lock); |
1562 | jbd2_log_wait_commit(journal, tid); | 1565 | jbd2_log_wait_commit(journal, tid); |
1563 | } else { | 1566 | } else { |
1564 | spin_unlock(&journal->j_state_lock); | 1567 | write_unlock(&journal->j_state_lock); |
1565 | } | 1568 | } |
1566 | 1569 | ||
1567 | /* ...and flush everything in the log out to disk. */ | 1570 | /* ...and flush everything in the log out to disk. */ |
@@ -1585,12 +1588,12 @@ int jbd2_journal_flush(journal_t *journal) | |||
1585 | * the magic code for a fully-recovered superblock. Any future | 1588 | * the magic code for a fully-recovered superblock. Any future |
1586 | * commits of data to the journal will restore the current | 1589 | * commits of data to the journal will restore the current |
1587 | * s_start value. */ | 1590 | * s_start value. */ |
1588 | spin_lock(&journal->j_state_lock); | 1591 | write_lock(&journal->j_state_lock); |
1589 | old_tail = journal->j_tail; | 1592 | old_tail = journal->j_tail; |
1590 | journal->j_tail = 0; | 1593 | journal->j_tail = 0; |
1591 | spin_unlock(&journal->j_state_lock); | 1594 | write_unlock(&journal->j_state_lock); |
1592 | jbd2_journal_update_superblock(journal, 1); | 1595 | jbd2_journal_update_superblock(journal, 1); |
1593 | spin_lock(&journal->j_state_lock); | 1596 | write_lock(&journal->j_state_lock); |
1594 | journal->j_tail = old_tail; | 1597 | journal->j_tail = old_tail; |
1595 | 1598 | ||
1596 | J_ASSERT(!journal->j_running_transaction); | 1599 | J_ASSERT(!journal->j_running_transaction); |
@@ -1598,7 +1601,7 @@ int jbd2_journal_flush(journal_t *journal) | |||
1598 | J_ASSERT(!journal->j_checkpoint_transactions); | 1601 | J_ASSERT(!journal->j_checkpoint_transactions); |
1599 | J_ASSERT(journal->j_head == journal->j_tail); | 1602 | J_ASSERT(journal->j_head == journal->j_tail); |
1600 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); | 1603 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); |
1601 | spin_unlock(&journal->j_state_lock); | 1604 | write_unlock(&journal->j_state_lock); |
1602 | return 0; | 1605 | return 0; |
1603 | } | 1606 | } |
1604 | 1607 | ||
@@ -1617,7 +1620,6 @@ int jbd2_journal_flush(journal_t *journal) | |||
1617 | 1620 | ||
1618 | int jbd2_journal_wipe(journal_t *journal, int write) | 1621 | int jbd2_journal_wipe(journal_t *journal, int write) |
1619 | { | 1622 | { |
1620 | journal_superblock_t *sb; | ||
1621 | int err = 0; | 1623 | int err = 0; |
1622 | 1624 | ||
1623 | J_ASSERT (!(journal->j_flags & JBD2_LOADED)); | 1625 | J_ASSERT (!(journal->j_flags & JBD2_LOADED)); |
@@ -1626,8 +1628,6 @@ int jbd2_journal_wipe(journal_t *journal, int write) | |||
1626 | if (err) | 1628 | if (err) |
1627 | return err; | 1629 | return err; |
1628 | 1630 | ||
1629 | sb = journal->j_superblock; | ||
1630 | |||
1631 | if (!journal->j_tail) | 1631 | if (!journal->j_tail) |
1632 | goto no_recovery; | 1632 | goto no_recovery; |
1633 | 1633 | ||
@@ -1665,12 +1665,12 @@ void __jbd2_journal_abort_hard(journal_t *journal) | |||
1665 | printk(KERN_ERR "Aborting journal on device %s.\n", | 1665 | printk(KERN_ERR "Aborting journal on device %s.\n", |
1666 | journal->j_devname); | 1666 | journal->j_devname); |
1667 | 1667 | ||
1668 | spin_lock(&journal->j_state_lock); | 1668 | write_lock(&journal->j_state_lock); |
1669 | journal->j_flags |= JBD2_ABORT; | 1669 | journal->j_flags |= JBD2_ABORT; |
1670 | transaction = journal->j_running_transaction; | 1670 | transaction = journal->j_running_transaction; |
1671 | if (transaction) | 1671 | if (transaction) |
1672 | __jbd2_log_start_commit(journal, transaction->t_tid); | 1672 | __jbd2_log_start_commit(journal, transaction->t_tid); |
1673 | spin_unlock(&journal->j_state_lock); | 1673 | write_unlock(&journal->j_state_lock); |
1674 | } | 1674 | } |
1675 | 1675 | ||
1676 | /* Soft abort: record the abort error status in the journal superblock, | 1676 | /* Soft abort: record the abort error status in the journal superblock, |
@@ -1755,12 +1755,12 @@ int jbd2_journal_errno(journal_t *journal) | |||
1755 | { | 1755 | { |
1756 | int err; | 1756 | int err; |
1757 | 1757 | ||
1758 | spin_lock(&journal->j_state_lock); | 1758 | read_lock(&journal->j_state_lock); |
1759 | if (journal->j_flags & JBD2_ABORT) | 1759 | if (journal->j_flags & JBD2_ABORT) |
1760 | err = -EROFS; | 1760 | err = -EROFS; |
1761 | else | 1761 | else |
1762 | err = journal->j_errno; | 1762 | err = journal->j_errno; |
1763 | spin_unlock(&journal->j_state_lock); | 1763 | read_unlock(&journal->j_state_lock); |
1764 | return err; | 1764 | return err; |
1765 | } | 1765 | } |
1766 | 1766 | ||
@@ -1775,12 +1775,12 @@ int jbd2_journal_clear_err(journal_t *journal) | |||
1775 | { | 1775 | { |
1776 | int err = 0; | 1776 | int err = 0; |
1777 | 1777 | ||
1778 | spin_lock(&journal->j_state_lock); | 1778 | write_lock(&journal->j_state_lock); |
1779 | if (journal->j_flags & JBD2_ABORT) | 1779 | if (journal->j_flags & JBD2_ABORT) |
1780 | err = -EROFS; | 1780 | err = -EROFS; |
1781 | else | 1781 | else |
1782 | journal->j_errno = 0; | 1782 | journal->j_errno = 0; |
1783 | spin_unlock(&journal->j_state_lock); | 1783 | write_unlock(&journal->j_state_lock); |
1784 | return err; | 1784 | return err; |
1785 | } | 1785 | } |
1786 | 1786 | ||
@@ -1793,10 +1793,10 @@ int jbd2_journal_clear_err(journal_t *journal) | |||
1793 | */ | 1793 | */ |
1794 | void jbd2_journal_ack_err(journal_t *journal) | 1794 | void jbd2_journal_ack_err(journal_t *journal) |
1795 | { | 1795 | { |
1796 | spin_lock(&journal->j_state_lock); | 1796 | write_lock(&journal->j_state_lock); |
1797 | if (journal->j_errno) | 1797 | if (journal->j_errno) |
1798 | journal->j_flags |= JBD2_ACK_ERR; | 1798 | journal->j_flags |= JBD2_ACK_ERR; |
1799 | spin_unlock(&journal->j_state_lock); | 1799 | write_unlock(&journal->j_state_lock); |
1800 | } | 1800 | } |
1801 | 1801 | ||
1802 | int jbd2_journal_blocks_per_page(struct inode *inode) | 1802 | int jbd2_journal_blocks_per_page(struct inode *inode) |
@@ -2201,8 +2201,6 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) | |||
2201 | void jbd2_journal_release_jbd_inode(journal_t *journal, | 2201 | void jbd2_journal_release_jbd_inode(journal_t *journal, |
2202 | struct jbd2_inode *jinode) | 2202 | struct jbd2_inode *jinode) |
2203 | { | 2203 | { |
2204 | int writeout = 0; | ||
2205 | |||
2206 | if (!journal) | 2204 | if (!journal) |
2207 | return; | 2205 | return; |
2208 | restart: | 2206 | restart: |
@@ -2219,9 +2217,6 @@ restart: | |||
2219 | goto restart; | 2217 | goto restart; |
2220 | } | 2218 | } |
2221 | 2219 | ||
2222 | /* Do we need to wait for data writeback? */ | ||
2223 | if (journal->j_committing_transaction == jinode->i_transaction) | ||
2224 | writeout = 1; | ||
2225 | if (jinode->i_transaction) { | 2220 | if (jinode->i_transaction) { |
2226 | list_del(&jinode->i_list); | 2221 | list_del(&jinode->i_list); |
2227 | jinode->i_transaction = NULL; | 2222 | jinode->i_transaction = NULL; |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 049281b7cb89..2bc4d5f116f1 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -285,12 +285,10 @@ int jbd2_journal_recover(journal_t *journal) | |||
285 | int jbd2_journal_skip_recovery(journal_t *journal) | 285 | int jbd2_journal_skip_recovery(journal_t *journal) |
286 | { | 286 | { |
287 | int err; | 287 | int err; |
288 | journal_superblock_t * sb; | ||
289 | 288 | ||
290 | struct recovery_info info; | 289 | struct recovery_info info; |
291 | 290 | ||
292 | memset (&info, 0, sizeof(info)); | 291 | memset (&info, 0, sizeof(info)); |
293 | sb = journal->j_superblock; | ||
294 | 292 | ||
295 | err = do_one_pass(journal, &info, PASS_SCAN); | 293 | err = do_one_pass(journal, &info, PASS_SCAN); |
296 | 294 | ||
@@ -299,7 +297,8 @@ int jbd2_journal_skip_recovery(journal_t *journal) | |||
299 | ++journal->j_transaction_sequence; | 297 | ++journal->j_transaction_sequence; |
300 | } else { | 298 | } else { |
301 | #ifdef CONFIG_JBD2_DEBUG | 299 | #ifdef CONFIG_JBD2_DEBUG |
302 | int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); | 300 | int dropped = info.end_transaction - |
301 | be32_to_cpu(journal->j_superblock->s_sequence); | ||
303 | #endif | 302 | #endif |
304 | jbd_debug(1, | 303 | jbd_debug(1, |
305 | "JBD: ignoring %d transaction%s from the journal.\n", | 304 | "JBD: ignoring %d transaction%s from the journal.\n", |
@@ -365,11 +364,6 @@ static int do_one_pass(journal_t *journal, | |||
365 | int tag_bytes = journal_tag_bytes(journal); | 364 | int tag_bytes = journal_tag_bytes(journal); |
366 | __u32 crc32_sum = ~0; /* Transactional Checksums */ | 365 | __u32 crc32_sum = ~0; /* Transactional Checksums */ |
367 | 366 | ||
368 | /* Precompute the maximum metadata descriptors in a descriptor block */ | ||
369 | int MAX_BLOCKS_PER_DESC; | ||
370 | MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) | ||
371 | / tag_bytes); | ||
372 | |||
373 | /* | 367 | /* |
374 | * First thing is to establish what we expect to find in the log | 368 | * First thing is to establish what we expect to find in the log |
375 | * (in terms of transaction IDs), and where (in terms of log | 369 | * (in terms of transaction IDs), and where (in terms of log |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index b8e0806681bb..d95cc9d0401d 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -26,6 +26,8 @@ | |||
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/hrtimer.h> | 28 | #include <linux/hrtimer.h> |
29 | #include <linux/backing-dev.h> | ||
30 | #include <linux/module.h> | ||
29 | 31 | ||
30 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 32 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
31 | 33 | ||
@@ -53,6 +55,9 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
53 | transaction->t_tid = journal->j_transaction_sequence++; | 55 | transaction->t_tid = journal->j_transaction_sequence++; |
54 | transaction->t_expires = jiffies + journal->j_commit_interval; | 56 | transaction->t_expires = jiffies + journal->j_commit_interval; |
55 | spin_lock_init(&transaction->t_handle_lock); | 57 | spin_lock_init(&transaction->t_handle_lock); |
58 | atomic_set(&transaction->t_updates, 0); | ||
59 | atomic_set(&transaction->t_outstanding_credits, 0); | ||
60 | atomic_set(&transaction->t_handle_count, 0); | ||
56 | INIT_LIST_HEAD(&transaction->t_inode_list); | 61 | INIT_LIST_HEAD(&transaction->t_inode_list); |
57 | INIT_LIST_HEAD(&transaction->t_private_list); | 62 | INIT_LIST_HEAD(&transaction->t_private_list); |
58 | 63 | ||
@@ -83,65 +88,75 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
83 | * transaction's buffer credits. | 88 | * transaction's buffer credits. |
84 | */ | 89 | */ |
85 | 90 | ||
86 | static int start_this_handle(journal_t *journal, handle_t *handle) | 91 | static int start_this_handle(journal_t *journal, handle_t *handle, |
92 | int gfp_mask) | ||
87 | { | 93 | { |
88 | transaction_t *transaction; | 94 | transaction_t *transaction; |
89 | int needed; | 95 | int needed; |
90 | int nblocks = handle->h_buffer_credits; | 96 | int nblocks = handle->h_buffer_credits; |
91 | transaction_t *new_transaction = NULL; | 97 | transaction_t *new_transaction = NULL; |
92 | int ret = 0; | ||
93 | unsigned long ts = jiffies; | 98 | unsigned long ts = jiffies; |
94 | 99 | ||
95 | if (nblocks > journal->j_max_transaction_buffers) { | 100 | if (nblocks > journal->j_max_transaction_buffers) { |
96 | printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", | 101 | printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", |
97 | current->comm, nblocks, | 102 | current->comm, nblocks, |
98 | journal->j_max_transaction_buffers); | 103 | journal->j_max_transaction_buffers); |
99 | ret = -ENOSPC; | 104 | return -ENOSPC; |
100 | goto out; | ||
101 | } | 105 | } |
102 | 106 | ||
103 | alloc_transaction: | 107 | alloc_transaction: |
104 | if (!journal->j_running_transaction) { | 108 | if (!journal->j_running_transaction) { |
105 | new_transaction = kzalloc(sizeof(*new_transaction), | 109 | new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask); |
106 | GFP_NOFS|__GFP_NOFAIL); | ||
107 | if (!new_transaction) { | 110 | if (!new_transaction) { |
108 | ret = -ENOMEM; | 111 | /* |
109 | goto out; | 112 | * If __GFP_FS is not present, then we may be |
113 | * being called from inside the fs writeback | ||
114 | * layer, so we MUST NOT fail. Since | ||
115 | * __GFP_NOFAIL is going away, we will arrange | ||
116 | * to retry the allocation ourselves. | ||
117 | */ | ||
118 | if ((gfp_mask & __GFP_FS) == 0) { | ||
119 | congestion_wait(BLK_RW_ASYNC, HZ/50); | ||
120 | goto alloc_transaction; | ||
121 | } | ||
122 | return -ENOMEM; | ||
110 | } | 123 | } |
111 | } | 124 | } |
112 | 125 | ||
113 | jbd_debug(3, "New handle %p going live.\n", handle); | 126 | jbd_debug(3, "New handle %p going live.\n", handle); |
114 | 127 | ||
115 | repeat: | ||
116 | |||
117 | /* | 128 | /* |
118 | * We need to hold j_state_lock until t_updates has been incremented, | 129 | * We need to hold j_state_lock until t_updates has been incremented, |
119 | * for proper journal barrier handling | 130 | * for proper journal barrier handling |
120 | */ | 131 | */ |
121 | spin_lock(&journal->j_state_lock); | 132 | repeat: |
122 | repeat_locked: | 133 | read_lock(&journal->j_state_lock); |
123 | if (is_journal_aborted(journal) || | 134 | if (is_journal_aborted(journal) || |
124 | (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { | 135 | (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { |
125 | spin_unlock(&journal->j_state_lock); | 136 | read_unlock(&journal->j_state_lock); |
126 | ret = -EROFS; | 137 | kfree(new_transaction); |
127 | goto out; | 138 | return -EROFS; |
128 | } | 139 | } |
129 | 140 | ||
130 | /* Wait on the journal's transaction barrier if necessary */ | 141 | /* Wait on the journal's transaction barrier if necessary */ |
131 | if (journal->j_barrier_count) { | 142 | if (journal->j_barrier_count) { |
132 | spin_unlock(&journal->j_state_lock); | 143 | read_unlock(&journal->j_state_lock); |
133 | wait_event(journal->j_wait_transaction_locked, | 144 | wait_event(journal->j_wait_transaction_locked, |
134 | journal->j_barrier_count == 0); | 145 | journal->j_barrier_count == 0); |
135 | goto repeat; | 146 | goto repeat; |
136 | } | 147 | } |
137 | 148 | ||
138 | if (!journal->j_running_transaction) { | 149 | if (!journal->j_running_transaction) { |
139 | if (!new_transaction) { | 150 | read_unlock(&journal->j_state_lock); |
140 | spin_unlock(&journal->j_state_lock); | 151 | if (!new_transaction) |
141 | goto alloc_transaction; | 152 | goto alloc_transaction; |
153 | write_lock(&journal->j_state_lock); | ||
154 | if (!journal->j_running_transaction) { | ||
155 | jbd2_get_transaction(journal, new_transaction); | ||
156 | new_transaction = NULL; | ||
142 | } | 157 | } |
143 | jbd2_get_transaction(journal, new_transaction); | 158 | write_unlock(&journal->j_state_lock); |
144 | new_transaction = NULL; | 159 | goto repeat; |
145 | } | 160 | } |
146 | 161 | ||
147 | transaction = journal->j_running_transaction; | 162 | transaction = journal->j_running_transaction; |
@@ -155,7 +170,7 @@ repeat_locked: | |||
155 | 170 | ||
156 | prepare_to_wait(&journal->j_wait_transaction_locked, | 171 | prepare_to_wait(&journal->j_wait_transaction_locked, |
157 | &wait, TASK_UNINTERRUPTIBLE); | 172 | &wait, TASK_UNINTERRUPTIBLE); |
158 | spin_unlock(&journal->j_state_lock); | 173 | read_unlock(&journal->j_state_lock); |
159 | schedule(); | 174 | schedule(); |
160 | finish_wait(&journal->j_wait_transaction_locked, &wait); | 175 | finish_wait(&journal->j_wait_transaction_locked, &wait); |
161 | goto repeat; | 176 | goto repeat; |
@@ -166,8 +181,8 @@ repeat_locked: | |||
166 | * buffers requested by this operation, we need to stall pending a log | 181 | * buffers requested by this operation, we need to stall pending a log |
167 | * checkpoint to free some more log space. | 182 | * checkpoint to free some more log space. |
168 | */ | 183 | */ |
169 | spin_lock(&transaction->t_handle_lock); | 184 | needed = atomic_add_return(nblocks, |
170 | needed = transaction->t_outstanding_credits + nblocks; | 185 | &transaction->t_outstanding_credits); |
171 | 186 | ||
172 | if (needed > journal->j_max_transaction_buffers) { | 187 | if (needed > journal->j_max_transaction_buffers) { |
173 | /* | 188 | /* |
@@ -178,11 +193,11 @@ repeat_locked: | |||
178 | DEFINE_WAIT(wait); | 193 | DEFINE_WAIT(wait); |
179 | 194 | ||
180 | jbd_debug(2, "Handle %p starting new commit...\n", handle); | 195 | jbd_debug(2, "Handle %p starting new commit...\n", handle); |
181 | spin_unlock(&transaction->t_handle_lock); | 196 | atomic_sub(nblocks, &transaction->t_outstanding_credits); |
182 | prepare_to_wait(&journal->j_wait_transaction_locked, &wait, | 197 | prepare_to_wait(&journal->j_wait_transaction_locked, &wait, |
183 | TASK_UNINTERRUPTIBLE); | 198 | TASK_UNINTERRUPTIBLE); |
184 | __jbd2_log_start_commit(journal, transaction->t_tid); | 199 | __jbd2_log_start_commit(journal, transaction->t_tid); |
185 | spin_unlock(&journal->j_state_lock); | 200 | read_unlock(&journal->j_state_lock); |
186 | schedule(); | 201 | schedule(); |
187 | finish_wait(&journal->j_wait_transaction_locked, &wait); | 202 | finish_wait(&journal->j_wait_transaction_locked, &wait); |
188 | goto repeat; | 203 | goto repeat; |
@@ -215,35 +230,48 @@ repeat_locked: | |||
215 | */ | 230 | */ |
216 | if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { | 231 | if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { |
217 | jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); | 232 | jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); |
218 | spin_unlock(&transaction->t_handle_lock); | 233 | atomic_sub(nblocks, &transaction->t_outstanding_credits); |
219 | __jbd2_log_wait_for_space(journal); | 234 | read_unlock(&journal->j_state_lock); |
220 | goto repeat_locked; | 235 | write_lock(&journal->j_state_lock); |
236 | if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) | ||
237 | __jbd2_log_wait_for_space(journal); | ||
238 | write_unlock(&journal->j_state_lock); | ||
239 | goto repeat; | ||
221 | } | 240 | } |
222 | 241 | ||
223 | /* OK, account for the buffers that this operation expects to | 242 | /* OK, account for the buffers that this operation expects to |
224 | * use and add the handle to the running transaction. */ | 243 | * use and add the handle to the running transaction. |
225 | 244 | * | |
226 | if (time_after(transaction->t_start, ts)) { | 245 | * In order for t_max_wait to be reliable, it must be |
246 | * protected by a lock. But doing so will mean that | ||
247 | * start_this_handle() can not be run in parallel on SMP | ||
248 | * systems, which limits our scalability. So we only enable | ||
249 | * it when debugging is enabled. We may want to use a | ||
250 | * separate flag, eventually, so we can enable this | ||
251 | * independently of debugging. | ||
252 | */ | ||
253 | #ifdef CONFIG_JBD2_DEBUG | ||
254 | if (jbd2_journal_enable_debug && | ||
255 | time_after(transaction->t_start, ts)) { | ||
227 | ts = jbd2_time_diff(ts, transaction->t_start); | 256 | ts = jbd2_time_diff(ts, transaction->t_start); |
257 | spin_lock(&transaction->t_handle_lock); | ||
228 | if (ts > transaction->t_max_wait) | 258 | if (ts > transaction->t_max_wait) |
229 | transaction->t_max_wait = ts; | 259 | transaction->t_max_wait = ts; |
260 | spin_unlock(&transaction->t_handle_lock); | ||
230 | } | 261 | } |
231 | 262 | #endif | |
232 | handle->h_transaction = transaction; | 263 | handle->h_transaction = transaction; |
233 | transaction->t_outstanding_credits += nblocks; | 264 | atomic_inc(&transaction->t_updates); |
234 | transaction->t_updates++; | 265 | atomic_inc(&transaction->t_handle_count); |
235 | transaction->t_handle_count++; | ||
236 | jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", | 266 | jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", |
237 | handle, nblocks, transaction->t_outstanding_credits, | 267 | handle, nblocks, |
268 | atomic_read(&transaction->t_outstanding_credits), | ||
238 | __jbd2_log_space_left(journal)); | 269 | __jbd2_log_space_left(journal)); |
239 | spin_unlock(&transaction->t_handle_lock); | 270 | read_unlock(&journal->j_state_lock); |
240 | spin_unlock(&journal->j_state_lock); | ||
241 | 271 | ||
242 | lock_map_acquire(&handle->h_lockdep_map); | 272 | lock_map_acquire(&handle->h_lockdep_map); |
243 | out: | 273 | kfree(new_transaction); |
244 | if (unlikely(new_transaction)) /* It's usually NULL */ | 274 | return 0; |
245 | kfree(new_transaction); | ||
246 | return ret; | ||
247 | } | 275 | } |
248 | 276 | ||
249 | static struct lock_class_key jbd2_handle_key; | 277 | static struct lock_class_key jbd2_handle_key; |
@@ -278,7 +306,7 @@ static handle_t *new_handle(int nblocks) | |||
278 | * | 306 | * |
279 | * Return a pointer to a newly allocated handle, or NULL on failure | 307 | * Return a pointer to a newly allocated handle, or NULL on failure |
280 | */ | 308 | */ |
281 | handle_t *jbd2_journal_start(journal_t *journal, int nblocks) | 309 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) |
282 | { | 310 | { |
283 | handle_t *handle = journal_current_handle(); | 311 | handle_t *handle = journal_current_handle(); |
284 | int err; | 312 | int err; |
@@ -298,7 +326,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) | |||
298 | 326 | ||
299 | current->journal_info = handle; | 327 | current->journal_info = handle; |
300 | 328 | ||
301 | err = start_this_handle(journal, handle); | 329 | err = start_this_handle(journal, handle, gfp_mask); |
302 | if (err < 0) { | 330 | if (err < 0) { |
303 | jbd2_free_handle(handle); | 331 | jbd2_free_handle(handle); |
304 | current->journal_info = NULL; | 332 | current->journal_info = NULL; |
@@ -308,6 +336,15 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) | |||
308 | out: | 336 | out: |
309 | return handle; | 337 | return handle; |
310 | } | 338 | } |
339 | EXPORT_SYMBOL(jbd2__journal_start); | ||
340 | |||
341 | |||
342 | handle_t *jbd2_journal_start(journal_t *journal, int nblocks) | ||
343 | { | ||
344 | return jbd2__journal_start(journal, nblocks, GFP_NOFS); | ||
345 | } | ||
346 | EXPORT_SYMBOL(jbd2_journal_start); | ||
347 | |||
311 | 348 | ||
312 | /** | 349 | /** |
313 | * int jbd2_journal_extend() - extend buffer credits. | 350 | * int jbd2_journal_extend() - extend buffer credits. |
@@ -342,7 +379,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) | |||
342 | 379 | ||
343 | result = 1; | 380 | result = 1; |
344 | 381 | ||
345 | spin_lock(&journal->j_state_lock); | 382 | read_lock(&journal->j_state_lock); |
346 | 383 | ||
347 | /* Don't extend a locked-down transaction! */ | 384 | /* Don't extend a locked-down transaction! */ |
348 | if (handle->h_transaction->t_state != T_RUNNING) { | 385 | if (handle->h_transaction->t_state != T_RUNNING) { |
@@ -352,7 +389,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) | |||
352 | } | 389 | } |
353 | 390 | ||
354 | spin_lock(&transaction->t_handle_lock); | 391 | spin_lock(&transaction->t_handle_lock); |
355 | wanted = transaction->t_outstanding_credits + nblocks; | 392 | wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks; |
356 | 393 | ||
357 | if (wanted > journal->j_max_transaction_buffers) { | 394 | if (wanted > journal->j_max_transaction_buffers) { |
358 | jbd_debug(3, "denied handle %p %d blocks: " | 395 | jbd_debug(3, "denied handle %p %d blocks: " |
@@ -367,14 +404,14 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) | |||
367 | } | 404 | } |
368 | 405 | ||
369 | handle->h_buffer_credits += nblocks; | 406 | handle->h_buffer_credits += nblocks; |
370 | transaction->t_outstanding_credits += nblocks; | 407 | atomic_add(nblocks, &transaction->t_outstanding_credits); |
371 | result = 0; | 408 | result = 0; |
372 | 409 | ||
373 | jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); | 410 | jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); |
374 | unlock: | 411 | unlock: |
375 | spin_unlock(&transaction->t_handle_lock); | 412 | spin_unlock(&transaction->t_handle_lock); |
376 | error_out: | 413 | error_out: |
377 | spin_unlock(&journal->j_state_lock); | 414 | read_unlock(&journal->j_state_lock); |
378 | out: | 415 | out: |
379 | return result; | 416 | return result; |
380 | } | 417 | } |
@@ -394,8 +431,7 @@ out: | |||
394 | * transaction capabable of guaranteeing the requested number of | 431 | * transaction capabable of guaranteeing the requested number of |
395 | * credits. | 432 | * credits. |
396 | */ | 433 | */ |
397 | 434 | int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) | |
398 | int jbd2_journal_restart(handle_t *handle, int nblocks) | ||
399 | { | 435 | { |
400 | transaction_t *transaction = handle->h_transaction; | 436 | transaction_t *transaction = handle->h_transaction; |
401 | journal_t *journal = transaction->t_journal; | 437 | journal_t *journal = transaction->t_journal; |
@@ -410,29 +446,35 @@ int jbd2_journal_restart(handle_t *handle, int nblocks) | |||
410 | * First unlink the handle from its current transaction, and start the | 446 | * First unlink the handle from its current transaction, and start the |
411 | * commit on that. | 447 | * commit on that. |
412 | */ | 448 | */ |
413 | J_ASSERT(transaction->t_updates > 0); | 449 | J_ASSERT(atomic_read(&transaction->t_updates) > 0); |
414 | J_ASSERT(journal_current_handle() == handle); | 450 | J_ASSERT(journal_current_handle() == handle); |
415 | 451 | ||
416 | spin_lock(&journal->j_state_lock); | 452 | read_lock(&journal->j_state_lock); |
417 | spin_lock(&transaction->t_handle_lock); | 453 | spin_lock(&transaction->t_handle_lock); |
418 | transaction->t_outstanding_credits -= handle->h_buffer_credits; | 454 | atomic_sub(handle->h_buffer_credits, |
419 | transaction->t_updates--; | 455 | &transaction->t_outstanding_credits); |
420 | 456 | if (atomic_dec_and_test(&transaction->t_updates)) | |
421 | if (!transaction->t_updates) | ||
422 | wake_up(&journal->j_wait_updates); | 457 | wake_up(&journal->j_wait_updates); |
423 | spin_unlock(&transaction->t_handle_lock); | 458 | spin_unlock(&transaction->t_handle_lock); |
424 | 459 | ||
425 | jbd_debug(2, "restarting handle %p\n", handle); | 460 | jbd_debug(2, "restarting handle %p\n", handle); |
426 | __jbd2_log_start_commit(journal, transaction->t_tid); | 461 | __jbd2_log_start_commit(journal, transaction->t_tid); |
427 | spin_unlock(&journal->j_state_lock); | 462 | read_unlock(&journal->j_state_lock); |
428 | 463 | ||
429 | lock_map_release(&handle->h_lockdep_map); | 464 | lock_map_release(&handle->h_lockdep_map); |
430 | handle->h_buffer_credits = nblocks; | 465 | handle->h_buffer_credits = nblocks; |
431 | ret = start_this_handle(journal, handle); | 466 | ret = start_this_handle(journal, handle, gfp_mask); |
432 | return ret; | 467 | return ret; |
433 | } | 468 | } |
469 | EXPORT_SYMBOL(jbd2__journal_restart); | ||
434 | 470 | ||
435 | 471 | ||
472 | int jbd2_journal_restart(handle_t *handle, int nblocks) | ||
473 | { | ||
474 | return jbd2__journal_restart(handle, nblocks, GFP_NOFS); | ||
475 | } | ||
476 | EXPORT_SYMBOL(jbd2_journal_restart); | ||
477 | |||
436 | /** | 478 | /** |
437 | * void jbd2_journal_lock_updates () - establish a transaction barrier. | 479 | * void jbd2_journal_lock_updates () - establish a transaction barrier. |
438 | * @journal: Journal to establish a barrier on. | 480 | * @journal: Journal to establish a barrier on. |
@@ -447,7 +489,7 @@ void jbd2_journal_lock_updates(journal_t *journal) | |||
447 | { | 489 | { |
448 | DEFINE_WAIT(wait); | 490 | DEFINE_WAIT(wait); |
449 | 491 | ||
450 | spin_lock(&journal->j_state_lock); | 492 | write_lock(&journal->j_state_lock); |
451 | ++journal->j_barrier_count; | 493 | ++journal->j_barrier_count; |
452 | 494 | ||
453 | /* Wait until there are no running updates */ | 495 | /* Wait until there are no running updates */ |
@@ -458,19 +500,19 @@ void jbd2_journal_lock_updates(journal_t *journal) | |||
458 | break; | 500 | break; |
459 | 501 | ||
460 | spin_lock(&transaction->t_handle_lock); | 502 | spin_lock(&transaction->t_handle_lock); |
461 | if (!transaction->t_updates) { | 503 | if (!atomic_read(&transaction->t_updates)) { |
462 | spin_unlock(&transaction->t_handle_lock); | 504 | spin_unlock(&transaction->t_handle_lock); |
463 | break; | 505 | break; |
464 | } | 506 | } |
465 | prepare_to_wait(&journal->j_wait_updates, &wait, | 507 | prepare_to_wait(&journal->j_wait_updates, &wait, |
466 | TASK_UNINTERRUPTIBLE); | 508 | TASK_UNINTERRUPTIBLE); |
467 | spin_unlock(&transaction->t_handle_lock); | 509 | spin_unlock(&transaction->t_handle_lock); |
468 | spin_unlock(&journal->j_state_lock); | 510 | write_unlock(&journal->j_state_lock); |
469 | schedule(); | 511 | schedule(); |
470 | finish_wait(&journal->j_wait_updates, &wait); | 512 | finish_wait(&journal->j_wait_updates, &wait); |
471 | spin_lock(&journal->j_state_lock); | 513 | write_lock(&journal->j_state_lock); |
472 | } | 514 | } |
473 | spin_unlock(&journal->j_state_lock); | 515 | write_unlock(&journal->j_state_lock); |
474 | 516 | ||
475 | /* | 517 | /* |
476 | * We have now established a barrier against other normal updates, but | 518 | * We have now established a barrier against other normal updates, but |
@@ -494,9 +536,9 @@ void jbd2_journal_unlock_updates (journal_t *journal) | |||
494 | J_ASSERT(journal->j_barrier_count != 0); | 536 | J_ASSERT(journal->j_barrier_count != 0); |
495 | 537 | ||
496 | mutex_unlock(&journal->j_barrier); | 538 | mutex_unlock(&journal->j_barrier); |
497 | spin_lock(&journal->j_state_lock); | 539 | write_lock(&journal->j_state_lock); |
498 | --journal->j_barrier_count; | 540 | --journal->j_barrier_count; |
499 | spin_unlock(&journal->j_state_lock); | 541 | write_unlock(&journal->j_state_lock); |
500 | wake_up(&journal->j_wait_transaction_locked); | 542 | wake_up(&journal->j_wait_transaction_locked); |
501 | } | 543 | } |
502 | 544 | ||
@@ -1238,7 +1280,8 @@ int jbd2_journal_stop(handle_t *handle) | |||
1238 | { | 1280 | { |
1239 | transaction_t *transaction = handle->h_transaction; | 1281 | transaction_t *transaction = handle->h_transaction; |
1240 | journal_t *journal = transaction->t_journal; | 1282 | journal_t *journal = transaction->t_journal; |
1241 | int err; | 1283 | int err, wait_for_commit = 0; |
1284 | tid_t tid; | ||
1242 | pid_t pid; | 1285 | pid_t pid; |
1243 | 1286 | ||
1244 | J_ASSERT(journal_current_handle() == handle); | 1287 | J_ASSERT(journal_current_handle() == handle); |
@@ -1246,7 +1289,7 @@ int jbd2_journal_stop(handle_t *handle) | |||
1246 | if (is_handle_aborted(handle)) | 1289 | if (is_handle_aborted(handle)) |
1247 | err = -EIO; | 1290 | err = -EIO; |
1248 | else { | 1291 | else { |
1249 | J_ASSERT(transaction->t_updates > 0); | 1292 | J_ASSERT(atomic_read(&transaction->t_updates) > 0); |
1250 | err = 0; | 1293 | err = 0; |
1251 | } | 1294 | } |
1252 | 1295 | ||
@@ -1291,9 +1334,9 @@ int jbd2_journal_stop(handle_t *handle) | |||
1291 | 1334 | ||
1292 | journal->j_last_sync_writer = pid; | 1335 | journal->j_last_sync_writer = pid; |
1293 | 1336 | ||
1294 | spin_lock(&journal->j_state_lock); | 1337 | read_lock(&journal->j_state_lock); |
1295 | commit_time = journal->j_average_commit_time; | 1338 | commit_time = journal->j_average_commit_time; |
1296 | spin_unlock(&journal->j_state_lock); | 1339 | read_unlock(&journal->j_state_lock); |
1297 | 1340 | ||
1298 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), | 1341 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), |
1299 | transaction->t_start_time)); | 1342 | transaction->t_start_time)); |
@@ -1314,14 +1357,8 @@ int jbd2_journal_stop(handle_t *handle) | |||
1314 | if (handle->h_sync) | 1357 | if (handle->h_sync) |
1315 | transaction->t_synchronous_commit = 1; | 1358 | transaction->t_synchronous_commit = 1; |
1316 | current->journal_info = NULL; | 1359 | current->journal_info = NULL; |
1317 | spin_lock(&transaction->t_handle_lock); | 1360 | atomic_sub(handle->h_buffer_credits, |
1318 | transaction->t_outstanding_credits -= handle->h_buffer_credits; | 1361 | &transaction->t_outstanding_credits); |
1319 | transaction->t_updates--; | ||
1320 | if (!transaction->t_updates) { | ||
1321 | wake_up(&journal->j_wait_updates); | ||
1322 | if (journal->j_barrier_count) | ||
1323 | wake_up(&journal->j_wait_transaction_locked); | ||
1324 | } | ||
1325 | 1362 | ||
1326 | /* | 1363 | /* |
1327 | * If the handle is marked SYNC, we need to set another commit | 1364 | * If the handle is marked SYNC, we need to set another commit |
@@ -1330,15 +1367,13 @@ int jbd2_journal_stop(handle_t *handle) | |||
1330 | * transaction is too old now. | 1367 | * transaction is too old now. |
1331 | */ | 1368 | */ |
1332 | if (handle->h_sync || | 1369 | if (handle->h_sync || |
1333 | transaction->t_outstanding_credits > | 1370 | (atomic_read(&transaction->t_outstanding_credits) > |
1334 | journal->j_max_transaction_buffers || | 1371 | journal->j_max_transaction_buffers) || |
1335 | time_after_eq(jiffies, transaction->t_expires)) { | 1372 | time_after_eq(jiffies, transaction->t_expires)) { |
1336 | /* Do this even for aborted journals: an abort still | 1373 | /* Do this even for aborted journals: an abort still |
1337 | * completes the commit thread, it just doesn't write | 1374 | * completes the commit thread, it just doesn't write |
1338 | * anything to disk. */ | 1375 | * anything to disk. */ |
1339 | tid_t tid = transaction->t_tid; | ||
1340 | 1376 | ||
1341 | spin_unlock(&transaction->t_handle_lock); | ||
1342 | jbd_debug(2, "transaction too old, requesting commit for " | 1377 | jbd_debug(2, "transaction too old, requesting commit for " |
1343 | "handle %p\n", handle); | 1378 | "handle %p\n", handle); |
1344 | /* This is non-blocking */ | 1379 | /* This is non-blocking */ |
@@ -1349,11 +1384,25 @@ int jbd2_journal_stop(handle_t *handle) | |||
1349 | * to wait for the commit to complete. | 1384 | * to wait for the commit to complete. |
1350 | */ | 1385 | */ |
1351 | if (handle->h_sync && !(current->flags & PF_MEMALLOC)) | 1386 | if (handle->h_sync && !(current->flags & PF_MEMALLOC)) |
1352 | err = jbd2_log_wait_commit(journal, tid); | 1387 | wait_for_commit = 1; |
1353 | } else { | ||
1354 | spin_unlock(&transaction->t_handle_lock); | ||
1355 | } | 1388 | } |
1356 | 1389 | ||
1390 | /* | ||
1391 | * Once we drop t_updates, if it goes to zero the transaction | ||
1392 | * could start commiting on us and eventually disappear. So | ||
1393 | * once we do this, we must not dereference transaction | ||
1394 | * pointer again. | ||
1395 | */ | ||
1396 | tid = transaction->t_tid; | ||
1397 | if (atomic_dec_and_test(&transaction->t_updates)) { | ||
1398 | wake_up(&journal->j_wait_updates); | ||
1399 | if (journal->j_barrier_count) | ||
1400 | wake_up(&journal->j_wait_transaction_locked); | ||
1401 | } | ||
1402 | |||
1403 | if (wait_for_commit) | ||
1404 | err = jbd2_log_wait_commit(journal, tid); | ||
1405 | |||
1357 | lock_map_release(&handle->h_lockdep_map); | 1406 | lock_map_release(&handle->h_lockdep_map); |
1358 | 1407 | ||
1359 | jbd2_free_handle(handle); | 1408 | jbd2_free_handle(handle); |
@@ -1719,7 +1768,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1719 | goto zap_buffer_unlocked; | 1768 | goto zap_buffer_unlocked; |
1720 | 1769 | ||
1721 | /* OK, we have data buffer in journaled mode */ | 1770 | /* OK, we have data buffer in journaled mode */ |
1722 | spin_lock(&journal->j_state_lock); | 1771 | write_lock(&journal->j_state_lock); |
1723 | jbd_lock_bh_state(bh); | 1772 | jbd_lock_bh_state(bh); |
1724 | spin_lock(&journal->j_list_lock); | 1773 | spin_lock(&journal->j_list_lock); |
1725 | 1774 | ||
@@ -1772,7 +1821,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1772 | jbd2_journal_put_journal_head(jh); | 1821 | jbd2_journal_put_journal_head(jh); |
1773 | spin_unlock(&journal->j_list_lock); | 1822 | spin_unlock(&journal->j_list_lock); |
1774 | jbd_unlock_bh_state(bh); | 1823 | jbd_unlock_bh_state(bh); |
1775 | spin_unlock(&journal->j_state_lock); | 1824 | write_unlock(&journal->j_state_lock); |
1776 | return ret; | 1825 | return ret; |
1777 | } else { | 1826 | } else { |
1778 | /* There is no currently-running transaction. So the | 1827 | /* There is no currently-running transaction. So the |
@@ -1786,7 +1835,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1786 | jbd2_journal_put_journal_head(jh); | 1835 | jbd2_journal_put_journal_head(jh); |
1787 | spin_unlock(&journal->j_list_lock); | 1836 | spin_unlock(&journal->j_list_lock); |
1788 | jbd_unlock_bh_state(bh); | 1837 | jbd_unlock_bh_state(bh); |
1789 | spin_unlock(&journal->j_state_lock); | 1838 | write_unlock(&journal->j_state_lock); |
1790 | return ret; | 1839 | return ret; |
1791 | } else { | 1840 | } else { |
1792 | /* The orphan record's transaction has | 1841 | /* The orphan record's transaction has |
@@ -1810,7 +1859,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1810 | jbd2_journal_put_journal_head(jh); | 1859 | jbd2_journal_put_journal_head(jh); |
1811 | spin_unlock(&journal->j_list_lock); | 1860 | spin_unlock(&journal->j_list_lock); |
1812 | jbd_unlock_bh_state(bh); | 1861 | jbd_unlock_bh_state(bh); |
1813 | spin_unlock(&journal->j_state_lock); | 1862 | write_unlock(&journal->j_state_lock); |
1814 | return 0; | 1863 | return 0; |
1815 | } else { | 1864 | } else { |
1816 | /* Good, the buffer belongs to the running transaction. | 1865 | /* Good, the buffer belongs to the running transaction. |
@@ -1829,7 +1878,7 @@ zap_buffer: | |||
1829 | zap_buffer_no_jh: | 1878 | zap_buffer_no_jh: |
1830 | spin_unlock(&journal->j_list_lock); | 1879 | spin_unlock(&journal->j_list_lock); |
1831 | jbd_unlock_bh_state(bh); | 1880 | jbd_unlock_bh_state(bh); |
1832 | spin_unlock(&journal->j_state_lock); | 1881 | write_unlock(&journal->j_state_lock); |
1833 | zap_buffer_unlocked: | 1882 | zap_buffer_unlocked: |
1834 | clear_buffer_dirty(bh); | 1883 | clear_buffer_dirty(bh); |
1835 | J_ASSERT_BH(bh, !buffer_jbddirty(bh)); | 1884 | J_ASSERT_BH(bh, !buffer_jbddirty(bh)); |
@@ -2136,9 +2185,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal, | |||
2136 | /* Locks are here just to force reading of recent values, it is | 2185 | /* Locks are here just to force reading of recent values, it is |
2137 | * enough that the transaction was not committing before we started | 2186 | * enough that the transaction was not committing before we started |
2138 | * a transaction adding the inode to orphan list */ | 2187 | * a transaction adding the inode to orphan list */ |
2139 | spin_lock(&journal->j_state_lock); | 2188 | read_lock(&journal->j_state_lock); |
2140 | commit_trans = journal->j_committing_transaction; | 2189 | commit_trans = journal->j_committing_transaction; |
2141 | spin_unlock(&journal->j_state_lock); | 2190 | read_unlock(&journal->j_state_lock); |
2142 | spin_lock(&journal->j_list_lock); | 2191 | spin_lock(&journal->j_list_lock); |
2143 | inode_trans = jinode->i_transaction; | 2192 | inode_trans = jinode->i_transaction; |
2144 | spin_unlock(&journal->j_list_lock); | 2193 | spin_unlock(&journal->j_list_lock); |
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index 55f1dde2fa8b..404111b016c9 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index c5e1450d79f9..a906f538d11c 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c index f0294410868d..617a1e5694c1 100644 --- a/fs/jffs2/compr.c +++ b/fs/jffs2/compr.c | |||
@@ -2,11 +2,12 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Created by Arjan van de Ven <arjanv@redhat.com> | 5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> |
6 | * | ||
7 | * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>, | 6 | * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>, |
8 | * University of Szeged, Hungary | 7 | * University of Szeged, Hungary |
9 | * | 8 | * |
9 | * Created by Arjan van de Ven <arjan@infradead.org> | ||
10 | * | ||
10 | * For licensing information, see the file 'LICENCE' in this directory. | 11 | * For licensing information, see the file 'LICENCE' in this directory. |
11 | * | 12 | * |
12 | */ | 13 | */ |
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h index 7d1d72faa774..e471a9106fd9 100644 --- a/fs/jffs2/compr.h +++ b/fs/jffs2/compr.h | |||
@@ -3,6 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>, | 4 | * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>, |
5 | * University of Szeged, Hungary | 5 | * University of Szeged, Hungary |
6 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
6 | * | 7 | * |
7 | * For licensing information, see the file 'LICENCE' in this directory. | 8 | * For licensing information, see the file 'LICENCE' in this directory. |
8 | * | 9 | * |
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c index cd02acafde8a..ed25ae7c98eb 100644 --- a/fs/jffs2/compr_lzo.c +++ b/fs/jffs2/compr_lzo.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2007 Nokia Corporation. All rights reserved. | 4 | * Copyright © 2007 Nokia Corporation. All rights reserved. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by Richard Purdie <rpurdie@openedhand.com> | 7 | * Created by Richard Purdie <rpurdie@openedhand.com> |
7 | * | 8 | * |
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c index 546d1538d076..9696ad9ef5f7 100644 --- a/fs/jffs2/compr_rtime.c +++ b/fs/jffs2/compr_rtime.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by Arjan van de Ven <arjanv@redhat.com> | 7 | * Created by Arjan van de Ven <arjanv@redhat.com> |
7 | * | 8 | * |
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c index 170d289ac785..a12b4f763373 100644 --- a/fs/jffs2/compr_rubin.c +++ b/fs/jffs2/compr_rubin.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by Arjan van de Ven <arjanv@redhat.com> | 7 | * Created by Arjan van de Ven <arjanv@redhat.com> |
7 | * | 8 | * |
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c index b46661a42758..97fc45de6f81 100644 --- a/fs/jffs2/compr_zlib.c +++ b/fs/jffs2/compr_zlib.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c index ec3538413926..e0b76c87a91a 100644 --- a/fs/jffs2/debug.c +++ b/fs/jffs2/debug.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h index a113ecc3bafe..c4f8eef5ca68 100644 --- a/fs/jffs2/debug.h +++ b/fs/jffs2/debug.h | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 166062a68230..ed78a3cf3cb0 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
@@ -232,9 +233,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, | |||
232 | return 0; | 233 | return 0; |
233 | 234 | ||
234 | fail: | 235 | fail: |
235 | make_bad_inode(inode); | 236 | iget_failed(inode); |
236 | unlock_new_inode(inode); | ||
237 | iput(inode); | ||
238 | jffs2_free_raw_inode(ri); | 237 | jffs2_free_raw_inode(ri); |
239 | return ret; | 238 | return ret; |
240 | } | 239 | } |
@@ -454,9 +453,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char | |||
454 | return 0; | 453 | return 0; |
455 | 454 | ||
456 | fail: | 455 | fail: |
457 | make_bad_inode(inode); | 456 | iget_failed(inode); |
458 | unlock_new_inode(inode); | ||
459 | iput(inode); | ||
460 | return ret; | 457 | return ret; |
461 | } | 458 | } |
462 | 459 | ||
@@ -601,9 +598,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) | |||
601 | return 0; | 598 | return 0; |
602 | 599 | ||
603 | fail: | 600 | fail: |
604 | make_bad_inode(inode); | 601 | iget_failed(inode); |
605 | unlock_new_inode(inode); | ||
606 | iput(inode); | ||
607 | return ret; | 602 | return ret; |
608 | } | 603 | } |
609 | 604 | ||
@@ -778,9 +773,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de | |||
778 | return 0; | 773 | return 0; |
779 | 774 | ||
780 | fail: | 775 | fail: |
781 | make_bad_inode(inode); | 776 | iget_failed(inode); |
782 | unlock_new_inode(inode); | ||
783 | iput(inode); | ||
784 | return ret; | 777 | return ret; |
785 | } | 778 | } |
786 | 779 | ||
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index 6286ad9b00f7..abac961f617b 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 813497024437..1c0a08d711aa 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 459d39d1ea0b..6b2964a19850 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
@@ -169,13 +170,13 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr) | |||
169 | mutex_unlock(&f->sem); | 170 | mutex_unlock(&f->sem); |
170 | jffs2_complete_reservation(c); | 171 | jffs2_complete_reservation(c); |
171 | 172 | ||
172 | /* We have to do the simple_setsize() without f->sem held, since | 173 | /* We have to do the truncate_setsize() without f->sem held, since |
173 | some pages may be locked and waiting for it in readpage(). | 174 | some pages may be locked and waiting for it in readpage(). |
174 | We are protected from a simultaneous write() extending i_size | 175 | We are protected from a simultaneous write() extending i_size |
175 | back past iattr->ia_size, because do_truncate() holds the | 176 | back past iattr->ia_size, because do_truncate() holds the |
176 | generic inode semaphore. */ | 177 | generic inode semaphore. */ |
177 | if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) { | 178 | if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) { |
178 | simple_setsize(inode, iattr->ia_size); | 179 | truncate_setsize(inode, iattr->ia_size); |
179 | inode->i_blocks = (inode->i_size + 511) >> 9; | 180 | inode->i_blocks = (inode->i_size + 511) >> 9; |
180 | } | 181 | } |
181 | 182 | ||
@@ -225,7 +226,7 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
225 | } | 226 | } |
226 | 227 | ||
227 | 228 | ||
228 | void jffs2_clear_inode (struct inode *inode) | 229 | void jffs2_evict_inode (struct inode *inode) |
229 | { | 230 | { |
230 | /* We can forget about this inode for now - drop all | 231 | /* We can forget about this inode for now - drop all |
231 | * the nodelists associated with it, etc. | 232 | * the nodelists associated with it, etc. |
@@ -233,7 +234,9 @@ void jffs2_clear_inode (struct inode *inode) | |||
233 | struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); | 234 | struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); |
234 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); | 235 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); |
235 | 236 | ||
236 | D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode)); | 237 | D1(printk(KERN_DEBUG "jffs2_evict_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode)); |
238 | truncate_inode_pages(&inode->i_data, 0); | ||
239 | end_writeback(inode); | ||
237 | jffs2_do_clear_inode(c, f); | 240 | jffs2_do_clear_inode(c, f); |
238 | } | 241 | } |
239 | 242 | ||
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index f5e96bd656e8..846a79452497 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
diff --git a/fs/jffs2/ioctl.c b/fs/jffs2/ioctl.c index 9d41f43e47bb..859a598af020 100644 --- a/fs/jffs2/ioctl.c +++ b/fs/jffs2/ioctl.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index c6923da98263..2e4a86763c07 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 85ef6dbb1be7..6784bc89add1 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h | |||
@@ -2,6 +2,7 @@ | |||
2 | * JFFS2 -- Journalling Flash File System, Version 2. | 2 | * JFFS2 -- Journalling Flash File System, Version 2. |
3 | * | 3 | * |
4 | * Copyright © 2001-2007 Red Hat, Inc. | 4 | * Copyright © 2001-2007 Red Hat, Inc. |
5 | * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org> | ||
5 | * | 6 | * |
6 | * Created by David Woodhouse <dwmw2@infradead.org> | 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
7 | * | 8 | * |
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index a881a42f19e3..523a91691052 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h | |||
@@ -24,7 +24,6 @@ | |||
24 | #ifdef __ECOS | 24 | #ifdef __ECOS |
25 | #include "os-ecos.h" | 25 | #include "os-ecos.h" |
26 | #else | 26 | #else |
27 | #include <linux/mtd/compatmac.h> /* For compatibility with older kernels */ | ||
28 | #include "os-linux.h" | 27 | #include "os-linux.h" |
29 | #endif | 28 | #endif |
30 | 29 | ||
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 4791aacf3084..00bae7cc2e48 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h | |||
@@ -171,7 +171,7 @@ extern const struct inode_operations jffs2_symlink_inode_operations; | |||
171 | int jffs2_setattr (struct dentry *, struct iattr *); | 171 | int jffs2_setattr (struct dentry *, struct iattr *); |
172 | int jffs2_do_setattr (struct inode *, struct iattr *); | 172 | int jffs2_do_setattr (struct inode *, struct iattr *); |
173 | struct inode *jffs2_iget(struct super_block *, unsigned long); | 173 | struct inode *jffs2_iget(struct super_block *, unsigned long); |
174 | void jffs2_clear_inode (struct inode *); | 174 | void jffs2_evict_inode (struct inode *); |
175 | void jffs2_dirty_inode(struct inode *inode); | 175 | void jffs2_dirty_inode(struct inode *inode); |
176 | struct inode *jffs2_new_inode (struct inode *dir_i, int mode, | 176 | struct inode *jffs2_new_inode (struct inode *dir_i, int mode, |
177 | struct jffs2_raw_inode *ri); | 177 | struct jffs2_raw_inode *ri); |
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 511e2d609d12..662bba099501 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
@@ -135,7 +135,7 @@ static const struct super_operations jffs2_super_operations = | |||
135 | .write_super = jffs2_write_super, | 135 | .write_super = jffs2_write_super, |
136 | .statfs = jffs2_statfs, | 136 | .statfs = jffs2_statfs, |
137 | .remount_fs = jffs2_remount_fs, | 137 | .remount_fs = jffs2_remount_fs, |
138 | .clear_inode = jffs2_clear_inode, | 138 | .evict_inode = jffs2_evict_inode, |
139 | .dirty_inode = jffs2_dirty_inode, | 139 | .dirty_inode = jffs2_dirty_inode, |
140 | .sync_fs = jffs2_sync_fs, | 140 | .sync_fs = jffs2_sync_fs, |
141 | }; | 141 | }; |
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index d258e261bdc7..9b572ca40a49 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c | |||
@@ -588,7 +588,7 @@ static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *re | |||
588 | 588 | ||
589 | void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) | 589 | void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) |
590 | { | 590 | { |
591 | /* It's called from jffs2_clear_inode() on inode removing. | 591 | /* It's called from jffs2_evict_inode() on inode removing. |
592 | When an inode with XATTR is removed, those XATTRs must be removed. */ | 592 | When an inode with XATTR is removed, those XATTRs must be removed. */ |
593 | struct jffs2_xattr_ref *ref, *_ref; | 593 | struct jffs2_xattr_ref *ref, *_ref; |
594 | 594 | ||
diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 127263cc8657..c5ce6c1d1ff4 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c | |||
@@ -17,6 +17,7 @@ | |||
17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/mm.h> | ||
20 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
21 | #include <linux/quotaops.h> | 22 | #include <linux/quotaops.h> |
22 | #include "jfs_incore.h" | 23 | #include "jfs_incore.h" |
@@ -107,11 +108,18 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
107 | return rc; | 108 | return rc; |
108 | } | 109 | } |
109 | 110 | ||
110 | rc = inode_setattr(inode, iattr); | 111 | if ((iattr->ia_valid & ATTR_SIZE) && |
112 | iattr->ia_size != i_size_read(inode)) { | ||
113 | rc = vmtruncate(inode, iattr->ia_size); | ||
114 | if (rc) | ||
115 | return rc; | ||
116 | } | ||
111 | 117 | ||
112 | if (!rc && (iattr->ia_valid & ATTR_MODE)) | 118 | setattr_copy(inode, iattr); |
113 | rc = jfs_acl_chmod(inode); | 119 | mark_inode_dirty(inode); |
114 | 120 | ||
121 | if (iattr->ia_valid & ATTR_MODE) | ||
122 | rc = jfs_acl_chmod(inode); | ||
115 | return rc; | 123 | return rc; |
116 | } | 124 | } |
117 | 125 | ||
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index ed9ba6fe04f5..9978803ceedc 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
@@ -145,31 +145,32 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
145 | return 0; | 145 | return 0; |
146 | } | 146 | } |
147 | 147 | ||
148 | void jfs_delete_inode(struct inode *inode) | 148 | void jfs_evict_inode(struct inode *inode) |
149 | { | 149 | { |
150 | jfs_info("In jfs_delete_inode, inode = 0x%p", inode); | 150 | jfs_info("In jfs_evict_inode, inode = 0x%p", inode); |
151 | 151 | ||
152 | if (!is_bad_inode(inode)) | 152 | if (!inode->i_nlink && !is_bad_inode(inode)) { |
153 | dquot_initialize(inode); | 153 | dquot_initialize(inode); |
154 | 154 | ||
155 | if (!is_bad_inode(inode) && | 155 | if (JFS_IP(inode)->fileset == FILESYSTEM_I) { |
156 | (JFS_IP(inode)->fileset == FILESYSTEM_I)) { | 156 | truncate_inode_pages(&inode->i_data, 0); |
157 | truncate_inode_pages(&inode->i_data, 0); | ||
158 | 157 | ||
159 | if (test_cflag(COMMIT_Freewmap, inode)) | 158 | if (test_cflag(COMMIT_Freewmap, inode)) |
160 | jfs_free_zero_link(inode); | 159 | jfs_free_zero_link(inode); |
161 | 160 | ||
162 | diFree(inode); | 161 | diFree(inode); |
163 | 162 | ||
164 | /* | 163 | /* |
165 | * Free the inode from the quota allocation. | 164 | * Free the inode from the quota allocation. |
166 | */ | 165 | */ |
167 | dquot_initialize(inode); | 166 | dquot_initialize(inode); |
168 | dquot_free_inode(inode); | 167 | dquot_free_inode(inode); |
169 | dquot_drop(inode); | 168 | } |
169 | } else { | ||
170 | truncate_inode_pages(&inode->i_data, 0); | ||
170 | } | 171 | } |
171 | 172 | end_writeback(inode); | |
172 | clear_inode(inode); | 173 | dquot_drop(inode); |
173 | } | 174 | } |
174 | 175 | ||
175 | void jfs_dirty_inode(struct inode *inode) | 176 | void jfs_dirty_inode(struct inode *inode) |
@@ -303,8 +304,17 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping, | |||
303 | loff_t pos, unsigned len, unsigned flags, | 304 | loff_t pos, unsigned len, unsigned flags, |
304 | struct page **pagep, void **fsdata) | 305 | struct page **pagep, void **fsdata) |
305 | { | 306 | { |
306 | return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 307 | int ret; |
308 | |||
309 | ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata, | ||
307 | jfs_get_block); | 310 | jfs_get_block); |
311 | if (unlikely(ret)) { | ||
312 | loff_t isize = mapping->host->i_size; | ||
313 | if (pos + len > isize) | ||
314 | vmtruncate(mapping->host, isize); | ||
315 | } | ||
316 | |||
317 | return ret; | ||
308 | } | 318 | } |
309 | 319 | ||
310 | static sector_t jfs_bmap(struct address_space *mapping, sector_t block) | 320 | static sector_t jfs_bmap(struct address_space *mapping, sector_t block) |
@@ -317,9 +327,24 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, | |||
317 | { | 327 | { |
318 | struct file *file = iocb->ki_filp; | 328 | struct file *file = iocb->ki_filp; |
319 | struct inode *inode = file->f_mapping->host; | 329 | struct inode *inode = file->f_mapping->host; |
330 | ssize_t ret; | ||
320 | 331 | ||
321 | return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 332 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
322 | offset, nr_segs, jfs_get_block, NULL); | 333 | offset, nr_segs, jfs_get_block, NULL); |
334 | |||
335 | /* | ||
336 | * In case of error extending write may have instantiated a few | ||
337 | * blocks outside i_size. Trim these off again. | ||
338 | */ | ||
339 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
340 | loff_t isize = i_size_read(inode); | ||
341 | loff_t end = offset + iov_length(iov, nr_segs); | ||
342 | |||
343 | if (end > isize) | ||
344 | vmtruncate(inode, isize); | ||
345 | } | ||
346 | |||
347 | return ret; | ||
323 | } | 348 | } |
324 | 349 | ||
325 | const struct address_space_operations jfs_aops = { | 350 | const struct address_space_operations jfs_aops = { |
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 11042b1f44b5..155e91eff07d 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h | |||
@@ -27,7 +27,7 @@ extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); | |||
27 | extern struct inode *jfs_iget(struct super_block *, unsigned long); | 27 | extern struct inode *jfs_iget(struct super_block *, unsigned long); |
28 | extern int jfs_commit_inode(struct inode *, int); | 28 | extern int jfs_commit_inode(struct inode *, int); |
29 | extern int jfs_write_inode(struct inode *, struct writeback_control *); | 29 | extern int jfs_write_inode(struct inode *, struct writeback_control *); |
30 | extern void jfs_delete_inode(struct inode *); | 30 | extern void jfs_evict_inode(struct inode *); |
31 | extern void jfs_dirty_inode(struct inode *); | 31 | extern void jfs_dirty_inode(struct inode *); |
32 | extern void jfs_truncate(struct inode *); | 32 | extern void jfs_truncate(struct inode *); |
33 | extern void jfs_truncate_nolock(struct inode *, loff_t); | 33 | extern void jfs_truncate_nolock(struct inode *, loff_t); |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index b38f96bef829..ec8c3e4baca3 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -132,11 +132,6 @@ static void jfs_destroy_inode(struct inode *inode) | |||
132 | kmem_cache_free(jfs_inode_cachep, ji); | 132 | kmem_cache_free(jfs_inode_cachep, ji); |
133 | } | 133 | } |
134 | 134 | ||
135 | static void jfs_clear_inode(struct inode *inode) | ||
136 | { | ||
137 | dquot_drop(inode); | ||
138 | } | ||
139 | |||
140 | static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 135 | static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
141 | { | 136 | { |
142 | struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb); | 137 | struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb); |
@@ -765,8 +760,7 @@ static const struct super_operations jfs_super_operations = { | |||
765 | .destroy_inode = jfs_destroy_inode, | 760 | .destroy_inode = jfs_destroy_inode, |
766 | .dirty_inode = jfs_dirty_inode, | 761 | .dirty_inode = jfs_dirty_inode, |
767 | .write_inode = jfs_write_inode, | 762 | .write_inode = jfs_write_inode, |
768 | .delete_inode = jfs_delete_inode, | 763 | .evict_inode = jfs_evict_inode, |
769 | .clear_inode = jfs_clear_inode, | ||
770 | .put_super = jfs_put_super, | 764 | .put_super = jfs_put_super, |
771 | .sync_fs = jfs_sync_fs, | 765 | .sync_fs = jfs_sync_fs, |
772 | .freeze_fs = jfs_freeze, | 766 | .freeze_fs = jfs_freeze, |
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index fa96bbb26343..2d7f165d0f1d 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c | |||
@@ -86,46 +86,25 @@ struct ea_buffer { | |||
86 | #define EA_MALLOC 0x0008 | 86 | #define EA_MALLOC 0x0008 |
87 | 87 | ||
88 | 88 | ||
89 | static int is_known_namespace(const char *name) | ||
90 | { | ||
91 | if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) && | ||
92 | strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && | ||
93 | strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && | ||
94 | strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) | ||
95 | return false; | ||
96 | |||
97 | return true; | ||
98 | } | ||
99 | |||
89 | /* | 100 | /* |
90 | * These three routines are used to recognize on-disk extended attributes | 101 | * These three routines are used to recognize on-disk extended attributes |
91 | * that are in a recognized namespace. If the attribute is not recognized, | 102 | * that are in a recognized namespace. If the attribute is not recognized, |
92 | * "os2." is prepended to the name | 103 | * "os2." is prepended to the name |
93 | */ | 104 | */ |
94 | static inline int is_os2_xattr(struct jfs_ea *ea) | 105 | static int is_os2_xattr(struct jfs_ea *ea) |
95 | { | 106 | { |
96 | /* | 107 | return !is_known_namespace(ea->name); |
97 | * Check for "system." | ||
98 | */ | ||
99 | if ((ea->namelen >= XATTR_SYSTEM_PREFIX_LEN) && | ||
100 | !strncmp(ea->name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | ||
101 | return false; | ||
102 | /* | ||
103 | * Check for "user." | ||
104 | */ | ||
105 | if ((ea->namelen >= XATTR_USER_PREFIX_LEN) && | ||
106 | !strncmp(ea->name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) | ||
107 | return false; | ||
108 | /* | ||
109 | * Check for "security." | ||
110 | */ | ||
111 | if ((ea->namelen >= XATTR_SECURITY_PREFIX_LEN) && | ||
112 | !strncmp(ea->name, XATTR_SECURITY_PREFIX, | ||
113 | XATTR_SECURITY_PREFIX_LEN)) | ||
114 | return false; | ||
115 | /* | ||
116 | * Check for "trusted." | ||
117 | */ | ||
118 | if ((ea->namelen >= XATTR_TRUSTED_PREFIX_LEN) && | ||
119 | !strncmp(ea->name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) | ||
120 | return false; | ||
121 | /* | ||
122 | * Add any other valid namespace prefixes here | ||
123 | */ | ||
124 | |||
125 | /* | ||
126 | * We assume it's OS/2's flat namespace | ||
127 | */ | ||
128 | return true; | ||
129 | } | 108 | } |
130 | 109 | ||
131 | static inline int name_size(struct jfs_ea *ea) | 110 | static inline int name_size(struct jfs_ea *ea) |
@@ -764,13 +743,23 @@ static int can_set_xattr(struct inode *inode, const char *name, | |||
764 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) | 743 | if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) |
765 | return can_set_system_xattr(inode, name, value, value_len); | 744 | return can_set_system_xattr(inode, name, value, value_len); |
766 | 745 | ||
746 | if (!strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)) { | ||
747 | /* | ||
748 | * This makes sure that we aren't trying to set an | ||
749 | * attribute in a different namespace by prefixing it | ||
750 | * with "os2." | ||
751 | */ | ||
752 | if (is_known_namespace(name + XATTR_OS2_PREFIX_LEN)) | ||
753 | return -EOPNOTSUPP; | ||
754 | return 0; | ||
755 | } | ||
756 | |||
767 | /* | 757 | /* |
768 | * Don't allow setting an attribute in an unknown namespace. | 758 | * Don't allow setting an attribute in an unknown namespace. |
769 | */ | 759 | */ |
770 | if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) && | 760 | if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) && |
771 | strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && | 761 | strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && |
772 | strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && | 762 | strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) |
773 | strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)) | ||
774 | return -EOPNOTSUPP; | 763 | return -EOPNOTSUPP; |
775 | 764 | ||
776 | return 0; | 765 | return 0; |
@@ -952,19 +941,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, | |||
952 | int xattr_size; | 941 | int xattr_size; |
953 | ssize_t size; | 942 | ssize_t size; |
954 | int namelen = strlen(name); | 943 | int namelen = strlen(name); |
955 | char *os2name = NULL; | ||
956 | char *value; | 944 | char *value; |
957 | 945 | ||
958 | if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { | ||
959 | os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1, | ||
960 | GFP_KERNEL); | ||
961 | if (!os2name) | ||
962 | return -ENOMEM; | ||
963 | strcpy(os2name, name + XATTR_OS2_PREFIX_LEN); | ||
964 | name = os2name; | ||
965 | namelen -= XATTR_OS2_PREFIX_LEN; | ||
966 | } | ||
967 | |||
968 | down_read(&JFS_IP(inode)->xattr_sem); | 946 | down_read(&JFS_IP(inode)->xattr_sem); |
969 | 947 | ||
970 | xattr_size = ea_get(inode, &ea_buf, 0); | 948 | xattr_size = ea_get(inode, &ea_buf, 0); |
@@ -1002,8 +980,6 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, | |||
1002 | out: | 980 | out: |
1003 | up_read(&JFS_IP(inode)->xattr_sem); | 981 | up_read(&JFS_IP(inode)->xattr_sem); |
1004 | 982 | ||
1005 | kfree(os2name); | ||
1006 | |||
1007 | return size; | 983 | return size; |
1008 | } | 984 | } |
1009 | 985 | ||
@@ -1012,6 +988,19 @@ ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data, | |||
1012 | { | 988 | { |
1013 | int err; | 989 | int err; |
1014 | 990 | ||
991 | if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { | ||
992 | /* | ||
993 | * skip past "os2." prefix | ||
994 | */ | ||
995 | name += XATTR_OS2_PREFIX_LEN; | ||
996 | /* | ||
997 | * Don't allow retrieving properly prefixed attributes | ||
998 | * by prepending them with "os2." | ||
999 | */ | ||
1000 | if (is_known_namespace(name)) | ||
1001 | return -EOPNOTSUPP; | ||
1002 | } | ||
1003 | |||
1015 | err = __jfs_getxattr(dentry->d_inode, name, data, buf_size); | 1004 | err = __jfs_getxattr(dentry->d_inode, name, data, buf_size); |
1016 | 1005 | ||
1017 | return err; | 1006 | return err; |
diff --git a/fs/libfs.c b/fs/libfs.c index dcaf972cbf1b..0a9da95317f7 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -327,77 +327,35 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
327 | } | 327 | } |
328 | 328 | ||
329 | /** | 329 | /** |
330 | * simple_setsize - handle core mm and vfs requirements for file size change | 330 | * simple_setattr - setattr for simple filesystem |
331 | * @inode: inode | ||
332 | * @newsize: new file size | ||
333 | * | ||
334 | * Returns 0 on success, -error on failure. | ||
335 | * | ||
336 | * simple_setsize must be called with inode_mutex held. | ||
337 | * | ||
338 | * simple_setsize will check that the requested new size is OK (see | ||
339 | * inode_newsize_ok), and then will perform the necessary i_size update | ||
340 | * and pagecache truncation (if necessary). It will be typically be called | ||
341 | * from the filesystem's setattr function when ATTR_SIZE is passed in. | ||
342 | * | ||
343 | * The inode itself must have correct permissions and attributes to allow | ||
344 | * i_size to be changed, this function then just checks that the new size | ||
345 | * requested is valid. | ||
346 | * | ||
347 | * In the case of simple in-memory filesystems with inodes stored solely | ||
348 | * in the inode cache, and file data in the pagecache, nothing more needs | ||
349 | * to be done to satisfy a truncate request. Filesystems with on-disk | ||
350 | * blocks for example will need to free them in the case of truncate, in | ||
351 | * that case it may be easier not to use simple_setsize (but each of its | ||
352 | * components will likely be required at some point to update pagecache | ||
353 | * and inode etc). | ||
354 | */ | ||
355 | int simple_setsize(struct inode *inode, loff_t newsize) | ||
356 | { | ||
357 | loff_t oldsize; | ||
358 | int error; | ||
359 | |||
360 | error = inode_newsize_ok(inode, newsize); | ||
361 | if (error) | ||
362 | return error; | ||
363 | |||
364 | oldsize = inode->i_size; | ||
365 | i_size_write(inode, newsize); | ||
366 | truncate_pagecache(inode, oldsize, newsize); | ||
367 | |||
368 | return error; | ||
369 | } | ||
370 | EXPORT_SYMBOL(simple_setsize); | ||
371 | |||
372 | /** | ||
373 | * simple_setattr - setattr for simple in-memory filesystem | ||
374 | * @dentry: dentry | 331 | * @dentry: dentry |
375 | * @iattr: iattr structure | 332 | * @iattr: iattr structure |
376 | * | 333 | * |
377 | * Returns 0 on success, -error on failure. | 334 | * Returns 0 on success, -error on failure. |
378 | * | 335 | * |
379 | * simple_setattr implements setattr for an in-memory filesystem which | 336 | * simple_setattr is a simple ->setattr implementation without a proper |
380 | * does not store its own file data or metadata (eg. uses the page cache | 337 | * implementation of size changes. |
381 | * and inode cache as its data store). | 338 | * |
339 | * It can either be used for in-memory filesystems or special files | ||
340 | * on simple regular filesystems. Anything that needs to change on-disk | ||
341 | * or wire state on size changes needs its own setattr method. | ||
382 | */ | 342 | */ |
383 | int simple_setattr(struct dentry *dentry, struct iattr *iattr) | 343 | int simple_setattr(struct dentry *dentry, struct iattr *iattr) |
384 | { | 344 | { |
385 | struct inode *inode = dentry->d_inode; | 345 | struct inode *inode = dentry->d_inode; |
386 | int error; | 346 | int error; |
387 | 347 | ||
348 | WARN_ON_ONCE(inode->i_op->truncate); | ||
349 | |||
388 | error = inode_change_ok(inode, iattr); | 350 | error = inode_change_ok(inode, iattr); |
389 | if (error) | 351 | if (error) |
390 | return error; | 352 | return error; |
391 | 353 | ||
392 | if (iattr->ia_valid & ATTR_SIZE) { | 354 | if (iattr->ia_valid & ATTR_SIZE) |
393 | error = simple_setsize(inode, iattr->ia_size); | 355 | truncate_setsize(inode, iattr->ia_size); |
394 | if (error) | 356 | setattr_copy(inode, iattr); |
395 | return error; | 357 | mark_inode_dirty(inode); |
396 | } | 358 | return 0; |
397 | |||
398 | generic_setattr(inode, iattr); | ||
399 | |||
400 | return error; | ||
401 | } | 359 | } |
402 | EXPORT_SYMBOL(simple_setattr); | 360 | EXPORT_SYMBOL(simple_setattr); |
403 | 361 | ||
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 72d1893ddd36..675cc49197fe 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c | |||
@@ -434,8 +434,11 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry, | |||
434 | int ret; | 434 | int ret; |
435 | 435 | ||
436 | ta = kzalloc(sizeof(*ta), GFP_KERNEL); | 436 | ta = kzalloc(sizeof(*ta), GFP_KERNEL); |
437 | if (!ta) | 437 | if (!ta) { |
438 | inode->i_nlink--; | ||
439 | iput(inode); | ||
438 | return -ENOMEM; | 440 | return -ENOMEM; |
441 | } | ||
439 | 442 | ||
440 | ta->state = CREATE_1; | 443 | ta->state = CREATE_1; |
441 | ta->ino = inode->i_ino; | 444 | ta->ino = inode->i_ino; |
diff --git a/fs/logfs/file.c b/fs/logfs/file.c index abe1cafbd4c2..4dd0f7c06e39 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c | |||
@@ -232,15 +232,19 @@ static int logfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
232 | struct inode *inode = dentry->d_inode; | 232 | struct inode *inode = dentry->d_inode; |
233 | int err = 0; | 233 | int err = 0; |
234 | 234 | ||
235 | if (attr->ia_valid & ATTR_SIZE) | 235 | err = inode_change_ok(inode, attr); |
236 | if (err) | ||
237 | return err; | ||
238 | |||
239 | if (attr->ia_valid & ATTR_SIZE) { | ||
236 | err = logfs_truncate(inode, attr->ia_size); | 240 | err = logfs_truncate(inode, attr->ia_size); |
237 | attr->ia_valid &= ~ATTR_SIZE; | 241 | if (err) |
242 | return err; | ||
243 | } | ||
238 | 244 | ||
239 | if (!err) | 245 | setattr_copy(inode, attr); |
240 | err = inode_change_ok(inode, attr); | 246 | mark_inode_dirty(inode); |
241 | if (!err) | 247 | return 0; |
242 | err = inode_setattr(inode, attr); | ||
243 | return err; | ||
244 | } | 248 | } |
245 | 249 | ||
246 | const struct inode_operations logfs_reg_iops = { | 250 | const struct inode_operations logfs_reg_iops = { |
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index f602e230e162..d8c71ece098f 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c | |||
@@ -235,33 +235,21 @@ static struct inode *logfs_alloc_inode(struct super_block *sb) | |||
235 | * purpose is to create a new inode that will not trigger the warning if such | 235 | * purpose is to create a new inode that will not trigger the warning if such |
236 | * an inode is still in use. An ugly hack, no doubt. Suggections for | 236 | * an inode is still in use. An ugly hack, no doubt. Suggections for |
237 | * improvement are welcome. | 237 | * improvement are welcome. |
238 | * | ||
239 | * AV: that's what ->put_super() is for... | ||
238 | */ | 240 | */ |
239 | struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino) | 241 | struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino) |
240 | { | 242 | { |
241 | struct inode *inode; | 243 | struct inode *inode; |
242 | 244 | ||
243 | inode = logfs_alloc_inode(sb); | 245 | inode = new_inode(sb); |
244 | if (!inode) | 246 | if (!inode) |
245 | return ERR_PTR(-ENOMEM); | 247 | return ERR_PTR(-ENOMEM); |
246 | 248 | ||
247 | inode->i_mode = S_IFREG; | 249 | inode->i_mode = S_IFREG; |
248 | inode->i_ino = ino; | 250 | inode->i_ino = ino; |
249 | inode->i_sb = sb; | 251 | inode->i_data.a_ops = &logfs_reg_aops; |
250 | 252 | mapping_set_gfp_mask(&inode->i_data, GFP_NOFS); | |
251 | /* This is a blatant copy of alloc_inode code. We'd need alloc_inode | ||
252 | * to be nonstatic, alas. */ | ||
253 | { | ||
254 | struct address_space * const mapping = &inode->i_data; | ||
255 | |||
256 | mapping->a_ops = &logfs_reg_aops; | ||
257 | mapping->host = inode; | ||
258 | mapping->flags = 0; | ||
259 | mapping_set_gfp_mask(mapping, GFP_NOFS); | ||
260 | mapping->assoc_mapping = NULL; | ||
261 | mapping->backing_dev_info = &default_backing_dev_info; | ||
262 | inode->i_mapping = mapping; | ||
263 | inode->i_nlink = 1; | ||
264 | } | ||
265 | 253 | ||
266 | return inode; | 254 | return inode; |
267 | } | 255 | } |
@@ -277,7 +265,7 @@ struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino) | |||
277 | 265 | ||
278 | err = logfs_read_inode(inode); | 266 | err = logfs_read_inode(inode); |
279 | if (err) { | 267 | if (err) { |
280 | destroy_meta_inode(inode); | 268 | iput(inode); |
281 | return ERR_PTR(err); | 269 | return ERR_PTR(err); |
282 | } | 270 | } |
283 | logfs_inode_setops(inode); | 271 | logfs_inode_setops(inode); |
@@ -298,18 +286,8 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
298 | return ret; | 286 | return ret; |
299 | } | 287 | } |
300 | 288 | ||
301 | void destroy_meta_inode(struct inode *inode) | ||
302 | { | ||
303 | if (inode) { | ||
304 | if (inode->i_data.nrpages) | ||
305 | truncate_inode_pages(&inode->i_data, 0); | ||
306 | logfs_clear_inode(inode); | ||
307 | kmem_cache_free(logfs_inode_cache, logfs_inode(inode)); | ||
308 | } | ||
309 | } | ||
310 | |||
311 | /* called with inode_lock held */ | 289 | /* called with inode_lock held */ |
312 | static void logfs_drop_inode(struct inode *inode) | 290 | static int logfs_drop_inode(struct inode *inode) |
313 | { | 291 | { |
314 | struct logfs_super *super = logfs_super(inode->i_sb); | 292 | struct logfs_super *super = logfs_super(inode->i_sb); |
315 | struct logfs_inode *li = logfs_inode(inode); | 293 | struct logfs_inode *li = logfs_inode(inode); |
@@ -317,7 +295,7 @@ static void logfs_drop_inode(struct inode *inode) | |||
317 | spin_lock(&logfs_inode_lock); | 295 | spin_lock(&logfs_inode_lock); |
318 | list_move(&li->li_freeing_list, &super->s_freeing_list); | 296 | list_move(&li->li_freeing_list, &super->s_freeing_list); |
319 | spin_unlock(&logfs_inode_lock); | 297 | spin_unlock(&logfs_inode_lock); |
320 | generic_drop_inode(inode); | 298 | return generic_drop_inode(inode); |
321 | } | 299 | } |
322 | 300 | ||
323 | static void logfs_set_ino_generation(struct super_block *sb, | 301 | static void logfs_set_ino_generation(struct super_block *sb, |
@@ -384,12 +362,21 @@ static int logfs_sync_fs(struct super_block *sb, int wait) | |||
384 | return 0; | 362 | return 0; |
385 | } | 363 | } |
386 | 364 | ||
365 | static void logfs_put_super(struct super_block *sb) | ||
366 | { | ||
367 | struct logfs_super *super = logfs_super(sb); | ||
368 | /* kill the meta-inodes */ | ||
369 | iput(super->s_master_inode); | ||
370 | iput(super->s_segfile_inode); | ||
371 | iput(super->s_mapping_inode); | ||
372 | } | ||
373 | |||
387 | const struct super_operations logfs_super_operations = { | 374 | const struct super_operations logfs_super_operations = { |
388 | .alloc_inode = logfs_alloc_inode, | 375 | .alloc_inode = logfs_alloc_inode, |
389 | .clear_inode = logfs_clear_inode, | ||
390 | .delete_inode = logfs_delete_inode, | ||
391 | .destroy_inode = logfs_destroy_inode, | 376 | .destroy_inode = logfs_destroy_inode, |
377 | .evict_inode = logfs_evict_inode, | ||
392 | .drop_inode = logfs_drop_inode, | 378 | .drop_inode = logfs_drop_inode, |
379 | .put_super = logfs_put_super, | ||
393 | .write_inode = logfs_write_inode, | 380 | .write_inode = logfs_write_inode, |
394 | .statfs = logfs_statfs, | 381 | .statfs = logfs_statfs, |
395 | .sync_fs = logfs_sync_fs, | 382 | .sync_fs = logfs_sync_fs, |
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 4b0e0616b357..f46ee8b0e135 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c | |||
@@ -889,8 +889,6 @@ void logfs_cleanup_journal(struct super_block *sb) | |||
889 | struct logfs_super *super = logfs_super(sb); | 889 | struct logfs_super *super = logfs_super(sb); |
890 | 890 | ||
891 | btree_grim_visitor32(&super->s_reserved_segments, 0, NULL); | 891 | btree_grim_visitor32(&super->s_reserved_segments, 0, NULL); |
892 | destroy_meta_inode(super->s_master_inode); | ||
893 | super->s_master_inode = NULL; | ||
894 | 892 | ||
895 | kfree(super->s_compressed_je); | 893 | kfree(super->s_compressed_je); |
896 | kfree(super->s_je); | 894 | kfree(super->s_je); |
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index c838c4d72111..5e3b72077951 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h | |||
@@ -525,13 +525,11 @@ struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino); | |||
525 | struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino); | 525 | struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino); |
526 | int logfs_init_inode_cache(void); | 526 | int logfs_init_inode_cache(void); |
527 | void logfs_destroy_inode_cache(void); | 527 | void logfs_destroy_inode_cache(void); |
528 | void destroy_meta_inode(struct inode *inode); | ||
529 | void logfs_set_blocks(struct inode *inode, u64 no); | 528 | void logfs_set_blocks(struct inode *inode, u64 no); |
530 | /* these logically belong into inode.c but actually reside in readwrite.c */ | 529 | /* these logically belong into inode.c but actually reside in readwrite.c */ |
531 | int logfs_read_inode(struct inode *inode); | 530 | int logfs_read_inode(struct inode *inode); |
532 | int __logfs_write_inode(struct inode *inode, long flags); | 531 | int __logfs_write_inode(struct inode *inode, long flags); |
533 | void logfs_delete_inode(struct inode *inode); | 532 | void logfs_evict_inode(struct inode *inode); |
534 | void logfs_clear_inode(struct inode *inode); | ||
535 | 533 | ||
536 | /* journal.c */ | 534 | /* journal.c */ |
537 | void logfs_write_anchor(struct super_block *sb); | 535 | void logfs_write_anchor(struct super_block *sb); |
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 0718d112a1a5..6127baf0e188 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c | |||
@@ -1972,31 +1972,6 @@ static struct page *inode_to_page(struct inode *inode) | |||
1972 | return page; | 1972 | return page; |
1973 | } | 1973 | } |
1974 | 1974 | ||
1975 | /* Cheaper version of write_inode. All changes are concealed in | ||
1976 | * aliases, which are moved back. No write to the medium happens. | ||
1977 | */ | ||
1978 | void logfs_clear_inode(struct inode *inode) | ||
1979 | { | ||
1980 | struct super_block *sb = inode->i_sb; | ||
1981 | struct logfs_inode *li = logfs_inode(inode); | ||
1982 | struct logfs_block *block = li->li_block; | ||
1983 | struct page *page; | ||
1984 | |||
1985 | /* Only deleted files may be dirty at this point */ | ||
1986 | BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink); | ||
1987 | if (!block) | ||
1988 | return; | ||
1989 | if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) { | ||
1990 | block->ops->free_block(inode->i_sb, block); | ||
1991 | return; | ||
1992 | } | ||
1993 | |||
1994 | BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS); | ||
1995 | page = inode_to_page(inode); | ||
1996 | BUG_ON(!page); /* FIXME: Use emergency page */ | ||
1997 | logfs_put_write_page(page); | ||
1998 | } | ||
1999 | |||
2000 | static int do_write_inode(struct inode *inode) | 1975 | static int do_write_inode(struct inode *inode) |
2001 | { | 1976 | { |
2002 | struct super_block *sb = inode->i_sb; | 1977 | struct super_block *sb = inode->i_sb; |
@@ -2164,18 +2139,40 @@ static int do_delete_inode(struct inode *inode) | |||
2164 | * ZOMBIE inodes have already been deleted before and should remain dead, | 2139 | * ZOMBIE inodes have already been deleted before and should remain dead, |
2165 | * if it weren't for valid checking. No need to kill them again here. | 2140 | * if it weren't for valid checking. No need to kill them again here. |
2166 | */ | 2141 | */ |
2167 | void logfs_delete_inode(struct inode *inode) | 2142 | void logfs_evict_inode(struct inode *inode) |
2168 | { | 2143 | { |
2144 | struct super_block *sb = inode->i_sb; | ||
2169 | struct logfs_inode *li = logfs_inode(inode); | 2145 | struct logfs_inode *li = logfs_inode(inode); |
2146 | struct logfs_block *block = li->li_block; | ||
2147 | struct page *page; | ||
2170 | 2148 | ||
2171 | if (!(li->li_flags & LOGFS_IF_ZOMBIE)) { | 2149 | if (!inode->i_nlink) { |
2172 | li->li_flags |= LOGFS_IF_ZOMBIE; | 2150 | if (!(li->li_flags & LOGFS_IF_ZOMBIE)) { |
2173 | if (i_size_read(inode) > 0) | 2151 | li->li_flags |= LOGFS_IF_ZOMBIE; |
2174 | logfs_truncate(inode, 0); | 2152 | if (i_size_read(inode) > 0) |
2175 | do_delete_inode(inode); | 2153 | logfs_truncate(inode, 0); |
2154 | do_delete_inode(inode); | ||
2155 | } | ||
2176 | } | 2156 | } |
2177 | truncate_inode_pages(&inode->i_data, 0); | 2157 | truncate_inode_pages(&inode->i_data, 0); |
2178 | clear_inode(inode); | 2158 | end_writeback(inode); |
2159 | |||
2160 | /* Cheaper version of write_inode. All changes are concealed in | ||
2161 | * aliases, which are moved back. No write to the medium happens. | ||
2162 | */ | ||
2163 | /* Only deleted files may be dirty at this point */ | ||
2164 | BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink); | ||
2165 | if (!block) | ||
2166 | return; | ||
2167 | if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) { | ||
2168 | block->ops->free_block(inode->i_sb, block); | ||
2169 | return; | ||
2170 | } | ||
2171 | |||
2172 | BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS); | ||
2173 | page = inode_to_page(inode); | ||
2174 | BUG_ON(!page); /* FIXME: Use emergency page */ | ||
2175 | logfs_put_write_page(page); | ||
2179 | } | 2176 | } |
2180 | 2177 | ||
2181 | void btree_write_block(struct logfs_block *block) | 2178 | void btree_write_block(struct logfs_block *block) |
@@ -2272,7 +2269,6 @@ void logfs_cleanup_rw(struct super_block *sb) | |||
2272 | { | 2269 | { |
2273 | struct logfs_super *super = logfs_super(sb); | 2270 | struct logfs_super *super = logfs_super(sb); |
2274 | 2271 | ||
2275 | destroy_meta_inode(super->s_segfile_inode); | ||
2276 | logfs_mempool_destroy(super->s_block_pool); | 2272 | logfs_mempool_destroy(super->s_block_pool); |
2277 | logfs_mempool_destroy(super->s_shadow_pool); | 2273 | logfs_mempool_destroy(super->s_shadow_pool); |
2278 | } | 2274 | } |
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index a9657afb70ad..9d5187353255 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c | |||
@@ -929,5 +929,4 @@ void logfs_cleanup_areas(struct super_block *sb) | |||
929 | for_each_area(i) | 929 | for_each_area(i) |
930 | free_area(super->s_area[i]); | 930 | free_area(super->s_area[i]); |
931 | free_area(super->s_journal_area); | 931 | free_area(super->s_journal_area); |
932 | destroy_meta_inode(super->s_mapping_inode); | ||
933 | } | 932 | } |
diff --git a/fs/logfs/super.c b/fs/logfs/super.c index d651e10a1e9c..5336155c5d81 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c | |||
@@ -342,24 +342,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt) | |||
342 | goto fail; | 342 | goto fail; |
343 | } | 343 | } |
344 | 344 | ||
345 | /* at that point we know that ->put_super() will be called */ | ||
345 | super->s_erase_page = alloc_pages(GFP_KERNEL, 0); | 346 | super->s_erase_page = alloc_pages(GFP_KERNEL, 0); |
346 | if (!super->s_erase_page) | 347 | if (!super->s_erase_page) |
347 | goto fail; | 348 | return -ENOMEM; |
348 | memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE); | 349 | memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE); |
349 | 350 | ||
350 | /* FIXME: check for read-only mounts */ | 351 | /* FIXME: check for read-only mounts */ |
351 | err = logfs_make_writeable(sb); | 352 | err = logfs_make_writeable(sb); |
352 | if (err) | 353 | if (err) { |
353 | goto fail1; | 354 | __free_page(super->s_erase_page); |
355 | return err; | ||
356 | } | ||
354 | 357 | ||
355 | log_super("LogFS: Finished mounting\n"); | 358 | log_super("LogFS: Finished mounting\n"); |
356 | simple_set_mnt(mnt, sb); | 359 | simple_set_mnt(mnt, sb); |
357 | return 0; | 360 | return 0; |
358 | 361 | ||
359 | fail1: | ||
360 | __free_page(super->s_erase_page); | ||
361 | fail: | 362 | fail: |
362 | iput(logfs_super(sb)->s_master_inode); | 363 | iput(super->s_master_inode); |
364 | iput(super->s_segfile_inode); | ||
365 | iput(super->s_mapping_inode); | ||
363 | return -EIO; | 366 | return -EIO; |
364 | } | 367 | } |
365 | 368 | ||
@@ -580,10 +583,14 @@ int logfs_get_sb_device(struct file_system_type *type, int flags, | |||
580 | sb->s_flags |= MS_ACTIVE; | 583 | sb->s_flags |= MS_ACTIVE; |
581 | err = logfs_get_sb_final(sb, mnt); | 584 | err = logfs_get_sb_final(sb, mnt); |
582 | if (err) | 585 | if (err) |
583 | goto err1; | 586 | deactivate_locked_super(sb); |
584 | return 0; | 587 | return err; |
585 | 588 | ||
586 | err1: | 589 | err1: |
590 | /* no ->s_root, no ->put_super() */ | ||
591 | iput(super->s_master_inode); | ||
592 | iput(super->s_segfile_inode); | ||
593 | iput(super->s_mapping_inode); | ||
587 | deactivate_locked_super(sb); | 594 | deactivate_locked_super(sb); |
588 | return err; | 595 | return err; |
589 | err0: | 596 | err0: |
diff --git a/fs/mbcache.c b/fs/mbcache.c index e28f21b95344..cf4e6cdfd15b 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
@@ -79,15 +79,11 @@ EXPORT_SYMBOL(mb_cache_entry_find_next); | |||
79 | struct mb_cache { | 79 | struct mb_cache { |
80 | struct list_head c_cache_list; | 80 | struct list_head c_cache_list; |
81 | const char *c_name; | 81 | const char *c_name; |
82 | struct mb_cache_op c_op; | ||
83 | atomic_t c_entry_count; | 82 | atomic_t c_entry_count; |
84 | int c_bucket_bits; | 83 | int c_bucket_bits; |
85 | #ifndef MB_CACHE_INDEXES_COUNT | 84 | struct kmem_cache *c_entry_cache; |
86 | int c_indexes_count; | ||
87 | #endif | ||
88 | struct kmem_cache *c_entry_cache; | ||
89 | struct list_head *c_block_hash; | 85 | struct list_head *c_block_hash; |
90 | struct list_head *c_indexes_hash[0]; | 86 | struct list_head *c_index_hash; |
91 | }; | 87 | }; |
92 | 88 | ||
93 | 89 | ||
@@ -101,16 +97,6 @@ static LIST_HEAD(mb_cache_list); | |||
101 | static LIST_HEAD(mb_cache_lru_list); | 97 | static LIST_HEAD(mb_cache_lru_list); |
102 | static DEFINE_SPINLOCK(mb_cache_spinlock); | 98 | static DEFINE_SPINLOCK(mb_cache_spinlock); |
103 | 99 | ||
104 | static inline int | ||
105 | mb_cache_indexes(struct mb_cache *cache) | ||
106 | { | ||
107 | #ifdef MB_CACHE_INDEXES_COUNT | ||
108 | return MB_CACHE_INDEXES_COUNT; | ||
109 | #else | ||
110 | return cache->c_indexes_count; | ||
111 | #endif | ||
112 | } | ||
113 | |||
114 | /* | 100 | /* |
115 | * What the mbcache registers as to get shrunk dynamically. | 101 | * What the mbcache registers as to get shrunk dynamically. |
116 | */ | 102 | */ |
@@ -132,12 +118,9 @@ __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) | |||
132 | static void | 118 | static void |
133 | __mb_cache_entry_unhash(struct mb_cache_entry *ce) | 119 | __mb_cache_entry_unhash(struct mb_cache_entry *ce) |
134 | { | 120 | { |
135 | int n; | ||
136 | |||
137 | if (__mb_cache_entry_is_hashed(ce)) { | 121 | if (__mb_cache_entry_is_hashed(ce)) { |
138 | list_del_init(&ce->e_block_list); | 122 | list_del_init(&ce->e_block_list); |
139 | for (n=0; n<mb_cache_indexes(ce->e_cache); n++) | 123 | list_del(&ce->e_index.o_list); |
140 | list_del(&ce->e_indexes[n].o_list); | ||
141 | } | 124 | } |
142 | } | 125 | } |
143 | 126 | ||
@@ -148,16 +131,8 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) | |||
148 | struct mb_cache *cache = ce->e_cache; | 131 | struct mb_cache *cache = ce->e_cache; |
149 | 132 | ||
150 | mb_assert(!(ce->e_used || ce->e_queued)); | 133 | mb_assert(!(ce->e_used || ce->e_queued)); |
151 | if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) { | 134 | kmem_cache_free(cache->c_entry_cache, ce); |
152 | /* free failed -- put back on the lru list | 135 | atomic_dec(&cache->c_entry_count); |
153 | for freeing later. */ | ||
154 | spin_lock(&mb_cache_spinlock); | ||
155 | list_add(&ce->e_lru_list, &mb_cache_lru_list); | ||
156 | spin_unlock(&mb_cache_spinlock); | ||
157 | } else { | ||
158 | kmem_cache_free(cache->c_entry_cache, ce); | ||
159 | atomic_dec(&cache->c_entry_count); | ||
160 | } | ||
161 | } | 136 | } |
162 | 137 | ||
163 | 138 | ||
@@ -201,22 +176,12 @@ static int | |||
201 | mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | 176 | mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) |
202 | { | 177 | { |
203 | LIST_HEAD(free_list); | 178 | LIST_HEAD(free_list); |
204 | struct list_head *l, *ltmp; | 179 | struct mb_cache *cache; |
180 | struct mb_cache_entry *entry, *tmp; | ||
205 | int count = 0; | 181 | int count = 0; |
206 | 182 | ||
207 | spin_lock(&mb_cache_spinlock); | ||
208 | list_for_each(l, &mb_cache_list) { | ||
209 | struct mb_cache *cache = | ||
210 | list_entry(l, struct mb_cache, c_cache_list); | ||
211 | mb_debug("cache %s (%d)", cache->c_name, | ||
212 | atomic_read(&cache->c_entry_count)); | ||
213 | count += atomic_read(&cache->c_entry_count); | ||
214 | } | ||
215 | mb_debug("trying to free %d entries", nr_to_scan); | 183 | mb_debug("trying to free %d entries", nr_to_scan); |
216 | if (nr_to_scan == 0) { | 184 | spin_lock(&mb_cache_spinlock); |
217 | spin_unlock(&mb_cache_spinlock); | ||
218 | goto out; | ||
219 | } | ||
220 | while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { | 185 | while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { |
221 | struct mb_cache_entry *ce = | 186 | struct mb_cache_entry *ce = |
222 | list_entry(mb_cache_lru_list.next, | 187 | list_entry(mb_cache_lru_list.next, |
@@ -224,12 +189,15 @@ mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
224 | list_move_tail(&ce->e_lru_list, &free_list); | 189 | list_move_tail(&ce->e_lru_list, &free_list); |
225 | __mb_cache_entry_unhash(ce); | 190 | __mb_cache_entry_unhash(ce); |
226 | } | 191 | } |
192 | list_for_each_entry(cache, &mb_cache_list, c_cache_list) { | ||
193 | mb_debug("cache %s (%d)", cache->c_name, | ||
194 | atomic_read(&cache->c_entry_count)); | ||
195 | count += atomic_read(&cache->c_entry_count); | ||
196 | } | ||
227 | spin_unlock(&mb_cache_spinlock); | 197 | spin_unlock(&mb_cache_spinlock); |
228 | list_for_each_safe(l, ltmp, &free_list) { | 198 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { |
229 | __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, | 199 | __mb_cache_entry_forget(entry, gfp_mask); |
230 | e_lru_list), gfp_mask); | ||
231 | } | 200 | } |
232 | out: | ||
233 | return (count / 100) * sysctl_vfs_cache_pressure; | 201 | return (count / 100) * sysctl_vfs_cache_pressure; |
234 | } | 202 | } |
235 | 203 | ||
@@ -243,72 +211,49 @@ out: | |||
243 | * memory was available. | 211 | * memory was available. |
244 | * | 212 | * |
245 | * @name: name of the cache (informal) | 213 | * @name: name of the cache (informal) |
246 | * @cache_op: contains the callback called when freeing a cache entry | ||
247 | * @entry_size: The size of a cache entry, including | ||
248 | * struct mb_cache_entry | ||
249 | * @indexes_count: number of additional indexes in the cache. Must equal | ||
250 | * MB_CACHE_INDEXES_COUNT if the number of indexes is | ||
251 | * hardwired. | ||
252 | * @bucket_bits: log2(number of hash buckets) | 214 | * @bucket_bits: log2(number of hash buckets) |
253 | */ | 215 | */ |
254 | struct mb_cache * | 216 | struct mb_cache * |
255 | mb_cache_create(const char *name, struct mb_cache_op *cache_op, | 217 | mb_cache_create(const char *name, int bucket_bits) |
256 | size_t entry_size, int indexes_count, int bucket_bits) | ||
257 | { | 218 | { |
258 | int m=0, n, bucket_count = 1 << bucket_bits; | 219 | int n, bucket_count = 1 << bucket_bits; |
259 | struct mb_cache *cache = NULL; | 220 | struct mb_cache *cache = NULL; |
260 | 221 | ||
261 | if(entry_size < sizeof(struct mb_cache_entry) + | 222 | cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); |
262 | indexes_count * sizeof(((struct mb_cache_entry *) 0)->e_indexes[0])) | ||
263 | return NULL; | ||
264 | |||
265 | cache = kmalloc(sizeof(struct mb_cache) + | ||
266 | indexes_count * sizeof(struct list_head), GFP_KERNEL); | ||
267 | if (!cache) | 223 | if (!cache) |
268 | goto fail; | 224 | return NULL; |
269 | cache->c_name = name; | 225 | cache->c_name = name; |
270 | cache->c_op.free = NULL; | ||
271 | if (cache_op) | ||
272 | cache->c_op.free = cache_op->free; | ||
273 | atomic_set(&cache->c_entry_count, 0); | 226 | atomic_set(&cache->c_entry_count, 0); |
274 | cache->c_bucket_bits = bucket_bits; | 227 | cache->c_bucket_bits = bucket_bits; |
275 | #ifdef MB_CACHE_INDEXES_COUNT | ||
276 | mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT); | ||
277 | #else | ||
278 | cache->c_indexes_count = indexes_count; | ||
279 | #endif | ||
280 | cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), | 228 | cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), |
281 | GFP_KERNEL); | 229 | GFP_KERNEL); |
282 | if (!cache->c_block_hash) | 230 | if (!cache->c_block_hash) |
283 | goto fail; | 231 | goto fail; |
284 | for (n=0; n<bucket_count; n++) | 232 | for (n=0; n<bucket_count; n++) |
285 | INIT_LIST_HEAD(&cache->c_block_hash[n]); | 233 | INIT_LIST_HEAD(&cache->c_block_hash[n]); |
286 | for (m=0; m<indexes_count; m++) { | 234 | cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head), |
287 | cache->c_indexes_hash[m] = kmalloc(bucket_count * | 235 | GFP_KERNEL); |
288 | sizeof(struct list_head), | 236 | if (!cache->c_index_hash) |
289 | GFP_KERNEL); | 237 | goto fail; |
290 | if (!cache->c_indexes_hash[m]) | 238 | for (n=0; n<bucket_count; n++) |
291 | goto fail; | 239 | INIT_LIST_HEAD(&cache->c_index_hash[n]); |
292 | for (n=0; n<bucket_count; n++) | 240 | cache->c_entry_cache = kmem_cache_create(name, |
293 | INIT_LIST_HEAD(&cache->c_indexes_hash[m][n]); | 241 | sizeof(struct mb_cache_entry), 0, |
294 | } | ||
295 | cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, | ||
296 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); | 242 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); |
297 | if (!cache->c_entry_cache) | 243 | if (!cache->c_entry_cache) |
298 | goto fail; | 244 | goto fail2; |
299 | 245 | ||
300 | spin_lock(&mb_cache_spinlock); | 246 | spin_lock(&mb_cache_spinlock); |
301 | list_add(&cache->c_cache_list, &mb_cache_list); | 247 | list_add(&cache->c_cache_list, &mb_cache_list); |
302 | spin_unlock(&mb_cache_spinlock); | 248 | spin_unlock(&mb_cache_spinlock); |
303 | return cache; | 249 | return cache; |
304 | 250 | ||
251 | fail2: | ||
252 | kfree(cache->c_index_hash); | ||
253 | |||
305 | fail: | 254 | fail: |
306 | if (cache) { | 255 | kfree(cache->c_block_hash); |
307 | while (--m >= 0) | 256 | kfree(cache); |
308 | kfree(cache->c_indexes_hash[m]); | ||
309 | kfree(cache->c_block_hash); | ||
310 | kfree(cache); | ||
311 | } | ||
312 | return NULL; | 257 | return NULL; |
313 | } | 258 | } |
314 | 259 | ||
@@ -357,7 +302,6 @@ mb_cache_destroy(struct mb_cache *cache) | |||
357 | { | 302 | { |
358 | LIST_HEAD(free_list); | 303 | LIST_HEAD(free_list); |
359 | struct list_head *l, *ltmp; | 304 | struct list_head *l, *ltmp; |
360 | int n; | ||
361 | 305 | ||
362 | spin_lock(&mb_cache_spinlock); | 306 | spin_lock(&mb_cache_spinlock); |
363 | list_for_each_safe(l, ltmp, &mb_cache_lru_list) { | 307 | list_for_each_safe(l, ltmp, &mb_cache_lru_list) { |
@@ -384,8 +328,7 @@ mb_cache_destroy(struct mb_cache *cache) | |||
384 | 328 | ||
385 | kmem_cache_destroy(cache->c_entry_cache); | 329 | kmem_cache_destroy(cache->c_entry_cache); |
386 | 330 | ||
387 | for (n=0; n < mb_cache_indexes(cache); n++) | 331 | kfree(cache->c_index_hash); |
388 | kfree(cache->c_indexes_hash[n]); | ||
389 | kfree(cache->c_block_hash); | 332 | kfree(cache->c_block_hash); |
390 | kfree(cache); | 333 | kfree(cache); |
391 | } | 334 | } |
@@ -429,17 +372,16 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) | |||
429 | * | 372 | * |
430 | * @bdev: device the cache entry belongs to | 373 | * @bdev: device the cache entry belongs to |
431 | * @block: block number | 374 | * @block: block number |
432 | * @keys: array of additional keys. There must be indexes_count entries | 375 | * @key: lookup key |
433 | * in the array (as specified when creating the cache). | ||
434 | */ | 376 | */ |
435 | int | 377 | int |
436 | mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, | 378 | mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, |
437 | sector_t block, unsigned int keys[]) | 379 | sector_t block, unsigned int key) |
438 | { | 380 | { |
439 | struct mb_cache *cache = ce->e_cache; | 381 | struct mb_cache *cache = ce->e_cache; |
440 | unsigned int bucket; | 382 | unsigned int bucket; |
441 | struct list_head *l; | 383 | struct list_head *l; |
442 | int error = -EBUSY, n; | 384 | int error = -EBUSY; |
443 | 385 | ||
444 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), | 386 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), |
445 | cache->c_bucket_bits); | 387 | cache->c_bucket_bits); |
@@ -454,12 +396,9 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, | |||
454 | ce->e_bdev = bdev; | 396 | ce->e_bdev = bdev; |
455 | ce->e_block = block; | 397 | ce->e_block = block; |
456 | list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); | 398 | list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); |
457 | for (n=0; n<mb_cache_indexes(cache); n++) { | 399 | ce->e_index.o_key = key; |
458 | ce->e_indexes[n].o_key = keys[n]; | 400 | bucket = hash_long(key, cache->c_bucket_bits); |
459 | bucket = hash_long(keys[n], cache->c_bucket_bits); | 401 | list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]); |
460 | list_add(&ce->e_indexes[n].o_list, | ||
461 | &cache->c_indexes_hash[n][bucket]); | ||
462 | } | ||
463 | error = 0; | 402 | error = 0; |
464 | out: | 403 | out: |
465 | spin_unlock(&mb_cache_spinlock); | 404 | spin_unlock(&mb_cache_spinlock); |
@@ -555,13 +494,12 @@ cleanup: | |||
555 | 494 | ||
556 | static struct mb_cache_entry * | 495 | static struct mb_cache_entry * |
557 | __mb_cache_entry_find(struct list_head *l, struct list_head *head, | 496 | __mb_cache_entry_find(struct list_head *l, struct list_head *head, |
558 | int index, struct block_device *bdev, unsigned int key) | 497 | struct block_device *bdev, unsigned int key) |
559 | { | 498 | { |
560 | while (l != head) { | 499 | while (l != head) { |
561 | struct mb_cache_entry *ce = | 500 | struct mb_cache_entry *ce = |
562 | list_entry(l, struct mb_cache_entry, | 501 | list_entry(l, struct mb_cache_entry, e_index.o_list); |
563 | e_indexes[index].o_list); | 502 | if (ce->e_bdev == bdev && ce->e_index.o_key == key) { |
564 | if (ce->e_bdev == bdev && ce->e_indexes[index].o_key == key) { | ||
565 | DEFINE_WAIT(wait); | 503 | DEFINE_WAIT(wait); |
566 | 504 | ||
567 | if (!list_empty(&ce->e_lru_list)) | 505 | if (!list_empty(&ce->e_lru_list)) |
@@ -603,23 +541,20 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head, | |||
603 | * returned cache entry is locked for shared access ("multiple readers"). | 541 | * returned cache entry is locked for shared access ("multiple readers"). |
604 | * | 542 | * |
605 | * @cache: the cache to search | 543 | * @cache: the cache to search |
606 | * @index: the number of the additonal index to search (0<=index<indexes_count) | ||
607 | * @bdev: the device the cache entry should belong to | 544 | * @bdev: the device the cache entry should belong to |
608 | * @key: the key in the index | 545 | * @key: the key in the index |
609 | */ | 546 | */ |
610 | struct mb_cache_entry * | 547 | struct mb_cache_entry * |
611 | mb_cache_entry_find_first(struct mb_cache *cache, int index, | 548 | mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev, |
612 | struct block_device *bdev, unsigned int key) | 549 | unsigned int key) |
613 | { | 550 | { |
614 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); | 551 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); |
615 | struct list_head *l; | 552 | struct list_head *l; |
616 | struct mb_cache_entry *ce; | 553 | struct mb_cache_entry *ce; |
617 | 554 | ||
618 | mb_assert(index < mb_cache_indexes(cache)); | ||
619 | spin_lock(&mb_cache_spinlock); | 555 | spin_lock(&mb_cache_spinlock); |
620 | l = cache->c_indexes_hash[index][bucket].next; | 556 | l = cache->c_index_hash[bucket].next; |
621 | ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], | 557 | ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); |
622 | index, bdev, key); | ||
623 | spin_unlock(&mb_cache_spinlock); | 558 | spin_unlock(&mb_cache_spinlock); |
624 | return ce; | 559 | return ce; |
625 | } | 560 | } |
@@ -640,12 +575,11 @@ mb_cache_entry_find_first(struct mb_cache *cache, int index, | |||
640 | * } | 575 | * } |
641 | * | 576 | * |
642 | * @prev: The previous match | 577 | * @prev: The previous match |
643 | * @index: the number of the additonal index to search (0<=index<indexes_count) | ||
644 | * @bdev: the device the cache entry should belong to | 578 | * @bdev: the device the cache entry should belong to |
645 | * @key: the key in the index | 579 | * @key: the key in the index |
646 | */ | 580 | */ |
647 | struct mb_cache_entry * | 581 | struct mb_cache_entry * |
648 | mb_cache_entry_find_next(struct mb_cache_entry *prev, int index, | 582 | mb_cache_entry_find_next(struct mb_cache_entry *prev, |
649 | struct block_device *bdev, unsigned int key) | 583 | struct block_device *bdev, unsigned int key) |
650 | { | 584 | { |
651 | struct mb_cache *cache = prev->e_cache; | 585 | struct mb_cache *cache = prev->e_cache; |
@@ -653,11 +587,9 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, int index, | |||
653 | struct list_head *l; | 587 | struct list_head *l; |
654 | struct mb_cache_entry *ce; | 588 | struct mb_cache_entry *ce; |
655 | 589 | ||
656 | mb_assert(index < mb_cache_indexes(cache)); | ||
657 | spin_lock(&mb_cache_spinlock); | 590 | spin_lock(&mb_cache_spinlock); |
658 | l = prev->e_indexes[index].o_list.next; | 591 | l = prev->e_index.o_list.next; |
659 | ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], | 592 | ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); |
660 | index, bdev, key); | ||
661 | __mb_cache_entry_release_unlock(prev); | 593 | __mb_cache_entry_release_unlock(prev); |
662 | return ce; | 594 | return ce; |
663 | } | 595 | } |
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 482779fe4e7c..3f32bcb0d9bd 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c | |||
@@ -200,13 +200,13 @@ void minix_free_inode(struct inode * inode) | |||
200 | ino = inode->i_ino; | 200 | ino = inode->i_ino; |
201 | if (ino < 1 || ino > sbi->s_ninodes) { | 201 | if (ino < 1 || ino > sbi->s_ninodes) { |
202 | printk("minix_free_inode: inode 0 or nonexistent inode\n"); | 202 | printk("minix_free_inode: inode 0 or nonexistent inode\n"); |
203 | goto out; | 203 | return; |
204 | } | 204 | } |
205 | bit = ino & ((1<<k) - 1); | 205 | bit = ino & ((1<<k) - 1); |
206 | ino >>= k; | 206 | ino >>= k; |
207 | if (ino >= sbi->s_imap_blocks) { | 207 | if (ino >= sbi->s_imap_blocks) { |
208 | printk("minix_free_inode: nonexistent imap in superblock\n"); | 208 | printk("minix_free_inode: nonexistent imap in superblock\n"); |
209 | goto out; | 209 | return; |
210 | } | 210 | } |
211 | 211 | ||
212 | minix_clear_inode(inode); /* clear on-disk copy */ | 212 | minix_clear_inode(inode); /* clear on-disk copy */ |
@@ -217,8 +217,6 @@ void minix_free_inode(struct inode * inode) | |||
217 | printk("minix_free_inode: bit %lu already cleared\n", bit); | 217 | printk("minix_free_inode: bit %lu already cleared\n", bit); |
218 | spin_unlock(&bitmap_lock); | 218 | spin_unlock(&bitmap_lock); |
219 | mark_buffer_dirty(bh); | 219 | mark_buffer_dirty(bh); |
220 | out: | ||
221 | clear_inode(inode); /* clear in-memory copy */ | ||
222 | } | 220 | } |
223 | 221 | ||
224 | struct inode *minix_new_inode(const struct inode *dir, int mode, int *error) | 222 | struct inode *minix_new_inode(const struct inode *dir, int mode, int *error) |
diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 1dbf921ca44b..085a9262c692 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c | |||
@@ -271,8 +271,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode) | |||
271 | 271 | ||
272 | got_it: | 272 | got_it: |
273 | pos = page_offset(page) + p - (char *)page_address(page); | 273 | pos = page_offset(page) + p - (char *)page_address(page); |
274 | err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize, | 274 | err = minix_prepare_chunk(page, pos, sbi->s_dirsize); |
275 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
276 | if (err) | 275 | if (err) |
277 | goto out_unlock; | 276 | goto out_unlock; |
278 | memcpy (namx, name, namelen); | 277 | memcpy (namx, name, namelen); |
@@ -297,8 +296,7 @@ out_unlock: | |||
297 | 296 | ||
298 | int minix_delete_entry(struct minix_dir_entry *de, struct page *page) | 297 | int minix_delete_entry(struct minix_dir_entry *de, struct page *page) |
299 | { | 298 | { |
300 | struct address_space *mapping = page->mapping; | 299 | struct inode *inode = page->mapping->host; |
301 | struct inode *inode = (struct inode*)mapping->host; | ||
302 | char *kaddr = page_address(page); | 300 | char *kaddr = page_address(page); |
303 | loff_t pos = page_offset(page) + (char*)de - kaddr; | 301 | loff_t pos = page_offset(page) + (char*)de - kaddr; |
304 | struct minix_sb_info *sbi = minix_sb(inode->i_sb); | 302 | struct minix_sb_info *sbi = minix_sb(inode->i_sb); |
@@ -306,8 +304,7 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page) | |||
306 | int err; | 304 | int err; |
307 | 305 | ||
308 | lock_page(page); | 306 | lock_page(page); |
309 | err = __minix_write_begin(NULL, mapping, pos, len, | 307 | err = minix_prepare_chunk(page, pos, len); |
310 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
311 | if (err == 0) { | 308 | if (err == 0) { |
312 | if (sbi->s_version == MINIX_V3) | 309 | if (sbi->s_version == MINIX_V3) |
313 | ((minix3_dirent *) de)->inode = 0; | 310 | ((minix3_dirent *) de)->inode = 0; |
@@ -325,16 +322,14 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page) | |||
325 | 322 | ||
326 | int minix_make_empty(struct inode *inode, struct inode *dir) | 323 | int minix_make_empty(struct inode *inode, struct inode *dir) |
327 | { | 324 | { |
328 | struct address_space *mapping = inode->i_mapping; | 325 | struct page *page = grab_cache_page(inode->i_mapping, 0); |
329 | struct page *page = grab_cache_page(mapping, 0); | ||
330 | struct minix_sb_info *sbi = minix_sb(inode->i_sb); | 326 | struct minix_sb_info *sbi = minix_sb(inode->i_sb); |
331 | char *kaddr; | 327 | char *kaddr; |
332 | int err; | 328 | int err; |
333 | 329 | ||
334 | if (!page) | 330 | if (!page) |
335 | return -ENOMEM; | 331 | return -ENOMEM; |
336 | err = __minix_write_begin(NULL, mapping, 0, 2 * sbi->s_dirsize, | 332 | err = minix_prepare_chunk(page, 0, 2 * sbi->s_dirsize); |
337 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
338 | if (err) { | 333 | if (err) { |
339 | unlock_page(page); | 334 | unlock_page(page); |
340 | goto fail; | 335 | goto fail; |
@@ -425,8 +420,7 @@ not_empty: | |||
425 | void minix_set_link(struct minix_dir_entry *de, struct page *page, | 420 | void minix_set_link(struct minix_dir_entry *de, struct page *page, |
426 | struct inode *inode) | 421 | struct inode *inode) |
427 | { | 422 | { |
428 | struct address_space *mapping = page->mapping; | 423 | struct inode *dir = page->mapping->host; |
429 | struct inode *dir = mapping->host; | ||
430 | struct minix_sb_info *sbi = minix_sb(dir->i_sb); | 424 | struct minix_sb_info *sbi = minix_sb(dir->i_sb); |
431 | loff_t pos = page_offset(page) + | 425 | loff_t pos = page_offset(page) + |
432 | (char *)de-(char*)page_address(page); | 426 | (char *)de-(char*)page_address(page); |
@@ -434,8 +428,7 @@ void minix_set_link(struct minix_dir_entry *de, struct page *page, | |||
434 | 428 | ||
435 | lock_page(page); | 429 | lock_page(page); |
436 | 430 | ||
437 | err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize, | 431 | err = minix_prepare_chunk(page, pos, sbi->s_dirsize); |
438 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
439 | if (err == 0) { | 432 | if (err == 0) { |
440 | if (sbi->s_version == MINIX_V3) | 433 | if (sbi->s_version == MINIX_V3) |
441 | ((minix3_dirent *) de)->inode = inode->i_ino; | 434 | ((minix3_dirent *) de)->inode = inode->i_ino; |
diff --git a/fs/minix/file.c b/fs/minix/file.c index d5320ff23faf..4493ce695ab8 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c | |||
@@ -23,7 +23,29 @@ const struct file_operations minix_file_operations = { | |||
23 | .splice_read = generic_file_splice_read, | 23 | .splice_read = generic_file_splice_read, |
24 | }; | 24 | }; |
25 | 25 | ||
26 | static int minix_setattr(struct dentry *dentry, struct iattr *attr) | ||
27 | { | ||
28 | struct inode *inode = dentry->d_inode; | ||
29 | int error; | ||
30 | |||
31 | error = inode_change_ok(inode, attr); | ||
32 | if (error) | ||
33 | return error; | ||
34 | |||
35 | if ((attr->ia_valid & ATTR_SIZE) && | ||
36 | attr->ia_size != i_size_read(inode)) { | ||
37 | error = vmtruncate(inode, attr->ia_size); | ||
38 | if (error) | ||
39 | return error; | ||
40 | } | ||
41 | |||
42 | setattr_copy(inode, attr); | ||
43 | mark_inode_dirty(inode); | ||
44 | return 0; | ||
45 | } | ||
46 | |||
26 | const struct inode_operations minix_file_inode_operations = { | 47 | const struct inode_operations minix_file_inode_operations = { |
27 | .truncate = minix_truncate, | 48 | .truncate = minix_truncate, |
49 | .setattr = minix_setattr, | ||
28 | .getattr = minix_getattr, | 50 | .getattr = minix_getattr, |
29 | }; | 51 | }; |
diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 756f8c93780c..e39d6bf2e8fb 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c | |||
@@ -24,12 +24,17 @@ static int minix_write_inode(struct inode *inode, | |||
24 | static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); | 24 | static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); |
25 | static int minix_remount (struct super_block * sb, int * flags, char * data); | 25 | static int minix_remount (struct super_block * sb, int * flags, char * data); |
26 | 26 | ||
27 | static void minix_delete_inode(struct inode *inode) | 27 | static void minix_evict_inode(struct inode *inode) |
28 | { | 28 | { |
29 | truncate_inode_pages(&inode->i_data, 0); | 29 | truncate_inode_pages(&inode->i_data, 0); |
30 | inode->i_size = 0; | 30 | if (!inode->i_nlink) { |
31 | minix_truncate(inode); | 31 | inode->i_size = 0; |
32 | minix_free_inode(inode); | 32 | minix_truncate(inode); |
33 | } | ||
34 | invalidate_inode_buffers(inode); | ||
35 | end_writeback(inode); | ||
36 | if (!inode->i_nlink) | ||
37 | minix_free_inode(inode); | ||
33 | } | 38 | } |
34 | 39 | ||
35 | static void minix_put_super(struct super_block *sb) | 40 | static void minix_put_super(struct super_block *sb) |
@@ -96,7 +101,7 @@ static const struct super_operations minix_sops = { | |||
96 | .alloc_inode = minix_alloc_inode, | 101 | .alloc_inode = minix_alloc_inode, |
97 | .destroy_inode = minix_destroy_inode, | 102 | .destroy_inode = minix_destroy_inode, |
98 | .write_inode = minix_write_inode, | 103 | .write_inode = minix_write_inode, |
99 | .delete_inode = minix_delete_inode, | 104 | .evict_inode = minix_evict_inode, |
100 | .put_super = minix_put_super, | 105 | .put_super = minix_put_super, |
101 | .statfs = minix_statfs, | 106 | .statfs = minix_statfs, |
102 | .remount_fs = minix_remount, | 107 | .remount_fs = minix_remount, |
@@ -357,20 +362,26 @@ static int minix_readpage(struct file *file, struct page *page) | |||
357 | return block_read_full_page(page,minix_get_block); | 362 | return block_read_full_page(page,minix_get_block); |
358 | } | 363 | } |
359 | 364 | ||
360 | int __minix_write_begin(struct file *file, struct address_space *mapping, | 365 | int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len) |
361 | loff_t pos, unsigned len, unsigned flags, | ||
362 | struct page **pagep, void **fsdata) | ||
363 | { | 366 | { |
364 | return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 367 | return __block_write_begin(page, pos, len, minix_get_block); |
365 | minix_get_block); | ||
366 | } | 368 | } |
367 | 369 | ||
368 | static int minix_write_begin(struct file *file, struct address_space *mapping, | 370 | static int minix_write_begin(struct file *file, struct address_space *mapping, |
369 | loff_t pos, unsigned len, unsigned flags, | 371 | loff_t pos, unsigned len, unsigned flags, |
370 | struct page **pagep, void **fsdata) | 372 | struct page **pagep, void **fsdata) |
371 | { | 373 | { |
372 | *pagep = NULL; | 374 | int ret; |
373 | return __minix_write_begin(file, mapping, pos, len, flags, pagep, fsdata); | 375 | |
376 | ret = block_write_begin(mapping, pos, len, flags, pagep, | ||
377 | minix_get_block); | ||
378 | if (unlikely(ret)) { | ||
379 | loff_t isize = mapping->host->i_size; | ||
380 | if (pos + len > isize) | ||
381 | vmtruncate(mapping->host, isize); | ||
382 | } | ||
383 | |||
384 | return ret; | ||
374 | } | 385 | } |
375 | 386 | ||
376 | static sector_t minix_bmap(struct address_space *mapping, sector_t block) | 387 | static sector_t minix_bmap(struct address_space *mapping, sector_t block) |
diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 111f34ee9e3b..407b1c84911e 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h | |||
@@ -53,9 +53,7 @@ extern int minix_new_block(struct inode * inode); | |||
53 | extern void minix_free_block(struct inode *inode, unsigned long block); | 53 | extern void minix_free_block(struct inode *inode, unsigned long block); |
54 | extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi); | 54 | extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi); |
55 | extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 55 | extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
56 | extern int __minix_write_begin(struct file *file, struct address_space *mapping, | 56 | extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len); |
57 | loff_t pos, unsigned len, unsigned flags, | ||
58 | struct page **pagep, void **fsdata); | ||
59 | 57 | ||
60 | extern void V1_minix_truncate(struct inode *); | 58 | extern void V1_minix_truncate(struct inode *); |
61 | extern void V2_minix_truncate(struct inode *); | 59 | extern void V2_minix_truncate(struct inode *); |
diff --git a/fs/namei.c b/fs/namei.c index 868d0cb9d473..13ff4abdbdca 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -282,8 +282,7 @@ int inode_permission(struct inode *inode, int mask) | |||
282 | if (retval) | 282 | if (retval) |
283 | return retval; | 283 | return retval; |
284 | 284 | ||
285 | return security_inode_permission(inode, | 285 | return security_inode_permission(inode, mask); |
286 | mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND)); | ||
287 | } | 286 | } |
288 | 287 | ||
289 | /** | 288 | /** |
@@ -1484,8 +1483,7 @@ static int handle_truncate(struct path *path) | |||
1484 | */ | 1483 | */ |
1485 | error = locks_verify_locked(inode); | 1484 | error = locks_verify_locked(inode); |
1486 | if (!error) | 1485 | if (!error) |
1487 | error = security_path_truncate(path, 0, | 1486 | error = security_path_truncate(path); |
1488 | ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); | ||
1489 | if (!error) { | 1487 | if (!error) { |
1490 | error = do_truncate(path->dentry, 0, | 1488 | error = do_truncate(path->dentry, 0, |
1491 | ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, | 1489 | ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, |
@@ -2635,7 +2633,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2635 | { | 2633 | { |
2636 | int error; | 2634 | int error; |
2637 | int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); | 2635 | int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); |
2638 | const char *old_name; | 2636 | const unsigned char *old_name; |
2639 | 2637 | ||
2640 | if (old_dentry->d_inode == new_dentry->d_inode) | 2638 | if (old_dentry->d_inode == new_dentry->d_inode) |
2641 | return 0; | 2639 | return 0; |
diff --git a/fs/namespace.c b/fs/namespace.c index 88058de59c7c..66c4f7e781cb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/log2.h> | 29 | #include <linux/log2.h> |
30 | #include <linux/idr.h> | 30 | #include <linux/idr.h> |
31 | #include <linux/fs_struct.h> | 31 | #include <linux/fs_struct.h> |
32 | #include <linux/fsnotify.h> | ||
32 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
33 | #include <asm/unistd.h> | 34 | #include <asm/unistd.h> |
34 | #include "pnode.h" | 35 | #include "pnode.h" |
@@ -150,6 +151,9 @@ struct vfsmount *alloc_vfsmnt(const char *name) | |||
150 | INIT_LIST_HEAD(&mnt->mnt_share); | 151 | INIT_LIST_HEAD(&mnt->mnt_share); |
151 | INIT_LIST_HEAD(&mnt->mnt_slave_list); | 152 | INIT_LIST_HEAD(&mnt->mnt_slave_list); |
152 | INIT_LIST_HEAD(&mnt->mnt_slave); | 153 | INIT_LIST_HEAD(&mnt->mnt_slave); |
154 | #ifdef CONFIG_FSNOTIFY | ||
155 | INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); | ||
156 | #endif | ||
153 | #ifdef CONFIG_SMP | 157 | #ifdef CONFIG_SMP |
154 | mnt->mnt_writers = alloc_percpu(int); | 158 | mnt->mnt_writers = alloc_percpu(int); |
155 | if (!mnt->mnt_writers) | 159 | if (!mnt->mnt_writers) |
@@ -610,6 +614,7 @@ static inline void __mntput(struct vfsmount *mnt) | |||
610 | * provides barriers, so count_mnt_writers() below is safe. AV | 614 | * provides barriers, so count_mnt_writers() below is safe. AV |
611 | */ | 615 | */ |
612 | WARN_ON(count_mnt_writers(mnt)); | 616 | WARN_ON(count_mnt_writers(mnt)); |
617 | fsnotify_vfsmount_delete(mnt); | ||
613 | dput(mnt->mnt_root); | 618 | dput(mnt->mnt_root); |
614 | free_vfsmnt(mnt); | 619 | free_vfsmnt(mnt); |
615 | deactivate_super(sb); | 620 | deactivate_super(sb); |
@@ -1984,7 +1989,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, | |||
1984 | if (flags & MS_RDONLY) | 1989 | if (flags & MS_RDONLY) |
1985 | mnt_flags |= MNT_READONLY; | 1990 | mnt_flags |= MNT_READONLY; |
1986 | 1991 | ||
1987 | flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | | 1992 | flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | |
1988 | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | | 1993 | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | |
1989 | MS_STRICTATIME); | 1994 | MS_STRICTATIME); |
1990 | 1995 | ||
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index fa3385154023..b4de38cf49f5 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -43,7 +43,7 @@ | |||
43 | #define NCP_DEFAULT_TIME_OUT 10 | 43 | #define NCP_DEFAULT_TIME_OUT 10 |
44 | #define NCP_DEFAULT_RETRY_COUNT 20 | 44 | #define NCP_DEFAULT_RETRY_COUNT 20 |
45 | 45 | ||
46 | static void ncp_delete_inode(struct inode *); | 46 | static void ncp_evict_inode(struct inode *); |
47 | static void ncp_put_super(struct super_block *); | 47 | static void ncp_put_super(struct super_block *); |
48 | static int ncp_statfs(struct dentry *, struct kstatfs *); | 48 | static int ncp_statfs(struct dentry *, struct kstatfs *); |
49 | static int ncp_show_options(struct seq_file *, struct vfsmount *); | 49 | static int ncp_show_options(struct seq_file *, struct vfsmount *); |
@@ -100,7 +100,7 @@ static const struct super_operations ncp_sops = | |||
100 | .alloc_inode = ncp_alloc_inode, | 100 | .alloc_inode = ncp_alloc_inode, |
101 | .destroy_inode = ncp_destroy_inode, | 101 | .destroy_inode = ncp_destroy_inode, |
102 | .drop_inode = generic_delete_inode, | 102 | .drop_inode = generic_delete_inode, |
103 | .delete_inode = ncp_delete_inode, | 103 | .evict_inode = ncp_evict_inode, |
104 | .put_super = ncp_put_super, | 104 | .put_super = ncp_put_super, |
105 | .statfs = ncp_statfs, | 105 | .statfs = ncp_statfs, |
106 | .remount_fs = ncp_remount, | 106 | .remount_fs = ncp_remount, |
@@ -282,19 +282,19 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info) | |||
282 | } | 282 | } |
283 | 283 | ||
284 | static void | 284 | static void |
285 | ncp_delete_inode(struct inode *inode) | 285 | ncp_evict_inode(struct inode *inode) |
286 | { | 286 | { |
287 | truncate_inode_pages(&inode->i_data, 0); | 287 | truncate_inode_pages(&inode->i_data, 0); |
288 | end_writeback(inode); | ||
288 | 289 | ||
289 | if (S_ISDIR(inode->i_mode)) { | 290 | if (S_ISDIR(inode->i_mode)) { |
290 | DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino); | 291 | DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino); |
291 | } | 292 | } |
292 | 293 | ||
293 | if (ncp_make_closed(inode) != 0) { | 294 | if (ncp_make_closed(inode) != 0) { |
294 | /* We can't do anything but complain. */ | 295 | /* We can't do anything but complain. */ |
295 | printk(KERN_ERR "ncp_delete_inode: could not close\n"); | 296 | printk(KERN_ERR "ncp_evict_inode: could not close\n"); |
296 | } | 297 | } |
297 | clear_inode(inode); | ||
298 | } | 298 | } |
299 | 299 | ||
300 | static void ncp_stop_tasks(struct ncp_server *server) { | 300 | static void ncp_stop_tasks(struct ncp_server *server) { |
@@ -728,8 +728,8 @@ out_fput: | |||
728 | out_bdi: | 728 | out_bdi: |
729 | /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: | 729 | /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: |
730 | * | 730 | * |
731 | * The previously used put_filp(ncp_filp); was bogous, since | 731 | * The previously used put_filp(ncp_filp); was bogus, since |
732 | * it doesn't proper unlocking. | 732 | * it doesn't perform proper unlocking. |
733 | */ | 733 | */ |
734 | fput(ncp_filp); | 734 | fput(ncp_filp); |
735 | out: | 735 | out: |
@@ -924,9 +924,8 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) | |||
924 | tmpattr.ia_valid = ATTR_MODE; | 924 | tmpattr.ia_valid = ATTR_MODE; |
925 | tmpattr.ia_mode = attr->ia_mode; | 925 | tmpattr.ia_mode = attr->ia_mode; |
926 | 926 | ||
927 | result = inode_setattr(inode, &tmpattr); | 927 | setattr_copy(inode, &tmpattr); |
928 | if (result) | 928 | mark_inode_dirty(inode); |
929 | goto out; | ||
930 | } | 929 | } |
931 | } | 930 | } |
932 | #endif | 931 | #endif |
@@ -954,15 +953,12 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) | |||
954 | result = ncp_make_closed(inode); | 953 | result = ncp_make_closed(inode); |
955 | if (result) | 954 | if (result) |
956 | goto out; | 955 | goto out; |
957 | { | 956 | |
958 | struct iattr tmpattr; | 957 | if (attr->ia_size != i_size_read(inode)) { |
959 | 958 | result = vmtruncate(inode, attr->ia_size); | |
960 | tmpattr.ia_valid = ATTR_SIZE; | ||
961 | tmpattr.ia_size = attr->ia_size; | ||
962 | |||
963 | result = inode_setattr(inode, &tmpattr); | ||
964 | if (result) | 959 | if (result) |
965 | goto out; | 960 | goto out; |
961 | mark_inode_dirty(inode); | ||
966 | } | 962 | } |
967 | } | 963 | } |
968 | if ((attr->ia_valid & ATTR_CTIME) != 0) { | 964 | if ((attr->ia_valid & ATTR_CTIME) != 0) { |
@@ -1002,8 +998,12 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) | |||
1002 | NCP_FINFO(inode)->nwattr = info.attributes; | 998 | NCP_FINFO(inode)->nwattr = info.attributes; |
1003 | #endif | 999 | #endif |
1004 | } | 1000 | } |
1005 | if (!result) | 1001 | if (result) |
1006 | result = inode_setattr(inode, attr); | 1002 | goto out; |
1003 | |||
1004 | setattr_copy(inode, attr); | ||
1005 | mark_inode_dirty(inode); | ||
1006 | |||
1007 | out: | 1007 | out: |
1008 | unlock_kernel(); | 1008 | unlock_kernel(); |
1009 | return result; | 1009 | return result; |
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 023c03d02070..84a8cfc4e38e 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/smp_lock.h> | 20 | #include <linux/smp_lock.h> |
21 | #include <linux/vmalloc.h> | 21 | #include <linux/vmalloc.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/smp_lock.h> | ||
24 | 23 | ||
25 | #include <linux/ncp_fs.h> | 24 | #include <linux/ncp_fs.h> |
26 | 25 | ||
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index a43d07e7b924..cc1bb33b59b8 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -61,8 +61,8 @@ config NFS_V3_ACL | |||
61 | If unsure, say N. | 61 | If unsure, say N. |
62 | 62 | ||
63 | config NFS_V4 | 63 | config NFS_V4 |
64 | bool "NFS client support for NFS version 4 (EXPERIMENTAL)" | 64 | bool "NFS client support for NFS version 4" |
65 | depends on NFS_FS && EXPERIMENTAL | 65 | depends on NFS_FS |
66 | select RPCSEC_GSS_KRB5 | 66 | select RPCSEC_GSS_KRB5 |
67 | help | 67 | help |
68 | This option enables support for version 4 of the NFS protocol | 68 | This option enables support for version 4 of the NFS protocol |
@@ -72,16 +72,16 @@ config NFS_V4 | |||
72 | space programs which can be found in the Linux nfs-utils package, | 72 | space programs which can be found in the Linux nfs-utils package, |
73 | available from http://linux-nfs.org/. | 73 | available from http://linux-nfs.org/. |
74 | 74 | ||
75 | If unsure, say N. | 75 | If unsure, say Y. |
76 | 76 | ||
77 | config NFS_V4_1 | 77 | config NFS_V4_1 |
78 | bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)" | 78 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" |
79 | depends on NFS_V4 && EXPERIMENTAL | 79 | depends on NFS_V4 && EXPERIMENTAL |
80 | help | 80 | help |
81 | This option enables support for minor version 1 of the NFSv4 protocol | 81 | This option enables support for minor version 1 of the NFSv4 protocol |
82 | (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. | 82 | (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. |
83 | 83 | ||
84 | Unless you're an NFS developer, say N. | 84 | If unsure, say N. |
85 | 85 | ||
86 | config ROOT_NFS | 86 | config ROOT_NFS |
87 | bool "Root file system on NFS" | 87 | bool "Root file system on NFS" |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index a08770a7e857..930d10fecdaf 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -37,8 +37,8 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres * | |||
37 | if (inode == NULL) | 37 | if (inode == NULL) |
38 | goto out_putclient; | 38 | goto out_putclient; |
39 | nfsi = NFS_I(inode); | 39 | nfsi = NFS_I(inode); |
40 | down_read(&nfsi->rwsem); | 40 | rcu_read_lock(); |
41 | delegation = nfsi->delegation; | 41 | delegation = rcu_dereference(nfsi->delegation); |
42 | if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0) | 42 | if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0) |
43 | goto out_iput; | 43 | goto out_iput; |
44 | res->size = i_size_read(inode); | 44 | res->size = i_size_read(inode); |
@@ -53,7 +53,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres * | |||
53 | args->bitmap[1]; | 53 | args->bitmap[1]; |
54 | res->status = 0; | 54 | res->status = 0; |
55 | out_iput: | 55 | out_iput: |
56 | up_read(&nfsi->rwsem); | 56 | rcu_read_unlock(); |
57 | iput(inode); | 57 | iput(inode); |
58 | out_putclient: | 58 | out_putclient: |
59 | nfs_put_client(clp); | 59 | nfs_put_client(clp); |
@@ -62,16 +62,6 @@ out: | |||
62 | return res->status; | 62 | return res->status; |
63 | } | 63 | } |
64 | 64 | ||
65 | static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *) | ||
66 | { | ||
67 | #if defined(CONFIG_NFS_V4_1) | ||
68 | if (clp->cl_minorversion > 0) | ||
69 | return nfs41_validate_delegation_stateid; | ||
70 | #endif | ||
71 | return nfs4_validate_delegation_stateid; | ||
72 | } | ||
73 | |||
74 | |||
75 | __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) | 65 | __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) |
76 | { | 66 | { |
77 | struct nfs_client *clp; | 67 | struct nfs_client *clp; |
@@ -92,8 +82,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) | |||
92 | inode = nfs_delegation_find_inode(clp, &args->fh); | 82 | inode = nfs_delegation_find_inode(clp, &args->fh); |
93 | if (inode != NULL) { | 83 | if (inode != NULL) { |
94 | /* Set up a helper thread to actually return the delegation */ | 84 | /* Set up a helper thread to actually return the delegation */ |
95 | switch (nfs_async_inode_return_delegation(inode, &args->stateid, | 85 | switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { |
96 | nfs_validate_delegation_stateid(clp))) { | ||
97 | case 0: | 86 | case 0: |
98 | res = 0; | 87 | res = 0; |
99 | break; | 88 | break; |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d25b5257b7a1..4e7df2adb212 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -150,6 +150,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ | |||
150 | clp->cl_boot_time = CURRENT_TIME; | 150 | clp->cl_boot_time = CURRENT_TIME; |
151 | clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; | 151 | clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; |
152 | clp->cl_minorversion = cl_init->minorversion; | 152 | clp->cl_minorversion = cl_init->minorversion; |
153 | clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; | ||
153 | #endif | 154 | #endif |
154 | cred = rpc_lookup_machine_cred(); | 155 | cred = rpc_lookup_machine_cred(); |
155 | if (!IS_ERR(cred)) | 156 | if (!IS_ERR(cred)) |
@@ -178,7 +179,7 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp) | |||
178 | clp->cl_session = NULL; | 179 | clp->cl_session = NULL; |
179 | } | 180 | } |
180 | 181 | ||
181 | clp->cl_call_sync = _nfs4_call_sync; | 182 | clp->cl_mvops = nfs_v4_minor_ops[0]; |
182 | #endif /* CONFIG_NFS_V4_1 */ | 183 | #endif /* CONFIG_NFS_V4_1 */ |
183 | } | 184 | } |
184 | 185 | ||
@@ -188,7 +189,7 @@ static void nfs4_clear_client_minor_version(struct nfs_client *clp) | |||
188 | static void nfs4_destroy_callback(struct nfs_client *clp) | 189 | static void nfs4_destroy_callback(struct nfs_client *clp) |
189 | { | 190 | { |
190 | if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) | 191 | if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) |
191 | nfs_callback_down(clp->cl_minorversion); | 192 | nfs_callback_down(clp->cl_mvops->minor_version); |
192 | } | 193 | } |
193 | 194 | ||
194 | static void nfs4_shutdown_client(struct nfs_client *clp) | 195 | static void nfs4_shutdown_client(struct nfs_client *clp) |
@@ -1126,7 +1127,7 @@ static int nfs4_init_callback(struct nfs_client *clp) | |||
1126 | return error; | 1127 | return error; |
1127 | } | 1128 | } |
1128 | 1129 | ||
1129 | error = nfs_callback_up(clp->cl_minorversion, | 1130 | error = nfs_callback_up(clp->cl_mvops->minor_version, |
1130 | clp->cl_rpcclient->cl_xprt); | 1131 | clp->cl_rpcclient->cl_xprt); |
1131 | if (error < 0) { | 1132 | if (error < 0) { |
1132 | dprintk("%s: failed to start callback. Error = %d\n", | 1133 | dprintk("%s: failed to start callback. Error = %d\n", |
@@ -1143,10 +1144,8 @@ static int nfs4_init_callback(struct nfs_client *clp) | |||
1143 | */ | 1144 | */ |
1144 | static int nfs4_init_client_minor_version(struct nfs_client *clp) | 1145 | static int nfs4_init_client_minor_version(struct nfs_client *clp) |
1145 | { | 1146 | { |
1146 | clp->cl_call_sync = _nfs4_call_sync; | ||
1147 | |||
1148 | #if defined(CONFIG_NFS_V4_1) | 1147 | #if defined(CONFIG_NFS_V4_1) |
1149 | if (clp->cl_minorversion) { | 1148 | if (clp->cl_mvops->minor_version) { |
1150 | struct nfs4_session *session = NULL; | 1149 | struct nfs4_session *session = NULL; |
1151 | /* | 1150 | /* |
1152 | * Create the session and mark it expired. | 1151 | * Create the session and mark it expired. |
@@ -1158,7 +1157,13 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp) | |||
1158 | return -ENOMEM; | 1157 | return -ENOMEM; |
1159 | 1158 | ||
1160 | clp->cl_session = session; | 1159 | clp->cl_session = session; |
1161 | clp->cl_call_sync = _nfs4_call_sync_session; | 1160 | /* |
1161 | * The create session reply races with the server back | ||
1162 | * channel probe. Mark the client NFS_CS_SESSION_INITING | ||
1163 | * so that the client back channel can find the | ||
1164 | * nfs_client struct | ||
1165 | */ | ||
1166 | clp->cl_cons_state = NFS_CS_SESSION_INITING; | ||
1162 | } | 1167 | } |
1163 | #endif /* CONFIG_NFS_V4_1 */ | 1168 | #endif /* CONFIG_NFS_V4_1 */ |
1164 | 1169 | ||
@@ -1454,7 +1459,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, | |||
1454 | data->authflavor, | 1459 | data->authflavor, |
1455 | parent_server->client->cl_xprt->prot, | 1460 | parent_server->client->cl_xprt->prot, |
1456 | parent_server->client->cl_timeout, | 1461 | parent_server->client->cl_timeout, |
1457 | parent_client->cl_minorversion); | 1462 | parent_client->cl_mvops->minor_version); |
1458 | if (error < 0) | 1463 | if (error < 0) |
1459 | goto error; | 1464 | goto error; |
1460 | 1465 | ||
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 301634543974..b9c3c43cea1d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -268,14 +268,6 @@ out: | |||
268 | return status; | 268 | return status; |
269 | } | 269 | } |
270 | 270 | ||
271 | /* Sync all data to disk upon delegation return */ | ||
272 | static void nfs_msync_inode(struct inode *inode) | ||
273 | { | ||
274 | filemap_fdatawrite(inode->i_mapping); | ||
275 | nfs_wb_all(inode); | ||
276 | filemap_fdatawait(inode->i_mapping); | ||
277 | } | ||
278 | |||
279 | /* | 271 | /* |
280 | * Basic procedure for returning a delegation to the server | 272 | * Basic procedure for returning a delegation to the server |
281 | */ | 273 | */ |
@@ -367,7 +359,7 @@ int nfs_inode_return_delegation(struct inode *inode) | |||
367 | delegation = nfs_detach_delegation_locked(nfsi, NULL, clp); | 359 | delegation = nfs_detach_delegation_locked(nfsi, NULL, clp); |
368 | spin_unlock(&clp->cl_lock); | 360 | spin_unlock(&clp->cl_lock); |
369 | if (delegation != NULL) { | 361 | if (delegation != NULL) { |
370 | nfs_msync_inode(inode); | 362 | nfs_wb_all(inode); |
371 | err = __nfs_inode_return_delegation(inode, delegation, 1); | 363 | err = __nfs_inode_return_delegation(inode, delegation, 1); |
372 | } | 364 | } |
373 | } | 365 | } |
@@ -471,9 +463,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) | |||
471 | /* | 463 | /* |
472 | * Asynchronous delegation recall! | 464 | * Asynchronous delegation recall! |
473 | */ | 465 | */ |
474 | int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid, | 466 | int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) |
475 | int (*validate_stateid)(struct nfs_delegation *delegation, | ||
476 | const nfs4_stateid *stateid)) | ||
477 | { | 467 | { |
478 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; | 468 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; |
479 | struct nfs_delegation *delegation; | 469 | struct nfs_delegation *delegation; |
@@ -481,7 +471,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *s | |||
481 | rcu_read_lock(); | 471 | rcu_read_lock(); |
482 | delegation = rcu_dereference(NFS_I(inode)->delegation); | 472 | delegation = rcu_dereference(NFS_I(inode)->delegation); |
483 | 473 | ||
484 | if (!validate_stateid(delegation, stateid)) { | 474 | if (!clp->cl_mvops->validate_stateid(delegation, stateid)) { |
485 | rcu_read_unlock(); | 475 | rcu_read_unlock(); |
486 | return -ENOENT; | 476 | return -ENOENT; |
487 | } | 477 | } |
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 69e7b8140122..2026304bda19 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h | |||
@@ -34,9 +34,7 @@ enum { | |||
34 | int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); | 34 | int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); |
35 | void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); | 35 | void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); |
36 | int nfs_inode_return_delegation(struct inode *inode); | 36 | int nfs_inode_return_delegation(struct inode *inode); |
37 | int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid, | 37 | int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); |
38 | int (*validate_stateid)(struct nfs_delegation *delegation, | ||
39 | const nfs4_stateid *stateid)); | ||
40 | void nfs_inode_return_delegation_noreclaim(struct inode *inode); | 38 | void nfs_inode_return_delegation_noreclaim(struct inode *inode); |
41 | 39 | ||
42 | struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); | 40 | struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e60416d3f818..29539ceeb745 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -1652,16 +1652,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1652 | } | 1652 | } |
1653 | } | 1653 | } |
1654 | 1654 | ||
1655 | /* | ||
1656 | * ... prune child dentries and writebacks if needed. | ||
1657 | */ | ||
1658 | if (atomic_read(&old_dentry->d_count) > 1) { | ||
1659 | if (S_ISREG(old_inode->i_mode)) | ||
1660 | nfs_wb_all(old_inode); | ||
1661 | shrink_dcache_parent(old_dentry); | ||
1662 | } | ||
1663 | nfs_inode_return_delegation(old_inode); | 1655 | nfs_inode_return_delegation(old_inode); |
1664 | |||
1665 | if (new_inode != NULL) | 1656 | if (new_inode != NULL) |
1666 | nfs_inode_return_delegation(new_inode); | 1657 | nfs_inode_return_delegation(new_inode); |
1667 | 1658 | ||
@@ -1953,7 +1944,7 @@ int nfs_permission(struct inode *inode, int mask) | |||
1953 | if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) | 1944 | if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) |
1954 | goto out; | 1945 | goto out; |
1955 | /* Is this sys_access() ? */ | 1946 | /* Is this sys_access() ? */ |
1956 | if (mask & MAY_ACCESS) | 1947 | if (mask & (MAY_ACCESS | MAY_CHDIR)) |
1957 | goto force_lookup; | 1948 | goto force_lookup; |
1958 | 1949 | ||
1959 | switch (inode->i_mode & S_IFMT) { | 1950 | switch (inode->i_mode & S_IFMT) { |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index ad4cd31d6050..064a80961677 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -69,6 +69,7 @@ struct nfs_direct_req { | |||
69 | 69 | ||
70 | /* I/O parameters */ | 70 | /* I/O parameters */ |
71 | struct nfs_open_context *ctx; /* file open context info */ | 71 | struct nfs_open_context *ctx; /* file open context info */ |
72 | struct nfs_lock_context *l_ctx; /* Lock context info */ | ||
72 | struct kiocb * iocb; /* controlling i/o request */ | 73 | struct kiocb * iocb; /* controlling i/o request */ |
73 | struct inode * inode; /* target file of i/o */ | 74 | struct inode * inode; /* target file of i/o */ |
74 | 75 | ||
@@ -160,6 +161,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | |||
160 | INIT_LIST_HEAD(&dreq->rewrite_list); | 161 | INIT_LIST_HEAD(&dreq->rewrite_list); |
161 | dreq->iocb = NULL; | 162 | dreq->iocb = NULL; |
162 | dreq->ctx = NULL; | 163 | dreq->ctx = NULL; |
164 | dreq->l_ctx = NULL; | ||
163 | spin_lock_init(&dreq->lock); | 165 | spin_lock_init(&dreq->lock); |
164 | atomic_set(&dreq->io_count, 0); | 166 | atomic_set(&dreq->io_count, 0); |
165 | dreq->count = 0; | 167 | dreq->count = 0; |
@@ -173,6 +175,8 @@ static void nfs_direct_req_free(struct kref *kref) | |||
173 | { | 175 | { |
174 | struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); | 176 | struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); |
175 | 177 | ||
178 | if (dreq->l_ctx != NULL) | ||
179 | nfs_put_lock_context(dreq->l_ctx); | ||
176 | if (dreq->ctx != NULL) | 180 | if (dreq->ctx != NULL) |
177 | put_nfs_open_context(dreq->ctx); | 181 | put_nfs_open_context(dreq->ctx); |
178 | kmem_cache_free(nfs_direct_cachep, dreq); | 182 | kmem_cache_free(nfs_direct_cachep, dreq); |
@@ -336,6 +340,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, | |||
336 | data->cred = msg.rpc_cred; | 340 | data->cred = msg.rpc_cred; |
337 | data->args.fh = NFS_FH(inode); | 341 | data->args.fh = NFS_FH(inode); |
338 | data->args.context = ctx; | 342 | data->args.context = ctx; |
343 | data->args.lock_context = dreq->l_ctx; | ||
339 | data->args.offset = pos; | 344 | data->args.offset = pos; |
340 | data->args.pgbase = pgbase; | 345 | data->args.pgbase = pgbase; |
341 | data->args.pages = data->pagevec; | 346 | data->args.pages = data->pagevec; |
@@ -416,24 +421,28 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | |||
416 | static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, | 421 | static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, |
417 | unsigned long nr_segs, loff_t pos) | 422 | unsigned long nr_segs, loff_t pos) |
418 | { | 423 | { |
419 | ssize_t result = 0; | 424 | ssize_t result = -ENOMEM; |
420 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 425 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
421 | struct nfs_direct_req *dreq; | 426 | struct nfs_direct_req *dreq; |
422 | 427 | ||
423 | dreq = nfs_direct_req_alloc(); | 428 | dreq = nfs_direct_req_alloc(); |
424 | if (!dreq) | 429 | if (dreq == NULL) |
425 | return -ENOMEM; | 430 | goto out; |
426 | 431 | ||
427 | dreq->inode = inode; | 432 | dreq->inode = inode; |
428 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | 433 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); |
434 | dreq->l_ctx = nfs_get_lock_context(dreq->ctx); | ||
435 | if (dreq->l_ctx == NULL) | ||
436 | goto out_release; | ||
429 | if (!is_sync_kiocb(iocb)) | 437 | if (!is_sync_kiocb(iocb)) |
430 | dreq->iocb = iocb; | 438 | dreq->iocb = iocb; |
431 | 439 | ||
432 | result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); | 440 | result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); |
433 | if (!result) | 441 | if (!result) |
434 | result = nfs_direct_wait(dreq); | 442 | result = nfs_direct_wait(dreq); |
443 | out_release: | ||
435 | nfs_direct_req_release(dreq); | 444 | nfs_direct_req_release(dreq); |
436 | 445 | out: | |
437 | return result; | 446 | return result; |
438 | } | 447 | } |
439 | 448 | ||
@@ -574,6 +583,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) | |||
574 | data->args.offset = 0; | 583 | data->args.offset = 0; |
575 | data->args.count = 0; | 584 | data->args.count = 0; |
576 | data->args.context = dreq->ctx; | 585 | data->args.context = dreq->ctx; |
586 | data->args.lock_context = dreq->l_ctx; | ||
577 | data->res.count = 0; | 587 | data->res.count = 0; |
578 | data->res.fattr = &data->fattr; | 588 | data->res.fattr = &data->fattr; |
579 | data->res.verf = &data->verf; | 589 | data->res.verf = &data->verf; |
@@ -761,6 +771,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, | |||
761 | data->cred = msg.rpc_cred; | 771 | data->cred = msg.rpc_cred; |
762 | data->args.fh = NFS_FH(inode); | 772 | data->args.fh = NFS_FH(inode); |
763 | data->args.context = ctx; | 773 | data->args.context = ctx; |
774 | data->args.lock_context = dreq->l_ctx; | ||
764 | data->args.offset = pos; | 775 | data->args.offset = pos; |
765 | data->args.pgbase = pgbase; | 776 | data->args.pgbase = pgbase; |
766 | data->args.pages = data->pagevec; | 777 | data->args.pages = data->pagevec; |
@@ -845,7 +856,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
845 | unsigned long nr_segs, loff_t pos, | 856 | unsigned long nr_segs, loff_t pos, |
846 | size_t count) | 857 | size_t count) |
847 | { | 858 | { |
848 | ssize_t result = 0; | 859 | ssize_t result = -ENOMEM; |
849 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 860 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
850 | struct nfs_direct_req *dreq; | 861 | struct nfs_direct_req *dreq; |
851 | size_t wsize = NFS_SERVER(inode)->wsize; | 862 | size_t wsize = NFS_SERVER(inode)->wsize; |
@@ -853,7 +864,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
853 | 864 | ||
854 | dreq = nfs_direct_req_alloc(); | 865 | dreq = nfs_direct_req_alloc(); |
855 | if (!dreq) | 866 | if (!dreq) |
856 | return -ENOMEM; | 867 | goto out; |
857 | nfs_alloc_commit_data(dreq); | 868 | nfs_alloc_commit_data(dreq); |
858 | 869 | ||
859 | if (dreq->commit_data == NULL || count < wsize) | 870 | if (dreq->commit_data == NULL || count < wsize) |
@@ -861,14 +872,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
861 | 872 | ||
862 | dreq->inode = inode; | 873 | dreq->inode = inode; |
863 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); | 874 | dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); |
875 | dreq->l_ctx = nfs_get_lock_context(dreq->ctx); | ||
876 | if (dreq->l_ctx != NULL) | ||
877 | goto out_release; | ||
864 | if (!is_sync_kiocb(iocb)) | 878 | if (!is_sync_kiocb(iocb)) |
865 | dreq->iocb = iocb; | 879 | dreq->iocb = iocb; |
866 | 880 | ||
867 | result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); | 881 | result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); |
868 | if (!result) | 882 | if (!result) |
869 | result = nfs_direct_wait(dreq); | 883 | result = nfs_direct_wait(dreq); |
884 | out_release: | ||
870 | nfs_direct_req_release(dreq); | 885 | nfs_direct_req_release(dreq); |
871 | 886 | out: | |
872 | return result; | 887 | return result; |
873 | } | 888 | } |
874 | 889 | ||
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f036153d9f50..2d141a74ae82 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -203,37 +203,11 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | |||
203 | } | 203 | } |
204 | 204 | ||
205 | /* | 205 | /* |
206 | * Helper for nfs_file_flush() and nfs_file_fsync() | ||
207 | * | ||
208 | * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to | ||
209 | * disk, but it retrieves and clears ctx->error after synching, despite | ||
210 | * the two being set at the same time in nfs_context_set_write_error(). | ||
211 | * This is because the former is used to notify the _next_ call to | ||
212 | * nfs_file_write() that a write error occured, and hence cause it to | ||
213 | * fall back to doing a synchronous write. | ||
214 | */ | ||
215 | static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) | ||
216 | { | ||
217 | int have_error, status; | ||
218 | int ret = 0; | ||
219 | |||
220 | have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | ||
221 | status = nfs_wb_all(inode); | ||
222 | have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | ||
223 | if (have_error) | ||
224 | ret = xchg(&ctx->error, 0); | ||
225 | if (!ret) | ||
226 | ret = status; | ||
227 | return ret; | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * Flush all dirty pages, and check for write errors. | 206 | * Flush all dirty pages, and check for write errors. |
232 | */ | 207 | */ |
233 | static int | 208 | static int |
234 | nfs_file_flush(struct file *file, fl_owner_t id) | 209 | nfs_file_flush(struct file *file, fl_owner_t id) |
235 | { | 210 | { |
236 | struct nfs_open_context *ctx = nfs_file_open_context(file); | ||
237 | struct dentry *dentry = file->f_path.dentry; | 211 | struct dentry *dentry = file->f_path.dentry; |
238 | struct inode *inode = dentry->d_inode; | 212 | struct inode *inode = dentry->d_inode; |
239 | 213 | ||
@@ -246,7 +220,7 @@ nfs_file_flush(struct file *file, fl_owner_t id) | |||
246 | return 0; | 220 | return 0; |
247 | 221 | ||
248 | /* Flush writes to the server and return any errors */ | 222 | /* Flush writes to the server and return any errors */ |
249 | return nfs_do_fsync(ctx, inode); | 223 | return vfs_fsync(file, 0); |
250 | } | 224 | } |
251 | 225 | ||
252 | static ssize_t | 226 | static ssize_t |
@@ -321,6 +295,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
321 | * Flush any dirty pages for this process, and check for write errors. | 295 | * Flush any dirty pages for this process, and check for write errors. |
322 | * The return status from this call provides a reliable indication of | 296 | * The return status from this call provides a reliable indication of |
323 | * whether any write errors occurred for this process. | 297 | * whether any write errors occurred for this process. |
298 | * | ||
299 | * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to | ||
300 | * disk, but it retrieves and clears ctx->error after synching, despite | ||
301 | * the two being set at the same time in nfs_context_set_write_error(). | ||
302 | * This is because the former is used to notify the _next_ call to | ||
303 | * nfs_file_write() that a write error occured, and hence cause it to | ||
304 | * fall back to doing a synchronous write. | ||
324 | */ | 305 | */ |
325 | static int | 306 | static int |
326 | nfs_file_fsync(struct file *file, int datasync) | 307 | nfs_file_fsync(struct file *file, int datasync) |
@@ -328,13 +309,23 @@ nfs_file_fsync(struct file *file, int datasync) | |||
328 | struct dentry *dentry = file->f_path.dentry; | 309 | struct dentry *dentry = file->f_path.dentry; |
329 | struct nfs_open_context *ctx = nfs_file_open_context(file); | 310 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
330 | struct inode *inode = dentry->d_inode; | 311 | struct inode *inode = dentry->d_inode; |
312 | int have_error, status; | ||
313 | int ret = 0; | ||
314 | |||
331 | 315 | ||
332 | dprintk("NFS: fsync file(%s/%s) datasync %d\n", | 316 | dprintk("NFS: fsync file(%s/%s) datasync %d\n", |
333 | dentry->d_parent->d_name.name, dentry->d_name.name, | 317 | dentry->d_parent->d_name.name, dentry->d_name.name, |
334 | datasync); | 318 | datasync); |
335 | 319 | ||
336 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); | 320 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); |
337 | return nfs_do_fsync(ctx, inode); | 321 | have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); |
322 | status = nfs_commit_inode(inode, FLUSH_SYNC); | ||
323 | have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | ||
324 | if (have_error) | ||
325 | ret = xchg(&ctx->error, 0); | ||
326 | if (!ret) | ||
327 | ret = status; | ||
328 | return ret; | ||
338 | } | 329 | } |
339 | 330 | ||
340 | /* | 331 | /* |
@@ -648,7 +639,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
648 | 639 | ||
649 | /* Return error values for O_DSYNC and IS_SYNC() */ | 640 | /* Return error values for O_DSYNC and IS_SYNC() */ |
650 | if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { | 641 | if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { |
651 | int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); | 642 | int err = vfs_fsync(iocb->ki_filp, 0); |
652 | if (err < 0) | 643 | if (err < 0) |
653 | result = err; | 644 | result = err; |
654 | } | 645 | } |
@@ -684,7 +675,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, | |||
684 | written = ret; | 675 | written = ret; |
685 | 676 | ||
686 | if (ret >= 0 && nfs_need_sync_write(filp, inode)) { | 677 | if (ret >= 0 && nfs_need_sync_write(filp, inode)) { |
687 | int err = nfs_do_fsync(nfs_file_open_context(filp), inode); | 678 | int err = vfs_fsync(filp, 0); |
688 | if (err < 0) | 679 | if (err < 0) |
689 | ret = err; | 680 | ret = err; |
690 | } | 681 | } |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 099b3518feea..7d2d6c72aa78 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -98,7 +98,7 @@ u64 nfs_compat_user_ino64(u64 fileid) | |||
98 | return ino; | 98 | return ino; |
99 | } | 99 | } |
100 | 100 | ||
101 | void nfs_clear_inode(struct inode *inode) | 101 | static void nfs_clear_inode(struct inode *inode) |
102 | { | 102 | { |
103 | /* | 103 | /* |
104 | * The following should never happen... | 104 | * The following should never happen... |
@@ -110,6 +110,13 @@ void nfs_clear_inode(struct inode *inode) | |||
110 | nfs_fscache_release_inode_cookie(inode); | 110 | nfs_fscache_release_inode_cookie(inode); |
111 | } | 111 | } |
112 | 112 | ||
113 | void nfs_evict_inode(struct inode *inode) | ||
114 | { | ||
115 | truncate_inode_pages(&inode->i_data, 0); | ||
116 | end_writeback(inode); | ||
117 | nfs_clear_inode(inode); | ||
118 | } | ||
119 | |||
113 | /** | 120 | /** |
114 | * nfs_sync_mapping - helper to flush all mmapped dirty data to disk | 121 | * nfs_sync_mapping - helper to flush all mmapped dirty data to disk |
115 | */ | 122 | */ |
@@ -413,10 +420,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
413 | return 0; | 420 | return 0; |
414 | 421 | ||
415 | /* Write all dirty data */ | 422 | /* Write all dirty data */ |
416 | if (S_ISREG(inode->i_mode)) { | 423 | if (S_ISREG(inode->i_mode)) |
417 | filemap_write_and_wait(inode->i_mapping); | ||
418 | nfs_wb_all(inode); | 424 | nfs_wb_all(inode); |
419 | } | ||
420 | 425 | ||
421 | fattr = nfs_alloc_fattr(); | 426 | fattr = nfs_alloc_fattr(); |
422 | if (fattr == NULL) | 427 | if (fattr == NULL) |
@@ -530,6 +535,68 @@ out: | |||
530 | return err; | 535 | return err; |
531 | } | 536 | } |
532 | 537 | ||
538 | static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) | ||
539 | { | ||
540 | atomic_set(&l_ctx->count, 1); | ||
541 | l_ctx->lockowner = current->files; | ||
542 | l_ctx->pid = current->tgid; | ||
543 | INIT_LIST_HEAD(&l_ctx->list); | ||
544 | } | ||
545 | |||
546 | static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) | ||
547 | { | ||
548 | struct nfs_lock_context *pos; | ||
549 | |||
550 | list_for_each_entry(pos, &ctx->lock_context.list, list) { | ||
551 | if (pos->lockowner != current->files) | ||
552 | continue; | ||
553 | if (pos->pid != current->tgid) | ||
554 | continue; | ||
555 | atomic_inc(&pos->count); | ||
556 | return pos; | ||
557 | } | ||
558 | return NULL; | ||
559 | } | ||
560 | |||
561 | struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) | ||
562 | { | ||
563 | struct nfs_lock_context *res, *new = NULL; | ||
564 | struct inode *inode = ctx->path.dentry->d_inode; | ||
565 | |||
566 | spin_lock(&inode->i_lock); | ||
567 | res = __nfs_find_lock_context(ctx); | ||
568 | if (res == NULL) { | ||
569 | spin_unlock(&inode->i_lock); | ||
570 | new = kmalloc(sizeof(*new), GFP_KERNEL); | ||
571 | if (new == NULL) | ||
572 | return NULL; | ||
573 | nfs_init_lock_context(new); | ||
574 | spin_lock(&inode->i_lock); | ||
575 | res = __nfs_find_lock_context(ctx); | ||
576 | if (res == NULL) { | ||
577 | list_add_tail(&new->list, &ctx->lock_context.list); | ||
578 | new->open_context = ctx; | ||
579 | res = new; | ||
580 | new = NULL; | ||
581 | } | ||
582 | } | ||
583 | spin_unlock(&inode->i_lock); | ||
584 | kfree(new); | ||
585 | return res; | ||
586 | } | ||
587 | |||
588 | void nfs_put_lock_context(struct nfs_lock_context *l_ctx) | ||
589 | { | ||
590 | struct nfs_open_context *ctx = l_ctx->open_context; | ||
591 | struct inode *inode = ctx->path.dentry->d_inode; | ||
592 | |||
593 | if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock)) | ||
594 | return; | ||
595 | list_del(&l_ctx->list); | ||
596 | spin_unlock(&inode->i_lock); | ||
597 | kfree(l_ctx); | ||
598 | } | ||
599 | |||
533 | /** | 600 | /** |
534 | * nfs_close_context - Common close_context() routine NFSv2/v3 | 601 | * nfs_close_context - Common close_context() routine NFSv2/v3 |
535 | * @ctx: pointer to context | 602 | * @ctx: pointer to context |
@@ -566,11 +633,11 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct | |||
566 | path_get(&ctx->path); | 633 | path_get(&ctx->path); |
567 | ctx->cred = get_rpccred(cred); | 634 | ctx->cred = get_rpccred(cred); |
568 | ctx->state = NULL; | 635 | ctx->state = NULL; |
569 | ctx->lockowner = current->files; | ||
570 | ctx->flags = 0; | 636 | ctx->flags = 0; |
571 | ctx->error = 0; | 637 | ctx->error = 0; |
572 | ctx->dir_cookie = 0; | 638 | ctx->dir_cookie = 0; |
573 | atomic_set(&ctx->count, 1); | 639 | nfs_init_lock_context(&ctx->lock_context); |
640 | ctx->lock_context.open_context = ctx; | ||
574 | } | 641 | } |
575 | return ctx; | 642 | return ctx; |
576 | } | 643 | } |
@@ -578,7 +645,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct | |||
578 | struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) | 645 | struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) |
579 | { | 646 | { |
580 | if (ctx != NULL) | 647 | if (ctx != NULL) |
581 | atomic_inc(&ctx->count); | 648 | atomic_inc(&ctx->lock_context.count); |
582 | return ctx; | 649 | return ctx; |
583 | } | 650 | } |
584 | 651 | ||
@@ -586,7 +653,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) | |||
586 | { | 653 | { |
587 | struct inode *inode = ctx->path.dentry->d_inode; | 654 | struct inode *inode = ctx->path.dentry->d_inode; |
588 | 655 | ||
589 | if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) | 656 | if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) |
590 | return; | 657 | return; |
591 | list_del(&ctx->list); | 658 | list_del(&ctx->list); |
592 | spin_unlock(&inode->i_lock); | 659 | spin_unlock(&inode->i_lock); |
@@ -1338,8 +1405,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1338 | * to open() calls that passed nfs_atomic_lookup, but failed to call | 1405 | * to open() calls that passed nfs_atomic_lookup, but failed to call |
1339 | * nfs_open(). | 1406 | * nfs_open(). |
1340 | */ | 1407 | */ |
1341 | void nfs4_clear_inode(struct inode *inode) | 1408 | void nfs4_evict_inode(struct inode *inode) |
1342 | { | 1409 | { |
1410 | truncate_inode_pages(&inode->i_data, 0); | ||
1411 | end_writeback(inode); | ||
1343 | /* If we are holding a delegation, return it! */ | 1412 | /* If we are holding a delegation, return it! */ |
1344 | nfs_inode_return_delegation_noreclaim(inode); | 1413 | nfs_inode_return_delegation_noreclaim(inode); |
1345 | /* First call standard NFS clear_inode() code */ | 1414 | /* First call standard NFS clear_inode() code */ |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e70f44b9b3f4..c961bc92c107 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -213,9 +213,9 @@ extern struct workqueue_struct *nfsiod_workqueue; | |||
213 | extern struct inode *nfs_alloc_inode(struct super_block *sb); | 213 | extern struct inode *nfs_alloc_inode(struct super_block *sb); |
214 | extern void nfs_destroy_inode(struct inode *); | 214 | extern void nfs_destroy_inode(struct inode *); |
215 | extern int nfs_write_inode(struct inode *, struct writeback_control *); | 215 | extern int nfs_write_inode(struct inode *, struct writeback_control *); |
216 | extern void nfs_clear_inode(struct inode *); | 216 | extern void nfs_evict_inode(struct inode *); |
217 | #ifdef CONFIG_NFS_V4 | 217 | #ifdef CONFIG_NFS_V4 |
218 | extern void nfs4_clear_inode(struct inode *); | 218 | extern void nfs4_evict_inode(struct inode *); |
219 | #endif | 219 | #endif |
220 | void nfs_zap_acl_cache(struct inode *inode); | 220 | void nfs_zap_acl_cache(struct inode *inode); |
221 | extern int nfs_wait_bit_killable(void *word); | 221 | extern int nfs_wait_bit_killable(void *word); |
@@ -370,10 +370,9 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) | |||
370 | * Helper for restarting RPC calls in the possible presence of NFSv4.1 | 370 | * Helper for restarting RPC calls in the possible presence of NFSv4.1 |
371 | * sessions. | 371 | * sessions. |
372 | */ | 372 | */ |
373 | static inline void nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp) | 373 | static inline int nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp) |
374 | { | 374 | { |
375 | if (nfs4_has_session(clp)) | 375 | if (nfs4_has_session(clp)) |
376 | rpc_restart_call_prepare(task); | 376 | return rpc_restart_call_prepare(task); |
377 | else | 377 | return rpc_restart_call(task); |
378 | rpc_restart_call(task); | ||
379 | } | 378 | } |
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 81cf14257916..db8846a0e82e 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c | |||
@@ -233,7 +233,7 @@ nfs_xdr_removeargs(struct rpc_rqst *req, __be32 *p, const struct nfs_removeargs | |||
233 | static int | 233 | static int |
234 | nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) | 234 | nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) |
235 | { | 235 | { |
236 | struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; | 236 | struct rpc_auth *auth = req->rq_cred->cr_auth; |
237 | unsigned int replen; | 237 | unsigned int replen; |
238 | u32 offset = (u32)args->offset; | 238 | u32 offset = (u32)args->offset; |
239 | u32 count = args->count; | 239 | u32 count = args->count; |
@@ -393,8 +393,7 @@ nfs_xdr_symlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_symlinkargs *arg | |||
393 | static int | 393 | static int |
394 | nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) | 394 | nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args) |
395 | { | 395 | { |
396 | struct rpc_task *task = req->rq_task; | 396 | struct rpc_auth *auth = req->rq_cred->cr_auth; |
397 | struct rpc_auth *auth = task->tk_msg.rpc_cred->cr_auth; | ||
398 | unsigned int replen; | 397 | unsigned int replen; |
399 | u32 count = args->count; | 398 | u32 count = args->count; |
400 | 399 | ||
@@ -575,7 +574,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res) | |||
575 | static int | 574 | static int |
576 | nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) | 575 | nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args) |
577 | { | 576 | { |
578 | struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; | 577 | struct rpc_auth *auth = req->rq_cred->cr_auth; |
579 | unsigned int replen; | 578 | unsigned int replen; |
580 | 579 | ||
581 | p = xdr_encode_fhandle(p, args->fh); | 580 | p = xdr_encode_fhandle(p, args->fh); |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 75dcfc7da365..9769704f8ce6 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -330,7 +330,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg | |||
330 | static int | 330 | static int |
331 | nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) | 331 | nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) |
332 | { | 332 | { |
333 | struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; | 333 | struct rpc_auth *auth = req->rq_cred->cr_auth; |
334 | unsigned int replen; | 334 | unsigned int replen; |
335 | u32 count = args->count; | 335 | u32 count = args->count; |
336 | 336 | ||
@@ -471,7 +471,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args) | |||
471 | static int | 471 | static int |
472 | nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) | 472 | nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args) |
473 | { | 473 | { |
474 | struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; | 474 | struct rpc_auth *auth = req->rq_cred->cr_auth; |
475 | unsigned int replen; | 475 | unsigned int replen; |
476 | u32 count = args->count; | 476 | u32 count = args->count; |
477 | 477 | ||
@@ -675,7 +675,7 @@ static int | |||
675 | nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, | 675 | nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p, |
676 | struct nfs3_getaclargs *args) | 676 | struct nfs3_getaclargs *args) |
677 | { | 677 | { |
678 | struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; | 678 | struct rpc_auth *auth = req->rq_cred->cr_auth; |
679 | unsigned int replen; | 679 | unsigned int replen; |
680 | 680 | ||
681 | p = xdr_encode_fhandle(p, args->fh); | 681 | p = xdr_encode_fhandle(p, args->fh); |
@@ -802,7 +802,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res) | |||
802 | static int | 802 | static int |
803 | nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) | 803 | nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args) |
804 | { | 804 | { |
805 | struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; | 805 | struct rpc_auth *auth = req->rq_cred->cr_auth; |
806 | unsigned int replen; | 806 | unsigned int replen; |
807 | 807 | ||
808 | p = xdr_encode_fhandle(p, args->fh); | 808 | p = xdr_encode_fhandle(p, args->fh); |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c538c6106e16..311e15cc8af0 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -45,10 +45,29 @@ enum nfs4_client_state { | |||
45 | NFS4CLNT_RECLAIM_NOGRACE, | 45 | NFS4CLNT_RECLAIM_NOGRACE, |
46 | NFS4CLNT_DELEGRETURN, | 46 | NFS4CLNT_DELEGRETURN, |
47 | NFS4CLNT_SESSION_RESET, | 47 | NFS4CLNT_SESSION_RESET, |
48 | NFS4CLNT_SESSION_DRAINING, | ||
49 | NFS4CLNT_RECALL_SLOT, | 48 | NFS4CLNT_RECALL_SLOT, |
50 | }; | 49 | }; |
51 | 50 | ||
51 | enum nfs4_session_state { | ||
52 | NFS4_SESSION_INITING, | ||
53 | NFS4_SESSION_DRAINING, | ||
54 | }; | ||
55 | |||
56 | struct nfs4_minor_version_ops { | ||
57 | u32 minor_version; | ||
58 | |||
59 | int (*call_sync)(struct nfs_server *server, | ||
60 | struct rpc_message *msg, | ||
61 | struct nfs4_sequence_args *args, | ||
62 | struct nfs4_sequence_res *res, | ||
63 | int cache_reply); | ||
64 | int (*validate_stateid)(struct nfs_delegation *, | ||
65 | const nfs4_stateid *); | ||
66 | const struct nfs4_state_recovery_ops *reboot_recovery_ops; | ||
67 | const struct nfs4_state_recovery_ops *nograce_recovery_ops; | ||
68 | const struct nfs4_state_maintenance_ops *state_renewal_ops; | ||
69 | }; | ||
70 | |||
52 | /* | 71 | /* |
53 | * struct rpc_sequence ensures that RPC calls are sent in the exact | 72 | * struct rpc_sequence ensures that RPC calls are sent in the exact |
54 | * order that they appear on the list. | 73 | * order that they appear on the list. |
@@ -89,7 +108,6 @@ struct nfs_unique_id { | |||
89 | */ | 108 | */ |
90 | struct nfs4_state_owner { | 109 | struct nfs4_state_owner { |
91 | struct nfs_unique_id so_owner_id; | 110 | struct nfs_unique_id so_owner_id; |
92 | struct nfs_client *so_client; | ||
93 | struct nfs_server *so_server; | 111 | struct nfs_server *so_server; |
94 | struct rb_node so_client_node; | 112 | struct rb_node so_client_node; |
95 | 113 | ||
@@ -99,7 +117,6 @@ struct nfs4_state_owner { | |||
99 | atomic_t so_count; | 117 | atomic_t so_count; |
100 | unsigned long so_flags; | 118 | unsigned long so_flags; |
101 | struct list_head so_states; | 119 | struct list_head so_states; |
102 | struct list_head so_delegations; | ||
103 | struct nfs_seqid_counter so_seqid; | 120 | struct nfs_seqid_counter so_seqid; |
104 | struct rpc_sequence so_sequence; | 121 | struct rpc_sequence so_sequence; |
105 | }; | 122 | }; |
@@ -125,10 +142,20 @@ enum { | |||
125 | * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) | 142 | * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) |
126 | */ | 143 | */ |
127 | 144 | ||
145 | struct nfs4_lock_owner { | ||
146 | unsigned int lo_type; | ||
147 | #define NFS4_ANY_LOCK_TYPE (0U) | ||
148 | #define NFS4_FLOCK_LOCK_TYPE (1U << 0) | ||
149 | #define NFS4_POSIX_LOCK_TYPE (1U << 1) | ||
150 | union { | ||
151 | fl_owner_t posix_owner; | ||
152 | pid_t flock_owner; | ||
153 | } lo_u; | ||
154 | }; | ||
155 | |||
128 | struct nfs4_lock_state { | 156 | struct nfs4_lock_state { |
129 | struct list_head ls_locks; /* Other lock stateids */ | 157 | struct list_head ls_locks; /* Other lock stateids */ |
130 | struct nfs4_state * ls_state; /* Pointer to open state */ | 158 | struct nfs4_state * ls_state; /* Pointer to open state */ |
131 | fl_owner_t ls_owner; /* POSIX lock owner */ | ||
132 | #define NFS_LOCK_INITIALIZED 1 | 159 | #define NFS_LOCK_INITIALIZED 1 |
133 | int ls_flags; | 160 | int ls_flags; |
134 | struct nfs_seqid_counter ls_seqid; | 161 | struct nfs_seqid_counter ls_seqid; |
@@ -136,6 +163,7 @@ struct nfs4_lock_state { | |||
136 | struct nfs_unique_id ls_id; | 163 | struct nfs_unique_id ls_id; |
137 | nfs4_stateid ls_stateid; | 164 | nfs4_stateid ls_stateid; |
138 | atomic_t ls_count; | 165 | atomic_t ls_count; |
166 | struct nfs4_lock_owner ls_owner; | ||
139 | }; | 167 | }; |
140 | 168 | ||
141 | /* bits for nfs4_state->flags */ | 169 | /* bits for nfs4_state->flags */ |
@@ -219,11 +247,15 @@ extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nam | |||
219 | extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); | 247 | extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); |
220 | extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, | 248 | extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, |
221 | struct nfs4_fs_locations *fs_locations, struct page *page); | 249 | struct nfs4_fs_locations *fs_locations, struct page *page); |
250 | extern void nfs4_release_lockowner(const struct nfs4_lock_state *); | ||
222 | 251 | ||
223 | extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[]; | ||
224 | extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[]; | ||
225 | #if defined(CONFIG_NFS_V4_1) | 252 | #if defined(CONFIG_NFS_V4_1) |
226 | extern int nfs4_setup_sequence(struct nfs_client *clp, | 253 | static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) |
254 | { | ||
255 | return server->nfs_client->cl_session; | ||
256 | } | ||
257 | |||
258 | extern int nfs4_setup_sequence(const struct nfs_server *server, | ||
227 | struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, | 259 | struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, |
228 | int cache_reply, struct rpc_task *task); | 260 | int cache_reply, struct rpc_task *task); |
229 | extern void nfs4_destroy_session(struct nfs4_session *session); | 261 | extern void nfs4_destroy_session(struct nfs4_session *session); |
@@ -234,7 +266,12 @@ extern int nfs4_init_session(struct nfs_server *server); | |||
234 | extern int nfs4_proc_get_lease_time(struct nfs_client *clp, | 266 | extern int nfs4_proc_get_lease_time(struct nfs_client *clp, |
235 | struct nfs_fsinfo *fsinfo); | 267 | struct nfs_fsinfo *fsinfo); |
236 | #else /* CONFIG_NFS_v4_1 */ | 268 | #else /* CONFIG_NFS_v4_1 */ |
237 | static inline int nfs4_setup_sequence(struct nfs_client *clp, | 269 | static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) |
270 | { | ||
271 | return NULL; | ||
272 | } | ||
273 | |||
274 | static inline int nfs4_setup_sequence(const struct nfs_server *server, | ||
238 | struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, | 275 | struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, |
239 | int cache_reply, struct rpc_task *task) | 276 | int cache_reply, struct rpc_task *task) |
240 | { | 277 | { |
@@ -247,7 +284,7 @@ static inline int nfs4_init_session(struct nfs_server *server) | |||
247 | } | 284 | } |
248 | #endif /* CONFIG_NFS_V4_1 */ | 285 | #endif /* CONFIG_NFS_V4_1 */ |
249 | 286 | ||
250 | extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[]; | 287 | extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; |
251 | 288 | ||
252 | extern const u32 nfs4_fattr_bitmap[2]; | 289 | extern const u32 nfs4_fattr_bitmap[2]; |
253 | extern const u32 nfs4_statfs_bitmap[2]; | 290 | extern const u32 nfs4_statfs_bitmap[2]; |
@@ -284,7 +321,7 @@ extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) | |||
284 | extern void nfs41_handle_recall_slot(struct nfs_client *clp); | 321 | extern void nfs41_handle_recall_slot(struct nfs_client *clp); |
285 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); | 322 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); |
286 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); | 323 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); |
287 | extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); | 324 | extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); |
288 | 325 | ||
289 | extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); | 326 | extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); |
290 | extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); | 327 | extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 70015dd60a98..7ffbb98ddec3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -303,15 +303,19 @@ do_state_recovery: | |||
303 | } | 303 | } |
304 | 304 | ||
305 | 305 | ||
306 | static void renew_lease(const struct nfs_server *server, unsigned long timestamp) | 306 | static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp) |
307 | { | 307 | { |
308 | struct nfs_client *clp = server->nfs_client; | ||
309 | spin_lock(&clp->cl_lock); | 308 | spin_lock(&clp->cl_lock); |
310 | if (time_before(clp->cl_last_renewal,timestamp)) | 309 | if (time_before(clp->cl_last_renewal,timestamp)) |
311 | clp->cl_last_renewal = timestamp; | 310 | clp->cl_last_renewal = timestamp; |
312 | spin_unlock(&clp->cl_lock); | 311 | spin_unlock(&clp->cl_lock); |
313 | } | 312 | } |
314 | 313 | ||
314 | static void renew_lease(const struct nfs_server *server, unsigned long timestamp) | ||
315 | { | ||
316 | do_renew_lease(server->nfs_client, timestamp); | ||
317 | } | ||
318 | |||
315 | #if defined(CONFIG_NFS_V4_1) | 319 | #if defined(CONFIG_NFS_V4_1) |
316 | 320 | ||
317 | /* | 321 | /* |
@@ -356,7 +360,7 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses) | |||
356 | { | 360 | { |
357 | struct rpc_task *task; | 361 | struct rpc_task *task; |
358 | 362 | ||
359 | if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) { | 363 | if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { |
360 | task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq); | 364 | task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq); |
361 | if (task) | 365 | if (task) |
362 | rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); | 366 | rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); |
@@ -370,12 +374,11 @@ static void nfs41_check_drain_session_complete(struct nfs4_session *ses) | |||
370 | complete(&ses->complete); | 374 | complete(&ses->complete); |
371 | } | 375 | } |
372 | 376 | ||
373 | static void nfs41_sequence_free_slot(const struct nfs_client *clp, | 377 | static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) |
374 | struct nfs4_sequence_res *res) | ||
375 | { | 378 | { |
376 | struct nfs4_slot_table *tbl; | 379 | struct nfs4_slot_table *tbl; |
377 | 380 | ||
378 | tbl = &clp->cl_session->fc_slot_table; | 381 | tbl = &res->sr_session->fc_slot_table; |
379 | if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { | 382 | if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { |
380 | /* just wake up the next guy waiting since | 383 | /* just wake up the next guy waiting since |
381 | * we may have not consumed a slot after all */ | 384 | * we may have not consumed a slot after all */ |
@@ -385,18 +388,17 @@ static void nfs41_sequence_free_slot(const struct nfs_client *clp, | |||
385 | 388 | ||
386 | spin_lock(&tbl->slot_tbl_lock); | 389 | spin_lock(&tbl->slot_tbl_lock); |
387 | nfs4_free_slot(tbl, res->sr_slotid); | 390 | nfs4_free_slot(tbl, res->sr_slotid); |
388 | nfs41_check_drain_session_complete(clp->cl_session); | 391 | nfs41_check_drain_session_complete(res->sr_session); |
389 | spin_unlock(&tbl->slot_tbl_lock); | 392 | spin_unlock(&tbl->slot_tbl_lock); |
390 | res->sr_slotid = NFS4_MAX_SLOT_TABLE; | 393 | res->sr_slotid = NFS4_MAX_SLOT_TABLE; |
391 | } | 394 | } |
392 | 395 | ||
393 | static void nfs41_sequence_done(struct nfs_client *clp, | 396 | static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) |
394 | struct nfs4_sequence_res *res, | ||
395 | int rpc_status) | ||
396 | { | 397 | { |
397 | unsigned long timestamp; | 398 | unsigned long timestamp; |
398 | struct nfs4_slot_table *tbl; | 399 | struct nfs4_slot_table *tbl; |
399 | struct nfs4_slot *slot; | 400 | struct nfs4_slot *slot; |
401 | struct nfs_client *clp; | ||
400 | 402 | ||
401 | /* | 403 | /* |
402 | * sr_status remains 1 if an RPC level error occurred. The server | 404 | * sr_status remains 1 if an RPC level error occurred. The server |
@@ -411,25 +413,51 @@ static void nfs41_sequence_done(struct nfs_client *clp, | |||
411 | if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) | 413 | if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) |
412 | goto out; | 414 | goto out; |
413 | 415 | ||
416 | tbl = &res->sr_session->fc_slot_table; | ||
417 | slot = tbl->slots + res->sr_slotid; | ||
418 | |||
414 | /* Check the SEQUENCE operation status */ | 419 | /* Check the SEQUENCE operation status */ |
415 | if (res->sr_status == 0) { | 420 | switch (res->sr_status) { |
416 | tbl = &clp->cl_session->fc_slot_table; | 421 | case 0: |
417 | slot = tbl->slots + res->sr_slotid; | ||
418 | /* Update the slot's sequence and clientid lease timer */ | 422 | /* Update the slot's sequence and clientid lease timer */ |
419 | ++slot->seq_nr; | 423 | ++slot->seq_nr; |
420 | timestamp = res->sr_renewal_time; | 424 | timestamp = res->sr_renewal_time; |
421 | spin_lock(&clp->cl_lock); | 425 | clp = res->sr_session->clp; |
422 | if (time_before(clp->cl_last_renewal, timestamp)) | 426 | do_renew_lease(clp, timestamp); |
423 | clp->cl_last_renewal = timestamp; | ||
424 | spin_unlock(&clp->cl_lock); | ||
425 | /* Check sequence flags */ | 427 | /* Check sequence flags */ |
426 | if (atomic_read(&clp->cl_count) > 1) | 428 | if (atomic_read(&clp->cl_count) > 1) |
427 | nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); | 429 | nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); |
430 | break; | ||
431 | case -NFS4ERR_DELAY: | ||
432 | /* The server detected a resend of the RPC call and | ||
433 | * returned NFS4ERR_DELAY as per Section 2.10.6.2 | ||
434 | * of RFC5661. | ||
435 | */ | ||
436 | dprintk("%s: slot=%d seq=%d: Operation in progress\n", | ||
437 | __func__, res->sr_slotid, slot->seq_nr); | ||
438 | goto out_retry; | ||
439 | default: | ||
440 | /* Just update the slot sequence no. */ | ||
441 | ++slot->seq_nr; | ||
428 | } | 442 | } |
429 | out: | 443 | out: |
430 | /* The session may be reset by one of the error handlers. */ | 444 | /* The session may be reset by one of the error handlers. */ |
431 | dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); | 445 | dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); |
432 | nfs41_sequence_free_slot(clp, res); | 446 | nfs41_sequence_free_slot(res); |
447 | return 1; | ||
448 | out_retry: | ||
449 | if (!rpc_restart_call(task)) | ||
450 | goto out; | ||
451 | rpc_delay(task, NFS4_POLL_RETRY_MAX); | ||
452 | return 0; | ||
453 | } | ||
454 | |||
455 | static int nfs4_sequence_done(struct rpc_task *task, | ||
456 | struct nfs4_sequence_res *res) | ||
457 | { | ||
458 | if (res->sr_session == NULL) | ||
459 | return 1; | ||
460 | return nfs41_sequence_done(task, res); | ||
433 | } | 461 | } |
434 | 462 | ||
435 | /* | 463 | /* |
@@ -480,12 +508,11 @@ static int nfs41_setup_sequence(struct nfs4_session *session, | |||
480 | if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) | 508 | if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) |
481 | return 0; | 509 | return 0; |
482 | 510 | ||
483 | memset(res, 0, sizeof(*res)); | ||
484 | res->sr_slotid = NFS4_MAX_SLOT_TABLE; | 511 | res->sr_slotid = NFS4_MAX_SLOT_TABLE; |
485 | tbl = &session->fc_slot_table; | 512 | tbl = &session->fc_slot_table; |
486 | 513 | ||
487 | spin_lock(&tbl->slot_tbl_lock); | 514 | spin_lock(&tbl->slot_tbl_lock); |
488 | if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) && | 515 | if (test_bit(NFS4_SESSION_DRAINING, &session->session_state) && |
489 | !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { | 516 | !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) { |
490 | /* | 517 | /* |
491 | * The state manager will wait until the slot table is empty. | 518 | * The state manager will wait until the slot table is empty. |
@@ -525,6 +552,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session, | |||
525 | res->sr_session = session; | 552 | res->sr_session = session; |
526 | res->sr_slotid = slotid; | 553 | res->sr_slotid = slotid; |
527 | res->sr_renewal_time = jiffies; | 554 | res->sr_renewal_time = jiffies; |
555 | res->sr_status_flags = 0; | ||
528 | /* | 556 | /* |
529 | * sr_status is only set in decode_sequence, and so will remain | 557 | * sr_status is only set in decode_sequence, and so will remain |
530 | * set to 1 if an rpc level failure occurs. | 558 | * set to 1 if an rpc level failure occurs. |
@@ -533,33 +561,33 @@ static int nfs41_setup_sequence(struct nfs4_session *session, | |||
533 | return 0; | 561 | return 0; |
534 | } | 562 | } |
535 | 563 | ||
536 | int nfs4_setup_sequence(struct nfs_client *clp, | 564 | int nfs4_setup_sequence(const struct nfs_server *server, |
537 | struct nfs4_sequence_args *args, | 565 | struct nfs4_sequence_args *args, |
538 | struct nfs4_sequence_res *res, | 566 | struct nfs4_sequence_res *res, |
539 | int cache_reply, | 567 | int cache_reply, |
540 | struct rpc_task *task) | 568 | struct rpc_task *task) |
541 | { | 569 | { |
570 | struct nfs4_session *session = nfs4_get_session(server); | ||
542 | int ret = 0; | 571 | int ret = 0; |
543 | 572 | ||
573 | if (session == NULL) { | ||
574 | args->sa_session = NULL; | ||
575 | res->sr_session = NULL; | ||
576 | goto out; | ||
577 | } | ||
578 | |||
544 | dprintk("--> %s clp %p session %p sr_slotid %d\n", | 579 | dprintk("--> %s clp %p session %p sr_slotid %d\n", |
545 | __func__, clp, clp->cl_session, res->sr_slotid); | 580 | __func__, session->clp, session, res->sr_slotid); |
546 | 581 | ||
547 | if (!nfs4_has_session(clp)) | 582 | ret = nfs41_setup_sequence(session, args, res, cache_reply, |
548 | goto out; | ||
549 | ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply, | ||
550 | task); | 583 | task); |
551 | if (ret && ret != -EAGAIN) { | ||
552 | /* terminate rpc task */ | ||
553 | task->tk_status = ret; | ||
554 | task->tk_action = NULL; | ||
555 | } | ||
556 | out: | 584 | out: |
557 | dprintk("<-- %s status=%d\n", __func__, ret); | 585 | dprintk("<-- %s status=%d\n", __func__, ret); |
558 | return ret; | 586 | return ret; |
559 | } | 587 | } |
560 | 588 | ||
561 | struct nfs41_call_sync_data { | 589 | struct nfs41_call_sync_data { |
562 | struct nfs_client *clp; | 590 | const struct nfs_server *seq_server; |
563 | struct nfs4_sequence_args *seq_args; | 591 | struct nfs4_sequence_args *seq_args; |
564 | struct nfs4_sequence_res *seq_res; | 592 | struct nfs4_sequence_res *seq_res; |
565 | int cache_reply; | 593 | int cache_reply; |
@@ -569,9 +597,9 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) | |||
569 | { | 597 | { |
570 | struct nfs41_call_sync_data *data = calldata; | 598 | struct nfs41_call_sync_data *data = calldata; |
571 | 599 | ||
572 | dprintk("--> %s data->clp->cl_session %p\n", __func__, | 600 | dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); |
573 | data->clp->cl_session); | 601 | |
574 | if (nfs4_setup_sequence(data->clp, data->seq_args, | 602 | if (nfs4_setup_sequence(data->seq_server, data->seq_args, |
575 | data->seq_res, data->cache_reply, task)) | 603 | data->seq_res, data->cache_reply, task)) |
576 | return; | 604 | return; |
577 | rpc_call_start(task); | 605 | rpc_call_start(task); |
@@ -587,7 +615,7 @@ static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) | |||
587 | { | 615 | { |
588 | struct nfs41_call_sync_data *data = calldata; | 616 | struct nfs41_call_sync_data *data = calldata; |
589 | 617 | ||
590 | nfs41_sequence_done(data->clp, data->seq_res, task->tk_status); | 618 | nfs41_sequence_done(task, data->seq_res); |
591 | } | 619 | } |
592 | 620 | ||
593 | struct rpc_call_ops nfs41_call_sync_ops = { | 621 | struct rpc_call_ops nfs41_call_sync_ops = { |
@@ -600,8 +628,7 @@ struct rpc_call_ops nfs41_call_priv_sync_ops = { | |||
600 | .rpc_call_done = nfs41_call_sync_done, | 628 | .rpc_call_done = nfs41_call_sync_done, |
601 | }; | 629 | }; |
602 | 630 | ||
603 | static int nfs4_call_sync_sequence(struct nfs_client *clp, | 631 | static int nfs4_call_sync_sequence(struct nfs_server *server, |
604 | struct rpc_clnt *clnt, | ||
605 | struct rpc_message *msg, | 632 | struct rpc_message *msg, |
606 | struct nfs4_sequence_args *args, | 633 | struct nfs4_sequence_args *args, |
607 | struct nfs4_sequence_res *res, | 634 | struct nfs4_sequence_res *res, |
@@ -611,13 +638,13 @@ static int nfs4_call_sync_sequence(struct nfs_client *clp, | |||
611 | int ret; | 638 | int ret; |
612 | struct rpc_task *task; | 639 | struct rpc_task *task; |
613 | struct nfs41_call_sync_data data = { | 640 | struct nfs41_call_sync_data data = { |
614 | .clp = clp, | 641 | .seq_server = server, |
615 | .seq_args = args, | 642 | .seq_args = args, |
616 | .seq_res = res, | 643 | .seq_res = res, |
617 | .cache_reply = cache_reply, | 644 | .cache_reply = cache_reply, |
618 | }; | 645 | }; |
619 | struct rpc_task_setup task_setup = { | 646 | struct rpc_task_setup task_setup = { |
620 | .rpc_client = clnt, | 647 | .rpc_client = server->client, |
621 | .rpc_message = msg, | 648 | .rpc_message = msg, |
622 | .callback_ops = &nfs41_call_sync_ops, | 649 | .callback_ops = &nfs41_call_sync_ops, |
623 | .callback_data = &data | 650 | .callback_data = &data |
@@ -642,10 +669,15 @@ int _nfs4_call_sync_session(struct nfs_server *server, | |||
642 | struct nfs4_sequence_res *res, | 669 | struct nfs4_sequence_res *res, |
643 | int cache_reply) | 670 | int cache_reply) |
644 | { | 671 | { |
645 | return nfs4_call_sync_sequence(server->nfs_client, server->client, | 672 | return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0); |
646 | msg, args, res, cache_reply, 0); | ||
647 | } | 673 | } |
648 | 674 | ||
675 | #else | ||
676 | static int nfs4_sequence_done(struct rpc_task *task, | ||
677 | struct nfs4_sequence_res *res) | ||
678 | { | ||
679 | return 1; | ||
680 | } | ||
649 | #endif /* CONFIG_NFS_V4_1 */ | 681 | #endif /* CONFIG_NFS_V4_1 */ |
650 | 682 | ||
651 | int _nfs4_call_sync(struct nfs_server *server, | 683 | int _nfs4_call_sync(struct nfs_server *server, |
@@ -659,18 +691,9 @@ int _nfs4_call_sync(struct nfs_server *server, | |||
659 | } | 691 | } |
660 | 692 | ||
661 | #define nfs4_call_sync(server, msg, args, res, cache_reply) \ | 693 | #define nfs4_call_sync(server, msg, args, res, cache_reply) \ |
662 | (server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \ | 694 | (server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \ |
663 | &(res)->seq_res, (cache_reply)) | 695 | &(res)->seq_res, (cache_reply)) |
664 | 696 | ||
665 | static void nfs4_sequence_done(const struct nfs_server *server, | ||
666 | struct nfs4_sequence_res *res, int rpc_status) | ||
667 | { | ||
668 | #ifdef CONFIG_NFS_V4_1 | ||
669 | if (nfs4_has_session(server->nfs_client)) | ||
670 | nfs41_sequence_done(server->nfs_client, res, rpc_status); | ||
671 | #endif /* CONFIG_NFS_V4_1 */ | ||
672 | } | ||
673 | |||
674 | static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) | 697 | static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) |
675 | { | 698 | { |
676 | struct nfs_inode *nfsi = NFS_I(dir); | 699 | struct nfs_inode *nfsi = NFS_I(dir); |
@@ -745,19 +768,14 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, | |||
745 | p->o_arg.server = server; | 768 | p->o_arg.server = server; |
746 | p->o_arg.bitmask = server->attr_bitmask; | 769 | p->o_arg.bitmask = server->attr_bitmask; |
747 | p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; | 770 | p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; |
748 | if (flags & O_EXCL) { | 771 | if (flags & O_CREAT) { |
749 | if (nfs4_has_persistent_session(server->nfs_client)) { | 772 | u32 *s; |
750 | /* GUARDED */ | 773 | |
751 | p->o_arg.u.attrs = &p->attrs; | ||
752 | memcpy(&p->attrs, attrs, sizeof(p->attrs)); | ||
753 | } else { /* EXCLUSIVE4_1 */ | ||
754 | u32 *s = (u32 *) p->o_arg.u.verifier.data; | ||
755 | s[0] = jiffies; | ||
756 | s[1] = current->pid; | ||
757 | } | ||
758 | } else if (flags & O_CREAT) { | ||
759 | p->o_arg.u.attrs = &p->attrs; | 774 | p->o_arg.u.attrs = &p->attrs; |
760 | memcpy(&p->attrs, attrs, sizeof(p->attrs)); | 775 | memcpy(&p->attrs, attrs, sizeof(p->attrs)); |
776 | s = (u32 *) p->o_arg.u.verifier.data; | ||
777 | s[0] = jiffies; | ||
778 | s[1] = current->pid; | ||
761 | } | 779 | } |
762 | p->c_arg.fh = &p->o_res.fh; | 780 | p->c_arg.fh = &p->o_res.fh; |
763 | p->c_arg.stateid = &p->o_res.stateid; | 781 | p->c_arg.stateid = &p->o_res.stateid; |
@@ -1255,8 +1273,6 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) | |||
1255 | struct nfs4_opendata *data = calldata; | 1273 | struct nfs4_opendata *data = calldata; |
1256 | 1274 | ||
1257 | data->rpc_status = task->tk_status; | 1275 | data->rpc_status = task->tk_status; |
1258 | if (RPC_ASSASSINATED(task)) | ||
1259 | return; | ||
1260 | if (data->rpc_status == 0) { | 1276 | if (data->rpc_status == 0) { |
1261 | memcpy(data->o_res.stateid.data, data->c_res.stateid.data, | 1277 | memcpy(data->o_res.stateid.data, data->c_res.stateid.data, |
1262 | sizeof(data->o_res.stateid.data)); | 1278 | sizeof(data->o_res.stateid.data)); |
@@ -1356,13 +1372,13 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) | |||
1356 | } | 1372 | } |
1357 | /* Update sequence id. */ | 1373 | /* Update sequence id. */ |
1358 | data->o_arg.id = sp->so_owner_id.id; | 1374 | data->o_arg.id = sp->so_owner_id.id; |
1359 | data->o_arg.clientid = sp->so_client->cl_clientid; | 1375 | data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; |
1360 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { | 1376 | if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { |
1361 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; | 1377 | task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; |
1362 | nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); | 1378 | nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); |
1363 | } | 1379 | } |
1364 | data->timestamp = jiffies; | 1380 | data->timestamp = jiffies; |
1365 | if (nfs4_setup_sequence(data->o_arg.server->nfs_client, | 1381 | if (nfs4_setup_sequence(data->o_arg.server, |
1366 | &data->o_arg.seq_args, | 1382 | &data->o_arg.seq_args, |
1367 | &data->o_res.seq_res, 1, task)) | 1383 | &data->o_res.seq_res, 1, task)) |
1368 | return; | 1384 | return; |
@@ -1385,11 +1401,9 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) | |||
1385 | 1401 | ||
1386 | data->rpc_status = task->tk_status; | 1402 | data->rpc_status = task->tk_status; |
1387 | 1403 | ||
1388 | nfs4_sequence_done(data->o_arg.server, &data->o_res.seq_res, | 1404 | if (!nfs4_sequence_done(task, &data->o_res.seq_res)) |
1389 | task->tk_status); | ||
1390 | |||
1391 | if (RPC_ASSASSINATED(task)) | ||
1392 | return; | 1405 | return; |
1406 | |||
1393 | if (task->tk_status == 0) { | 1407 | if (task->tk_status == 0) { |
1394 | switch (data->o_res.f_attr->mode & S_IFMT) { | 1408 | switch (data->o_res.f_attr->mode & S_IFMT) { |
1395 | case S_IFREG: | 1409 | case S_IFREG: |
@@ -1773,7 +1787,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
1773 | if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { | 1787 | if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { |
1774 | /* Use that stateid */ | 1788 | /* Use that stateid */ |
1775 | } else if (state != NULL) { | 1789 | } else if (state != NULL) { |
1776 | nfs4_copy_stateid(&arg.stateid, state, current->files); | 1790 | nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid); |
1777 | } else | 1791 | } else |
1778 | memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); | 1792 | memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); |
1779 | 1793 | ||
@@ -1838,8 +1852,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
1838 | struct nfs4_state *state = calldata->state; | 1852 | struct nfs4_state *state = calldata->state; |
1839 | struct nfs_server *server = NFS_SERVER(calldata->inode); | 1853 | struct nfs_server *server = NFS_SERVER(calldata->inode); |
1840 | 1854 | ||
1841 | nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status); | 1855 | if (!nfs4_sequence_done(task, &calldata->res.seq_res)) |
1842 | if (RPC_ASSASSINATED(task)) | ||
1843 | return; | 1856 | return; |
1844 | /* hmm. we are done with the inode, and in the process of freeing | 1857 | /* hmm. we are done with the inode, and in the process of freeing |
1845 | * the state_owner. we keep this around to process errors | 1858 | * the state_owner. we keep this around to process errors |
@@ -1903,7 +1916,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) | |||
1903 | 1916 | ||
1904 | nfs_fattr_init(calldata->res.fattr); | 1917 | nfs_fattr_init(calldata->res.fattr); |
1905 | calldata->timestamp = jiffies; | 1918 | calldata->timestamp = jiffies; |
1906 | if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client, | 1919 | if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), |
1907 | &calldata->arg.seq_args, &calldata->res.seq_res, | 1920 | &calldata->arg.seq_args, &calldata->res.seq_res, |
1908 | 1, task)) | 1921 | 1, task)) |
1909 | return; | 1922 | return; |
@@ -2648,7 +2661,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) | |||
2648 | { | 2661 | { |
2649 | struct nfs_removeres *res = task->tk_msg.rpc_resp; | 2662 | struct nfs_removeres *res = task->tk_msg.rpc_resp; |
2650 | 2663 | ||
2651 | nfs4_sequence_done(res->server, &res->seq_res, task->tk_status); | 2664 | if (!nfs4_sequence_done(task, &res->seq_res)) |
2665 | return 0; | ||
2652 | if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) | 2666 | if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) |
2653 | return 0; | 2667 | return 0; |
2654 | update_changeattr(dir, &res->cinfo); | 2668 | update_changeattr(dir, &res->cinfo); |
@@ -3093,7 +3107,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
3093 | 3107 | ||
3094 | dprintk("--> %s\n", __func__); | 3108 | dprintk("--> %s\n", __func__); |
3095 | 3109 | ||
3096 | nfs4_sequence_done(server, &data->res.seq_res, task->tk_status); | 3110 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
3111 | return -EAGAIN; | ||
3097 | 3112 | ||
3098 | if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { | 3113 | if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { |
3099 | nfs_restart_rpc(task, server->nfs_client); | 3114 | nfs_restart_rpc(task, server->nfs_client); |
@@ -3116,8 +3131,8 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
3116 | { | 3131 | { |
3117 | struct inode *inode = data->inode; | 3132 | struct inode *inode = data->inode; |
3118 | 3133 | ||
3119 | nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, | 3134 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
3120 | task->tk_status); | 3135 | return -EAGAIN; |
3121 | 3136 | ||
3122 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { | 3137 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { |
3123 | nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); | 3138 | nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); |
@@ -3145,8 +3160,9 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) | |||
3145 | { | 3160 | { |
3146 | struct inode *inode = data->inode; | 3161 | struct inode *inode = data->inode; |
3147 | 3162 | ||
3148 | nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, | 3163 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
3149 | task->tk_status); | 3164 | return -EAGAIN; |
3165 | |||
3150 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { | 3166 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { |
3151 | nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); | 3167 | nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); |
3152 | return -EAGAIN; | 3168 | return -EAGAIN; |
@@ -3196,10 +3212,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) | |||
3196 | nfs4_schedule_state_recovery(clp); | 3212 | nfs4_schedule_state_recovery(clp); |
3197 | return; | 3213 | return; |
3198 | } | 3214 | } |
3199 | spin_lock(&clp->cl_lock); | 3215 | do_renew_lease(clp, timestamp); |
3200 | if (time_before(clp->cl_last_renewal,timestamp)) | ||
3201 | clp->cl_last_renewal = timestamp; | ||
3202 | spin_unlock(&clp->cl_lock); | ||
3203 | } | 3216 | } |
3204 | 3217 | ||
3205 | static const struct rpc_call_ops nfs4_renew_ops = { | 3218 | static const struct rpc_call_ops nfs4_renew_ops = { |
@@ -3240,10 +3253,7 @@ int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) | |||
3240 | status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); | 3253 | status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); |
3241 | if (status < 0) | 3254 | if (status < 0) |
3242 | return status; | 3255 | return status; |
3243 | spin_lock(&clp->cl_lock); | 3256 | do_renew_lease(clp, now); |
3244 | if (time_before(clp->cl_last_renewal,now)) | ||
3245 | clp->cl_last_renewal = now; | ||
3246 | spin_unlock(&clp->cl_lock); | ||
3247 | return 0; | 3257 | return 0; |
3248 | } | 3258 | } |
3249 | 3259 | ||
@@ -3464,9 +3474,11 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen | |||
3464 | } | 3474 | } |
3465 | 3475 | ||
3466 | static int | 3476 | static int |
3467 | _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state) | 3477 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) |
3468 | { | 3478 | { |
3469 | if (!clp || task->tk_status >= 0) | 3479 | struct nfs_client *clp = server->nfs_client; |
3480 | |||
3481 | if (task->tk_status >= 0) | ||
3470 | return 0; | 3482 | return 0; |
3471 | switch(task->tk_status) { | 3483 | switch(task->tk_status) { |
3472 | case -NFS4ERR_ADMIN_REVOKED: | 3484 | case -NFS4ERR_ADMIN_REVOKED: |
@@ -3498,8 +3510,7 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, | |||
3498 | return -EAGAIN; | 3510 | return -EAGAIN; |
3499 | #endif /* CONFIG_NFS_V4_1 */ | 3511 | #endif /* CONFIG_NFS_V4_1 */ |
3500 | case -NFS4ERR_DELAY: | 3512 | case -NFS4ERR_DELAY: |
3501 | if (server) | 3513 | nfs_inc_server_stats(server, NFSIOS_DELAY); |
3502 | nfs_inc_server_stats(server, NFSIOS_DELAY); | ||
3503 | case -NFS4ERR_GRACE: | 3514 | case -NFS4ERR_GRACE: |
3504 | case -EKEYEXPIRED: | 3515 | case -EKEYEXPIRED: |
3505 | rpc_delay(task, NFS4_POLL_RETRY_MAX); | 3516 | rpc_delay(task, NFS4_POLL_RETRY_MAX); |
@@ -3520,12 +3531,6 @@ do_state_recovery: | |||
3520 | return -EAGAIN; | 3531 | return -EAGAIN; |
3521 | } | 3532 | } |
3522 | 3533 | ||
3523 | static int | ||
3524 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) | ||
3525 | { | ||
3526 | return _nfs4_async_handle_error(task, server, server->nfs_client, state); | ||
3527 | } | ||
3528 | |||
3529 | int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | 3534 | int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, |
3530 | unsigned short port, struct rpc_cred *cred, | 3535 | unsigned short port, struct rpc_cred *cred, |
3531 | struct nfs4_setclientid_res *res) | 3536 | struct nfs4_setclientid_res *res) |
@@ -3641,8 +3646,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | |||
3641 | { | 3646 | { |
3642 | struct nfs4_delegreturndata *data = calldata; | 3647 | struct nfs4_delegreturndata *data = calldata; |
3643 | 3648 | ||
3644 | nfs4_sequence_done(data->res.server, &data->res.seq_res, | 3649 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
3645 | task->tk_status); | 3650 | return; |
3646 | 3651 | ||
3647 | switch (task->tk_status) { | 3652 | switch (task->tk_status) { |
3648 | case -NFS4ERR_STALE_STATEID: | 3653 | case -NFS4ERR_STALE_STATEID: |
@@ -3672,7 +3677,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) | |||
3672 | 3677 | ||
3673 | d_data = (struct nfs4_delegreturndata *)data; | 3678 | d_data = (struct nfs4_delegreturndata *)data; |
3674 | 3679 | ||
3675 | if (nfs4_setup_sequence(d_data->res.server->nfs_client, | 3680 | if (nfs4_setup_sequence(d_data->res.server, |
3676 | &d_data->args.seq_args, | 3681 | &d_data->args.seq_args, |
3677 | &d_data->res.seq_res, 1, task)) | 3682 | &d_data->res.seq_res, 1, task)) |
3678 | return; | 3683 | return; |
@@ -3892,9 +3897,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) | |||
3892 | { | 3897 | { |
3893 | struct nfs4_unlockdata *calldata = data; | 3898 | struct nfs4_unlockdata *calldata = data; |
3894 | 3899 | ||
3895 | nfs4_sequence_done(calldata->server, &calldata->res.seq_res, | 3900 | if (!nfs4_sequence_done(task, &calldata->res.seq_res)) |
3896 | task->tk_status); | ||
3897 | if (RPC_ASSASSINATED(task)) | ||
3898 | return; | 3901 | return; |
3899 | switch (task->tk_status) { | 3902 | switch (task->tk_status) { |
3900 | case 0: | 3903 | case 0: |
@@ -3927,7 +3930,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) | |||
3927 | return; | 3930 | return; |
3928 | } | 3931 | } |
3929 | calldata->timestamp = jiffies; | 3932 | calldata->timestamp = jiffies; |
3930 | if (nfs4_setup_sequence(calldata->server->nfs_client, | 3933 | if (nfs4_setup_sequence(calldata->server, |
3931 | &calldata->arg.seq_args, | 3934 | &calldata->arg.seq_args, |
3932 | &calldata->res.seq_res, 1, task)) | 3935 | &calldata->res.seq_res, 1, task)) |
3933 | return; | 3936 | return; |
@@ -4082,7 +4085,8 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) | |||
4082 | } else | 4085 | } else |
4083 | data->arg.new_lock_owner = 0; | 4086 | data->arg.new_lock_owner = 0; |
4084 | data->timestamp = jiffies; | 4087 | data->timestamp = jiffies; |
4085 | if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args, | 4088 | if (nfs4_setup_sequence(data->server, |
4089 | &data->arg.seq_args, | ||
4086 | &data->res.seq_res, 1, task)) | 4090 | &data->res.seq_res, 1, task)) |
4087 | return; | 4091 | return; |
4088 | rpc_call_start(task); | 4092 | rpc_call_start(task); |
@@ -4101,12 +4105,10 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) | |||
4101 | 4105 | ||
4102 | dprintk("%s: begin!\n", __func__); | 4106 | dprintk("%s: begin!\n", __func__); |
4103 | 4107 | ||
4104 | nfs4_sequence_done(data->server, &data->res.seq_res, | 4108 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
4105 | task->tk_status); | 4109 | return; |
4106 | 4110 | ||
4107 | data->rpc_status = task->tk_status; | 4111 | data->rpc_status = task->tk_status; |
4108 | if (RPC_ASSASSINATED(task)) | ||
4109 | goto out; | ||
4110 | if (data->arg.new_lock_owner != 0) { | 4112 | if (data->arg.new_lock_owner != 0) { |
4111 | if (data->rpc_status == 0) | 4113 | if (data->rpc_status == 0) |
4112 | nfs_confirm_seqid(&data->lsp->ls_seqid, 0); | 4114 | nfs_confirm_seqid(&data->lsp->ls_seqid, 0); |
@@ -4424,6 +4426,34 @@ out: | |||
4424 | return err; | 4426 | return err; |
4425 | } | 4427 | } |
4426 | 4428 | ||
4429 | static void nfs4_release_lockowner_release(void *calldata) | ||
4430 | { | ||
4431 | kfree(calldata); | ||
4432 | } | ||
4433 | |||
4434 | const struct rpc_call_ops nfs4_release_lockowner_ops = { | ||
4435 | .rpc_release = nfs4_release_lockowner_release, | ||
4436 | }; | ||
4437 | |||
4438 | void nfs4_release_lockowner(const struct nfs4_lock_state *lsp) | ||
4439 | { | ||
4440 | struct nfs_server *server = lsp->ls_state->owner->so_server; | ||
4441 | struct nfs_release_lockowner_args *args; | ||
4442 | struct rpc_message msg = { | ||
4443 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER], | ||
4444 | }; | ||
4445 | |||
4446 | if (server->nfs_client->cl_mvops->minor_version != 0) | ||
4447 | return; | ||
4448 | args = kmalloc(sizeof(*args), GFP_NOFS); | ||
4449 | if (!args) | ||
4450 | return; | ||
4451 | args->lock_owner.clientid = server->nfs_client->cl_clientid; | ||
4452 | args->lock_owner.id = lsp->ls_id.id; | ||
4453 | msg.rpc_argp = args; | ||
4454 | rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args); | ||
4455 | } | ||
4456 | |||
4427 | #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" | 4457 | #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" |
4428 | 4458 | ||
4429 | int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, | 4459 | int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, |
@@ -4611,7 +4641,8 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) | |||
4611 | (struct nfs4_get_lease_time_data *)calldata; | 4641 | (struct nfs4_get_lease_time_data *)calldata; |
4612 | 4642 | ||
4613 | dprintk("--> %s\n", __func__); | 4643 | dprintk("--> %s\n", __func__); |
4614 | nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status); | 4644 | if (!nfs41_sequence_done(task, &data->res->lr_seq_res)) |
4645 | return; | ||
4615 | switch (task->tk_status) { | 4646 | switch (task->tk_status) { |
4616 | case -NFS4ERR_DELAY: | 4647 | case -NFS4ERR_DELAY: |
4617 | case -NFS4ERR_GRACE: | 4648 | case -NFS4ERR_GRACE: |
@@ -4805,13 +4836,6 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) | |||
4805 | if (!session) | 4836 | if (!session) |
4806 | return NULL; | 4837 | return NULL; |
4807 | 4838 | ||
4808 | /* | ||
4809 | * The create session reply races with the server back | ||
4810 | * channel probe. Mark the client NFS_CS_SESSION_INITING | ||
4811 | * so that the client back channel can find the | ||
4812 | * nfs_client struct | ||
4813 | */ | ||
4814 | clp->cl_cons_state = NFS_CS_SESSION_INITING; | ||
4815 | init_completion(&session->complete); | 4839 | init_completion(&session->complete); |
4816 | 4840 | ||
4817 | tbl = &session->fc_slot_table; | 4841 | tbl = &session->fc_slot_table; |
@@ -4824,6 +4848,8 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) | |||
4824 | spin_lock_init(&tbl->slot_tbl_lock); | 4848 | spin_lock_init(&tbl->slot_tbl_lock); |
4825 | rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); | 4849 | rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); |
4826 | 4850 | ||
4851 | session->session_state = 1<<NFS4_SESSION_INITING; | ||
4852 | |||
4827 | session->clp = clp; | 4853 | session->clp = clp; |
4828 | return session; | 4854 | return session; |
4829 | } | 4855 | } |
@@ -5040,6 +5066,10 @@ int nfs4_init_session(struct nfs_server *server) | |||
5040 | if (!nfs4_has_session(clp)) | 5066 | if (!nfs4_has_session(clp)) |
5041 | return 0; | 5067 | return 0; |
5042 | 5068 | ||
5069 | session = clp->cl_session; | ||
5070 | if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) | ||
5071 | return 0; | ||
5072 | |||
5043 | rsize = server->rsize; | 5073 | rsize = server->rsize; |
5044 | if (rsize == 0) | 5074 | if (rsize == 0) |
5045 | rsize = NFS_MAX_FILE_IO_SIZE; | 5075 | rsize = NFS_MAX_FILE_IO_SIZE; |
@@ -5047,7 +5077,6 @@ int nfs4_init_session(struct nfs_server *server) | |||
5047 | if (wsize == 0) | 5077 | if (wsize == 0) |
5048 | wsize = NFS_MAX_FILE_IO_SIZE; | 5078 | wsize = NFS_MAX_FILE_IO_SIZE; |
5049 | 5079 | ||
5050 | session = clp->cl_session; | ||
5051 | session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; | 5080 | session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead; |
5052 | session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; | 5081 | session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead; |
5053 | 5082 | ||
@@ -5060,69 +5089,70 @@ int nfs4_init_session(struct nfs_server *server) | |||
5060 | /* | 5089 | /* |
5061 | * Renew the cl_session lease. | 5090 | * Renew the cl_session lease. |
5062 | */ | 5091 | */ |
5063 | static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) | 5092 | struct nfs4_sequence_data { |
5064 | { | 5093 | struct nfs_client *clp; |
5065 | struct nfs4_sequence_args args; | 5094 | struct nfs4_sequence_args args; |
5066 | struct nfs4_sequence_res res; | 5095 | struct nfs4_sequence_res res; |
5067 | 5096 | }; | |
5068 | struct rpc_message msg = { | ||
5069 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE], | ||
5070 | .rpc_argp = &args, | ||
5071 | .rpc_resp = &res, | ||
5072 | .rpc_cred = cred, | ||
5073 | }; | ||
5074 | |||
5075 | args.sa_cache_this = 0; | ||
5076 | |||
5077 | return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args, | ||
5078 | &res, args.sa_cache_this, 1); | ||
5079 | } | ||
5080 | 5097 | ||
5081 | static void nfs41_sequence_release(void *data) | 5098 | static void nfs41_sequence_release(void *data) |
5082 | { | 5099 | { |
5083 | struct nfs_client *clp = (struct nfs_client *)data; | 5100 | struct nfs4_sequence_data *calldata = data; |
5101 | struct nfs_client *clp = calldata->clp; | ||
5084 | 5102 | ||
5085 | if (atomic_read(&clp->cl_count) > 1) | 5103 | if (atomic_read(&clp->cl_count) > 1) |
5086 | nfs4_schedule_state_renewal(clp); | 5104 | nfs4_schedule_state_renewal(clp); |
5087 | nfs_put_client(clp); | 5105 | nfs_put_client(clp); |
5106 | kfree(calldata); | ||
5107 | } | ||
5108 | |||
5109 | static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client *clp) | ||
5110 | { | ||
5111 | switch(task->tk_status) { | ||
5112 | case -NFS4ERR_DELAY: | ||
5113 | case -EKEYEXPIRED: | ||
5114 | rpc_delay(task, NFS4_POLL_RETRY_MAX); | ||
5115 | return -EAGAIN; | ||
5116 | default: | ||
5117 | nfs4_schedule_state_recovery(clp); | ||
5118 | } | ||
5119 | return 0; | ||
5088 | } | 5120 | } |
5089 | 5121 | ||
5090 | static void nfs41_sequence_call_done(struct rpc_task *task, void *data) | 5122 | static void nfs41_sequence_call_done(struct rpc_task *task, void *data) |
5091 | { | 5123 | { |
5092 | struct nfs_client *clp = (struct nfs_client *)data; | 5124 | struct nfs4_sequence_data *calldata = data; |
5125 | struct nfs_client *clp = calldata->clp; | ||
5093 | 5126 | ||
5094 | nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status); | 5127 | if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp)) |
5128 | return; | ||
5095 | 5129 | ||
5096 | if (task->tk_status < 0) { | 5130 | if (task->tk_status < 0) { |
5097 | dprintk("%s ERROR %d\n", __func__, task->tk_status); | 5131 | dprintk("%s ERROR %d\n", __func__, task->tk_status); |
5098 | if (atomic_read(&clp->cl_count) == 1) | 5132 | if (atomic_read(&clp->cl_count) == 1) |
5099 | goto out; | 5133 | goto out; |
5100 | 5134 | ||
5101 | if (_nfs4_async_handle_error(task, NULL, clp, NULL) | 5135 | if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) { |
5102 | == -EAGAIN) { | 5136 | rpc_restart_call_prepare(task); |
5103 | nfs_restart_rpc(task, clp); | ||
5104 | return; | 5137 | return; |
5105 | } | 5138 | } |
5106 | } | 5139 | } |
5107 | dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); | 5140 | dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); |
5108 | out: | 5141 | out: |
5109 | kfree(task->tk_msg.rpc_argp); | ||
5110 | kfree(task->tk_msg.rpc_resp); | ||
5111 | |||
5112 | dprintk("<-- %s\n", __func__); | 5142 | dprintk("<-- %s\n", __func__); |
5113 | } | 5143 | } |
5114 | 5144 | ||
5115 | static void nfs41_sequence_prepare(struct rpc_task *task, void *data) | 5145 | static void nfs41_sequence_prepare(struct rpc_task *task, void *data) |
5116 | { | 5146 | { |
5117 | struct nfs_client *clp; | 5147 | struct nfs4_sequence_data *calldata = data; |
5148 | struct nfs_client *clp = calldata->clp; | ||
5118 | struct nfs4_sequence_args *args; | 5149 | struct nfs4_sequence_args *args; |
5119 | struct nfs4_sequence_res *res; | 5150 | struct nfs4_sequence_res *res; |
5120 | 5151 | ||
5121 | clp = (struct nfs_client *)data; | ||
5122 | args = task->tk_msg.rpc_argp; | 5152 | args = task->tk_msg.rpc_argp; |
5123 | res = task->tk_msg.rpc_resp; | 5153 | res = task->tk_msg.rpc_resp; |
5124 | 5154 | ||
5125 | if (nfs4_setup_sequence(clp, args, res, 0, task)) | 5155 | if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task)) |
5126 | return; | 5156 | return; |
5127 | rpc_call_start(task); | 5157 | rpc_call_start(task); |
5128 | } | 5158 | } |
@@ -5133,32 +5163,67 @@ static const struct rpc_call_ops nfs41_sequence_ops = { | |||
5133 | .rpc_release = nfs41_sequence_release, | 5163 | .rpc_release = nfs41_sequence_release, |
5134 | }; | 5164 | }; |
5135 | 5165 | ||
5136 | static int nfs41_proc_async_sequence(struct nfs_client *clp, | 5166 | static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) |
5137 | struct rpc_cred *cred) | ||
5138 | { | 5167 | { |
5139 | struct nfs4_sequence_args *args; | 5168 | struct nfs4_sequence_data *calldata; |
5140 | struct nfs4_sequence_res *res; | ||
5141 | struct rpc_message msg = { | 5169 | struct rpc_message msg = { |
5142 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE], | 5170 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE], |
5143 | .rpc_cred = cred, | 5171 | .rpc_cred = cred, |
5144 | }; | 5172 | }; |
5173 | struct rpc_task_setup task_setup_data = { | ||
5174 | .rpc_client = clp->cl_rpcclient, | ||
5175 | .rpc_message = &msg, | ||
5176 | .callback_ops = &nfs41_sequence_ops, | ||
5177 | .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT, | ||
5178 | }; | ||
5145 | 5179 | ||
5146 | if (!atomic_inc_not_zero(&clp->cl_count)) | 5180 | if (!atomic_inc_not_zero(&clp->cl_count)) |
5147 | return -EIO; | 5181 | return ERR_PTR(-EIO); |
5148 | args = kzalloc(sizeof(*args), GFP_NOFS); | 5182 | calldata = kmalloc(sizeof(*calldata), GFP_NOFS); |
5149 | res = kzalloc(sizeof(*res), GFP_NOFS); | 5183 | if (calldata == NULL) { |
5150 | if (!args || !res) { | ||
5151 | kfree(args); | ||
5152 | kfree(res); | ||
5153 | nfs_put_client(clp); | 5184 | nfs_put_client(clp); |
5154 | return -ENOMEM; | 5185 | return ERR_PTR(-ENOMEM); |
5155 | } | 5186 | } |
5156 | res->sr_slotid = NFS4_MAX_SLOT_TABLE; | 5187 | calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE; |
5157 | msg.rpc_argp = args; | 5188 | msg.rpc_argp = &calldata->args; |
5158 | msg.rpc_resp = res; | 5189 | msg.rpc_resp = &calldata->res; |
5190 | calldata->clp = clp; | ||
5191 | task_setup_data.callback_data = calldata; | ||
5159 | 5192 | ||
5160 | return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, | 5193 | return rpc_run_task(&task_setup_data); |
5161 | &nfs41_sequence_ops, (void *)clp); | 5194 | } |
5195 | |||
5196 | static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred) | ||
5197 | { | ||
5198 | struct rpc_task *task; | ||
5199 | int ret = 0; | ||
5200 | |||
5201 | task = _nfs41_proc_sequence(clp, cred); | ||
5202 | if (IS_ERR(task)) | ||
5203 | ret = PTR_ERR(task); | ||
5204 | else | ||
5205 | rpc_put_task(task); | ||
5206 | dprintk("<-- %s status=%d\n", __func__, ret); | ||
5207 | return ret; | ||
5208 | } | ||
5209 | |||
5210 | static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) | ||
5211 | { | ||
5212 | struct rpc_task *task; | ||
5213 | int ret; | ||
5214 | |||
5215 | task = _nfs41_proc_sequence(clp, cred); | ||
5216 | if (IS_ERR(task)) { | ||
5217 | ret = PTR_ERR(task); | ||
5218 | goto out; | ||
5219 | } | ||
5220 | ret = rpc_wait_for_completion_task(task); | ||
5221 | if (!ret) | ||
5222 | ret = task->tk_status; | ||
5223 | rpc_put_task(task); | ||
5224 | out: | ||
5225 | dprintk("<-- %s status=%d\n", __func__, ret); | ||
5226 | return ret; | ||
5162 | } | 5227 | } |
5163 | 5228 | ||
5164 | struct nfs4_reclaim_complete_data { | 5229 | struct nfs4_reclaim_complete_data { |
@@ -5172,13 +5237,31 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) | |||
5172 | struct nfs4_reclaim_complete_data *calldata = data; | 5237 | struct nfs4_reclaim_complete_data *calldata = data; |
5173 | 5238 | ||
5174 | rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); | 5239 | rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); |
5175 | if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args, | 5240 | if (nfs41_setup_sequence(calldata->clp->cl_session, |
5241 | &calldata->arg.seq_args, | ||
5176 | &calldata->res.seq_res, 0, task)) | 5242 | &calldata->res.seq_res, 0, task)) |
5177 | return; | 5243 | return; |
5178 | 5244 | ||
5179 | rpc_call_start(task); | 5245 | rpc_call_start(task); |
5180 | } | 5246 | } |
5181 | 5247 | ||
5248 | static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nfs_client *clp) | ||
5249 | { | ||
5250 | switch(task->tk_status) { | ||
5251 | case 0: | ||
5252 | case -NFS4ERR_COMPLETE_ALREADY: | ||
5253 | case -NFS4ERR_WRONG_CRED: /* What to do here? */ | ||
5254 | break; | ||
5255 | case -NFS4ERR_DELAY: | ||
5256 | case -EKEYEXPIRED: | ||
5257 | rpc_delay(task, NFS4_POLL_RETRY_MAX); | ||
5258 | return -EAGAIN; | ||
5259 | default: | ||
5260 | nfs4_schedule_state_recovery(clp); | ||
5261 | } | ||
5262 | return 0; | ||
5263 | } | ||
5264 | |||
5182 | static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) | 5265 | static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) |
5183 | { | 5266 | { |
5184 | struct nfs4_reclaim_complete_data *calldata = data; | 5267 | struct nfs4_reclaim_complete_data *calldata = data; |
@@ -5186,32 +5269,13 @@ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) | |||
5186 | struct nfs4_sequence_res *res = &calldata->res.seq_res; | 5269 | struct nfs4_sequence_res *res = &calldata->res.seq_res; |
5187 | 5270 | ||
5188 | dprintk("--> %s\n", __func__); | 5271 | dprintk("--> %s\n", __func__); |
5189 | nfs41_sequence_done(clp, res, task->tk_status); | 5272 | if (!nfs41_sequence_done(task, res)) |
5190 | switch (task->tk_status) { | 5273 | return; |
5191 | case 0: | ||
5192 | case -NFS4ERR_COMPLETE_ALREADY: | ||
5193 | break; | ||
5194 | case -NFS4ERR_BADSESSION: | ||
5195 | case -NFS4ERR_DEADSESSION: | ||
5196 | /* | ||
5197 | * Handle the session error, but do not retry the operation, as | ||
5198 | * we have no way of telling whether the clientid had to be | ||
5199 | * reset before we got our reply. If reset, a new wave of | ||
5200 | * reclaim operations will follow, containing their own reclaim | ||
5201 | * complete. We don't want our retry to get on the way of | ||
5202 | * recovery by incorrectly indicating to the server that we're | ||
5203 | * done reclaiming state since the process had to be restarted. | ||
5204 | */ | ||
5205 | _nfs4_async_handle_error(task, NULL, clp, NULL); | ||
5206 | break; | ||
5207 | default: | ||
5208 | if (_nfs4_async_handle_error( | ||
5209 | task, NULL, clp, NULL) == -EAGAIN) { | ||
5210 | rpc_restart_call_prepare(task); | ||
5211 | return; | ||
5212 | } | ||
5213 | } | ||
5214 | 5274 | ||
5275 | if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) { | ||
5276 | rpc_restart_call_prepare(task); | ||
5277 | return; | ||
5278 | } | ||
5215 | dprintk("<-- %s\n", __func__); | 5279 | dprintk("<-- %s\n", __func__); |
5216 | } | 5280 | } |
5217 | 5281 | ||
@@ -5325,28 +5389,30 @@ struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { | |||
5325 | }; | 5389 | }; |
5326 | #endif | 5390 | #endif |
5327 | 5391 | ||
5328 | /* | 5392 | static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { |
5329 | * Per minor version reboot and network partition recovery ops | 5393 | .minor_version = 0, |
5330 | */ | 5394 | .call_sync = _nfs4_call_sync, |
5331 | 5395 | .validate_stateid = nfs4_validate_delegation_stateid, | |
5332 | struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = { | 5396 | .reboot_recovery_ops = &nfs40_reboot_recovery_ops, |
5333 | &nfs40_reboot_recovery_ops, | 5397 | .nograce_recovery_ops = &nfs40_nograce_recovery_ops, |
5334 | #if defined(CONFIG_NFS_V4_1) | 5398 | .state_renewal_ops = &nfs40_state_renewal_ops, |
5335 | &nfs41_reboot_recovery_ops, | ||
5336 | #endif | ||
5337 | }; | 5399 | }; |
5338 | 5400 | ||
5339 | struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = { | ||
5340 | &nfs40_nograce_recovery_ops, | ||
5341 | #if defined(CONFIG_NFS_V4_1) | 5401 | #if defined(CONFIG_NFS_V4_1) |
5342 | &nfs41_nograce_recovery_ops, | 5402 | static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { |
5343 | #endif | 5403 | .minor_version = 1, |
5404 | .call_sync = _nfs4_call_sync_session, | ||
5405 | .validate_stateid = nfs41_validate_delegation_stateid, | ||
5406 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, | ||
5407 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, | ||
5408 | .state_renewal_ops = &nfs41_state_renewal_ops, | ||
5344 | }; | 5409 | }; |
5410 | #endif | ||
5345 | 5411 | ||
5346 | struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = { | 5412 | const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { |
5347 | &nfs40_state_renewal_ops, | 5413 | [0] = &nfs_v4_0_minor_ops, |
5348 | #if defined(CONFIG_NFS_V4_1) | 5414 | #if defined(CONFIG_NFS_V4_1) |
5349 | &nfs41_state_renewal_ops, | 5415 | [1] = &nfs_v4_1_minor_ops, |
5350 | #endif | 5416 | #endif |
5351 | }; | 5417 | }; |
5352 | 5418 | ||
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index d87f10327b72..72b6c580af13 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c | |||
@@ -54,14 +54,14 @@ | |||
54 | void | 54 | void |
55 | nfs4_renew_state(struct work_struct *work) | 55 | nfs4_renew_state(struct work_struct *work) |
56 | { | 56 | { |
57 | struct nfs4_state_maintenance_ops *ops; | 57 | const struct nfs4_state_maintenance_ops *ops; |
58 | struct nfs_client *clp = | 58 | struct nfs_client *clp = |
59 | container_of(work, struct nfs_client, cl_renewd.work); | 59 | container_of(work, struct nfs_client, cl_renewd.work); |
60 | struct rpc_cred *cred; | 60 | struct rpc_cred *cred; |
61 | long lease; | 61 | long lease; |
62 | unsigned long last, now; | 62 | unsigned long last, now; |
63 | 63 | ||
64 | ops = nfs4_state_renewal_ops[clp->cl_minorversion]; | 64 | ops = clp->cl_mvops->state_renewal_ops; |
65 | dprintk("%s: start\n", __func__); | 65 | dprintk("%s: start\n", __func__); |
66 | /* Are there any active superblocks? */ | 66 | /* Are there any active superblocks? */ |
67 | if (list_empty(&clp->cl_superblocks)) | 67 | if (list_empty(&clp->cl_superblocks)) |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 34acf5926fdc..3e2f19b04c06 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -145,7 +145,9 @@ static void nfs4_end_drain_session(struct nfs_client *clp) | |||
145 | struct nfs4_session *ses = clp->cl_session; | 145 | struct nfs4_session *ses = clp->cl_session; |
146 | int max_slots; | 146 | int max_slots; |
147 | 147 | ||
148 | if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) { | 148 | if (ses == NULL) |
149 | return; | ||
150 | if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { | ||
149 | spin_lock(&ses->fc_slot_table.slot_tbl_lock); | 151 | spin_lock(&ses->fc_slot_table.slot_tbl_lock); |
150 | max_slots = ses->fc_slot_table.max_slots; | 152 | max_slots = ses->fc_slot_table.max_slots; |
151 | while (max_slots--) { | 153 | while (max_slots--) { |
@@ -167,7 +169,7 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) | |||
167 | struct nfs4_slot_table *tbl = &ses->fc_slot_table; | 169 | struct nfs4_slot_table *tbl = &ses->fc_slot_table; |
168 | 170 | ||
169 | spin_lock(&tbl->slot_tbl_lock); | 171 | spin_lock(&tbl->slot_tbl_lock); |
170 | set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); | 172 | set_bit(NFS4_SESSION_DRAINING, &ses->session_state); |
171 | if (tbl->highest_used_slotid != -1) { | 173 | if (tbl->highest_used_slotid != -1) { |
172 | INIT_COMPLETION(ses->complete); | 174 | INIT_COMPLETION(ses->complete); |
173 | spin_unlock(&tbl->slot_tbl_lock); | 175 | spin_unlock(&tbl->slot_tbl_lock); |
@@ -371,7 +373,6 @@ nfs4_alloc_state_owner(void) | |||
371 | return NULL; | 373 | return NULL; |
372 | spin_lock_init(&sp->so_lock); | 374 | spin_lock_init(&sp->so_lock); |
373 | INIT_LIST_HEAD(&sp->so_states); | 375 | INIT_LIST_HEAD(&sp->so_states); |
374 | INIT_LIST_HEAD(&sp->so_delegations); | ||
375 | rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); | 376 | rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); |
376 | sp->so_seqid.sequence = &sp->so_sequence; | 377 | sp->so_seqid.sequence = &sp->so_sequence; |
377 | spin_lock_init(&sp->so_sequence.lock); | 378 | spin_lock_init(&sp->so_sequence.lock); |
@@ -384,7 +385,7 @@ static void | |||
384 | nfs4_drop_state_owner(struct nfs4_state_owner *sp) | 385 | nfs4_drop_state_owner(struct nfs4_state_owner *sp) |
385 | { | 386 | { |
386 | if (!RB_EMPTY_NODE(&sp->so_client_node)) { | 387 | if (!RB_EMPTY_NODE(&sp->so_client_node)) { |
387 | struct nfs_client *clp = sp->so_client; | 388 | struct nfs_client *clp = sp->so_server->nfs_client; |
388 | 389 | ||
389 | spin_lock(&clp->cl_lock); | 390 | spin_lock(&clp->cl_lock); |
390 | rb_erase(&sp->so_client_node, &clp->cl_state_owners); | 391 | rb_erase(&sp->so_client_node, &clp->cl_state_owners); |
@@ -406,7 +407,6 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct | |||
406 | new = nfs4_alloc_state_owner(); | 407 | new = nfs4_alloc_state_owner(); |
407 | if (new == NULL) | 408 | if (new == NULL) |
408 | return NULL; | 409 | return NULL; |
409 | new->so_client = clp; | ||
410 | new->so_server = server; | 410 | new->so_server = server; |
411 | new->so_cred = cred; | 411 | new->so_cred = cred; |
412 | spin_lock(&clp->cl_lock); | 412 | spin_lock(&clp->cl_lock); |
@@ -423,7 +423,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct | |||
423 | 423 | ||
424 | void nfs4_put_state_owner(struct nfs4_state_owner *sp) | 424 | void nfs4_put_state_owner(struct nfs4_state_owner *sp) |
425 | { | 425 | { |
426 | struct nfs_client *clp = sp->so_client; | 426 | struct nfs_client *clp = sp->so_server->nfs_client; |
427 | struct rpc_cred *cred = sp->so_cred; | 427 | struct rpc_cred *cred = sp->so_cred; |
428 | 428 | ||
429 | if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) | 429 | if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) |
@@ -602,12 +602,21 @@ void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode) | |||
602 | * that is compatible with current->files | 602 | * that is compatible with current->files |
603 | */ | 603 | */ |
604 | static struct nfs4_lock_state * | 604 | static struct nfs4_lock_state * |
605 | __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) | 605 | __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) |
606 | { | 606 | { |
607 | struct nfs4_lock_state *pos; | 607 | struct nfs4_lock_state *pos; |
608 | list_for_each_entry(pos, &state->lock_states, ls_locks) { | 608 | list_for_each_entry(pos, &state->lock_states, ls_locks) { |
609 | if (pos->ls_owner != fl_owner) | 609 | if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type) |
610 | continue; | 610 | continue; |
611 | switch (pos->ls_owner.lo_type) { | ||
612 | case NFS4_POSIX_LOCK_TYPE: | ||
613 | if (pos->ls_owner.lo_u.posix_owner != fl_owner) | ||
614 | continue; | ||
615 | break; | ||
616 | case NFS4_FLOCK_LOCK_TYPE: | ||
617 | if (pos->ls_owner.lo_u.flock_owner != fl_pid) | ||
618 | continue; | ||
619 | } | ||
611 | atomic_inc(&pos->ls_count); | 620 | atomic_inc(&pos->ls_count); |
612 | return pos; | 621 | return pos; |
613 | } | 622 | } |
@@ -619,10 +628,10 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) | |||
619 | * exists, return an uninitialized one. | 628 | * exists, return an uninitialized one. |
620 | * | 629 | * |
621 | */ | 630 | */ |
622 | static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) | 631 | static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) |
623 | { | 632 | { |
624 | struct nfs4_lock_state *lsp; | 633 | struct nfs4_lock_state *lsp; |
625 | struct nfs_client *clp = state->owner->so_client; | 634 | struct nfs_client *clp = state->owner->so_server->nfs_client; |
626 | 635 | ||
627 | lsp = kzalloc(sizeof(*lsp), GFP_NOFS); | 636 | lsp = kzalloc(sizeof(*lsp), GFP_NOFS); |
628 | if (lsp == NULL) | 637 | if (lsp == NULL) |
@@ -633,7 +642,18 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f | |||
633 | lsp->ls_seqid.sequence = &lsp->ls_sequence; | 642 | lsp->ls_seqid.sequence = &lsp->ls_sequence; |
634 | atomic_set(&lsp->ls_count, 1); | 643 | atomic_set(&lsp->ls_count, 1); |
635 | lsp->ls_state = state; | 644 | lsp->ls_state = state; |
636 | lsp->ls_owner = fl_owner; | 645 | lsp->ls_owner.lo_type = type; |
646 | switch (lsp->ls_owner.lo_type) { | ||
647 | case NFS4_FLOCK_LOCK_TYPE: | ||
648 | lsp->ls_owner.lo_u.flock_owner = fl_pid; | ||
649 | break; | ||
650 | case NFS4_POSIX_LOCK_TYPE: | ||
651 | lsp->ls_owner.lo_u.posix_owner = fl_owner; | ||
652 | break; | ||
653 | default: | ||
654 | kfree(lsp); | ||
655 | return NULL; | ||
656 | } | ||
637 | spin_lock(&clp->cl_lock); | 657 | spin_lock(&clp->cl_lock); |
638 | nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64); | 658 | nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64); |
639 | spin_unlock(&clp->cl_lock); | 659 | spin_unlock(&clp->cl_lock); |
@@ -643,7 +663,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f | |||
643 | 663 | ||
644 | static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) | 664 | static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) |
645 | { | 665 | { |
646 | struct nfs_client *clp = lsp->ls_state->owner->so_client; | 666 | struct nfs_client *clp = lsp->ls_state->owner->so_server->nfs_client; |
647 | 667 | ||
648 | spin_lock(&clp->cl_lock); | 668 | spin_lock(&clp->cl_lock); |
649 | nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); | 669 | nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); |
@@ -657,13 +677,13 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) | |||
657 | * exists, return an uninitialized one. | 677 | * exists, return an uninitialized one. |
658 | * | 678 | * |
659 | */ | 679 | */ |
660 | static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) | 680 | static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type) |
661 | { | 681 | { |
662 | struct nfs4_lock_state *lsp, *new = NULL; | 682 | struct nfs4_lock_state *lsp, *new = NULL; |
663 | 683 | ||
664 | for(;;) { | 684 | for(;;) { |
665 | spin_lock(&state->state_lock); | 685 | spin_lock(&state->state_lock); |
666 | lsp = __nfs4_find_lock_state(state, owner); | 686 | lsp = __nfs4_find_lock_state(state, owner, pid, type); |
667 | if (lsp != NULL) | 687 | if (lsp != NULL) |
668 | break; | 688 | break; |
669 | if (new != NULL) { | 689 | if (new != NULL) { |
@@ -674,7 +694,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ | |||
674 | break; | 694 | break; |
675 | } | 695 | } |
676 | spin_unlock(&state->state_lock); | 696 | spin_unlock(&state->state_lock); |
677 | new = nfs4_alloc_lock_state(state, owner); | 697 | new = nfs4_alloc_lock_state(state, owner, pid, type); |
678 | if (new == NULL) | 698 | if (new == NULL) |
679 | return NULL; | 699 | return NULL; |
680 | } | 700 | } |
@@ -701,6 +721,8 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) | |||
701 | if (list_empty(&state->lock_states)) | 721 | if (list_empty(&state->lock_states)) |
702 | clear_bit(LK_STATE_IN_USE, &state->flags); | 722 | clear_bit(LK_STATE_IN_USE, &state->flags); |
703 | spin_unlock(&state->state_lock); | 723 | spin_unlock(&state->state_lock); |
724 | if (lsp->ls_flags & NFS_LOCK_INITIALIZED) | ||
725 | nfs4_release_lockowner(lsp); | ||
704 | nfs4_free_lock_state(lsp); | 726 | nfs4_free_lock_state(lsp); |
705 | } | 727 | } |
706 | 728 | ||
@@ -728,7 +750,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) | |||
728 | 750 | ||
729 | if (fl->fl_ops != NULL) | 751 | if (fl->fl_ops != NULL) |
730 | return 0; | 752 | return 0; |
731 | lsp = nfs4_get_lock_state(state, fl->fl_owner); | 753 | if (fl->fl_flags & FL_POSIX) |
754 | lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE); | ||
755 | else if (fl->fl_flags & FL_FLOCK) | ||
756 | lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE); | ||
757 | else | ||
758 | return -EINVAL; | ||
732 | if (lsp == NULL) | 759 | if (lsp == NULL) |
733 | return -ENOMEM; | 760 | return -ENOMEM; |
734 | fl->fl_u.nfs4_fl.owner = lsp; | 761 | fl->fl_u.nfs4_fl.owner = lsp; |
@@ -740,7 +767,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) | |||
740 | * Byte-range lock aware utility to initialize the stateid of read/write | 767 | * Byte-range lock aware utility to initialize the stateid of read/write |
741 | * requests. | 768 | * requests. |
742 | */ | 769 | */ |
743 | void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) | 770 | void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid) |
744 | { | 771 | { |
745 | struct nfs4_lock_state *lsp; | 772 | struct nfs4_lock_state *lsp; |
746 | int seq; | 773 | int seq; |
@@ -753,7 +780,7 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f | |||
753 | return; | 780 | return; |
754 | 781 | ||
755 | spin_lock(&state->state_lock); | 782 | spin_lock(&state->state_lock); |
756 | lsp = __nfs4_find_lock_state(state, fl_owner); | 783 | lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); |
757 | if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) | 784 | if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) |
758 | memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); | 785 | memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); |
759 | spin_unlock(&state->state_lock); | 786 | spin_unlock(&state->state_lock); |
@@ -1041,11 +1068,11 @@ restart: | |||
1041 | case -NFS4ERR_BAD_STATEID: | 1068 | case -NFS4ERR_BAD_STATEID: |
1042 | case -NFS4ERR_RECLAIM_BAD: | 1069 | case -NFS4ERR_RECLAIM_BAD: |
1043 | case -NFS4ERR_RECLAIM_CONFLICT: | 1070 | case -NFS4ERR_RECLAIM_CONFLICT: |
1044 | nfs4_state_mark_reclaim_nograce(sp->so_client, state); | 1071 | nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state); |
1045 | break; | 1072 | break; |
1046 | case -NFS4ERR_EXPIRED: | 1073 | case -NFS4ERR_EXPIRED: |
1047 | case -NFS4ERR_NO_GRACE: | 1074 | case -NFS4ERR_NO_GRACE: |
1048 | nfs4_state_mark_reclaim_nograce(sp->so_client, state); | 1075 | nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state); |
1049 | case -NFS4ERR_STALE_CLIENTID: | 1076 | case -NFS4ERR_STALE_CLIENTID: |
1050 | case -NFS4ERR_BADSESSION: | 1077 | case -NFS4ERR_BADSESSION: |
1051 | case -NFS4ERR_BADSLOT: | 1078 | case -NFS4ERR_BADSLOT: |
@@ -1120,8 +1147,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) | |||
1120 | if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) | 1147 | if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) |
1121 | return; | 1148 | return; |
1122 | 1149 | ||
1123 | nfs4_reclaim_complete(clp, | 1150 | nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops); |
1124 | nfs4_reboot_recovery_ops[clp->cl_minorversion]); | ||
1125 | 1151 | ||
1126 | for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { | 1152 | for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { |
1127 | sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); | 1153 | sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); |
@@ -1211,8 +1237,8 @@ restart: | |||
1211 | static int nfs4_check_lease(struct nfs_client *clp) | 1237 | static int nfs4_check_lease(struct nfs_client *clp) |
1212 | { | 1238 | { |
1213 | struct rpc_cred *cred; | 1239 | struct rpc_cred *cred; |
1214 | struct nfs4_state_maintenance_ops *ops = | 1240 | const struct nfs4_state_maintenance_ops *ops = |
1215 | nfs4_state_renewal_ops[clp->cl_minorversion]; | 1241 | clp->cl_mvops->state_renewal_ops; |
1216 | int status = -NFS4ERR_EXPIRED; | 1242 | int status = -NFS4ERR_EXPIRED; |
1217 | 1243 | ||
1218 | /* Is the client already known to have an expired lease? */ | 1244 | /* Is the client already known to have an expired lease? */ |
@@ -1235,8 +1261,8 @@ out: | |||
1235 | static int nfs4_reclaim_lease(struct nfs_client *clp) | 1261 | static int nfs4_reclaim_lease(struct nfs_client *clp) |
1236 | { | 1262 | { |
1237 | struct rpc_cred *cred; | 1263 | struct rpc_cred *cred; |
1238 | struct nfs4_state_recovery_ops *ops = | 1264 | const struct nfs4_state_recovery_ops *ops = |
1239 | nfs4_reboot_recovery_ops[clp->cl_minorversion]; | 1265 | clp->cl_mvops->reboot_recovery_ops; |
1240 | int status = -ENOENT; | 1266 | int status = -ENOENT; |
1241 | 1267 | ||
1242 | cred = ops->get_clid_cred(clp); | 1268 | cred = ops->get_clid_cred(clp); |
@@ -1444,7 +1470,7 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
1444 | /* First recover reboot state... */ | 1470 | /* First recover reboot state... */ |
1445 | if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { | 1471 | if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { |
1446 | status = nfs4_do_reclaim(clp, | 1472 | status = nfs4_do_reclaim(clp, |
1447 | nfs4_reboot_recovery_ops[clp->cl_minorversion]); | 1473 | clp->cl_mvops->reboot_recovery_ops); |
1448 | if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || | 1474 | if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || |
1449 | test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) | 1475 | test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) |
1450 | continue; | 1476 | continue; |
@@ -1458,7 +1484,7 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
1458 | /* Now recover expired state... */ | 1484 | /* Now recover expired state... */ |
1459 | if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { | 1485 | if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { |
1460 | status = nfs4_do_reclaim(clp, | 1486 | status = nfs4_do_reclaim(clp, |
1461 | nfs4_nograce_recovery_ops[clp->cl_minorversion]); | 1487 | clp->cl_mvops->nograce_recovery_ops); |
1462 | if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || | 1488 | if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || |
1463 | test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) || | 1489 | test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) || |
1464 | test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) | 1490 | test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 65c8dae4b267..08ef91291132 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -202,14 +202,17 @@ static int nfs4_stat_to_errno(int); | |||
202 | #define encode_link_maxsz (op_encode_hdr_maxsz + \ | 202 | #define encode_link_maxsz (op_encode_hdr_maxsz + \ |
203 | nfs4_name_maxsz) | 203 | nfs4_name_maxsz) |
204 | #define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) | 204 | #define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) |
205 | #define encode_lockowner_maxsz (7) | ||
205 | #define encode_lock_maxsz (op_encode_hdr_maxsz + \ | 206 | #define encode_lock_maxsz (op_encode_hdr_maxsz + \ |
206 | 7 + \ | 207 | 7 + \ |
207 | 1 + encode_stateid_maxsz + 8) | 208 | 1 + encode_stateid_maxsz + 1 + \ |
209 | encode_lockowner_maxsz) | ||
208 | #define decode_lock_denied_maxsz \ | 210 | #define decode_lock_denied_maxsz \ |
209 | (8 + decode_lockowner_maxsz) | 211 | (8 + decode_lockowner_maxsz) |
210 | #define decode_lock_maxsz (op_decode_hdr_maxsz + \ | 212 | #define decode_lock_maxsz (op_decode_hdr_maxsz + \ |
211 | decode_lock_denied_maxsz) | 213 | decode_lock_denied_maxsz) |
212 | #define encode_lockt_maxsz (op_encode_hdr_maxsz + 12) | 214 | #define encode_lockt_maxsz (op_encode_hdr_maxsz + 5 + \ |
215 | encode_lockowner_maxsz) | ||
213 | #define decode_lockt_maxsz (op_decode_hdr_maxsz + \ | 216 | #define decode_lockt_maxsz (op_decode_hdr_maxsz + \ |
214 | decode_lock_denied_maxsz) | 217 | decode_lock_denied_maxsz) |
215 | #define encode_locku_maxsz (op_encode_hdr_maxsz + 3 + \ | 218 | #define encode_locku_maxsz (op_encode_hdr_maxsz + 3 + \ |
@@ -217,6 +220,11 @@ static int nfs4_stat_to_errno(int); | |||
217 | 4) | 220 | 4) |
218 | #define decode_locku_maxsz (op_decode_hdr_maxsz + \ | 221 | #define decode_locku_maxsz (op_decode_hdr_maxsz + \ |
219 | decode_stateid_maxsz) | 222 | decode_stateid_maxsz) |
223 | #define encode_release_lockowner_maxsz \ | ||
224 | (op_encode_hdr_maxsz + \ | ||
225 | encode_lockowner_maxsz) | ||
226 | #define decode_release_lockowner_maxsz \ | ||
227 | (op_decode_hdr_maxsz) | ||
220 | #define encode_access_maxsz (op_encode_hdr_maxsz + 1) | 228 | #define encode_access_maxsz (op_encode_hdr_maxsz + 1) |
221 | #define decode_access_maxsz (op_decode_hdr_maxsz + 2) | 229 | #define decode_access_maxsz (op_decode_hdr_maxsz + 2) |
222 | #define encode_symlink_maxsz (op_encode_hdr_maxsz + \ | 230 | #define encode_symlink_maxsz (op_encode_hdr_maxsz + \ |
@@ -471,6 +479,12 @@ static int nfs4_stat_to_errno(int); | |||
471 | decode_sequence_maxsz + \ | 479 | decode_sequence_maxsz + \ |
472 | decode_putfh_maxsz + \ | 480 | decode_putfh_maxsz + \ |
473 | decode_locku_maxsz) | 481 | decode_locku_maxsz) |
482 | #define NFS4_enc_release_lockowner_sz \ | ||
483 | (compound_encode_hdr_maxsz + \ | ||
484 | encode_lockowner_maxsz) | ||
485 | #define NFS4_dec_release_lockowner_sz \ | ||
486 | (compound_decode_hdr_maxsz + \ | ||
487 | decode_lockowner_maxsz) | ||
474 | #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ | 488 | #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ |
475 | encode_sequence_maxsz + \ | 489 | encode_sequence_maxsz + \ |
476 | encode_putfh_maxsz + \ | 490 | encode_putfh_maxsz + \ |
@@ -744,7 +758,7 @@ static void encode_compound_hdr(struct xdr_stream *xdr, | |||
744 | struct compound_hdr *hdr) | 758 | struct compound_hdr *hdr) |
745 | { | 759 | { |
746 | __be32 *p; | 760 | __be32 *p; |
747 | struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; | 761 | struct rpc_auth *auth = req->rq_cred->cr_auth; |
748 | 762 | ||
749 | /* initialize running count of expected bytes in reply. | 763 | /* initialize running count of expected bytes in reply. |
750 | * NOTE: the replied tag SHOULD be the same is the one sent, | 764 | * NOTE: the replied tag SHOULD be the same is the one sent, |
@@ -1042,6 +1056,17 @@ static inline uint64_t nfs4_lock_length(struct file_lock *fl) | |||
1042 | return fl->fl_end - fl->fl_start + 1; | 1056 | return fl->fl_end - fl->fl_start + 1; |
1043 | } | 1057 | } |
1044 | 1058 | ||
1059 | static void encode_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner) | ||
1060 | { | ||
1061 | __be32 *p; | ||
1062 | |||
1063 | p = reserve_space(xdr, 28); | ||
1064 | p = xdr_encode_hyper(p, lowner->clientid); | ||
1065 | *p++ = cpu_to_be32(16); | ||
1066 | p = xdr_encode_opaque_fixed(p, "lock id:", 8); | ||
1067 | xdr_encode_hyper(p, lowner->id); | ||
1068 | } | ||
1069 | |||
1045 | /* | 1070 | /* |
1046 | * opcode,type,reclaim,offset,length,new_lock_owner = 32 | 1071 | * opcode,type,reclaim,offset,length,new_lock_owner = 32 |
1047 | * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 | 1072 | * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 |
@@ -1058,14 +1083,11 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args | |||
1058 | p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); | 1083 | p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); |
1059 | *p = cpu_to_be32(args->new_lock_owner); | 1084 | *p = cpu_to_be32(args->new_lock_owner); |
1060 | if (args->new_lock_owner){ | 1085 | if (args->new_lock_owner){ |
1061 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32); | 1086 | p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); |
1062 | *p++ = cpu_to_be32(args->open_seqid->sequence->counter); | 1087 | *p++ = cpu_to_be32(args->open_seqid->sequence->counter); |
1063 | p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE); | 1088 | p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE); |
1064 | *p++ = cpu_to_be32(args->lock_seqid->sequence->counter); | 1089 | *p++ = cpu_to_be32(args->lock_seqid->sequence->counter); |
1065 | p = xdr_encode_hyper(p, args->lock_owner.clientid); | 1090 | encode_lockowner(xdr, &args->lock_owner); |
1066 | *p++ = cpu_to_be32(16); | ||
1067 | p = xdr_encode_opaque_fixed(p, "lock id:", 8); | ||
1068 | xdr_encode_hyper(p, args->lock_owner.id); | ||
1069 | } | 1091 | } |
1070 | else { | 1092 | else { |
1071 | p = reserve_space(xdr, NFS4_STATEID_SIZE+4); | 1093 | p = reserve_space(xdr, NFS4_STATEID_SIZE+4); |
@@ -1080,15 +1102,12 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar | |||
1080 | { | 1102 | { |
1081 | __be32 *p; | 1103 | __be32 *p; |
1082 | 1104 | ||
1083 | p = reserve_space(xdr, 52); | 1105 | p = reserve_space(xdr, 24); |
1084 | *p++ = cpu_to_be32(OP_LOCKT); | 1106 | *p++ = cpu_to_be32(OP_LOCKT); |
1085 | *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); | 1107 | *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); |
1086 | p = xdr_encode_hyper(p, args->fl->fl_start); | 1108 | p = xdr_encode_hyper(p, args->fl->fl_start); |
1087 | p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); | 1109 | p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); |
1088 | p = xdr_encode_hyper(p, args->lock_owner.clientid); | 1110 | encode_lockowner(xdr, &args->lock_owner); |
1089 | *p++ = cpu_to_be32(16); | ||
1090 | p = xdr_encode_opaque_fixed(p, "lock id:", 8); | ||
1091 | xdr_encode_hyper(p, args->lock_owner.id); | ||
1092 | hdr->nops++; | 1111 | hdr->nops++; |
1093 | hdr->replen += decode_lockt_maxsz; | 1112 | hdr->replen += decode_lockt_maxsz; |
1094 | } | 1113 | } |
@@ -1108,6 +1127,17 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar | |||
1108 | hdr->replen += decode_locku_maxsz; | 1127 | hdr->replen += decode_locku_maxsz; |
1109 | } | 1128 | } |
1110 | 1129 | ||
1130 | static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr) | ||
1131 | { | ||
1132 | __be32 *p; | ||
1133 | |||
1134 | p = reserve_space(xdr, 4); | ||
1135 | *p = cpu_to_be32(OP_RELEASE_LOCKOWNER); | ||
1136 | encode_lockowner(xdr, lowner); | ||
1137 | hdr->nops++; | ||
1138 | hdr->replen += decode_release_lockowner_maxsz; | ||
1139 | } | ||
1140 | |||
1111 | static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) | 1141 | static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) |
1112 | { | 1142 | { |
1113 | int len = name->len; | 1143 | int len = name->len; |
@@ -1172,7 +1202,7 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op | |||
1172 | break; | 1202 | break; |
1173 | default: | 1203 | default: |
1174 | clp = arg->server->nfs_client; | 1204 | clp = arg->server->nfs_client; |
1175 | if (clp->cl_minorversion > 0) { | 1205 | if (clp->cl_mvops->minor_version > 0) { |
1176 | if (nfs4_has_persistent_session(clp)) { | 1206 | if (nfs4_has_persistent_session(clp)) { |
1177 | *p = cpu_to_be32(NFS4_CREATE_GUARDED); | 1207 | *p = cpu_to_be32(NFS4_CREATE_GUARDED); |
1178 | encode_attrs(xdr, arg->u.attrs, arg->server); | 1208 | encode_attrs(xdr, arg->u.attrs, arg->server); |
@@ -1324,14 +1354,14 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) | |||
1324 | hdr->replen += decode_putrootfh_maxsz; | 1354 | hdr->replen += decode_putrootfh_maxsz; |
1325 | } | 1355 | } |
1326 | 1356 | ||
1327 | static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) | 1357 | static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx) |
1328 | { | 1358 | { |
1329 | nfs4_stateid stateid; | 1359 | nfs4_stateid stateid; |
1330 | __be32 *p; | 1360 | __be32 *p; |
1331 | 1361 | ||
1332 | p = reserve_space(xdr, NFS4_STATEID_SIZE); | 1362 | p = reserve_space(xdr, NFS4_STATEID_SIZE); |
1333 | if (ctx->state != NULL) { | 1363 | if (ctx->state != NULL) { |
1334 | nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); | 1364 | nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); |
1335 | xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); | 1365 | xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); |
1336 | } else | 1366 | } else |
1337 | xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); | 1367 | xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); |
@@ -1344,7 +1374,7 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, | |||
1344 | p = reserve_space(xdr, 4); | 1374 | p = reserve_space(xdr, 4); |
1345 | *p = cpu_to_be32(OP_READ); | 1375 | *p = cpu_to_be32(OP_READ); |
1346 | 1376 | ||
1347 | encode_stateid(xdr, args->context); | 1377 | encode_stateid(xdr, args->context, args->lock_context); |
1348 | 1378 | ||
1349 | p = reserve_space(xdr, 12); | 1379 | p = reserve_space(xdr, 12); |
1350 | p = xdr_encode_hyper(p, args->offset); | 1380 | p = xdr_encode_hyper(p, args->offset); |
@@ -1523,7 +1553,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg | |||
1523 | p = reserve_space(xdr, 4); | 1553 | p = reserve_space(xdr, 4); |
1524 | *p = cpu_to_be32(OP_WRITE); | 1554 | *p = cpu_to_be32(OP_WRITE); |
1525 | 1555 | ||
1526 | encode_stateid(xdr, args->context); | 1556 | encode_stateid(xdr, args->context, args->lock_context); |
1527 | 1557 | ||
1528 | p = reserve_space(xdr, 16); | 1558 | p = reserve_space(xdr, 16); |
1529 | p = xdr_encode_hyper(p, args->offset); | 1559 | p = xdr_encode_hyper(p, args->offset); |
@@ -1704,7 +1734,7 @@ static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args) | |||
1704 | { | 1734 | { |
1705 | #if defined(CONFIG_NFS_V4_1) | 1735 | #if defined(CONFIG_NFS_V4_1) |
1706 | if (args->sa_session) | 1736 | if (args->sa_session) |
1707 | return args->sa_session->clp->cl_minorversion; | 1737 | return args->sa_session->clp->cl_mvops->minor_version; |
1708 | #endif /* CONFIG_NFS_V4_1 */ | 1738 | #endif /* CONFIG_NFS_V4_1 */ |
1709 | return 0; | 1739 | return 0; |
1710 | } | 1740 | } |
@@ -2048,6 +2078,20 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_ | |||
2048 | return 0; | 2078 | return 0; |
2049 | } | 2079 | } |
2050 | 2080 | ||
2081 | static int nfs4_xdr_enc_release_lockowner(struct rpc_rqst *req, __be32 *p, struct nfs_release_lockowner_args *args) | ||
2082 | { | ||
2083 | struct xdr_stream xdr; | ||
2084 | struct compound_hdr hdr = { | ||
2085 | .minorversion = 0, | ||
2086 | }; | ||
2087 | |||
2088 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | ||
2089 | encode_compound_hdr(&xdr, req, &hdr); | ||
2090 | encode_release_lockowner(&xdr, &args->lock_owner, &hdr); | ||
2091 | encode_nops(&hdr); | ||
2092 | return 0; | ||
2093 | } | ||
2094 | |||
2051 | /* | 2095 | /* |
2052 | * Encode a READLINK request | 2096 | * Encode a READLINK request |
2053 | */ | 2097 | */ |
@@ -2395,7 +2439,7 @@ static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p, | |||
2395 | { | 2439 | { |
2396 | struct xdr_stream xdr; | 2440 | struct xdr_stream xdr; |
2397 | struct compound_hdr hdr = { | 2441 | struct compound_hdr hdr = { |
2398 | .minorversion = args->client->cl_minorversion, | 2442 | .minorversion = args->client->cl_mvops->minor_version, |
2399 | }; | 2443 | }; |
2400 | 2444 | ||
2401 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | 2445 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); |
@@ -2413,7 +2457,7 @@ static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p, | |||
2413 | { | 2457 | { |
2414 | struct xdr_stream xdr; | 2458 | struct xdr_stream xdr; |
2415 | struct compound_hdr hdr = { | 2459 | struct compound_hdr hdr = { |
2416 | .minorversion = args->client->cl_minorversion, | 2460 | .minorversion = args->client->cl_mvops->minor_version, |
2417 | }; | 2461 | }; |
2418 | 2462 | ||
2419 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | 2463 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); |
@@ -2431,7 +2475,7 @@ static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p, | |||
2431 | { | 2475 | { |
2432 | struct xdr_stream xdr; | 2476 | struct xdr_stream xdr; |
2433 | struct compound_hdr hdr = { | 2477 | struct compound_hdr hdr = { |
2434 | .minorversion = session->clp->cl_minorversion, | 2478 | .minorversion = session->clp->cl_mvops->minor_version, |
2435 | }; | 2479 | }; |
2436 | 2480 | ||
2437 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | 2481 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); |
@@ -3973,6 +4017,11 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) | |||
3973 | return status; | 4017 | return status; |
3974 | } | 4018 | } |
3975 | 4019 | ||
4020 | static int decode_release_lockowner(struct xdr_stream *xdr) | ||
4021 | { | ||
4022 | return decode_op_hdr(xdr, OP_RELEASE_LOCKOWNER); | ||
4023 | } | ||
4024 | |||
3976 | static int decode_lookup(struct xdr_stream *xdr) | 4025 | static int decode_lookup(struct xdr_stream *xdr) |
3977 | { | 4026 | { |
3978 | return decode_op_hdr(xdr, OP_LOOKUP); | 4027 | return decode_op_hdr(xdr, OP_LOOKUP); |
@@ -5259,6 +5308,19 @@ out: | |||
5259 | return status; | 5308 | return status; |
5260 | } | 5309 | } |
5261 | 5310 | ||
5311 | static int nfs4_xdr_dec_release_lockowner(struct rpc_rqst *rqstp, __be32 *p, void *dummy) | ||
5312 | { | ||
5313 | struct xdr_stream xdr; | ||
5314 | struct compound_hdr hdr; | ||
5315 | int status; | ||
5316 | |||
5317 | xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); | ||
5318 | status = decode_compound_hdr(&xdr, &hdr); | ||
5319 | if (!status) | ||
5320 | status = decode_release_lockowner(&xdr); | ||
5321 | return status; | ||
5322 | } | ||
5323 | |||
5262 | /* | 5324 | /* |
5263 | * Decode READLINK response | 5325 | * Decode READLINK response |
5264 | */ | 5326 | */ |
@@ -5866,6 +5928,7 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
5866 | PROC(GETACL, enc_getacl, dec_getacl), | 5928 | PROC(GETACL, enc_getacl, dec_getacl), |
5867 | PROC(SETACL, enc_setacl, dec_setacl), | 5929 | PROC(SETACL, enc_setacl, dec_setacl), |
5868 | PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), | 5930 | PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), |
5931 | PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), | ||
5869 | #if defined(CONFIG_NFS_V4_1) | 5932 | #if defined(CONFIG_NFS_V4_1) |
5870 | PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), | 5933 | PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), |
5871 | PROC(CREATE_SESSION, enc_create_session, dec_create_session), | 5934 | PROC(CREATE_SESSION, enc_create_session, dec_create_session), |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index a3654e57b589..919490232e17 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -79,6 +79,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, | |||
79 | req->wb_pgbase = offset; | 79 | req->wb_pgbase = offset; |
80 | req->wb_bytes = count; | 80 | req->wb_bytes = count; |
81 | req->wb_context = get_nfs_open_context(ctx); | 81 | req->wb_context = get_nfs_open_context(ctx); |
82 | req->wb_lock_context = nfs_get_lock_context(ctx); | ||
82 | kref_init(&req->wb_kref); | 83 | kref_init(&req->wb_kref); |
83 | return req; | 84 | return req; |
84 | } | 85 | } |
@@ -141,11 +142,16 @@ void nfs_clear_request(struct nfs_page *req) | |||
141 | { | 142 | { |
142 | struct page *page = req->wb_page; | 143 | struct page *page = req->wb_page; |
143 | struct nfs_open_context *ctx = req->wb_context; | 144 | struct nfs_open_context *ctx = req->wb_context; |
145 | struct nfs_lock_context *l_ctx = req->wb_lock_context; | ||
144 | 146 | ||
145 | if (page != NULL) { | 147 | if (page != NULL) { |
146 | page_cache_release(page); | 148 | page_cache_release(page); |
147 | req->wb_page = NULL; | 149 | req->wb_page = NULL; |
148 | } | 150 | } |
151 | if (l_ctx != NULL) { | ||
152 | nfs_put_lock_context(l_ctx); | ||
153 | req->wb_lock_context = NULL; | ||
154 | } | ||
149 | if (ctx != NULL) { | 155 | if (ctx != NULL) { |
150 | put_nfs_open_context(ctx); | 156 | put_nfs_open_context(ctx); |
151 | req->wb_context = NULL; | 157 | req->wb_context = NULL; |
@@ -235,7 +241,7 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev, | |||
235 | { | 241 | { |
236 | if (req->wb_context->cred != prev->wb_context->cred) | 242 | if (req->wb_context->cred != prev->wb_context->cred) |
237 | return 0; | 243 | return 0; |
238 | if (req->wb_context->lockowner != prev->wb_context->lockowner) | 244 | if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) |
239 | return 0; | 245 | return 0; |
240 | if (req->wb_context->state != prev->wb_context->state) | 246 | if (req->wb_context->state != prev->wb_context->state) |
241 | return 0; | 247 | return 0; |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6e2b06e6ca79..87adc2744246 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -190,6 +190,7 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, | |||
190 | data->args.pages = data->pagevec; | 190 | data->args.pages = data->pagevec; |
191 | data->args.count = count; | 191 | data->args.count = count; |
192 | data->args.context = get_nfs_open_context(req->wb_context); | 192 | data->args.context = get_nfs_open_context(req->wb_context); |
193 | data->args.lock_context = req->wb_lock_context; | ||
193 | 194 | ||
194 | data->res.fattr = &data->fattr; | 195 | data->res.fattr = &data->fattr; |
195 | data->res.count = count; | 196 | data->res.count = count; |
@@ -410,7 +411,7 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata) | |||
410 | { | 411 | { |
411 | struct nfs_read_data *data = calldata; | 412 | struct nfs_read_data *data = calldata; |
412 | 413 | ||
413 | if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client, | 414 | if (nfs4_setup_sequence(NFS_SERVER(data->inode), |
414 | &data->args.seq_args, &data->res.seq_res, | 415 | &data->args.seq_args, &data->res.seq_res, |
415 | 0, task)) | 416 | 0, task)) |
416 | return; | 417 | return; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f9df16de4a56..ee26316ad1f4 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -270,7 +270,7 @@ static const struct super_operations nfs_sops = { | |||
270 | .write_inode = nfs_write_inode, | 270 | .write_inode = nfs_write_inode, |
271 | .put_super = nfs_put_super, | 271 | .put_super = nfs_put_super, |
272 | .statfs = nfs_statfs, | 272 | .statfs = nfs_statfs, |
273 | .clear_inode = nfs_clear_inode, | 273 | .evict_inode = nfs_evict_inode, |
274 | .umount_begin = nfs_umount_begin, | 274 | .umount_begin = nfs_umount_begin, |
275 | .show_options = nfs_show_options, | 275 | .show_options = nfs_show_options, |
276 | .show_stats = nfs_show_stats, | 276 | .show_stats = nfs_show_stats, |
@@ -340,7 +340,7 @@ static const struct super_operations nfs4_sops = { | |||
340 | .write_inode = nfs_write_inode, | 340 | .write_inode = nfs_write_inode, |
341 | .put_super = nfs_put_super, | 341 | .put_super = nfs_put_super, |
342 | .statfs = nfs_statfs, | 342 | .statfs = nfs_statfs, |
343 | .clear_inode = nfs4_clear_inode, | 343 | .evict_inode = nfs4_evict_inode, |
344 | .umount_begin = nfs_umount_begin, | 344 | .umount_begin = nfs_umount_begin, |
345 | .show_options = nfs_show_options, | 345 | .show_options = nfs_show_options, |
346 | .show_stats = nfs_show_stats, | 346 | .show_stats = nfs_show_stats, |
@@ -546,6 +546,9 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, | |||
546 | { | 546 | { |
547 | struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address; | 547 | struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address; |
548 | 548 | ||
549 | if (nfss->flags & NFS_MOUNT_LEGACY_INTERFACE) | ||
550 | return; | ||
551 | |||
549 | switch (sap->sa_family) { | 552 | switch (sap->sa_family) { |
550 | case AF_INET: { | 553 | case AF_INET: { |
551 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | 554 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; |
@@ -1780,6 +1783,7 @@ static int nfs_validate_mount_data(void *options, | |||
1780 | * can deal with. | 1783 | * can deal with. |
1781 | */ | 1784 | */ |
1782 | args->flags = data->flags & NFS_MOUNT_FLAGMASK; | 1785 | args->flags = data->flags & NFS_MOUNT_FLAGMASK; |
1786 | args->flags |= NFS_MOUNT_LEGACY_INTERFACE; | ||
1783 | args->rsize = data->rsize; | 1787 | args->rsize = data->rsize; |
1784 | args->wsize = data->wsize; | 1788 | args->wsize = data->wsize; |
1785 | args->timeo = data->timeo; | 1789 | args->timeo = data->timeo; |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index a2242af6a17d..2f84adaad427 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
@@ -110,7 +110,7 @@ void nfs_unlink_prepare(struct rpc_task *task, void *calldata) | |||
110 | struct nfs_unlinkdata *data = calldata; | 110 | struct nfs_unlinkdata *data = calldata; |
111 | struct nfs_server *server = NFS_SERVER(data->dir); | 111 | struct nfs_server *server = NFS_SERVER(data->dir); |
112 | 112 | ||
113 | if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args, | 113 | if (nfs4_setup_sequence(server, &data->args.seq_args, |
114 | &data->res.seq_res, 1, task)) | 114 | &data->res.seq_res, 1, task)) |
115 | return; | 115 | return; |
116 | rpc_call_start(task); | 116 | rpc_call_start(task); |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9f81bdd91c55..874972d9427c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -700,7 +700,9 @@ int nfs_flush_incompatible(struct file *file, struct page *page) | |||
700 | req = nfs_page_find_request(page); | 700 | req = nfs_page_find_request(page); |
701 | if (req == NULL) | 701 | if (req == NULL) |
702 | return 0; | 702 | return 0; |
703 | do_flush = req->wb_page != page || req->wb_context != ctx; | 703 | do_flush = req->wb_page != page || req->wb_context != ctx || |
704 | req->wb_lock_context->lockowner != current->files || | ||
705 | req->wb_lock_context->pid != current->tgid; | ||
704 | nfs_release_request(req); | 706 | nfs_release_request(req); |
705 | if (!do_flush) | 707 | if (!do_flush) |
706 | return 0; | 708 | return 0; |
@@ -824,6 +826,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req, | |||
824 | data->args.pages = data->pagevec; | 826 | data->args.pages = data->pagevec; |
825 | data->args.count = count; | 827 | data->args.count = count; |
826 | data->args.context = get_nfs_open_context(req->wb_context); | 828 | data->args.context = get_nfs_open_context(req->wb_context); |
829 | data->args.lock_context = req->wb_lock_context; | ||
827 | data->args.stable = NFS_UNSTABLE; | 830 | data->args.stable = NFS_UNSTABLE; |
828 | if (how & FLUSH_STABLE) { | 831 | if (how & FLUSH_STABLE) { |
829 | data->args.stable = NFS_DATA_SYNC; | 832 | data->args.stable = NFS_DATA_SYNC; |
@@ -1047,9 +1050,9 @@ out: | |||
1047 | void nfs_write_prepare(struct rpc_task *task, void *calldata) | 1050 | void nfs_write_prepare(struct rpc_task *task, void *calldata) |
1048 | { | 1051 | { |
1049 | struct nfs_write_data *data = calldata; | 1052 | struct nfs_write_data *data = calldata; |
1050 | struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client; | ||
1051 | 1053 | ||
1052 | if (nfs4_setup_sequence(clp, &data->args.seq_args, | 1054 | if (nfs4_setup_sequence(NFS_SERVER(data->inode), |
1055 | &data->args.seq_args, | ||
1053 | &data->res.seq_res, 1, task)) | 1056 | &data->res.seq_res, 1, task)) |
1054 | return; | 1057 | return; |
1055 | rpc_call_start(task); | 1058 | rpc_call_start(task); |
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 3d68f45a37b9..5b7e3021e06b 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c | |||
@@ -168,7 +168,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, | |||
168 | svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); | 168 | svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); |
169 | 169 | ||
170 | fh_copy(&resp->fh, &argp->fh); | 170 | fh_copy(&resp->fh, &argp->fh); |
171 | nfserr = nfsd_read(rqstp, &resp->fh, NULL, | 171 | nfserr = nfsd_read(rqstp, &resp->fh, |
172 | argp->offset, | 172 | argp->offset, |
173 | rqstp->rq_vec, argp->vlen, | 173 | rqstp->rq_vec, argp->vlen, |
174 | &resp->count); | 174 | &resp->count); |
@@ -271,7 +271,7 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, | |||
271 | fh_init(&resp->fh, NFS3_FHSIZE); | 271 | fh_init(&resp->fh, NFS3_FHSIZE); |
272 | nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, | 272 | nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, |
273 | &argp->attrs, S_IFDIR, 0, &resp->fh); | 273 | &argp->attrs, S_IFDIR, 0, &resp->fh); |
274 | 274 | fh_unlock(&resp->dirfh); | |
275 | RETURN_STATUS(nfserr); | 275 | RETURN_STATUS(nfserr); |
276 | } | 276 | } |
277 | 277 | ||
@@ -327,7 +327,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp, | |||
327 | type = nfs3_ftypes[argp->ftype]; | 327 | type = nfs3_ftypes[argp->ftype]; |
328 | nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, | 328 | nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, |
329 | &argp->attrs, type, rdev, &resp->fh); | 329 | &argp->attrs, type, rdev, &resp->fh); |
330 | 330 | fh_unlock(&resp->dirfh); | |
331 | RETURN_STATUS(nfserr); | 331 | RETURN_STATUS(nfserr); |
332 | } | 332 | } |
333 | 333 | ||
@@ -348,6 +348,7 @@ nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, | |||
348 | /* Unlink. -S_IFDIR means file must not be a directory */ | 348 | /* Unlink. -S_IFDIR means file must not be a directory */ |
349 | fh_copy(&resp->fh, &argp->fh); | 349 | fh_copy(&resp->fh, &argp->fh); |
350 | nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); | 350 | nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); |
351 | fh_unlock(&resp->fh); | ||
351 | RETURN_STATUS(nfserr); | 352 | RETURN_STATUS(nfserr); |
352 | } | 353 | } |
353 | 354 | ||
@@ -367,6 +368,7 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, | |||
367 | 368 | ||
368 | fh_copy(&resp->fh, &argp->fh); | 369 | fh_copy(&resp->fh, &argp->fh); |
369 | nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); | 370 | nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); |
371 | fh_unlock(&resp->fh); | ||
370 | RETURN_STATUS(nfserr); | 372 | RETURN_STATUS(nfserr); |
371 | } | 373 | } |
372 | 374 | ||
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index eb78e7e22077..988cbb3a19b6 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -143,8 +143,6 @@ struct nfs4_cb_compound_hdr { | |||
143 | u32 minorversion; | 143 | u32 minorversion; |
144 | /* res */ | 144 | /* res */ |
145 | int status; | 145 | int status; |
146 | u32 taglen; | ||
147 | char *tag; | ||
148 | }; | 146 | }; |
149 | 147 | ||
150 | static struct { | 148 | static struct { |
@@ -205,6 +203,16 @@ nfs_cb_stat_to_errno(int stat) | |||
205 | */ | 203 | */ |
206 | 204 | ||
207 | static void | 205 | static void |
206 | encode_stateid(struct xdr_stream *xdr, stateid_t *sid) | ||
207 | { | ||
208 | __be32 *p; | ||
209 | |||
210 | RESERVE_SPACE(sizeof(stateid_t)); | ||
211 | WRITE32(sid->si_generation); | ||
212 | WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); | ||
213 | } | ||
214 | |||
215 | static void | ||
208 | encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) | 216 | encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) |
209 | { | 217 | { |
210 | __be32 * p; | 218 | __be32 * p; |
@@ -229,10 +237,10 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, | |||
229 | __be32 *p; | 237 | __be32 *p; |
230 | int len = dp->dl_fh.fh_size; | 238 | int len = dp->dl_fh.fh_size; |
231 | 239 | ||
232 | RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len); | 240 | RESERVE_SPACE(4); |
233 | WRITE32(OP_CB_RECALL); | 241 | WRITE32(OP_CB_RECALL); |
234 | WRITE32(dp->dl_stateid.si_generation); | 242 | encode_stateid(xdr, &dp->dl_stateid); |
235 | WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t)); | 243 | RESERVE_SPACE(8 + (XDR_QUADLEN(len) << 2)); |
236 | WRITE32(0); /* truncate optimization not implemented */ | 244 | WRITE32(0); /* truncate optimization not implemented */ |
237 | WRITE32(len); | 245 | WRITE32(len); |
238 | WRITEMEM(&dp->dl_fh.fh_base, len); | 246 | WRITEMEM(&dp->dl_fh.fh_base, len); |
@@ -293,13 +301,14 @@ nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, | |||
293 | static int | 301 | static int |
294 | decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){ | 302 | decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){ |
295 | __be32 *p; | 303 | __be32 *p; |
304 | u32 taglen; | ||
296 | 305 | ||
297 | READ_BUF(8); | 306 | READ_BUF(8); |
298 | READ32(hdr->status); | 307 | READ32(hdr->status); |
299 | READ32(hdr->taglen); | 308 | /* We've got no use for the tag; ignore it: */ |
300 | READ_BUF(hdr->taglen + 4); | 309 | READ32(taglen); |
301 | hdr->tag = (char *)p; | 310 | READ_BUF(taglen + 4); |
302 | p += XDR_QUADLEN(hdr->taglen); | 311 | p += XDR_QUADLEN(taglen); |
303 | READ32(hdr->nops); | 312 | READ32(hdr->nops); |
304 | return 0; | 313 | return 0; |
305 | } | 314 | } |
@@ -667,28 +676,28 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) | |||
667 | } | 676 | } |
668 | 677 | ||
669 | switch (task->tk_status) { | 678 | switch (task->tk_status) { |
670 | case -EIO: | 679 | case 0: |
680 | return; | ||
681 | case -EBADHANDLE: | ||
682 | case -NFS4ERR_BAD_STATEID: | ||
683 | /* Race: client probably got cb_recall | ||
684 | * before open reply granting delegation */ | ||
685 | break; | ||
686 | default: | ||
671 | /* Network partition? */ | 687 | /* Network partition? */ |
672 | atomic_set(&clp->cl_cb_set, 0); | 688 | atomic_set(&clp->cl_cb_set, 0); |
673 | warn_no_callback_path(clp, task->tk_status); | 689 | warn_no_callback_path(clp, task->tk_status); |
674 | if (current_rpc_client != task->tk_client) { | 690 | if (current_rpc_client != task->tk_client) { |
675 | /* queue a callback on the new connection: */ | 691 | /* queue a callback on the new connection: */ |
692 | atomic_inc(&dp->dl_count); | ||
676 | nfsd4_cb_recall(dp); | 693 | nfsd4_cb_recall(dp); |
677 | return; | 694 | return; |
678 | } | 695 | } |
679 | case -EBADHANDLE: | ||
680 | case -NFS4ERR_BAD_STATEID: | ||
681 | /* Race: client probably got cb_recall | ||
682 | * before open reply granting delegation */ | ||
683 | break; | ||
684 | default: | ||
685 | /* success, or error we can't handle */ | ||
686 | return; | ||
687 | } | 696 | } |
688 | if (dp->dl_retries--) { | 697 | if (dp->dl_retries--) { |
689 | rpc_delay(task, 2*HZ); | 698 | rpc_delay(task, 2*HZ); |
690 | task->tk_status = 0; | 699 | task->tk_status = 0; |
691 | rpc_restart_call(task); | 700 | rpc_restart_call_prepare(task); |
692 | return; | 701 | return; |
693 | } else { | 702 | } else { |
694 | atomic_set(&clp->cl_cb_set, 0); | 703 | atomic_set(&clp->cl_cb_set, 0); |
@@ -752,18 +761,16 @@ static void _nfsd4_cb_recall(struct nfs4_delegation *dp) | |||
752 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], | 761 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], |
753 | .rpc_cred = callback_cred | 762 | .rpc_cred = callback_cred |
754 | }; | 763 | }; |
755 | int status; | ||
756 | 764 | ||
757 | if (clnt == NULL) | 765 | if (clnt == NULL) { |
766 | nfs4_put_delegation(dp); | ||
758 | return; /* Client is shutting down; give up. */ | 767 | return; /* Client is shutting down; give up. */ |
768 | } | ||
759 | 769 | ||
760 | args->args_op = dp; | 770 | args->args_op = dp; |
761 | msg.rpc_argp = args; | 771 | msg.rpc_argp = args; |
762 | dp->dl_retries = 1; | 772 | dp->dl_retries = 1; |
763 | status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, | 773 | rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp); |
764 | &nfsd4_cb_recall_ops, dp); | ||
765 | if (status) | ||
766 | nfs4_put_delegation(dp); | ||
767 | } | 774 | } |
768 | 775 | ||
769 | void nfsd4_do_callback_rpc(struct work_struct *w) | 776 | void nfsd4_do_callback_rpc(struct work_struct *w) |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4a2734758778..2e7357104cfd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -51,7 +51,6 @@ static time_t boot_time; | |||
51 | static u32 current_ownerid = 1; | 51 | static u32 current_ownerid = 1; |
52 | static u32 current_fileid = 1; | 52 | static u32 current_fileid = 1; |
53 | static u32 current_delegid = 1; | 53 | static u32 current_delegid = 1; |
54 | static u32 nfs4_init; | ||
55 | static stateid_t zerostateid; /* bits all 0 */ | 54 | static stateid_t zerostateid; /* bits all 0 */ |
56 | static stateid_t onestateid; /* bits all 1 */ | 55 | static stateid_t onestateid; /* bits all 1 */ |
57 | static u64 current_sessionid = 1; | 56 | static u64 current_sessionid = 1; |
@@ -163,6 +162,46 @@ static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; | |||
163 | static struct list_head file_hashtbl[FILE_HASH_SIZE]; | 162 | static struct list_head file_hashtbl[FILE_HASH_SIZE]; |
164 | static struct list_head stateid_hashtbl[STATEID_HASH_SIZE]; | 163 | static struct list_head stateid_hashtbl[STATEID_HASH_SIZE]; |
165 | 164 | ||
165 | static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) | ||
166 | { | ||
167 | BUG_ON(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); | ||
168 | atomic_inc(&fp->fi_access[oflag]); | ||
169 | } | ||
170 | |||
171 | static void nfs4_file_get_access(struct nfs4_file *fp, int oflag) | ||
172 | { | ||
173 | if (oflag == O_RDWR) { | ||
174 | __nfs4_file_get_access(fp, O_RDONLY); | ||
175 | __nfs4_file_get_access(fp, O_WRONLY); | ||
176 | } else | ||
177 | __nfs4_file_get_access(fp, oflag); | ||
178 | } | ||
179 | |||
180 | static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) | ||
181 | { | ||
182 | if (fp->fi_fds[oflag]) { | ||
183 | fput(fp->fi_fds[oflag]); | ||
184 | fp->fi_fds[oflag] = NULL; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) | ||
189 | { | ||
190 | if (atomic_dec_and_test(&fp->fi_access[oflag])) { | ||
191 | nfs4_file_put_fd(fp, O_RDWR); | ||
192 | nfs4_file_put_fd(fp, oflag); | ||
193 | } | ||
194 | } | ||
195 | |||
196 | static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) | ||
197 | { | ||
198 | if (oflag == O_RDWR) { | ||
199 | __nfs4_file_put_access(fp, O_RDONLY); | ||
200 | __nfs4_file_put_access(fp, O_WRONLY); | ||
201 | } else | ||
202 | __nfs4_file_put_access(fp, oflag); | ||
203 | } | ||
204 | |||
166 | static struct nfs4_delegation * | 205 | static struct nfs4_delegation * |
167 | alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) | 206 | alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) |
168 | { | 207 | { |
@@ -171,6 +210,13 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f | |||
171 | struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn; | 210 | struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn; |
172 | 211 | ||
173 | dprintk("NFSD alloc_init_deleg\n"); | 212 | dprintk("NFSD alloc_init_deleg\n"); |
213 | /* | ||
214 | * Major work on the lease subsystem (for example, to support | ||
215 | * calbacks on stat) will be required before we can support | ||
216 | * write delegations properly. | ||
217 | */ | ||
218 | if (type != NFS4_OPEN_DELEGATE_READ) | ||
219 | return NULL; | ||
174 | if (fp->fi_had_conflict) | 220 | if (fp->fi_had_conflict) |
175 | return NULL; | 221 | return NULL; |
176 | if (num_delegations > max_delegations) | 222 | if (num_delegations > max_delegations) |
@@ -185,9 +231,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f | |||
185 | dp->dl_client = clp; | 231 | dp->dl_client = clp; |
186 | get_nfs4_file(fp); | 232 | get_nfs4_file(fp); |
187 | dp->dl_file = fp; | 233 | dp->dl_file = fp; |
234 | nfs4_file_get_access(fp, O_RDONLY); | ||
188 | dp->dl_flock = NULL; | 235 | dp->dl_flock = NULL; |
189 | get_file(stp->st_vfs_file); | ||
190 | dp->dl_vfs_file = stp->st_vfs_file; | ||
191 | dp->dl_type = type; | 236 | dp->dl_type = type; |
192 | dp->dl_ident = cb->cb_ident; | 237 | dp->dl_ident = cb->cb_ident; |
193 | dp->dl_stateid.si_boot = boot_time; | 238 | dp->dl_stateid.si_boot = boot_time; |
@@ -222,15 +267,12 @@ nfs4_put_delegation(struct nfs4_delegation *dp) | |||
222 | static void | 267 | static void |
223 | nfs4_close_delegation(struct nfs4_delegation *dp) | 268 | nfs4_close_delegation(struct nfs4_delegation *dp) |
224 | { | 269 | { |
225 | struct file *filp = dp->dl_vfs_file; | 270 | struct file *filp = find_readable_file(dp->dl_file); |
226 | 271 | ||
227 | dprintk("NFSD: close_delegation dp %p\n",dp); | 272 | dprintk("NFSD: close_delegation dp %p\n",dp); |
228 | dp->dl_vfs_file = NULL; | ||
229 | /* The following nfsd_close may not actually close the file, | ||
230 | * but we want to remove the lease in any case. */ | ||
231 | if (dp->dl_flock) | 273 | if (dp->dl_flock) |
232 | vfs_setlease(filp, F_UNLCK, &dp->dl_flock); | 274 | vfs_setlease(filp, F_UNLCK, &dp->dl_flock); |
233 | nfsd_close(filp); | 275 | nfs4_file_put_access(dp->dl_file, O_RDONLY); |
234 | } | 276 | } |
235 | 277 | ||
236 | /* Called under the state lock. */ | 278 | /* Called under the state lock. */ |
@@ -302,8 +344,12 @@ static void free_generic_stateid(struct nfs4_stateid *stp) | |||
302 | 344 | ||
303 | static void release_lock_stateid(struct nfs4_stateid *stp) | 345 | static void release_lock_stateid(struct nfs4_stateid *stp) |
304 | { | 346 | { |
347 | struct file *file; | ||
348 | |||
305 | unhash_generic_stateid(stp); | 349 | unhash_generic_stateid(stp); |
306 | locks_remove_posix(stp->st_vfs_file, (fl_owner_t)stp->st_stateowner); | 350 | file = find_any_file(stp->st_file); |
351 | if (file) | ||
352 | locks_remove_posix(file, (fl_owner_t)stp->st_stateowner); | ||
307 | free_generic_stateid(stp); | 353 | free_generic_stateid(stp); |
308 | } | 354 | } |
309 | 355 | ||
@@ -341,11 +387,85 @@ release_stateid_lockowners(struct nfs4_stateid *open_stp) | |||
341 | } | 387 | } |
342 | } | 388 | } |
343 | 389 | ||
390 | /* | ||
391 | * We store the NONE, READ, WRITE, and BOTH bits separately in the | ||
392 | * st_{access,deny}_bmap field of the stateid, in order to track not | ||
393 | * only what share bits are currently in force, but also what | ||
394 | * combinations of share bits previous opens have used. This allows us | ||
395 | * to enforce the recommendation of rfc 3530 14.2.19 that the server | ||
396 | * return an error if the client attempt to downgrade to a combination | ||
397 | * of share bits not explicable by closing some of its previous opens. | ||
398 | * | ||
399 | * XXX: This enforcement is actually incomplete, since we don't keep | ||
400 | * track of access/deny bit combinations; so, e.g., we allow: | ||
401 | * | ||
402 | * OPEN allow read, deny write | ||
403 | * OPEN allow both, deny none | ||
404 | * DOWNGRADE allow read, deny none | ||
405 | * | ||
406 | * which we should reject. | ||
407 | */ | ||
408 | static void | ||
409 | set_access(unsigned int *access, unsigned long bmap) { | ||
410 | int i; | ||
411 | |||
412 | *access = 0; | ||
413 | for (i = 1; i < 4; i++) { | ||
414 | if (test_bit(i, &bmap)) | ||
415 | *access |= i; | ||
416 | } | ||
417 | } | ||
418 | |||
419 | static void | ||
420 | set_deny(unsigned int *deny, unsigned long bmap) { | ||
421 | int i; | ||
422 | |||
423 | *deny = 0; | ||
424 | for (i = 0; i < 4; i++) { | ||
425 | if (test_bit(i, &bmap)) | ||
426 | *deny |= i ; | ||
427 | } | ||
428 | } | ||
429 | |||
430 | static int | ||
431 | test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { | ||
432 | unsigned int access, deny; | ||
433 | |||
434 | set_access(&access, stp->st_access_bmap); | ||
435 | set_deny(&deny, stp->st_deny_bmap); | ||
436 | if ((access & open->op_share_deny) || (deny & open->op_share_access)) | ||
437 | return 0; | ||
438 | return 1; | ||
439 | } | ||
440 | |||
441 | static int nfs4_access_to_omode(u32 access) | ||
442 | { | ||
443 | switch (access) { | ||
444 | case NFS4_SHARE_ACCESS_READ: | ||
445 | return O_RDONLY; | ||
446 | case NFS4_SHARE_ACCESS_WRITE: | ||
447 | return O_WRONLY; | ||
448 | case NFS4_SHARE_ACCESS_BOTH: | ||
449 | return O_RDWR; | ||
450 | } | ||
451 | BUG(); | ||
452 | } | ||
453 | |||
454 | static int nfs4_access_bmap_to_omode(struct nfs4_stateid *stp) | ||
455 | { | ||
456 | unsigned int access; | ||
457 | |||
458 | set_access(&access, stp->st_access_bmap); | ||
459 | return nfs4_access_to_omode(access); | ||
460 | } | ||
461 | |||
344 | static void release_open_stateid(struct nfs4_stateid *stp) | 462 | static void release_open_stateid(struct nfs4_stateid *stp) |
345 | { | 463 | { |
464 | int oflag = nfs4_access_bmap_to_omode(stp); | ||
465 | |||
346 | unhash_generic_stateid(stp); | 466 | unhash_generic_stateid(stp); |
347 | release_stateid_lockowners(stp); | 467 | release_stateid_lockowners(stp); |
348 | nfsd_close(stp->st_vfs_file); | 468 | nfs4_file_put_access(stp->st_file, oflag); |
349 | free_generic_stateid(stp); | 469 | free_generic_stateid(stp); |
350 | } | 470 | } |
351 | 471 | ||
@@ -457,7 +577,7 @@ static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) | |||
457 | spin_unlock(&nfsd_drc_lock); | 577 | spin_unlock(&nfsd_drc_lock); |
458 | 578 | ||
459 | if (fchan->maxreqs == 0) | 579 | if (fchan->maxreqs == 0) |
460 | return nfserr_serverfault; | 580 | return nfserr_jukebox; |
461 | 581 | ||
462 | fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; | 582 | fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; |
463 | return 0; | 583 | return 0; |
@@ -542,7 +662,7 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, | |||
542 | BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) | 662 | BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) |
543 | + sizeof(struct nfsd4_session) > PAGE_SIZE); | 663 | + sizeof(struct nfsd4_session) > PAGE_SIZE); |
544 | 664 | ||
545 | status = nfserr_serverfault; | 665 | status = nfserr_jukebox; |
546 | /* allocate struct nfsd4_session and slot table pointers in one piece */ | 666 | /* allocate struct nfsd4_session and slot table pointers in one piece */ |
547 | slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *); | 667 | slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *); |
548 | new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); | 668 | new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); |
@@ -591,10 +711,8 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) | |||
591 | 711 | ||
592 | dump_sessionid(__func__, sessionid); | 712 | dump_sessionid(__func__, sessionid); |
593 | idx = hash_sessionid(sessionid); | 713 | idx = hash_sessionid(sessionid); |
594 | dprintk("%s: idx is %d\n", __func__, idx); | ||
595 | /* Search in the appropriate list */ | 714 | /* Search in the appropriate list */ |
596 | list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) { | 715 | list_for_each_entry(elem, &sessionid_hashtbl[idx], se_hash) { |
597 | dump_sessionid("list traversal", &elem->se_sessionid); | ||
598 | if (!memcmp(elem->se_sessionid.data, sessionid->data, | 716 | if (!memcmp(elem->se_sessionid.data, sessionid->data, |
599 | NFS4_MAX_SESSIONID_LEN)) { | 717 | NFS4_MAX_SESSIONID_LEN)) { |
600 | return elem; | 718 | return elem; |
@@ -714,7 +832,6 @@ release_session_client(struct nfsd4_session *session) | |||
714 | } else | 832 | } else |
715 | renew_client_locked(clp); | 833 | renew_client_locked(clp); |
716 | spin_unlock(&client_lock); | 834 | spin_unlock(&client_lock); |
717 | nfsd4_put_session(session); | ||
718 | } | 835 | } |
719 | 836 | ||
720 | /* must be called under the client_lock */ | 837 | /* must be called under the client_lock */ |
@@ -1220,7 +1337,7 @@ out_new: | |||
1220 | /* Normal case */ | 1337 | /* Normal case */ |
1221 | new = create_client(exid->clname, dname, rqstp, &verf); | 1338 | new = create_client(exid->clname, dname, rqstp, &verf); |
1222 | if (new == NULL) { | 1339 | if (new == NULL) { |
1223 | status = nfserr_serverfault; | 1340 | status = nfserr_jukebox; |
1224 | goto out; | 1341 | goto out; |
1225 | } | 1342 | } |
1226 | 1343 | ||
@@ -1760,6 +1877,8 @@ alloc_init_file(struct inode *ino) | |||
1760 | fp->fi_inode = igrab(ino); | 1877 | fp->fi_inode = igrab(ino); |
1761 | fp->fi_id = current_fileid++; | 1878 | fp->fi_id = current_fileid++; |
1762 | fp->fi_had_conflict = false; | 1879 | fp->fi_had_conflict = false; |
1880 | memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); | ||
1881 | memset(fp->fi_access, 0, sizeof(fp->fi_access)); | ||
1763 | spin_lock(&recall_lock); | 1882 | spin_lock(&recall_lock); |
1764 | list_add(&fp->fi_hash, &file_hashtbl[hashval]); | 1883 | list_add(&fp->fi_hash, &file_hashtbl[hashval]); |
1765 | spin_unlock(&recall_lock); | 1884 | spin_unlock(&recall_lock); |
@@ -1971,57 +2090,6 @@ static inline int deny_valid(u32 x) | |||
1971 | } | 2090 | } |
1972 | 2091 | ||
1973 | /* | 2092 | /* |
1974 | * We store the NONE, READ, WRITE, and BOTH bits separately in the | ||
1975 | * st_{access,deny}_bmap field of the stateid, in order to track not | ||
1976 | * only what share bits are currently in force, but also what | ||
1977 | * combinations of share bits previous opens have used. This allows us | ||
1978 | * to enforce the recommendation of rfc 3530 14.2.19 that the server | ||
1979 | * return an error if the client attempt to downgrade to a combination | ||
1980 | * of share bits not explicable by closing some of its previous opens. | ||
1981 | * | ||
1982 | * XXX: This enforcement is actually incomplete, since we don't keep | ||
1983 | * track of access/deny bit combinations; so, e.g., we allow: | ||
1984 | * | ||
1985 | * OPEN allow read, deny write | ||
1986 | * OPEN allow both, deny none | ||
1987 | * DOWNGRADE allow read, deny none | ||
1988 | * | ||
1989 | * which we should reject. | ||
1990 | */ | ||
1991 | static void | ||
1992 | set_access(unsigned int *access, unsigned long bmap) { | ||
1993 | int i; | ||
1994 | |||
1995 | *access = 0; | ||
1996 | for (i = 1; i < 4; i++) { | ||
1997 | if (test_bit(i, &bmap)) | ||
1998 | *access |= i; | ||
1999 | } | ||
2000 | } | ||
2001 | |||
2002 | static void | ||
2003 | set_deny(unsigned int *deny, unsigned long bmap) { | ||
2004 | int i; | ||
2005 | |||
2006 | *deny = 0; | ||
2007 | for (i = 0; i < 4; i++) { | ||
2008 | if (test_bit(i, &bmap)) | ||
2009 | *deny |= i ; | ||
2010 | } | ||
2011 | } | ||
2012 | |||
2013 | static int | ||
2014 | test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { | ||
2015 | unsigned int access, deny; | ||
2016 | |||
2017 | set_access(&access, stp->st_access_bmap); | ||
2018 | set_deny(&deny, stp->st_deny_bmap); | ||
2019 | if ((access & open->op_share_deny) || (deny & open->op_share_access)) | ||
2020 | return 0; | ||
2021 | return 1; | ||
2022 | } | ||
2023 | |||
2024 | /* | ||
2025 | * Called to check deny when READ with all zero stateid or | 2093 | * Called to check deny when READ with all zero stateid or |
2026 | * WRITE with all zero or all one stateid | 2094 | * WRITE with all zero or all one stateid |
2027 | */ | 2095 | */ |
@@ -2052,14 +2120,12 @@ out: | |||
2052 | } | 2120 | } |
2053 | 2121 | ||
2054 | static inline void | 2122 | static inline void |
2055 | nfs4_file_downgrade(struct file *filp, unsigned int share_access) | 2123 | nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access) |
2056 | { | 2124 | { |
2057 | if (share_access & NFS4_SHARE_ACCESS_WRITE) { | 2125 | if (share_access & NFS4_SHARE_ACCESS_WRITE) |
2058 | drop_file_write_access(filp); | 2126 | nfs4_file_put_access(fp, O_WRONLY); |
2059 | spin_lock(&filp->f_lock); | 2127 | if (share_access & NFS4_SHARE_ACCESS_READ) |
2060 | filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE; | 2128 | nfs4_file_put_access(fp, O_RDONLY); |
2061 | spin_unlock(&filp->f_lock); | ||
2062 | } | ||
2063 | } | 2129 | } |
2064 | 2130 | ||
2065 | /* | 2131 | /* |
@@ -2255,6 +2321,13 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid) | |||
2255 | return NULL; | 2321 | return NULL; |
2256 | } | 2322 | } |
2257 | 2323 | ||
2324 | int share_access_to_flags(u32 share_access) | ||
2325 | { | ||
2326 | share_access &= ~NFS4_SHARE_WANT_MASK; | ||
2327 | |||
2328 | return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; | ||
2329 | } | ||
2330 | |||
2258 | static __be32 | 2331 | static __be32 |
2259 | nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, | 2332 | nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, |
2260 | struct nfs4_delegation **dp) | 2333 | struct nfs4_delegation **dp) |
@@ -2265,8 +2338,7 @@ nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open, | |||
2265 | *dp = find_delegation_file(fp, &open->op_delegate_stateid); | 2338 | *dp = find_delegation_file(fp, &open->op_delegate_stateid); |
2266 | if (*dp == NULL) | 2339 | if (*dp == NULL) |
2267 | goto out; | 2340 | goto out; |
2268 | flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ? | 2341 | flags = share_access_to_flags(open->op_share_access); |
2269 | RD_STATE : WR_STATE; | ||
2270 | status = nfs4_check_delegmode(*dp, flags); | 2342 | status = nfs4_check_delegmode(*dp, flags); |
2271 | if (status) | 2343 | if (status) |
2272 | *dp = NULL; | 2344 | *dp = NULL; |
@@ -2308,30 +2380,53 @@ nfs4_alloc_stateid(void) | |||
2308 | return kmem_cache_alloc(stateid_slab, GFP_KERNEL); | 2380 | return kmem_cache_alloc(stateid_slab, GFP_KERNEL); |
2309 | } | 2381 | } |
2310 | 2382 | ||
2383 | static inline int nfs4_access_to_access(u32 nfs4_access) | ||
2384 | { | ||
2385 | int flags = 0; | ||
2386 | |||
2387 | if (nfs4_access & NFS4_SHARE_ACCESS_READ) | ||
2388 | flags |= NFSD_MAY_READ; | ||
2389 | if (nfs4_access & NFS4_SHARE_ACCESS_WRITE) | ||
2390 | flags |= NFSD_MAY_WRITE; | ||
2391 | return flags; | ||
2392 | } | ||
2393 | |||
2394 | static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file | ||
2395 | *fp, struct svc_fh *cur_fh, u32 nfs4_access) | ||
2396 | { | ||
2397 | __be32 status; | ||
2398 | int oflag = nfs4_access_to_omode(nfs4_access); | ||
2399 | int access = nfs4_access_to_access(nfs4_access); | ||
2400 | |||
2401 | if (!fp->fi_fds[oflag]) { | ||
2402 | status = nfsd_open(rqstp, cur_fh, S_IFREG, access, | ||
2403 | &fp->fi_fds[oflag]); | ||
2404 | if (status == nfserr_dropit) | ||
2405 | status = nfserr_jukebox; | ||
2406 | if (status) | ||
2407 | return status; | ||
2408 | } | ||
2409 | nfs4_file_get_access(fp, oflag); | ||
2410 | |||
2411 | return nfs_ok; | ||
2412 | } | ||
2413 | |||
2311 | static __be32 | 2414 | static __be32 |
2312 | nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, | 2415 | nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, |
2313 | struct nfs4_delegation *dp, | 2416 | struct nfs4_file *fp, struct svc_fh *cur_fh, |
2314 | struct svc_fh *cur_fh, int flags) | 2417 | struct nfsd4_open *open) |
2315 | { | 2418 | { |
2316 | struct nfs4_stateid *stp; | 2419 | struct nfs4_stateid *stp; |
2420 | __be32 status; | ||
2317 | 2421 | ||
2318 | stp = nfs4_alloc_stateid(); | 2422 | stp = nfs4_alloc_stateid(); |
2319 | if (stp == NULL) | 2423 | if (stp == NULL) |
2320 | return nfserr_resource; | 2424 | return nfserr_resource; |
2321 | 2425 | ||
2322 | if (dp) { | 2426 | status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open->op_share_access); |
2323 | get_file(dp->dl_vfs_file); | 2427 | if (status) { |
2324 | stp->st_vfs_file = dp->dl_vfs_file; | 2428 | kmem_cache_free(stateid_slab, stp); |
2325 | } else { | 2429 | return status; |
2326 | __be32 status; | ||
2327 | status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, | ||
2328 | &stp->st_vfs_file); | ||
2329 | if (status) { | ||
2330 | if (status == nfserr_dropit) | ||
2331 | status = nfserr_jukebox; | ||
2332 | kmem_cache_free(stateid_slab, stp); | ||
2333 | return status; | ||
2334 | } | ||
2335 | } | 2430 | } |
2336 | *stpp = stp; | 2431 | *stpp = stp; |
2337 | return 0; | 2432 | return 0; |
@@ -2353,35 +2448,30 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, | |||
2353 | } | 2448 | } |
2354 | 2449 | ||
2355 | static __be32 | 2450 | static __be32 |
2356 | nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) | 2451 | nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) |
2357 | { | 2452 | { |
2358 | struct file *filp = stp->st_vfs_file; | 2453 | u32 op_share_access, new_access; |
2359 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
2360 | unsigned int share_access, new_writer; | ||
2361 | __be32 status; | 2454 | __be32 status; |
2362 | 2455 | ||
2363 | set_access(&share_access, stp->st_access_bmap); | 2456 | set_access(&new_access, stp->st_access_bmap); |
2364 | new_writer = (~share_access) & open->op_share_access | 2457 | new_access = (~new_access) & open->op_share_access & ~NFS4_SHARE_WANT_MASK; |
2365 | & NFS4_SHARE_ACCESS_WRITE; | 2458 | |
2366 | 2459 | if (new_access) { | |
2367 | if (new_writer) { | 2460 | status = nfs4_get_vfs_file(rqstp, fp, cur_fh, new_access); |
2368 | int err = get_write_access(inode); | 2461 | if (status) |
2369 | if (err) | 2462 | return status; |
2370 | return nfserrno(err); | ||
2371 | err = mnt_want_write(cur_fh->fh_export->ex_path.mnt); | ||
2372 | if (err) | ||
2373 | return nfserrno(err); | ||
2374 | file_take_write(filp); | ||
2375 | } | 2463 | } |
2376 | status = nfsd4_truncate(rqstp, cur_fh, open); | 2464 | status = nfsd4_truncate(rqstp, cur_fh, open); |
2377 | if (status) { | 2465 | if (status) { |
2378 | if (new_writer) | 2466 | if (new_access) { |
2379 | put_write_access(inode); | 2467 | int oflag = nfs4_access_to_omode(new_access); |
2468 | nfs4_file_put_access(fp, oflag); | ||
2469 | } | ||
2380 | return status; | 2470 | return status; |
2381 | } | 2471 | } |
2382 | /* remember the open */ | 2472 | /* remember the open */ |
2383 | filp->f_mode |= open->op_share_access; | 2473 | op_share_access = open->op_share_access & ~NFS4_SHARE_WANT_MASK; |
2384 | __set_bit(open->op_share_access, &stp->st_access_bmap); | 2474 | __set_bit(op_share_access, &stp->st_access_bmap); |
2385 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); | 2475 | __set_bit(open->op_share_deny, &stp->st_deny_bmap); |
2386 | 2476 | ||
2387 | return nfs_ok; | 2477 | return nfs_ok; |
@@ -2444,13 +2534,14 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta | |||
2444 | fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; | 2534 | fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; |
2445 | fl.fl_end = OFFSET_MAX; | 2535 | fl.fl_end = OFFSET_MAX; |
2446 | fl.fl_owner = (fl_owner_t)dp; | 2536 | fl.fl_owner = (fl_owner_t)dp; |
2447 | fl.fl_file = stp->st_vfs_file; | 2537 | fl.fl_file = find_readable_file(stp->st_file); |
2538 | BUG_ON(!fl.fl_file); | ||
2448 | fl.fl_pid = current->tgid; | 2539 | fl.fl_pid = current->tgid; |
2449 | 2540 | ||
2450 | /* vfs_setlease checks to see if delegation should be handed out. | 2541 | /* vfs_setlease checks to see if delegation should be handed out. |
2451 | * the lock_manager callbacks fl_mylease and fl_change are used | 2542 | * the lock_manager callbacks fl_mylease and fl_change are used |
2452 | */ | 2543 | */ |
2453 | if ((status = vfs_setlease(stp->st_vfs_file, fl.fl_type, &flp))) { | 2544 | if ((status = vfs_setlease(fl.fl_file, fl.fl_type, &flp))) { |
2454 | dprintk("NFSD: setlease failed [%d], no delegation\n", status); | 2545 | dprintk("NFSD: setlease failed [%d], no delegation\n", status); |
2455 | unhash_delegation(dp); | 2546 | unhash_delegation(dp); |
2456 | flag = NFS4_OPEN_DELEGATE_NONE; | 2547 | flag = NFS4_OPEN_DELEGATE_NONE; |
@@ -2514,18 +2605,12 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf | |||
2514 | */ | 2605 | */ |
2515 | if (stp) { | 2606 | if (stp) { |
2516 | /* Stateid was found, this is an OPEN upgrade */ | 2607 | /* Stateid was found, this is an OPEN upgrade */ |
2517 | status = nfs4_upgrade_open(rqstp, current_fh, stp, open); | 2608 | status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); |
2518 | if (status) | 2609 | if (status) |
2519 | goto out; | 2610 | goto out; |
2520 | update_stateid(&stp->st_stateid); | 2611 | update_stateid(&stp->st_stateid); |
2521 | } else { | 2612 | } else { |
2522 | /* Stateid was not found, this is a new OPEN */ | 2613 | status = nfs4_new_open(rqstp, &stp, fp, current_fh, open); |
2523 | int flags = 0; | ||
2524 | if (open->op_share_access & NFS4_SHARE_ACCESS_READ) | ||
2525 | flags |= NFSD_MAY_READ; | ||
2526 | if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) | ||
2527 | flags |= NFSD_MAY_WRITE; | ||
2528 | status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags); | ||
2529 | if (status) | 2614 | if (status) |
2530 | goto out; | 2615 | goto out; |
2531 | init_stateid(stp, fp, open); | 2616 | init_stateid(stp, fp, open); |
@@ -2727,7 +2812,7 @@ search_close_lru(u32 st_id, int flags) | |||
2727 | static inline int | 2812 | static inline int |
2728 | nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) | 2813 | nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) |
2729 | { | 2814 | { |
2730 | return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_path.dentry->d_inode; | 2815 | return fhp->fh_dentry->d_inode != stp->st_file->fi_inode; |
2731 | } | 2816 | } |
2732 | 2817 | ||
2733 | static int | 2818 | static int |
@@ -2760,6 +2845,9 @@ __be32 nfs4_check_openmode(struct nfs4_stateid *stp, int flags) | |||
2760 | { | 2845 | { |
2761 | __be32 status = nfserr_openmode; | 2846 | __be32 status = nfserr_openmode; |
2762 | 2847 | ||
2848 | /* For lock stateid's, we test the parent open, not the lock: */ | ||
2849 | if (stp->st_openstp) | ||
2850 | stp = stp->st_openstp; | ||
2763 | if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap))) | 2851 | if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap))) |
2764 | goto out; | 2852 | goto out; |
2765 | if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap))) | 2853 | if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap))) |
@@ -2872,7 +2960,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, | |||
2872 | goto out; | 2960 | goto out; |
2873 | renew_client(dp->dl_client); | 2961 | renew_client(dp->dl_client); |
2874 | if (filpp) | 2962 | if (filpp) |
2875 | *filpp = dp->dl_vfs_file; | 2963 | *filpp = find_readable_file(dp->dl_file); |
2964 | BUG_ON(!*filpp); | ||
2876 | } else { /* open or lock stateid */ | 2965 | } else { /* open or lock stateid */ |
2877 | stp = find_stateid(stateid, flags); | 2966 | stp = find_stateid(stateid, flags); |
2878 | if (!stp) | 2967 | if (!stp) |
@@ -2889,8 +2978,13 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, | |||
2889 | if (status) | 2978 | if (status) |
2890 | goto out; | 2979 | goto out; |
2891 | renew_client(stp->st_stateowner->so_client); | 2980 | renew_client(stp->st_stateowner->so_client); |
2892 | if (filpp) | 2981 | if (filpp) { |
2893 | *filpp = stp->st_vfs_file; | 2982 | if (flags & RD_STATE) |
2983 | *filpp = find_readable_file(stp->st_file); | ||
2984 | else | ||
2985 | *filpp = find_writeable_file(stp->st_file); | ||
2986 | BUG_ON(!*filpp); /* assured by check_openmode */ | ||
2987 | } | ||
2894 | } | 2988 | } |
2895 | status = nfs_ok; | 2989 | status = nfs_ok; |
2896 | out: | 2990 | out: |
@@ -3126,8 +3220,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, | |||
3126 | goto out; | 3220 | goto out; |
3127 | } | 3221 | } |
3128 | set_access(&share_access, stp->st_access_bmap); | 3222 | set_access(&share_access, stp->st_access_bmap); |
3129 | nfs4_file_downgrade(stp->st_vfs_file, | 3223 | nfs4_file_downgrade(stp->st_file, share_access & ~od->od_share_access); |
3130 | share_access & ~od->od_share_access); | ||
3131 | 3224 | ||
3132 | reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap); | 3225 | reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap); |
3133 | reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); | 3226 | reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); |
@@ -3346,11 +3439,9 @@ static inline void | |||
3346 | nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) | 3439 | nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) |
3347 | { | 3440 | { |
3348 | struct nfs4_stateowner *sop; | 3441 | struct nfs4_stateowner *sop; |
3349 | unsigned int hval; | ||
3350 | 3442 | ||
3351 | if (fl->fl_lmops == &nfsd_posix_mng_ops) { | 3443 | if (fl->fl_lmops == &nfsd_posix_mng_ops) { |
3352 | sop = (struct nfs4_stateowner *) fl->fl_owner; | 3444 | sop = (struct nfs4_stateowner *) fl->fl_owner; |
3353 | hval = lockownerid_hashval(sop->so_id); | ||
3354 | kref_get(&sop->so_ref); | 3445 | kref_get(&sop->so_ref); |
3355 | deny->ld_sop = sop; | 3446 | deny->ld_sop = sop; |
3356 | deny->ld_clientid = sop->so_client->cl_clientid; | 3447 | deny->ld_clientid = sop->so_client->cl_clientid; |
@@ -3446,8 +3537,6 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc | |||
3446 | stp->st_stateid.si_stateownerid = sop->so_id; | 3537 | stp->st_stateid.si_stateownerid = sop->so_id; |
3447 | stp->st_stateid.si_fileid = fp->fi_id; | 3538 | stp->st_stateid.si_fileid = fp->fi_id; |
3448 | stp->st_stateid.si_generation = 0; | 3539 | stp->st_stateid.si_generation = 0; |
3449 | stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */ | ||
3450 | stp->st_access_bmap = open_stp->st_access_bmap; | ||
3451 | stp->st_deny_bmap = open_stp->st_deny_bmap; | 3540 | stp->st_deny_bmap = open_stp->st_deny_bmap; |
3452 | stp->st_openstp = open_stp; | 3541 | stp->st_openstp = open_stp; |
3453 | 3542 | ||
@@ -3547,7 +3636,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3547 | lock_sop = lock->lk_replay_owner; | 3636 | lock_sop = lock->lk_replay_owner; |
3548 | } | 3637 | } |
3549 | /* lock->lk_replay_owner and lock_stp have been created or found */ | 3638 | /* lock->lk_replay_owner and lock_stp have been created or found */ |
3550 | filp = lock_stp->st_vfs_file; | ||
3551 | 3639 | ||
3552 | status = nfserr_grace; | 3640 | status = nfserr_grace; |
3553 | if (locks_in_grace() && !lock->lk_reclaim) | 3641 | if (locks_in_grace() && !lock->lk_reclaim) |
@@ -3560,11 +3648,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3560 | switch (lock->lk_type) { | 3648 | switch (lock->lk_type) { |
3561 | case NFS4_READ_LT: | 3649 | case NFS4_READ_LT: |
3562 | case NFS4_READW_LT: | 3650 | case NFS4_READW_LT: |
3651 | filp = find_readable_file(lock_stp->st_file); | ||
3563 | file_lock.fl_type = F_RDLCK; | 3652 | file_lock.fl_type = F_RDLCK; |
3564 | cmd = F_SETLK; | 3653 | cmd = F_SETLK; |
3565 | break; | 3654 | break; |
3566 | case NFS4_WRITE_LT: | 3655 | case NFS4_WRITE_LT: |
3567 | case NFS4_WRITEW_LT: | 3656 | case NFS4_WRITEW_LT: |
3657 | filp = find_writeable_file(lock_stp->st_file); | ||
3568 | file_lock.fl_type = F_WRLCK; | 3658 | file_lock.fl_type = F_WRLCK; |
3569 | cmd = F_SETLK; | 3659 | cmd = F_SETLK; |
3570 | break; | 3660 | break; |
@@ -3572,6 +3662,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3572 | status = nfserr_inval; | 3662 | status = nfserr_inval; |
3573 | goto out; | 3663 | goto out; |
3574 | } | 3664 | } |
3665 | if (!filp) { | ||
3666 | status = nfserr_openmode; | ||
3667 | goto out; | ||
3668 | } | ||
3575 | file_lock.fl_owner = (fl_owner_t)lock_sop; | 3669 | file_lock.fl_owner = (fl_owner_t)lock_sop; |
3576 | file_lock.fl_pid = current->tgid; | 3670 | file_lock.fl_pid = current->tgid; |
3577 | file_lock.fl_file = filp; | 3671 | file_lock.fl_file = filp; |
@@ -3740,7 +3834,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3740 | &locku->lu_stateowner, &stp, NULL))) | 3834 | &locku->lu_stateowner, &stp, NULL))) |
3741 | goto out; | 3835 | goto out; |
3742 | 3836 | ||
3743 | filp = stp->st_vfs_file; | 3837 | filp = find_any_file(stp->st_file); |
3838 | if (!filp) { | ||
3839 | status = nfserr_lock_range; | ||
3840 | goto out; | ||
3841 | } | ||
3744 | BUG_ON(!filp); | 3842 | BUG_ON(!filp); |
3745 | locks_init_lock(&file_lock); | 3843 | locks_init_lock(&file_lock); |
3746 | file_lock.fl_type = F_UNLCK; | 3844 | file_lock.fl_type = F_UNLCK; |
@@ -3787,10 +3885,10 @@ out_nfserr: | |||
3787 | * 0: no locks held by lockowner | 3885 | * 0: no locks held by lockowner |
3788 | */ | 3886 | */ |
3789 | static int | 3887 | static int |
3790 | check_for_locks(struct file *filp, struct nfs4_stateowner *lowner) | 3888 | check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner) |
3791 | { | 3889 | { |
3792 | struct file_lock **flpp; | 3890 | struct file_lock **flpp; |
3793 | struct inode *inode = filp->f_path.dentry->d_inode; | 3891 | struct inode *inode = filp->fi_inode; |
3794 | int status = 0; | 3892 | int status = 0; |
3795 | 3893 | ||
3796 | lock_kernel(); | 3894 | lock_kernel(); |
@@ -3841,7 +3939,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, | |||
3841 | continue; | 3939 | continue; |
3842 | list_for_each_entry(stp, &sop->so_stateids, | 3940 | list_for_each_entry(stp, &sop->so_stateids, |
3843 | st_perstateowner) { | 3941 | st_perstateowner) { |
3844 | if (check_for_locks(stp->st_vfs_file, sop)) | 3942 | if (check_for_locks(stp->st_file, sop)) |
3845 | goto out; | 3943 | goto out; |
3846 | /* Note: so_perclient unused for lockowners, | 3944 | /* Note: so_perclient unused for lockowners, |
3847 | * so it's OK to fool with here. */ | 3945 | * so it's OK to fool with here. */ |
@@ -4066,16 +4164,8 @@ out_free_laundry: | |||
4066 | int | 4164 | int |
4067 | nfs4_state_start(void) | 4165 | nfs4_state_start(void) |
4068 | { | 4166 | { |
4069 | int ret; | ||
4070 | |||
4071 | if (nfs4_init) | ||
4072 | return 0; | ||
4073 | nfsd4_load_reboot_recovery_data(); | 4167 | nfsd4_load_reboot_recovery_data(); |
4074 | ret = __nfs4_state_start(); | 4168 | return __nfs4_state_start(); |
4075 | if (ret) | ||
4076 | return ret; | ||
4077 | nfs4_init = 1; | ||
4078 | return 0; | ||
4079 | } | 4169 | } |
4080 | 4170 | ||
4081 | static void | 4171 | static void |
@@ -4110,7 +4200,6 @@ __nfs4_state_shutdown(void) | |||
4110 | } | 4200 | } |
4111 | 4201 | ||
4112 | nfsd4_shutdown_recdir(); | 4202 | nfsd4_shutdown_recdir(); |
4113 | nfs4_init = 0; | ||
4114 | } | 4203 | } |
4115 | 4204 | ||
4116 | void | 4205 | void |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ac17a7080239..1a468bbd330f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -1756,6 +1756,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1756 | struct nfs4_acl *acl = NULL; | 1756 | struct nfs4_acl *acl = NULL; |
1757 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | 1757 | struct nfsd4_compoundres *resp = rqstp->rq_resp; |
1758 | u32 minorversion = resp->cstate.minorversion; | 1758 | u32 minorversion = resp->cstate.minorversion; |
1759 | struct path path = { | ||
1760 | .mnt = exp->ex_path.mnt, | ||
1761 | .dentry = dentry, | ||
1762 | }; | ||
1759 | 1763 | ||
1760 | BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); | 1764 | BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); |
1761 | BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); | 1765 | BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); |
@@ -1776,7 +1780,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1776 | FATTR4_WORD0_MAXNAME)) || | 1780 | FATTR4_WORD0_MAXNAME)) || |
1777 | (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | | 1781 | (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | |
1778 | FATTR4_WORD1_SPACE_TOTAL))) { | 1782 | FATTR4_WORD1_SPACE_TOTAL))) { |
1779 | err = vfs_statfs(dentry, &statfs); | 1783 | err = vfs_statfs(&path, &statfs); |
1780 | if (err) | 1784 | if (err) |
1781 | goto out_nfserr; | 1785 | goto out_nfserr; |
1782 | } | 1786 | } |
@@ -2630,7 +2634,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, | |||
2630 | } | 2634 | } |
2631 | read->rd_vlen = v; | 2635 | read->rd_vlen = v; |
2632 | 2636 | ||
2633 | nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp, | 2637 | nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp, |
2634 | read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, | 2638 | read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, |
2635 | &maxcount); | 2639 | &maxcount); |
2636 | 2640 | ||
@@ -3325,6 +3329,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo | |||
3325 | } | 3329 | } |
3326 | /* Renew the clientid on success and on replay */ | 3330 | /* Renew the clientid on success and on replay */ |
3327 | release_session_client(cs->session); | 3331 | release_session_client(cs->session); |
3332 | nfsd4_put_session(cs->session); | ||
3328 | } | 3333 | } |
3329 | return 1; | 3334 | return 1; |
3330 | } | 3335 | } |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 508941c23af7..b53b1d042f1f 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -949,15 +949,12 @@ static ssize_t __write_ports_addfd(char *buf) | |||
949 | if (err != 0) | 949 | if (err != 0) |
950 | return err; | 950 | return err; |
951 | 951 | ||
952 | err = lockd_up(); | ||
953 | if (err != 0) | ||
954 | goto out; | ||
955 | |||
956 | err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); | 952 | err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); |
957 | if (err < 0) | 953 | if (err < 0) { |
958 | lockd_down(); | 954 | svc_destroy(nfsd_serv); |
955 | return err; | ||
956 | } | ||
959 | 957 | ||
960 | out: | ||
961 | /* Decrease the count, but don't shut down the service */ | 958 | /* Decrease the count, but don't shut down the service */ |
962 | nfsd_serv->sv_nrthreads--; | 959 | nfsd_serv->sv_nrthreads--; |
963 | return err; | 960 | return err; |
@@ -978,9 +975,6 @@ static ssize_t __write_ports_delfd(char *buf) | |||
978 | if (nfsd_serv != NULL) | 975 | if (nfsd_serv != NULL) |
979 | len = svc_sock_names(nfsd_serv, buf, | 976 | len = svc_sock_names(nfsd_serv, buf, |
980 | SIMPLE_TRANSACTION_LIMIT, toclose); | 977 | SIMPLE_TRANSACTION_LIMIT, toclose); |
981 | if (len >= 0) | ||
982 | lockd_down(); | ||
983 | |||
984 | kfree(toclose); | 978 | kfree(toclose); |
985 | return len; | 979 | return len; |
986 | } | 980 | } |
@@ -1014,6 +1008,9 @@ static ssize_t __write_ports_addxprt(char *buf) | |||
1014 | PF_INET6, port, SVC_SOCK_ANONYMOUS); | 1008 | PF_INET6, port, SVC_SOCK_ANONYMOUS); |
1015 | if (err < 0 && err != -EAFNOSUPPORT) | 1009 | if (err < 0 && err != -EAFNOSUPPORT) |
1016 | goto out_close; | 1010 | goto out_close; |
1011 | |||
1012 | /* Decrease the count, but don't shut down the service */ | ||
1013 | nfsd_serv->sv_nrthreads--; | ||
1017 | return 0; | 1014 | return 0; |
1018 | out_close: | 1015 | out_close: |
1019 | xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); | 1016 | xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); |
@@ -1022,8 +1019,7 @@ out_close: | |||
1022 | svc_xprt_put(xprt); | 1019 | svc_xprt_put(xprt); |
1023 | } | 1020 | } |
1024 | out_err: | 1021 | out_err: |
1025 | /* Decrease the count, but don't shut down the service */ | 1022 | svc_destroy(nfsd_serv); |
1026 | nfsd_serv->sv_nrthreads--; | ||
1027 | return err; | 1023 | return err; |
1028 | } | 1024 | } |
1029 | 1025 | ||
@@ -1194,7 +1190,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) | |||
1194 | bsize = NFSSVC_MAXBLKSIZE; | 1190 | bsize = NFSSVC_MAXBLKSIZE; |
1195 | bsize &= ~(1024-1); | 1191 | bsize &= ~(1024-1); |
1196 | mutex_lock(&nfsd_mutex); | 1192 | mutex_lock(&nfsd_mutex); |
1197 | if (nfsd_serv && nfsd_serv->sv_nrthreads) { | 1193 | if (nfsd_serv) { |
1198 | mutex_unlock(&nfsd_mutex); | 1194 | mutex_unlock(&nfsd_mutex); |
1199 | return -EBUSY; | 1195 | return -EBUSY; |
1200 | } | 1196 | } |
@@ -1310,6 +1306,8 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) | |||
1310 | return -EINVAL; | 1306 | return -EINVAL; |
1311 | 1307 | ||
1312 | status = nfs4_reset_recoverydir(recdir); | 1308 | status = nfs4_reset_recoverydir(recdir); |
1309 | if (status) | ||
1310 | return status; | ||
1313 | } | 1311 | } |
1314 | 1312 | ||
1315 | return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n", | 1313 | return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n", |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 72377761270e..b76ac3a82e39 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
@@ -153,6 +153,7 @@ void nfsd_lockd_shutdown(void); | |||
153 | #define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID) | 153 | #define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID) |
154 | #define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK) | 154 | #define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK) |
155 | #define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME) | 155 | #define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME) |
156 | #define nfserr_lock_range cpu_to_be32(NFSERR_LOCK_RANGE) | ||
156 | #define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH) | 157 | #define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH) |
157 | #define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP) | 158 | #define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP) |
158 | #define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR) | 159 | #define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR) |
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a047ad6111ef..08e17264784b 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
@@ -144,7 +144,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, | |||
144 | svc_reserve_auth(rqstp, (19<<2) + argp->count + 4); | 144 | svc_reserve_auth(rqstp, (19<<2) + argp->count + 4); |
145 | 145 | ||
146 | resp->count = argp->count; | 146 | resp->count = argp->count; |
147 | nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, | 147 | nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), |
148 | argp->offset, | 148 | argp->offset, |
149 | rqstp->rq_vec, argp->vlen, | 149 | rqstp->rq_vec, argp->vlen, |
150 | &resp->count); | 150 | &resp->count); |
@@ -290,7 +290,6 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, | |||
290 | * gospel of sun micro | 290 | * gospel of sun micro |
291 | */ | 291 | */ |
292 | if (type != S_IFREG) { | 292 | if (type != S_IFREG) { |
293 | int is_borc = 0; | ||
294 | if (type != S_IFBLK && type != S_IFCHR) { | 293 | if (type != S_IFBLK && type != S_IFCHR) { |
295 | rdev = 0; | 294 | rdev = 0; |
296 | } else if (type == S_IFCHR && !(attr->ia_valid & ATTR_SIZE)) { | 295 | } else if (type == S_IFCHR && !(attr->ia_valid & ATTR_SIZE)) { |
@@ -298,7 +297,6 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, | |||
298 | type = S_IFIFO; | 297 | type = S_IFIFO; |
299 | } else { | 298 | } else { |
300 | /* Okay, char or block special */ | 299 | /* Okay, char or block special */ |
301 | is_borc = 1; | ||
302 | if (!rdev) | 300 | if (!rdev) |
303 | rdev = wanted; | 301 | rdev = wanted; |
304 | } | 302 | } |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 06b2a26edfe0..e2c43464f237 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -180,15 +180,80 @@ int nfsd_nrthreads(void) | |||
180 | return rv; | 180 | return rv; |
181 | } | 181 | } |
182 | 182 | ||
183 | static int nfsd_init_socks(int port) | ||
184 | { | ||
185 | int error; | ||
186 | if (!list_empty(&nfsd_serv->sv_permsocks)) | ||
187 | return 0; | ||
188 | |||
189 | error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, | ||
190 | SVC_SOCK_DEFAULTS); | ||
191 | if (error < 0) | ||
192 | return error; | ||
193 | |||
194 | error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, | ||
195 | SVC_SOCK_DEFAULTS); | ||
196 | if (error < 0) | ||
197 | return error; | ||
198 | |||
199 | return 0; | ||
200 | } | ||
201 | |||
202 | static bool nfsd_up = false; | ||
203 | |||
204 | static int nfsd_startup(unsigned short port, int nrservs) | ||
205 | { | ||
206 | int ret; | ||
207 | |||
208 | if (nfsd_up) | ||
209 | return 0; | ||
210 | /* | ||
211 | * Readahead param cache - will no-op if it already exists. | ||
212 | * (Note therefore results will be suboptimal if number of | ||
213 | * threads is modified after nfsd start.) | ||
214 | */ | ||
215 | ret = nfsd_racache_init(2*nrservs); | ||
216 | if (ret) | ||
217 | return ret; | ||
218 | ret = nfsd_init_socks(port); | ||
219 | if (ret) | ||
220 | goto out_racache; | ||
221 | ret = lockd_up(); | ||
222 | if (ret) | ||
223 | goto out_racache; | ||
224 | ret = nfs4_state_start(); | ||
225 | if (ret) | ||
226 | goto out_lockd; | ||
227 | nfsd_up = true; | ||
228 | return 0; | ||
229 | out_lockd: | ||
230 | lockd_down(); | ||
231 | out_racache: | ||
232 | nfsd_racache_shutdown(); | ||
233 | return ret; | ||
234 | } | ||
235 | |||
236 | static void nfsd_shutdown(void) | ||
237 | { | ||
238 | /* | ||
239 | * write_ports can create the server without actually starting | ||
240 | * any threads--if we get shut down before any threads are | ||
241 | * started, then nfsd_last_thread will be run before any of this | ||
242 | * other initialization has been done. | ||
243 | */ | ||
244 | if (!nfsd_up) | ||
245 | return; | ||
246 | nfs4_state_shutdown(); | ||
247 | lockd_down(); | ||
248 | nfsd_racache_shutdown(); | ||
249 | nfsd_up = false; | ||
250 | } | ||
251 | |||
183 | static void nfsd_last_thread(struct svc_serv *serv) | 252 | static void nfsd_last_thread(struct svc_serv *serv) |
184 | { | 253 | { |
185 | /* When last nfsd thread exits we need to do some clean-up */ | 254 | /* When last nfsd thread exits we need to do some clean-up */ |
186 | struct svc_xprt *xprt; | ||
187 | list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) | ||
188 | lockd_down(); | ||
189 | nfsd_serv = NULL; | 255 | nfsd_serv = NULL; |
190 | nfsd_racache_shutdown(); | 256 | nfsd_shutdown(); |
191 | nfs4_state_shutdown(); | ||
192 | 257 | ||
193 | printk(KERN_WARNING "nfsd: last server has exited, flushing export " | 258 | printk(KERN_WARNING "nfsd: last server has exited, flushing export " |
194 | "cache\n"); | 259 | "cache\n"); |
@@ -263,45 +328,18 @@ int nfsd_create_serv(void) | |||
263 | nfsd_max_blksize >= 8*1024*2) | 328 | nfsd_max_blksize >= 8*1024*2) |
264 | nfsd_max_blksize /= 2; | 329 | nfsd_max_blksize /= 2; |
265 | } | 330 | } |
331 | nfsd_reset_versions(); | ||
266 | 332 | ||
267 | nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, | 333 | nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, |
268 | nfsd_last_thread, nfsd, THIS_MODULE); | 334 | nfsd_last_thread, nfsd, THIS_MODULE); |
269 | if (nfsd_serv == NULL) | 335 | if (nfsd_serv == NULL) |
270 | err = -ENOMEM; | 336 | return -ENOMEM; |
271 | else | ||
272 | set_max_drc(); | ||
273 | 337 | ||
338 | set_max_drc(); | ||
274 | do_gettimeofday(&nfssvc_boot); /* record boot time */ | 339 | do_gettimeofday(&nfssvc_boot); /* record boot time */ |
275 | return err; | 340 | return err; |
276 | } | 341 | } |
277 | 342 | ||
278 | static int nfsd_init_socks(int port) | ||
279 | { | ||
280 | int error; | ||
281 | if (!list_empty(&nfsd_serv->sv_permsocks)) | ||
282 | return 0; | ||
283 | |||
284 | error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, | ||
285 | SVC_SOCK_DEFAULTS); | ||
286 | if (error < 0) | ||
287 | return error; | ||
288 | |||
289 | error = lockd_up(); | ||
290 | if (error < 0) | ||
291 | return error; | ||
292 | |||
293 | error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, | ||
294 | SVC_SOCK_DEFAULTS); | ||
295 | if (error < 0) | ||
296 | return error; | ||
297 | |||
298 | error = lockd_up(); | ||
299 | if (error < 0) | ||
300 | return error; | ||
301 | |||
302 | return 0; | ||
303 | } | ||
304 | |||
305 | int nfsd_nrpools(void) | 343 | int nfsd_nrpools(void) |
306 | { | 344 | { |
307 | if (nfsd_serv == NULL) | 345 | if (nfsd_serv == NULL) |
@@ -376,10 +414,16 @@ int nfsd_set_nrthreads(int n, int *nthreads) | |||
376 | return err; | 414 | return err; |
377 | } | 415 | } |
378 | 416 | ||
417 | /* | ||
418 | * Adjust the number of threads and return the new number of threads. | ||
419 | * This is also the function that starts the server if necessary, if | ||
420 | * this is the first time nrservs is nonzero. | ||
421 | */ | ||
379 | int | 422 | int |
380 | nfsd_svc(unsigned short port, int nrservs) | 423 | nfsd_svc(unsigned short port, int nrservs) |
381 | { | 424 | { |
382 | int error; | 425 | int error; |
426 | bool nfsd_up_before; | ||
383 | 427 | ||
384 | mutex_lock(&nfsd_mutex); | 428 | mutex_lock(&nfsd_mutex); |
385 | dprintk("nfsd: creating service\n"); | 429 | dprintk("nfsd: creating service\n"); |
@@ -391,34 +435,29 @@ nfsd_svc(unsigned short port, int nrservs) | |||
391 | if (nrservs == 0 && nfsd_serv == NULL) | 435 | if (nrservs == 0 && nfsd_serv == NULL) |
392 | goto out; | 436 | goto out; |
393 | 437 | ||
394 | /* Readahead param cache - will no-op if it already exists */ | 438 | error = nfsd_create_serv(); |
395 | error = nfsd_racache_init(2*nrservs); | ||
396 | if (error<0) | ||
397 | goto out; | ||
398 | error = nfs4_state_start(); | ||
399 | if (error) | 439 | if (error) |
400 | goto out; | 440 | goto out; |
401 | 441 | ||
402 | nfsd_reset_versions(); | 442 | nfsd_up_before = nfsd_up; |
403 | |||
404 | error = nfsd_create_serv(); | ||
405 | 443 | ||
444 | error = nfsd_startup(port, nrservs); | ||
406 | if (error) | 445 | if (error) |
407 | goto out; | 446 | goto out_destroy; |
408 | error = nfsd_init_socks(port); | ||
409 | if (error) | ||
410 | goto failure; | ||
411 | |||
412 | error = svc_set_num_threads(nfsd_serv, NULL, nrservs); | 447 | error = svc_set_num_threads(nfsd_serv, NULL, nrservs); |
413 | if (error == 0) | 448 | if (error) |
414 | /* We are holding a reference to nfsd_serv which | 449 | goto out_shutdown; |
415 | * we don't want to count in the return value, | 450 | /* We are holding a reference to nfsd_serv which |
416 | * so subtract 1 | 451 | * we don't want to count in the return value, |
417 | */ | 452 | * so subtract 1 |
418 | error = nfsd_serv->sv_nrthreads - 1; | 453 | */ |
419 | failure: | 454 | error = nfsd_serv->sv_nrthreads - 1; |
455 | out_shutdown: | ||
456 | if (error < 0 && !nfsd_up_before) | ||
457 | nfsd_shutdown(); | ||
458 | out_destroy: | ||
420 | svc_destroy(nfsd_serv); /* Release server */ | 459 | svc_destroy(nfsd_serv); /* Release server */ |
421 | out: | 460 | out: |
422 | mutex_unlock(&nfsd_mutex); | 461 | mutex_unlock(&nfsd_mutex); |
423 | return error; | 462 | return error; |
424 | } | 463 | } |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 006c84230c7c..7731a75971dd 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -88,7 +88,6 @@ struct nfs4_delegation { | |||
88 | struct nfs4_client *dl_client; | 88 | struct nfs4_client *dl_client; |
89 | struct nfs4_file *dl_file; | 89 | struct nfs4_file *dl_file; |
90 | struct file_lock *dl_flock; | 90 | struct file_lock *dl_flock; |
91 | struct file *dl_vfs_file; | ||
92 | u32 dl_type; | 91 | u32 dl_type; |
93 | time_t dl_time; | 92 | time_t dl_time; |
94 | /* For recall: */ | 93 | /* For recall: */ |
@@ -342,12 +341,50 @@ struct nfs4_file { | |||
342 | struct list_head fi_hash; /* hash by "struct inode *" */ | 341 | struct list_head fi_hash; /* hash by "struct inode *" */ |
343 | struct list_head fi_stateids; | 342 | struct list_head fi_stateids; |
344 | struct list_head fi_delegations; | 343 | struct list_head fi_delegations; |
344 | /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ | ||
345 | struct file * fi_fds[3]; | ||
346 | /* One each for O_RDONLY, O_WRONLY: */ | ||
347 | atomic_t fi_access[2]; | ||
348 | /* | ||
349 | * Each open stateid contributes 1 to either fi_readers or | ||
350 | * fi_writers, or both, depending on the open mode. A | ||
351 | * delegation also takes an fi_readers reference. Lock | ||
352 | * stateid's take none. | ||
353 | */ | ||
354 | atomic_t fi_readers; | ||
355 | atomic_t fi_writers; | ||
345 | struct inode *fi_inode; | 356 | struct inode *fi_inode; |
346 | u32 fi_id; /* used with stateowner->so_id | 357 | u32 fi_id; /* used with stateowner->so_id |
347 | * for stateid_hashtbl hash */ | 358 | * for stateid_hashtbl hash */ |
348 | bool fi_had_conflict; | 359 | bool fi_had_conflict; |
349 | }; | 360 | }; |
350 | 361 | ||
362 | /* XXX: for first cut may fall back on returning file that doesn't work | ||
363 | * at all? */ | ||
364 | static inline struct file *find_writeable_file(struct nfs4_file *f) | ||
365 | { | ||
366 | if (f->fi_fds[O_RDWR]) | ||
367 | return f->fi_fds[O_RDWR]; | ||
368 | return f->fi_fds[O_WRONLY]; | ||
369 | } | ||
370 | |||
371 | static inline struct file *find_readable_file(struct nfs4_file *f) | ||
372 | { | ||
373 | if (f->fi_fds[O_RDWR]) | ||
374 | return f->fi_fds[O_RDWR]; | ||
375 | return f->fi_fds[O_RDONLY]; | ||
376 | } | ||
377 | |||
378 | static inline struct file *find_any_file(struct nfs4_file *f) | ||
379 | { | ||
380 | if (f->fi_fds[O_RDWR]) | ||
381 | return f->fi_fds[O_RDWR]; | ||
382 | else if (f->fi_fds[O_RDWR]) | ||
383 | return f->fi_fds[O_WRONLY]; | ||
384 | else | ||
385 | return f->fi_fds[O_RDONLY]; | ||
386 | } | ||
387 | |||
351 | /* | 388 | /* |
352 | * nfs4_stateid can either be an open stateid or (eventually) a lock stateid | 389 | * nfs4_stateid can either be an open stateid or (eventually) a lock stateid |
353 | * | 390 | * |
@@ -373,7 +410,6 @@ struct nfs4_stateid { | |||
373 | struct nfs4_stateowner * st_stateowner; | 410 | struct nfs4_stateowner * st_stateowner; |
374 | struct nfs4_file * st_file; | 411 | struct nfs4_file * st_file; |
375 | stateid_t st_stateid; | 412 | stateid_t st_stateid; |
376 | struct file * st_vfs_file; | ||
377 | unsigned long st_access_bmap; | 413 | unsigned long st_access_bmap; |
378 | unsigned long st_deny_bmap; | 414 | unsigned long st_deny_bmap; |
379 | struct nfs4_stateid * st_openstp; | 415 | struct nfs4_stateid * st_openstp; |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 3c111120b619..96360a83cb91 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -604,7 +604,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac | |||
604 | return error; | 604 | return error; |
605 | } | 605 | } |
606 | 606 | ||
607 | #endif /* defined(CONFIG_NFS_V4) */ | 607 | #endif /* defined(CONFIG_NFSD_V4) */ |
608 | 608 | ||
609 | #ifdef CONFIG_NFSD_V3 | 609 | #ifdef CONFIG_NFSD_V3 |
610 | /* | 610 | /* |
@@ -903,7 +903,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
903 | loff_t offset, struct kvec *vec, int vlen, unsigned long *count) | 903 | loff_t offset, struct kvec *vec, int vlen, unsigned long *count) |
904 | { | 904 | { |
905 | struct inode *inode; | 905 | struct inode *inode; |
906 | struct raparms *ra; | ||
907 | mm_segment_t oldfs; | 906 | mm_segment_t oldfs; |
908 | __be32 err; | 907 | __be32 err; |
909 | int host_err; | 908 | int host_err; |
@@ -914,12 +913,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
914 | if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count)) | 913 | if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count)) |
915 | goto out; | 914 | goto out; |
916 | 915 | ||
917 | /* Get readahead parameters */ | ||
918 | ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); | ||
919 | |||
920 | if (ra && ra->p_set) | ||
921 | file->f_ra = ra->p_ra; | ||
922 | |||
923 | if (file->f_op->splice_read && rqstp->rq_splice_ok) { | 916 | if (file->f_op->splice_read && rqstp->rq_splice_ok) { |
924 | struct splice_desc sd = { | 917 | struct splice_desc sd = { |
925 | .len = 0, | 918 | .len = 0, |
@@ -937,21 +930,11 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
937 | set_fs(oldfs); | 930 | set_fs(oldfs); |
938 | } | 931 | } |
939 | 932 | ||
940 | /* Write back readahead params */ | ||
941 | if (ra) { | ||
942 | struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; | ||
943 | spin_lock(&rab->pb_lock); | ||
944 | ra->p_ra = file->f_ra; | ||
945 | ra->p_set = 1; | ||
946 | ra->p_count--; | ||
947 | spin_unlock(&rab->pb_lock); | ||
948 | } | ||
949 | |||
950 | if (host_err >= 0) { | 933 | if (host_err >= 0) { |
951 | nfsdstats.io_read += host_err; | 934 | nfsdstats.io_read += host_err; |
952 | *count = host_err; | 935 | *count = host_err; |
953 | err = 0; | 936 | err = 0; |
954 | fsnotify_access(file->f_path.dentry); | 937 | fsnotify_access(file); |
955 | } else | 938 | } else |
956 | err = nfserrno(host_err); | 939 | err = nfserrno(host_err); |
957 | out: | 940 | out: |
@@ -1062,7 +1045,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
1062 | goto out_nfserr; | 1045 | goto out_nfserr; |
1063 | *cnt = host_err; | 1046 | *cnt = host_err; |
1064 | nfsdstats.io_write += host_err; | 1047 | nfsdstats.io_write += host_err; |
1065 | fsnotify_modify(file->f_path.dentry); | 1048 | fsnotify_modify(file); |
1066 | 1049 | ||
1067 | /* clear setuid/setgid flag after write */ | 1050 | /* clear setuid/setgid flag after write */ |
1068 | if (inode->i_mode & (S_ISUID | S_ISGID)) | 1051 | if (inode->i_mode & (S_ISUID | S_ISGID)) |
@@ -1086,8 +1069,45 @@ out: | |||
1086 | * on entry. On return, *count contains the number of bytes actually read. | 1069 | * on entry. On return, *count contains the number of bytes actually read. |
1087 | * N.B. After this call fhp needs an fh_put | 1070 | * N.B. After this call fhp needs an fh_put |
1088 | */ | 1071 | */ |
1072 | __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, | ||
1073 | loff_t offset, struct kvec *vec, int vlen, unsigned long *count) | ||
1074 | { | ||
1075 | struct file *file; | ||
1076 | struct inode *inode; | ||
1077 | struct raparms *ra; | ||
1078 | __be32 err; | ||
1079 | |||
1080 | err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); | ||
1081 | if (err) | ||
1082 | return err; | ||
1083 | |||
1084 | inode = file->f_path.dentry->d_inode; | ||
1085 | |||
1086 | /* Get readahead parameters */ | ||
1087 | ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); | ||
1088 | |||
1089 | if (ra && ra->p_set) | ||
1090 | file->f_ra = ra->p_ra; | ||
1091 | |||
1092 | err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); | ||
1093 | |||
1094 | /* Write back readahead params */ | ||
1095 | if (ra) { | ||
1096 | struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex]; | ||
1097 | spin_lock(&rab->pb_lock); | ||
1098 | ra->p_ra = file->f_ra; | ||
1099 | ra->p_set = 1; | ||
1100 | ra->p_count--; | ||
1101 | spin_unlock(&rab->pb_lock); | ||
1102 | } | ||
1103 | |||
1104 | nfsd_close(file); | ||
1105 | return err; | ||
1106 | } | ||
1107 | |||
1108 | /* As above, but use the provided file descriptor. */ | ||
1089 | __be32 | 1109 | __be32 |
1090 | nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | 1110 | nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, |
1091 | loff_t offset, struct kvec *vec, int vlen, | 1111 | loff_t offset, struct kvec *vec, int vlen, |
1092 | unsigned long *count) | 1112 | unsigned long *count) |
1093 | { | 1113 | { |
@@ -1099,13 +1119,8 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
1099 | if (err) | 1119 | if (err) |
1100 | goto out; | 1120 | goto out; |
1101 | err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); | 1121 | err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); |
1102 | } else { | 1122 | } else /* Note file may still be NULL in NFSv4 special stateid case: */ |
1103 | err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); | 1123 | err = nfsd_read(rqstp, fhp, offset, vec, vlen, count); |
1104 | if (err) | ||
1105 | goto out; | ||
1106 | err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); | ||
1107 | nfsd_close(file); | ||
1108 | } | ||
1109 | out: | 1124 | out: |
1110 | return err; | 1125 | return err; |
1111 | } | 1126 | } |
@@ -1631,7 +1646,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, | |||
1631 | char *name, int len, struct svc_fh *tfhp) | 1646 | char *name, int len, struct svc_fh *tfhp) |
1632 | { | 1647 | { |
1633 | struct dentry *ddir, *dnew, *dold; | 1648 | struct dentry *ddir, *dnew, *dold; |
1634 | struct inode *dirp, *dest; | 1649 | struct inode *dirp; |
1635 | __be32 err; | 1650 | __be32 err; |
1636 | int host_err; | 1651 | int host_err; |
1637 | 1652 | ||
@@ -1659,7 +1674,6 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, | |||
1659 | goto out_nfserr; | 1674 | goto out_nfserr; |
1660 | 1675 | ||
1661 | dold = tfhp->fh_dentry; | 1676 | dold = tfhp->fh_dentry; |
1662 | dest = dold->d_inode; | ||
1663 | 1677 | ||
1664 | host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt); | 1678 | host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt); |
1665 | if (host_err) { | 1679 | if (host_err) { |
@@ -2019,8 +2033,14 @@ out: | |||
2019 | __be32 | 2033 | __be32 |
2020 | nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) | 2034 | nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) |
2021 | { | 2035 | { |
2022 | __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); | 2036 | struct path path = { |
2023 | if (!err && vfs_statfs(fhp->fh_dentry,stat)) | 2037 | .mnt = fhp->fh_export->ex_path.mnt, |
2038 | .dentry = fhp->fh_dentry, | ||
2039 | }; | ||
2040 | __be32 err; | ||
2041 | |||
2042 | err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); | ||
2043 | if (!err && vfs_statfs(&path, stat)) | ||
2024 | err = nfserr_io; | 2044 | err = nfserr_io; |
2025 | return err; | 2045 | return err; |
2026 | } | 2046 | } |
@@ -2038,7 +2058,6 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, | |||
2038 | struct dentry *dentry, int acc) | 2058 | struct dentry *dentry, int acc) |
2039 | { | 2059 | { |
2040 | struct inode *inode = dentry->d_inode; | 2060 | struct inode *inode = dentry->d_inode; |
2041 | struct path path; | ||
2042 | int err; | 2061 | int err; |
2043 | 2062 | ||
2044 | if (acc == NFSD_MAY_NOP) | 2063 | if (acc == NFSD_MAY_NOP) |
@@ -2111,15 +2130,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, | |||
2111 | if (err == -EACCES && S_ISREG(inode->i_mode) && | 2130 | if (err == -EACCES && S_ISREG(inode->i_mode) && |
2112 | acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) | 2131 | acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) |
2113 | err = inode_permission(inode, MAY_EXEC); | 2132 | err = inode_permission(inode, MAY_EXEC); |
2114 | if (err) | ||
2115 | goto nfsd_out; | ||
2116 | 2133 | ||
2117 | /* Do integrity (permission) checking now, but defer incrementing | ||
2118 | * IMA counts to the actual file open. | ||
2119 | */ | ||
2120 | path.mnt = exp->ex_path.mnt; | ||
2121 | path.dentry = dentry; | ||
2122 | nfsd_out: | ||
2123 | return err? nfserrno(err) : 0; | 2134 | return err? nfserrno(err) : 0; |
2124 | } | 2135 | } |
2125 | 2136 | ||
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 217a62c2a357..9a370a5e36b7 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h | |||
@@ -64,7 +64,9 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, | |||
64 | __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, int, | 64 | __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, int, |
65 | int, struct file **); | 65 | int, struct file **); |
66 | void nfsd_close(struct file *); | 66 | void nfsd_close(struct file *); |
67 | __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *, | 67 | __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, |
68 | loff_t, struct kvec *, int, unsigned long *); | ||
69 | __be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *, | ||
68 | loff_t, struct kvec *, int, unsigned long *); | 70 | loff_t, struct kvec *, int, unsigned long *); |
69 | __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, | 71 | __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, |
70 | loff_t, struct kvec *,int, unsigned long *, int *); | 72 | loff_t, struct kvec *,int, unsigned long *, int *); |
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index effdbdbe6c11..3dbdc1d356bf 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c | |||
@@ -26,6 +26,8 @@ | |||
26 | #include "nilfs.h" | 26 | #include "nilfs.h" |
27 | #include "bmap.h" | 27 | #include "bmap.h" |
28 | #include "sb.h" | 28 | #include "sb.h" |
29 | #include "btree.h" | ||
30 | #include "direct.h" | ||
29 | #include "btnode.h" | 31 | #include "btnode.h" |
30 | #include "mdt.h" | 32 | #include "mdt.h" |
31 | #include "dat.h" | 33 | #include "dat.h" |
@@ -533,7 +535,7 @@ void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) | |||
533 | 535 | ||
534 | void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) | 536 | void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) |
535 | { | 537 | { |
536 | memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); | 538 | memcpy(gcbmap, bmap, sizeof(*bmap)); |
537 | init_rwsem(&gcbmap->b_sem); | 539 | init_rwsem(&gcbmap->b_sem); |
538 | lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); | 540 | lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); |
539 | gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; | 541 | gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; |
@@ -541,7 +543,7 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) | |||
541 | 543 | ||
542 | void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) | 544 | void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) |
543 | { | 545 | { |
544 | memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); | 546 | memcpy(bmap, gcbmap, sizeof(*bmap)); |
545 | init_rwsem(&bmap->b_sem); | 547 | init_rwsem(&bmap->b_sem); |
546 | lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); | 548 | lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); |
547 | bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; | 549 | bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; |
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 9980d7dbab91..a20569b19929 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h | |||
@@ -32,11 +32,6 @@ | |||
32 | 32 | ||
33 | #define NILFS_BMAP_INVALID_PTR 0 | 33 | #define NILFS_BMAP_INVALID_PTR 0 |
34 | 34 | ||
35 | #define nilfs_bmap_dkey_to_key(dkey) le64_to_cpu(dkey) | ||
36 | #define nilfs_bmap_key_to_dkey(key) cpu_to_le64(key) | ||
37 | #define nilfs_bmap_dptr_to_ptr(dptr) le64_to_cpu(dptr) | ||
38 | #define nilfs_bmap_ptr_to_dptr(ptr) cpu_to_le64(ptr) | ||
39 | |||
40 | #define nilfs_bmap_keydiff_abs(diff) ((diff) < 0 ? -(diff) : (diff)) | 35 | #define nilfs_bmap_keydiff_abs(diff) ((diff) < 0 ? -(diff) : (diff)) |
41 | 36 | ||
42 | 37 | ||
@@ -71,7 +66,7 @@ struct nilfs_bmap_operations { | |||
71 | int (*bop_delete)(struct nilfs_bmap *, __u64); | 66 | int (*bop_delete)(struct nilfs_bmap *, __u64); |
72 | void (*bop_clear)(struct nilfs_bmap *); | 67 | void (*bop_clear)(struct nilfs_bmap *); |
73 | 68 | ||
74 | int (*bop_propagate)(const struct nilfs_bmap *, struct buffer_head *); | 69 | int (*bop_propagate)(struct nilfs_bmap *, struct buffer_head *); |
75 | void (*bop_lookup_dirty_buffers)(struct nilfs_bmap *, | 70 | void (*bop_lookup_dirty_buffers)(struct nilfs_bmap *, |
76 | struct list_head *); | 71 | struct list_head *); |
77 | 72 | ||
@@ -110,6 +105,7 @@ static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) | |||
110 | * @b_last_allocated_ptr: last allocated ptr for data block | 105 | * @b_last_allocated_ptr: last allocated ptr for data block |
111 | * @b_ptr_type: pointer type | 106 | * @b_ptr_type: pointer type |
112 | * @b_state: state | 107 | * @b_state: state |
108 | * @b_nchildren_per_block: maximum number of child nodes for non-root nodes | ||
113 | */ | 109 | */ |
114 | struct nilfs_bmap { | 110 | struct nilfs_bmap { |
115 | union { | 111 | union { |
@@ -123,6 +119,7 @@ struct nilfs_bmap { | |||
123 | __u64 b_last_allocated_ptr; | 119 | __u64 b_last_allocated_ptr; |
124 | int b_ptr_type; | 120 | int b_ptr_type; |
125 | int b_state; | 121 | int b_state; |
122 | __u16 b_nchildren_per_block; | ||
126 | }; | 123 | }; |
127 | 124 | ||
128 | /* pointer type */ | 125 | /* pointer type */ |
@@ -224,6 +221,13 @@ static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap, | |||
224 | nilfs_dat_abort_end(dat, &req->bpr_req); | 221 | nilfs_dat_abort_end(dat, &req->bpr_req); |
225 | } | 222 | } |
226 | 223 | ||
224 | static inline void nilfs_bmap_set_target_v(struct nilfs_bmap *bmap, __u64 key, | ||
225 | __u64 ptr) | ||
226 | { | ||
227 | bmap->b_last_allocated_key = key; | ||
228 | bmap->b_last_allocated_ptr = ptr; | ||
229 | } | ||
230 | |||
227 | __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, | 231 | __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, |
228 | const struct buffer_head *); | 232 | const struct buffer_head *); |
229 | 233 | ||
diff --git a/fs/nilfs2/bmap_union.h b/fs/nilfs2/bmap_union.h deleted file mode 100644 index d41509bff47b..000000000000 --- a/fs/nilfs2/bmap_union.h +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | /* | ||
2 | * bmap_union.h - NILFS block mapping. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Koji Sato <koji@osrg.net>. | ||
21 | */ | ||
22 | |||
23 | #ifndef _NILFS_BMAP_UNION_H | ||
24 | #define _NILFS_BMAP_UNION_H | ||
25 | |||
26 | #include "bmap.h" | ||
27 | #include "direct.h" | ||
28 | #include "btree.h" | ||
29 | |||
30 | /** | ||
31 | * nilfs_bmap_union - | ||
32 | * @bi_bmap: bmap structure | ||
33 | * @bi_btree: direct map structure | ||
34 | * @bi_direct: B-tree structure | ||
35 | */ | ||
36 | union nilfs_bmap_union { | ||
37 | struct nilfs_bmap bi_bmap; | ||
38 | struct nilfs_direct bi_direct; | ||
39 | struct nilfs_btree bi_btree; | ||
40 | }; | ||
41 | |||
42 | #endif /* _NILFS_BMAP_UNION_H */ | ||
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 447ce47a3306..f78ab1044d1d 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c | |||
@@ -96,10 +96,12 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) | |||
96 | } | 96 | } |
97 | 97 | ||
98 | int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, | 98 | int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, |
99 | sector_t pblocknr, struct buffer_head **pbh) | 99 | sector_t pblocknr, int mode, |
100 | struct buffer_head **pbh, sector_t *submit_ptr) | ||
100 | { | 101 | { |
101 | struct buffer_head *bh; | 102 | struct buffer_head *bh; |
102 | struct inode *inode = NILFS_BTNC_I(btnc); | 103 | struct inode *inode = NILFS_BTNC_I(btnc); |
104 | struct page *page; | ||
103 | int err; | 105 | int err; |
104 | 106 | ||
105 | bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); | 107 | bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); |
@@ -107,6 +109,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, | |||
107 | return -ENOMEM; | 109 | return -ENOMEM; |
108 | 110 | ||
109 | err = -EEXIST; /* internal code */ | 111 | err = -EEXIST; /* internal code */ |
112 | page = bh->b_page; | ||
110 | 113 | ||
111 | if (buffer_uptodate(bh) || buffer_dirty(bh)) | 114 | if (buffer_uptodate(bh) || buffer_dirty(bh)) |
112 | goto found; | 115 | goto found; |
@@ -125,7 +128,16 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, | |||
125 | } | 128 | } |
126 | } | 129 | } |
127 | } | 130 | } |
128 | lock_buffer(bh); | 131 | |
132 | if (mode == READA) { | ||
133 | if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) { | ||
134 | err = -EBUSY; /* internal code */ | ||
135 | brelse(bh); | ||
136 | goto out_locked; | ||
137 | } | ||
138 | } else { /* mode == READ */ | ||
139 | lock_buffer(bh); | ||
140 | } | ||
129 | if (buffer_uptodate(bh)) { | 141 | if (buffer_uptodate(bh)) { |
130 | unlock_buffer(bh); | 142 | unlock_buffer(bh); |
131 | err = -EEXIST; /* internal code */ | 143 | err = -EEXIST; /* internal code */ |
@@ -136,15 +148,16 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, | |||
136 | bh->b_blocknr = pblocknr; /* set block address for read */ | 148 | bh->b_blocknr = pblocknr; /* set block address for read */ |
137 | bh->b_end_io = end_buffer_read_sync; | 149 | bh->b_end_io = end_buffer_read_sync; |
138 | get_bh(bh); | 150 | get_bh(bh); |
139 | submit_bh(READ, bh); | 151 | submit_bh(mode, bh); |
140 | bh->b_blocknr = blocknr; /* set back to the given block address */ | 152 | bh->b_blocknr = blocknr; /* set back to the given block address */ |
153 | *submit_ptr = pblocknr; | ||
141 | err = 0; | 154 | err = 0; |
142 | found: | 155 | found: |
143 | *pbh = bh; | 156 | *pbh = bh; |
144 | 157 | ||
145 | out_locked: | 158 | out_locked: |
146 | unlock_page(bh->b_page); | 159 | unlock_page(page); |
147 | page_cache_release(bh->b_page); | 160 | page_cache_release(page); |
148 | return err; | 161 | return err; |
149 | } | 162 | } |
150 | 163 | ||
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 07da83f07712..79037494f1e0 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h | |||
@@ -42,8 +42,8 @@ void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); | |||
42 | void nilfs_btnode_cache_clear(struct address_space *); | 42 | void nilfs_btnode_cache_clear(struct address_space *); |
43 | struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, | 43 | struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, |
44 | __u64 blocknr); | 44 | __u64 blocknr); |
45 | int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, | 45 | int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, int, |
46 | struct buffer_head **); | 46 | struct buffer_head **, sector_t *); |
47 | void nilfs_btnode_delete(struct buffer_head *); | 47 | void nilfs_btnode_delete(struct buffer_head *); |
48 | int nilfs_btnode_prepare_change_key(struct address_space *, | 48 | int nilfs_btnode_prepare_change_key(struct address_space *, |
49 | struct nilfs_btnode_chkey_ctxt *); | 49 | struct nilfs_btnode_chkey_ctxt *); |
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index b27a342c5af6..300c2bc00c3f 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c | |||
@@ -66,30 +66,10 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path) | |||
66 | /* | 66 | /* |
67 | * B-tree node operations | 67 | * B-tree node operations |
68 | */ | 68 | */ |
69 | static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr, | 69 | static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, |
70 | struct buffer_head **bhp) | ||
71 | { | ||
72 | struct address_space *btnc = | ||
73 | &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; | ||
74 | int err; | ||
75 | |||
76 | err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp); | ||
77 | if (err) | ||
78 | return err == -EEXIST ? 0 : err; | ||
79 | |||
80 | wait_on_buffer(*bhp); | ||
81 | if (!buffer_uptodate(*bhp)) { | ||
82 | brelse(*bhp); | ||
83 | return -EIO; | ||
84 | } | ||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, | ||
89 | __u64 ptr, struct buffer_head **bhp) | 70 | __u64 ptr, struct buffer_head **bhp) |
90 | { | 71 | { |
91 | struct address_space *btnc = | 72 | struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; |
92 | &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; | ||
93 | struct buffer_head *bh; | 73 | struct buffer_head *bh; |
94 | 74 | ||
95 | bh = nilfs_btnode_create_block(btnc, ptr); | 75 | bh = nilfs_btnode_create_block(btnc, ptr); |
@@ -101,71 +81,55 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, | |||
101 | return 0; | 81 | return 0; |
102 | } | 82 | } |
103 | 83 | ||
104 | static inline int | 84 | static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) |
105 | nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) | ||
106 | { | 85 | { |
107 | return node->bn_flags; | 86 | return node->bn_flags; |
108 | } | 87 | } |
109 | 88 | ||
110 | static inline void | 89 | static void |
111 | nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) | 90 | nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) |
112 | { | 91 | { |
113 | node->bn_flags = flags; | 92 | node->bn_flags = flags; |
114 | } | 93 | } |
115 | 94 | ||
116 | static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node) | 95 | static int nilfs_btree_node_root(const struct nilfs_btree_node *node) |
117 | { | 96 | { |
118 | return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; | 97 | return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; |
119 | } | 98 | } |
120 | 99 | ||
121 | static inline int | 100 | static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) |
122 | nilfs_btree_node_get_level(const struct nilfs_btree_node *node) | ||
123 | { | 101 | { |
124 | return node->bn_level; | 102 | return node->bn_level; |
125 | } | 103 | } |
126 | 104 | ||
127 | static inline void | 105 | static void |
128 | nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) | 106 | nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) |
129 | { | 107 | { |
130 | node->bn_level = level; | 108 | node->bn_level = level; |
131 | } | 109 | } |
132 | 110 | ||
133 | static inline int | 111 | static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) |
134 | nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) | ||
135 | { | 112 | { |
136 | return le16_to_cpu(node->bn_nchildren); | 113 | return le16_to_cpu(node->bn_nchildren); |
137 | } | 114 | } |
138 | 115 | ||
139 | static inline void | 116 | static void |
140 | nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) | 117 | nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) |
141 | { | 118 | { |
142 | node->bn_nchildren = cpu_to_le16(nchildren); | 119 | node->bn_nchildren = cpu_to_le16(nchildren); |
143 | } | 120 | } |
144 | 121 | ||
145 | static inline int nilfs_btree_node_size(const struct nilfs_btree *btree) | 122 | static int nilfs_btree_node_size(const struct nilfs_bmap *btree) |
146 | { | 123 | { |
147 | return 1 << btree->bt_bmap.b_inode->i_blkbits; | 124 | return 1 << btree->b_inode->i_blkbits; |
148 | } | 125 | } |
149 | 126 | ||
150 | static inline int | 127 | static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) |
151 | nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node, | ||
152 | const struct nilfs_btree *btree) | ||
153 | { | 128 | { |
154 | return nilfs_btree_node_root(node) ? | 129 | return btree->b_nchildren_per_block; |
155 | NILFS_BTREE_ROOT_NCHILDREN_MIN : | ||
156 | NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); | ||
157 | } | 130 | } |
158 | 131 | ||
159 | static inline int | 132 | static __le64 * |
160 | nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node, | ||
161 | const struct nilfs_btree *btree) | ||
162 | { | ||
163 | return nilfs_btree_node_root(node) ? | ||
164 | NILFS_BTREE_ROOT_NCHILDREN_MAX : | ||
165 | NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree)); | ||
166 | } | ||
167 | |||
168 | static inline __le64 * | ||
169 | nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) | 133 | nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) |
170 | { | 134 | { |
171 | return (__le64 *)((char *)(node + 1) + | 135 | return (__le64 *)((char *)(node + 1) + |
@@ -173,45 +137,40 @@ nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) | |||
173 | 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); | 137 | 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); |
174 | } | 138 | } |
175 | 139 | ||
176 | static inline __le64 * | 140 | static __le64 * |
177 | nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, | 141 | nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) |
178 | const struct nilfs_btree *btree) | ||
179 | { | 142 | { |
180 | return (__le64 *)(nilfs_btree_node_dkeys(node) + | 143 | return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); |
181 | nilfs_btree_node_nchildren_max(node, btree)); | ||
182 | } | 144 | } |
183 | 145 | ||
184 | static inline __u64 | 146 | static __u64 |
185 | nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) | 147 | nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) |
186 | { | 148 | { |
187 | return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index)); | 149 | return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); |
188 | } | 150 | } |
189 | 151 | ||
190 | static inline void | 152 | static void |
191 | nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) | 153 | nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) |
192 | { | 154 | { |
193 | *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key); | 155 | *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); |
194 | } | 156 | } |
195 | 157 | ||
196 | static inline __u64 | 158 | static __u64 |
197 | nilfs_btree_node_get_ptr(const struct nilfs_btree *btree, | 159 | nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, |
198 | const struct nilfs_btree_node *node, int index) | 160 | int ncmax) |
199 | { | 161 | { |
200 | return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) + | 162 | return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); |
201 | index)); | ||
202 | } | 163 | } |
203 | 164 | ||
204 | static inline void | 165 | static void |
205 | nilfs_btree_node_set_ptr(struct nilfs_btree *btree, | 166 | nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, |
206 | struct nilfs_btree_node *node, int index, __u64 ptr) | 167 | int ncmax) |
207 | { | 168 | { |
208 | *(nilfs_btree_node_dptrs(node, btree) + index) = | 169 | *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); |
209 | nilfs_bmap_ptr_to_dptr(ptr); | ||
210 | } | 170 | } |
211 | 171 | ||
212 | static void nilfs_btree_node_init(struct nilfs_btree *btree, | 172 | static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, |
213 | struct nilfs_btree_node *node, | 173 | int level, int nchildren, int ncmax, |
214 | int flags, int level, int nchildren, | ||
215 | const __u64 *keys, const __u64 *ptrs) | 174 | const __u64 *keys, const __u64 *ptrs) |
216 | { | 175 | { |
217 | __le64 *dkeys; | 176 | __le64 *dkeys; |
@@ -223,29 +182,28 @@ static void nilfs_btree_node_init(struct nilfs_btree *btree, | |||
223 | nilfs_btree_node_set_nchildren(node, nchildren); | 182 | nilfs_btree_node_set_nchildren(node, nchildren); |
224 | 183 | ||
225 | dkeys = nilfs_btree_node_dkeys(node); | 184 | dkeys = nilfs_btree_node_dkeys(node); |
226 | dptrs = nilfs_btree_node_dptrs(node, btree); | 185 | dptrs = nilfs_btree_node_dptrs(node, ncmax); |
227 | for (i = 0; i < nchildren; i++) { | 186 | for (i = 0; i < nchildren; i++) { |
228 | dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]); | 187 | dkeys[i] = cpu_to_le64(keys[i]); |
229 | dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]); | 188 | dptrs[i] = cpu_to_le64(ptrs[i]); |
230 | } | 189 | } |
231 | } | 190 | } |
232 | 191 | ||
233 | /* Assume the buffer heads corresponding to left and right are locked. */ | 192 | /* Assume the buffer heads corresponding to left and right are locked. */ |
234 | static void nilfs_btree_node_move_left(struct nilfs_btree *btree, | 193 | static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, |
235 | struct nilfs_btree_node *left, | ||
236 | struct nilfs_btree_node *right, | 194 | struct nilfs_btree_node *right, |
237 | int n) | 195 | int n, int lncmax, int rncmax) |
238 | { | 196 | { |
239 | __le64 *ldkeys, *rdkeys; | 197 | __le64 *ldkeys, *rdkeys; |
240 | __le64 *ldptrs, *rdptrs; | 198 | __le64 *ldptrs, *rdptrs; |
241 | int lnchildren, rnchildren; | 199 | int lnchildren, rnchildren; |
242 | 200 | ||
243 | ldkeys = nilfs_btree_node_dkeys(left); | 201 | ldkeys = nilfs_btree_node_dkeys(left); |
244 | ldptrs = nilfs_btree_node_dptrs(left, btree); | 202 | ldptrs = nilfs_btree_node_dptrs(left, lncmax); |
245 | lnchildren = nilfs_btree_node_get_nchildren(left); | 203 | lnchildren = nilfs_btree_node_get_nchildren(left); |
246 | 204 | ||
247 | rdkeys = nilfs_btree_node_dkeys(right); | 205 | rdkeys = nilfs_btree_node_dkeys(right); |
248 | rdptrs = nilfs_btree_node_dptrs(right, btree); | 206 | rdptrs = nilfs_btree_node_dptrs(right, rncmax); |
249 | rnchildren = nilfs_btree_node_get_nchildren(right); | 207 | rnchildren = nilfs_btree_node_get_nchildren(right); |
250 | 208 | ||
251 | memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); | 209 | memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); |
@@ -260,21 +218,20 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree, | |||
260 | } | 218 | } |
261 | 219 | ||
262 | /* Assume that the buffer heads corresponding to left and right are locked. */ | 220 | /* Assume that the buffer heads corresponding to left and right are locked. */ |
263 | static void nilfs_btree_node_move_right(struct nilfs_btree *btree, | 221 | static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, |
264 | struct nilfs_btree_node *left, | ||
265 | struct nilfs_btree_node *right, | 222 | struct nilfs_btree_node *right, |
266 | int n) | 223 | int n, int lncmax, int rncmax) |
267 | { | 224 | { |
268 | __le64 *ldkeys, *rdkeys; | 225 | __le64 *ldkeys, *rdkeys; |
269 | __le64 *ldptrs, *rdptrs; | 226 | __le64 *ldptrs, *rdptrs; |
270 | int lnchildren, rnchildren; | 227 | int lnchildren, rnchildren; |
271 | 228 | ||
272 | ldkeys = nilfs_btree_node_dkeys(left); | 229 | ldkeys = nilfs_btree_node_dkeys(left); |
273 | ldptrs = nilfs_btree_node_dptrs(left, btree); | 230 | ldptrs = nilfs_btree_node_dptrs(left, lncmax); |
274 | lnchildren = nilfs_btree_node_get_nchildren(left); | 231 | lnchildren = nilfs_btree_node_get_nchildren(left); |
275 | 232 | ||
276 | rdkeys = nilfs_btree_node_dkeys(right); | 233 | rdkeys = nilfs_btree_node_dkeys(right); |
277 | rdptrs = nilfs_btree_node_dptrs(right, btree); | 234 | rdptrs = nilfs_btree_node_dptrs(right, rncmax); |
278 | rnchildren = nilfs_btree_node_get_nchildren(right); | 235 | rnchildren = nilfs_btree_node_get_nchildren(right); |
279 | 236 | ||
280 | memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); | 237 | memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); |
@@ -289,16 +246,15 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree, | |||
289 | } | 246 | } |
290 | 247 | ||
291 | /* Assume that the buffer head corresponding to node is locked. */ | 248 | /* Assume that the buffer head corresponding to node is locked. */ |
292 | static void nilfs_btree_node_insert(struct nilfs_btree *btree, | 249 | static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, |
293 | struct nilfs_btree_node *node, | 250 | __u64 key, __u64 ptr, int ncmax) |
294 | __u64 key, __u64 ptr, int index) | ||
295 | { | 251 | { |
296 | __le64 *dkeys; | 252 | __le64 *dkeys; |
297 | __le64 *dptrs; | 253 | __le64 *dptrs; |
298 | int nchildren; | 254 | int nchildren; |
299 | 255 | ||
300 | dkeys = nilfs_btree_node_dkeys(node); | 256 | dkeys = nilfs_btree_node_dkeys(node); |
301 | dptrs = nilfs_btree_node_dptrs(node, btree); | 257 | dptrs = nilfs_btree_node_dptrs(node, ncmax); |
302 | nchildren = nilfs_btree_node_get_nchildren(node); | 258 | nchildren = nilfs_btree_node_get_nchildren(node); |
303 | if (index < nchildren) { | 259 | if (index < nchildren) { |
304 | memmove(dkeys + index + 1, dkeys + index, | 260 | memmove(dkeys + index + 1, dkeys + index, |
@@ -306,16 +262,15 @@ static void nilfs_btree_node_insert(struct nilfs_btree *btree, | |||
306 | memmove(dptrs + index + 1, dptrs + index, | 262 | memmove(dptrs + index + 1, dptrs + index, |
307 | (nchildren - index) * sizeof(*dptrs)); | 263 | (nchildren - index) * sizeof(*dptrs)); |
308 | } | 264 | } |
309 | dkeys[index] = nilfs_bmap_key_to_dkey(key); | 265 | dkeys[index] = cpu_to_le64(key); |
310 | dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr); | 266 | dptrs[index] = cpu_to_le64(ptr); |
311 | nchildren++; | 267 | nchildren++; |
312 | nilfs_btree_node_set_nchildren(node, nchildren); | 268 | nilfs_btree_node_set_nchildren(node, nchildren); |
313 | } | 269 | } |
314 | 270 | ||
315 | /* Assume that the buffer head corresponding to node is locked. */ | 271 | /* Assume that the buffer head corresponding to node is locked. */ |
316 | static void nilfs_btree_node_delete(struct nilfs_btree *btree, | 272 | static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, |
317 | struct nilfs_btree_node *node, | 273 | __u64 *keyp, __u64 *ptrp, int ncmax) |
318 | __u64 *keyp, __u64 *ptrp, int index) | ||
319 | { | 274 | { |
320 | __u64 key; | 275 | __u64 key; |
321 | __u64 ptr; | 276 | __u64 ptr; |
@@ -324,9 +279,9 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree, | |||
324 | int nchildren; | 279 | int nchildren; |
325 | 280 | ||
326 | dkeys = nilfs_btree_node_dkeys(node); | 281 | dkeys = nilfs_btree_node_dkeys(node); |
327 | dptrs = nilfs_btree_node_dptrs(node, btree); | 282 | dptrs = nilfs_btree_node_dptrs(node, ncmax); |
328 | key = nilfs_bmap_dkey_to_key(dkeys[index]); | 283 | key = le64_to_cpu(dkeys[index]); |
329 | ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]); | 284 | ptr = le64_to_cpu(dptrs[index]); |
330 | nchildren = nilfs_btree_node_get_nchildren(node); | 285 | nchildren = nilfs_btree_node_get_nchildren(node); |
331 | if (keyp != NULL) | 286 | if (keyp != NULL) |
332 | *keyp = key; | 287 | *keyp = key; |
@@ -382,40 +337,92 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, | |||
382 | return s == 0; | 337 | return s == 0; |
383 | } | 338 | } |
384 | 339 | ||
385 | static inline struct nilfs_btree_node * | 340 | /** |
386 | nilfs_btree_get_root(const struct nilfs_btree *btree) | 341 | * nilfs_btree_node_broken - verify consistency of btree node |
342 | * @node: btree node block to be examined | ||
343 | * @size: node size (in bytes) | ||
344 | * @blocknr: block number | ||
345 | * | ||
346 | * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. | ||
347 | */ | ||
348 | static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, | ||
349 | size_t size, sector_t blocknr) | ||
387 | { | 350 | { |
388 | return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data; | 351 | int level, flags, nchildren; |
352 | int ret = 0; | ||
353 | |||
354 | level = nilfs_btree_node_get_level(node); | ||
355 | flags = nilfs_btree_node_get_flags(node); | ||
356 | nchildren = nilfs_btree_node_get_nchildren(node); | ||
357 | |||
358 | if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || | ||
359 | level >= NILFS_BTREE_LEVEL_MAX || | ||
360 | (flags & NILFS_BTREE_NODE_ROOT) || | ||
361 | nchildren < 0 || | ||
362 | nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { | ||
363 | printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): " | ||
364 | "level = %d, flags = 0x%x, nchildren = %d\n", | ||
365 | (unsigned long long)blocknr, level, flags, nchildren); | ||
366 | ret = 1; | ||
367 | } | ||
368 | return ret; | ||
389 | } | 369 | } |
390 | 370 | ||
391 | static inline struct nilfs_btree_node * | 371 | int nilfs_btree_broken_node_block(struct buffer_head *bh) |
372 | { | ||
373 | int ret; | ||
374 | |||
375 | if (buffer_nilfs_checked(bh)) | ||
376 | return 0; | ||
377 | |||
378 | ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, | ||
379 | bh->b_size, bh->b_blocknr); | ||
380 | if (likely(!ret)) | ||
381 | set_buffer_nilfs_checked(bh); | ||
382 | return ret; | ||
383 | } | ||
384 | |||
385 | static struct nilfs_btree_node * | ||
386 | nilfs_btree_get_root(const struct nilfs_bmap *btree) | ||
387 | { | ||
388 | return (struct nilfs_btree_node *)btree->b_u.u_data; | ||
389 | } | ||
390 | |||
391 | static struct nilfs_btree_node * | ||
392 | nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) | 392 | nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) |
393 | { | 393 | { |
394 | return (struct nilfs_btree_node *)path[level].bp_bh->b_data; | 394 | return (struct nilfs_btree_node *)path[level].bp_bh->b_data; |
395 | } | 395 | } |
396 | 396 | ||
397 | static inline struct nilfs_btree_node * | 397 | static struct nilfs_btree_node * |
398 | nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) | 398 | nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) |
399 | { | 399 | { |
400 | return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; | 400 | return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; |
401 | } | 401 | } |
402 | 402 | ||
403 | static inline int nilfs_btree_height(const struct nilfs_btree *btree) | 403 | static int nilfs_btree_height(const struct nilfs_bmap *btree) |
404 | { | 404 | { |
405 | return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; | 405 | return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; |
406 | } | 406 | } |
407 | 407 | ||
408 | static inline struct nilfs_btree_node * | 408 | static struct nilfs_btree_node * |
409 | nilfs_btree_get_node(const struct nilfs_btree *btree, | 409 | nilfs_btree_get_node(const struct nilfs_bmap *btree, |
410 | const struct nilfs_btree_path *path, | 410 | const struct nilfs_btree_path *path, |
411 | int level) | 411 | int level, int *ncmaxp) |
412 | { | 412 | { |
413 | return (level == nilfs_btree_height(btree) - 1) ? | 413 | struct nilfs_btree_node *node; |
414 | nilfs_btree_get_root(btree) : | 414 | |
415 | nilfs_btree_get_nonroot_node(path, level); | 415 | if (level == nilfs_btree_height(btree) - 1) { |
416 | node = nilfs_btree_get_root(btree); | ||
417 | *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; | ||
418 | } else { | ||
419 | node = nilfs_btree_get_nonroot_node(path, level); | ||
420 | *ncmaxp = nilfs_btree_nchildren_per_block(btree); | ||
421 | } | ||
422 | return node; | ||
416 | } | 423 | } |
417 | 424 | ||
418 | static inline int | 425 | static int |
419 | nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) | 426 | nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) |
420 | { | 427 | { |
421 | if (unlikely(nilfs_btree_node_get_level(node) != level)) { | 428 | if (unlikely(nilfs_btree_node_get_level(node) != level)) { |
@@ -427,13 +434,83 @@ nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) | |||
427 | return 0; | 434 | return 0; |
428 | } | 435 | } |
429 | 436 | ||
430 | static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, | 437 | struct nilfs_btree_readahead_info { |
438 | struct nilfs_btree_node *node; /* parent node */ | ||
439 | int max_ra_blocks; /* max nof blocks to read ahead */ | ||
440 | int index; /* current index on the parent node */ | ||
441 | int ncmax; /* nof children in the parent node */ | ||
442 | }; | ||
443 | |||
444 | static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, | ||
445 | struct buffer_head **bhp, | ||
446 | const struct nilfs_btree_readahead_info *ra) | ||
447 | { | ||
448 | struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; | ||
449 | struct buffer_head *bh, *ra_bh; | ||
450 | sector_t submit_ptr = 0; | ||
451 | int ret; | ||
452 | |||
453 | ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr); | ||
454 | if (ret) { | ||
455 | if (ret != -EEXIST) | ||
456 | return ret; | ||
457 | goto out_check; | ||
458 | } | ||
459 | |||
460 | if (ra) { | ||
461 | int i, n; | ||
462 | __u64 ptr2; | ||
463 | |||
464 | /* read ahead sibling nodes */ | ||
465 | for (n = ra->max_ra_blocks, i = ra->index + 1; | ||
466 | n > 0 && i < ra->ncmax; n--, i++) { | ||
467 | ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); | ||
468 | |||
469 | ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA, | ||
470 | &ra_bh, &submit_ptr); | ||
471 | if (likely(!ret || ret == -EEXIST)) | ||
472 | brelse(ra_bh); | ||
473 | else if (ret != -EBUSY) | ||
474 | break; | ||
475 | if (!buffer_locked(bh)) | ||
476 | goto out_no_wait; | ||
477 | } | ||
478 | } | ||
479 | |||
480 | wait_on_buffer(bh); | ||
481 | |||
482 | out_no_wait: | ||
483 | if (!buffer_uptodate(bh)) { | ||
484 | brelse(bh); | ||
485 | return -EIO; | ||
486 | } | ||
487 | |||
488 | out_check: | ||
489 | if (nilfs_btree_broken_node_block(bh)) { | ||
490 | clear_buffer_uptodate(bh); | ||
491 | brelse(bh); | ||
492 | return -EINVAL; | ||
493 | } | ||
494 | |||
495 | *bhp = bh; | ||
496 | return 0; | ||
497 | } | ||
498 | |||
499 | static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, | ||
500 | struct buffer_head **bhp) | ||
501 | { | ||
502 | return __nilfs_btree_get_block(btree, ptr, bhp, NULL); | ||
503 | } | ||
504 | |||
505 | static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, | ||
431 | struct nilfs_btree_path *path, | 506 | struct nilfs_btree_path *path, |
432 | __u64 key, __u64 *ptrp, int minlevel) | 507 | __u64 key, __u64 *ptrp, int minlevel, |
508 | int readahead) | ||
433 | { | 509 | { |
434 | struct nilfs_btree_node *node; | 510 | struct nilfs_btree_node *node; |
511 | struct nilfs_btree_readahead_info p, *ra; | ||
435 | __u64 ptr; | 512 | __u64 ptr; |
436 | int level, index, found, ret; | 513 | int level, index, found, ncmax, ret; |
437 | 514 | ||
438 | node = nilfs_btree_get_root(btree); | 515 | node = nilfs_btree_get_root(btree); |
439 | level = nilfs_btree_node_get_level(node); | 516 | level = nilfs_btree_node_get_level(node); |
@@ -441,14 +518,27 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, | |||
441 | return -ENOENT; | 518 | return -ENOENT; |
442 | 519 | ||
443 | found = nilfs_btree_node_lookup(node, key, &index); | 520 | found = nilfs_btree_node_lookup(node, key, &index); |
444 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | 521 | ptr = nilfs_btree_node_get_ptr(node, index, |
522 | NILFS_BTREE_ROOT_NCHILDREN_MAX); | ||
445 | path[level].bp_bh = NULL; | 523 | path[level].bp_bh = NULL; |
446 | path[level].bp_index = index; | 524 | path[level].bp_index = index; |
447 | 525 | ||
448 | for (level--; level >= minlevel; level--) { | 526 | ncmax = nilfs_btree_nchildren_per_block(btree); |
449 | ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); | 527 | |
528 | while (--level >= minlevel) { | ||
529 | ra = NULL; | ||
530 | if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { | ||
531 | p.node = nilfs_btree_get_node(btree, path, level + 1, | ||
532 | &p.ncmax); | ||
533 | p.index = index; | ||
534 | p.max_ra_blocks = 7; | ||
535 | ra = &p; | ||
536 | } | ||
537 | ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, | ||
538 | ra); | ||
450 | if (ret < 0) | 539 | if (ret < 0) |
451 | return ret; | 540 | return ret; |
541 | |||
452 | node = nilfs_btree_get_nonroot_node(path, level); | 542 | node = nilfs_btree_get_nonroot_node(path, level); |
453 | if (nilfs_btree_bad_node(node, level)) | 543 | if (nilfs_btree_bad_node(node, level)) |
454 | return -EINVAL; | 544 | return -EINVAL; |
@@ -456,9 +546,9 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, | |||
456 | found = nilfs_btree_node_lookup(node, key, &index); | 546 | found = nilfs_btree_node_lookup(node, key, &index); |
457 | else | 547 | else |
458 | index = 0; | 548 | index = 0; |
459 | if (index < nilfs_btree_node_nchildren_max(node, btree)) | 549 | if (index < ncmax) { |
460 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | 550 | ptr = nilfs_btree_node_get_ptr(node, index, ncmax); |
461 | else { | 551 | } else { |
462 | WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); | 552 | WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); |
463 | /* insert */ | 553 | /* insert */ |
464 | ptr = NILFS_BMAP_INVALID_PTR; | 554 | ptr = NILFS_BMAP_INVALID_PTR; |
@@ -474,22 +564,24 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, | |||
474 | return 0; | 564 | return 0; |
475 | } | 565 | } |
476 | 566 | ||
477 | static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, | 567 | static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, |
478 | struct nilfs_btree_path *path, | 568 | struct nilfs_btree_path *path, |
479 | __u64 *keyp, __u64 *ptrp) | 569 | __u64 *keyp, __u64 *ptrp) |
480 | { | 570 | { |
481 | struct nilfs_btree_node *node; | 571 | struct nilfs_btree_node *node; |
482 | __u64 ptr; | 572 | __u64 ptr; |
483 | int index, level, ret; | 573 | int index, level, ncmax, ret; |
484 | 574 | ||
485 | node = nilfs_btree_get_root(btree); | 575 | node = nilfs_btree_get_root(btree); |
486 | index = nilfs_btree_node_get_nchildren(node) - 1; | 576 | index = nilfs_btree_node_get_nchildren(node) - 1; |
487 | if (index < 0) | 577 | if (index < 0) |
488 | return -ENOENT; | 578 | return -ENOENT; |
489 | level = nilfs_btree_node_get_level(node); | 579 | level = nilfs_btree_node_get_level(node); |
490 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | 580 | ptr = nilfs_btree_node_get_ptr(node, index, |
581 | NILFS_BTREE_ROOT_NCHILDREN_MAX); | ||
491 | path[level].bp_bh = NULL; | 582 | path[level].bp_bh = NULL; |
492 | path[level].bp_index = index; | 583 | path[level].bp_index = index; |
584 | ncmax = nilfs_btree_nchildren_per_block(btree); | ||
493 | 585 | ||
494 | for (level--; level > 0; level--) { | 586 | for (level--; level > 0; level--) { |
495 | ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); | 587 | ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); |
@@ -499,7 +591,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, | |||
499 | if (nilfs_btree_bad_node(node, level)) | 591 | if (nilfs_btree_bad_node(node, level)) |
500 | return -EINVAL; | 592 | return -EINVAL; |
501 | index = nilfs_btree_node_get_nchildren(node) - 1; | 593 | index = nilfs_btree_node_get_nchildren(node) - 1; |
502 | ptr = nilfs_btree_node_get_ptr(btree, node, index); | 594 | ptr = nilfs_btree_node_get_ptr(node, index, ncmax); |
503 | path[level].bp_index = index; | 595 | path[level].bp_index = index; |
504 | } | 596 | } |
505 | 597 | ||
@@ -511,51 +603,45 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, | |||
511 | return 0; | 603 | return 0; |
512 | } | 604 | } |
513 | 605 | ||
514 | static int nilfs_btree_lookup(const struct nilfs_bmap *bmap, | 606 | static int nilfs_btree_lookup(const struct nilfs_bmap *btree, |
515 | __u64 key, int level, __u64 *ptrp) | 607 | __u64 key, int level, __u64 *ptrp) |
516 | { | 608 | { |
517 | struct nilfs_btree *btree; | ||
518 | struct nilfs_btree_path *path; | 609 | struct nilfs_btree_path *path; |
519 | __u64 ptr; | ||
520 | int ret; | 610 | int ret; |
521 | 611 | ||
522 | btree = (struct nilfs_btree *)bmap; | ||
523 | path = nilfs_btree_alloc_path(); | 612 | path = nilfs_btree_alloc_path(); |
524 | if (path == NULL) | 613 | if (path == NULL) |
525 | return -ENOMEM; | 614 | return -ENOMEM; |
526 | 615 | ||
527 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); | 616 | ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); |
528 | |||
529 | if (ptrp != NULL) | ||
530 | *ptrp = ptr; | ||
531 | 617 | ||
532 | nilfs_btree_free_path(path); | 618 | nilfs_btree_free_path(path); |
533 | 619 | ||
534 | return ret; | 620 | return ret; |
535 | } | 621 | } |
536 | 622 | ||
537 | static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, | 623 | static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, |
538 | __u64 key, __u64 *ptrp, unsigned maxblocks) | 624 | __u64 key, __u64 *ptrp, unsigned maxblocks) |
539 | { | 625 | { |
540 | struct nilfs_btree *btree = (struct nilfs_btree *)bmap; | ||
541 | struct nilfs_btree_path *path; | 626 | struct nilfs_btree_path *path; |
542 | struct nilfs_btree_node *node; | 627 | struct nilfs_btree_node *node; |
543 | struct inode *dat = NULL; | 628 | struct inode *dat = NULL; |
544 | __u64 ptr, ptr2; | 629 | __u64 ptr, ptr2; |
545 | sector_t blocknr; | 630 | sector_t blocknr; |
546 | int level = NILFS_BTREE_LEVEL_NODE_MIN; | 631 | int level = NILFS_BTREE_LEVEL_NODE_MIN; |
547 | int ret, cnt, index, maxlevel; | 632 | int ret, cnt, index, maxlevel, ncmax; |
633 | struct nilfs_btree_readahead_info p; | ||
548 | 634 | ||
549 | path = nilfs_btree_alloc_path(); | 635 | path = nilfs_btree_alloc_path(); |
550 | if (path == NULL) | 636 | if (path == NULL) |
551 | return -ENOMEM; | 637 | return -ENOMEM; |
552 | 638 | ||
553 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); | 639 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); |
554 | if (ret < 0) | 640 | if (ret < 0) |
555 | goto out; | 641 | goto out; |
556 | 642 | ||
557 | if (NILFS_BMAP_USE_VBN(bmap)) { | 643 | if (NILFS_BMAP_USE_VBN(btree)) { |
558 | dat = nilfs_bmap_get_dat(bmap); | 644 | dat = nilfs_bmap_get_dat(btree); |
559 | ret = nilfs_dat_translate(dat, ptr, &blocknr); | 645 | ret = nilfs_dat_translate(dat, ptr, &blocknr); |
560 | if (ret < 0) | 646 | if (ret < 0) |
561 | goto out; | 647 | goto out; |
@@ -566,14 +652,14 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, | |||
566 | goto end; | 652 | goto end; |
567 | 653 | ||
568 | maxlevel = nilfs_btree_height(btree) - 1; | 654 | maxlevel = nilfs_btree_height(btree) - 1; |
569 | node = nilfs_btree_get_node(btree, path, level); | 655 | node = nilfs_btree_get_node(btree, path, level, &ncmax); |
570 | index = path[level].bp_index + 1; | 656 | index = path[level].bp_index + 1; |
571 | for (;;) { | 657 | for (;;) { |
572 | while (index < nilfs_btree_node_get_nchildren(node)) { | 658 | while (index < nilfs_btree_node_get_nchildren(node)) { |
573 | if (nilfs_btree_node_get_key(node, index) != | 659 | if (nilfs_btree_node_get_key(node, index) != |
574 | key + cnt) | 660 | key + cnt) |
575 | goto end; | 661 | goto end; |
576 | ptr2 = nilfs_btree_node_get_ptr(btree, node, index); | 662 | ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); |
577 | if (dat) { | 663 | if (dat) { |
578 | ret = nilfs_dat_translate(dat, ptr2, &blocknr); | 664 | ret = nilfs_dat_translate(dat, ptr2, &blocknr); |
579 | if (ret < 0) | 665 | if (ret < 0) |
@@ -589,20 +675,24 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, | |||
589 | break; | 675 | break; |
590 | 676 | ||
591 | /* look-up right sibling node */ | 677 | /* look-up right sibling node */ |
592 | node = nilfs_btree_get_node(btree, path, level + 1); | 678 | p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); |
593 | index = path[level + 1].bp_index + 1; | 679 | p.index = path[level + 1].bp_index + 1; |
594 | if (index >= nilfs_btree_node_get_nchildren(node) || | 680 | p.max_ra_blocks = 7; |
595 | nilfs_btree_node_get_key(node, index) != key + cnt) | 681 | if (p.index >= nilfs_btree_node_get_nchildren(p.node) || |
682 | nilfs_btree_node_get_key(p.node, p.index) != key + cnt) | ||
596 | break; | 683 | break; |
597 | ptr2 = nilfs_btree_node_get_ptr(btree, node, index); | 684 | ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); |
598 | path[level + 1].bp_index = index; | 685 | path[level + 1].bp_index = p.index; |
599 | 686 | ||
600 | brelse(path[level].bp_bh); | 687 | brelse(path[level].bp_bh); |
601 | path[level].bp_bh = NULL; | 688 | path[level].bp_bh = NULL; |
602 | ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh); | 689 | |
690 | ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, | ||
691 | &p); | ||
603 | if (ret < 0) | 692 | if (ret < 0) |
604 | goto out; | 693 | goto out; |
605 | node = nilfs_btree_get_nonroot_node(path, level); | 694 | node = nilfs_btree_get_nonroot_node(path, level); |
695 | ncmax = nilfs_btree_nchildren_per_block(btree); | ||
606 | index = 0; | 696 | index = 0; |
607 | path[level].bp_index = index; | 697 | path[level].bp_index = index; |
608 | } | 698 | } |
@@ -614,7 +704,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, | |||
614 | return ret; | 704 | return ret; |
615 | } | 705 | } |
616 | 706 | ||
617 | static void nilfs_btree_promote_key(struct nilfs_btree *btree, | 707 | static void nilfs_btree_promote_key(struct nilfs_bmap *btree, |
618 | struct nilfs_btree_path *path, | 708 | struct nilfs_btree_path *path, |
619 | int level, __u64 key) | 709 | int level, __u64 key) |
620 | { | 710 | { |
@@ -636,16 +726,18 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree, | |||
636 | } | 726 | } |
637 | } | 727 | } |
638 | 728 | ||
639 | static void nilfs_btree_do_insert(struct nilfs_btree *btree, | 729 | static void nilfs_btree_do_insert(struct nilfs_bmap *btree, |
640 | struct nilfs_btree_path *path, | 730 | struct nilfs_btree_path *path, |
641 | int level, __u64 *keyp, __u64 *ptrp) | 731 | int level, __u64 *keyp, __u64 *ptrp) |
642 | { | 732 | { |
643 | struct nilfs_btree_node *node; | 733 | struct nilfs_btree_node *node; |
734 | int ncblk; | ||
644 | 735 | ||
645 | if (level < nilfs_btree_height(btree) - 1) { | 736 | if (level < nilfs_btree_height(btree) - 1) { |
646 | node = nilfs_btree_get_nonroot_node(path, level); | 737 | node = nilfs_btree_get_nonroot_node(path, level); |
647 | nilfs_btree_node_insert(btree, node, *keyp, *ptrp, | 738 | ncblk = nilfs_btree_nchildren_per_block(btree); |
648 | path[level].bp_index); | 739 | nilfs_btree_node_insert(node, path[level].bp_index, |
740 | *keyp, *ptrp, ncblk); | ||
649 | if (!buffer_dirty(path[level].bp_bh)) | 741 | if (!buffer_dirty(path[level].bp_bh)) |
650 | nilfs_btnode_mark_dirty(path[level].bp_bh); | 742 | nilfs_btnode_mark_dirty(path[level].bp_bh); |
651 | 743 | ||
@@ -655,22 +747,24 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree, | |||
655 | 0)); | 747 | 0)); |
656 | } else { | 748 | } else { |
657 | node = nilfs_btree_get_root(btree); | 749 | node = nilfs_btree_get_root(btree); |
658 | nilfs_btree_node_insert(btree, node, *keyp, *ptrp, | 750 | nilfs_btree_node_insert(node, path[level].bp_index, |
659 | path[level].bp_index); | 751 | *keyp, *ptrp, |
752 | NILFS_BTREE_ROOT_NCHILDREN_MAX); | ||
660 | } | 753 | } |
661 | } | 754 | } |
662 | 755 | ||
663 | static void nilfs_btree_carry_left(struct nilfs_btree *btree, | 756 | static void nilfs_btree_carry_left(struct nilfs_bmap *btree, |
664 | struct nilfs_btree_path *path, | 757 | struct nilfs_btree_path *path, |
665 | int level, __u64 *keyp, __u64 *ptrp) | 758 | int level, __u64 *keyp, __u64 *ptrp) |
666 | { | 759 | { |
667 | struct nilfs_btree_node *node, *left; | 760 | struct nilfs_btree_node *node, *left; |
668 | int nchildren, lnchildren, n, move; | 761 | int nchildren, lnchildren, n, move, ncblk; |
669 | 762 | ||
670 | node = nilfs_btree_get_nonroot_node(path, level); | 763 | node = nilfs_btree_get_nonroot_node(path, level); |
671 | left = nilfs_btree_get_sib_node(path, level); | 764 | left = nilfs_btree_get_sib_node(path, level); |
672 | nchildren = nilfs_btree_node_get_nchildren(node); | 765 | nchildren = nilfs_btree_node_get_nchildren(node); |
673 | lnchildren = nilfs_btree_node_get_nchildren(left); | 766 | lnchildren = nilfs_btree_node_get_nchildren(left); |
767 | ncblk = nilfs_btree_nchildren_per_block(btree); | ||
674 | move = 0; | 768 | move = 0; |
675 | 769 | ||
676 | n = (nchildren + lnchildren + 1) / 2 - lnchildren; | 770 | n = (nchildren + lnchildren + 1) / 2 - lnchildren; |
@@ -680,7 +774,7 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, | |||
680 | move = 1; | 774 | move = 1; |
681 | } | 775 | } |
682 | 776 | ||
683 | nilfs_btree_node_move_left(btree, left, node, n); | 777 | nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); |
684 | 778 | ||
685 | if (!buffer_dirty(path[level].bp_bh)) | 779 | if (!buffer_dirty(path[level].bp_bh)) |
686 | nilfs_btnode_mark_dirty(path[level].bp_bh); | 780 | nilfs_btnode_mark_dirty(path[level].bp_bh); |
@@ -705,17 +799,18 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, | |||
705 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); | 799 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); |
706 | } | 800 | } |
707 | 801 | ||
708 | static void nilfs_btree_carry_right(struct nilfs_btree *btree, | 802 | static void nilfs_btree_carry_right(struct nilfs_bmap *btree, |
709 | struct nilfs_btree_path *path, | 803 | struct nilfs_btree_path *path, |
710 | int level, __u64 *keyp, __u64 *ptrp) | 804 | int level, __u64 *keyp, __u64 *ptrp) |
711 | { | 805 | { |
712 | struct nilfs_btree_node *node, *right; | 806 | struct nilfs_btree_node *node, *right; |
713 | int nchildren, rnchildren, n, move; | 807 | int nchildren, rnchildren, n, move, ncblk; |
714 | 808 | ||
715 | node = nilfs_btree_get_nonroot_node(path, level); | 809 | node = nilfs_btree_get_nonroot_node(path, level); |
716 | right = nilfs_btree_get_sib_node(path, level); | 810 | right = nilfs_btree_get_sib_node(path, level); |
717 | nchildren = nilfs_btree_node_get_nchildren(node); | 811 | nchildren = nilfs_btree_node_get_nchildren(node); |
718 | rnchildren = nilfs_btree_node_get_nchildren(right); | 812 | rnchildren = nilfs_btree_node_get_nchildren(right); |
813 | ncblk = nilfs_btree_nchildren_per_block(btree); | ||
719 | move = 0; | 814 | move = 0; |
720 | 815 | ||
721 | n = (nchildren + rnchildren + 1) / 2 - rnchildren; | 816 | n = (nchildren + rnchildren + 1) / 2 - rnchildren; |
@@ -725,7 +820,7 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, | |||
725 | move = 1; | 820 | move = 1; |
726 | } | 821 | } |
727 | 822 | ||
728 | nilfs_btree_node_move_right(btree, node, right, n); | 823 | nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); |
729 | 824 | ||
730 | if (!buffer_dirty(path[level].bp_bh)) | 825 | if (!buffer_dirty(path[level].bp_bh)) |
731 | nilfs_btnode_mark_dirty(path[level].bp_bh); | 826 | nilfs_btnode_mark_dirty(path[level].bp_bh); |
@@ -751,18 +846,19 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, | |||
751 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); | 846 | nilfs_btree_do_insert(btree, path, level, keyp, ptrp); |
752 | } | 847 | } |
753 | 848 | ||
754 | static void nilfs_btree_split(struct nilfs_btree *btree, | 849 | static void nilfs_btree_split(struct nilfs_bmap *btree, |
755 | struct nilfs_btree_path *path, | 850 | struct nilfs_btree_path *path, |
756 | int level, __u64 *keyp, __u64 *ptrp) | 851 | int level, __u64 *keyp, __u64 *ptrp) |
757 | { | 852 | { |
758 | struct nilfs_btree_node *node, *right; | 853 | struct nilfs_btree_node *node, *right; |
759 | __u64 newkey; | 854 | __u64 newkey; |
760 | __u64 newptr; | 855 | __u64 newptr; |
761 | int nchildren, n, move; | 856 | int nchildren, n, move, ncblk; |
762 | 857 | ||
763 | node = nilfs_btree_get_nonroot_node(path, level); | 858 | node = nilfs_btree_get_nonroot_node(path, level); |
764 | right = nilfs_btree_get_sib_node(path, level); | 859 | right = nilfs_btree_get_sib_node(path, level); |
765 | nchildren = nilfs_btree_node_get_nchildren(node); | 860 | nchildren = nilfs_btree_node_get_nchildren(node); |
861 | ncblk = nilfs_btree_nchildren_per_block(btree); | ||
766 | move = 0; | 862 | move = 0; |
767 | 863 | ||
768 | n = (nchildren + 1) / 2; | 864 | n = (nchildren + 1) / 2; |
@@ -771,7 +867,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree, | |||
771 | move = 1; | 867 | move = 1; |
772 | } | 868 | } |
773 | 869 | ||
774 | nilfs_btree_node_move_right(btree, node, right, n); | 870 | nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); |
775 | 871 | ||
776 | if (!buffer_dirty(path[level].bp_bh)) | 872 | if (!buffer_dirty(path[level].bp_bh)) |
777 | nilfs_btnode_mark_dirty(path[level].bp_bh); | 873 | nilfs_btnode_mark_dirty(path[level].bp_bh); |
@@ -783,8 +879,8 @@ static void nilfs_btree_split(struct nilfs_btree *btree, | |||
783 | 879 | ||
784 | if (move) { | 880 | if (move) { |
785 | path[level].bp_index -= nilfs_btree_node_get_nchildren(node); | 881 | path[level].bp_index -= nilfs_btree_node_get_nchildren(node); |
786 | nilfs_btree_node_insert(btree, right, *keyp, *ptrp, | 882 | nilfs_btree_node_insert(right, path[level].bp_index, |
787 | path[level].bp_index); | 883 | *keyp, *ptrp, ncblk); |
788 | 884 | ||
789 | *keyp = nilfs_btree_node_get_key(right, 0); | 885 | *keyp = nilfs_btree_node_get_key(right, 0); |
790 | *ptrp = path[level].bp_newreq.bpr_ptr; | 886 | *ptrp = path[level].bp_newreq.bpr_ptr; |
@@ -805,19 +901,21 @@ static void nilfs_btree_split(struct nilfs_btree *btree, | |||
805 | path[level + 1].bp_index++; | 901 | path[level + 1].bp_index++; |
806 | } | 902 | } |
807 | 903 | ||
808 | static void nilfs_btree_grow(struct nilfs_btree *btree, | 904 | static void nilfs_btree_grow(struct nilfs_bmap *btree, |
809 | struct nilfs_btree_path *path, | 905 | struct nilfs_btree_path *path, |
810 | int level, __u64 *keyp, __u64 *ptrp) | 906 | int level, __u64 *keyp, __u64 *ptrp) |
811 | { | 907 | { |
812 | struct nilfs_btree_node *root, *child; | 908 | struct nilfs_btree_node *root, *child; |
813 | int n; | 909 | int n, ncblk; |
814 | 910 | ||
815 | root = nilfs_btree_get_root(btree); | 911 | root = nilfs_btree_get_root(btree); |
816 | child = nilfs_btree_get_sib_node(path, level); | 912 | child = nilfs_btree_get_sib_node(path, level); |
913 | ncblk = nilfs_btree_nchildren_per_block(btree); | ||
817 | 914 | ||
818 | n = nilfs_btree_node_get_nchildren(root); | 915 | n = nilfs_btree_node_get_nchildren(root); |
819 | 916 | ||
820 | nilfs_btree_node_move_right(btree, root, child, n); | 917 | nilfs_btree_node_move_right(root, child, n, |
918 | NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); | ||
821 | nilfs_btree_node_set_level(root, level + 1); | 919 | nilfs_btree_node_set_level(root, level + 1); |
822 | 920 | ||
823 | if (!buffer_dirty(path[level].bp_sib_bh)) | 921 | if (!buffer_dirty(path[level].bp_sib_bh)) |
@@ -832,11 +930,11 @@ static void nilfs_btree_grow(struct nilfs_btree *btree, | |||
832 | *ptrp = path[level].bp_newreq.bpr_ptr; | 930 | *ptrp = path[level].bp_newreq.bpr_ptr; |
833 | } | 931 | } |
834 | 932 | ||
835 | static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree, | 933 | static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, |
836 | const struct nilfs_btree_path *path) | 934 | const struct nilfs_btree_path *path) |
837 | { | 935 | { |
838 | struct nilfs_btree_node *node; | 936 | struct nilfs_btree_node *node; |
839 | int level; | 937 | int level, ncmax; |
840 | 938 | ||
841 | if (path == NULL) | 939 | if (path == NULL) |
842 | return NILFS_BMAP_INVALID_PTR; | 940 | return NILFS_BMAP_INVALID_PTR; |
@@ -844,29 +942,30 @@ static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree, | |||
844 | /* left sibling */ | 942 | /* left sibling */ |
845 | level = NILFS_BTREE_LEVEL_NODE_MIN; | 943 | level = NILFS_BTREE_LEVEL_NODE_MIN; |
846 | if (path[level].bp_index > 0) { | 944 | if (path[level].bp_index > 0) { |
847 | node = nilfs_btree_get_node(btree, path, level); | 945 | node = nilfs_btree_get_node(btree, path, level, &ncmax); |
848 | return nilfs_btree_node_get_ptr(btree, node, | 946 | return nilfs_btree_node_get_ptr(node, |
849 | path[level].bp_index - 1); | 947 | path[level].bp_index - 1, |
948 | ncmax); | ||
850 | } | 949 | } |
851 | 950 | ||
852 | /* parent */ | 951 | /* parent */ |
853 | level = NILFS_BTREE_LEVEL_NODE_MIN + 1; | 952 | level = NILFS_BTREE_LEVEL_NODE_MIN + 1; |
854 | if (level <= nilfs_btree_height(btree) - 1) { | 953 | if (level <= nilfs_btree_height(btree) - 1) { |
855 | node = nilfs_btree_get_node(btree, path, level); | 954 | node = nilfs_btree_get_node(btree, path, level, &ncmax); |
856 | return nilfs_btree_node_get_ptr(btree, node, | 955 | return nilfs_btree_node_get_ptr(node, path[level].bp_index, |
857 | path[level].bp_index); | 956 | ncmax); |
858 | } | 957 | } |
859 | 958 | ||
860 | return NILFS_BMAP_INVALID_PTR; | 959 | return NILFS_BMAP_INVALID_PTR; |
861 | } | 960 | } |
862 | 961 | ||
863 | static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree, | 962 | static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, |
864 | const struct nilfs_btree_path *path, | 963 | const struct nilfs_btree_path *path, |
865 | __u64 key) | 964 | __u64 key) |
866 | { | 965 | { |
867 | __u64 ptr; | 966 | __u64 ptr; |
868 | 967 | ||
869 | ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key); | 968 | ptr = nilfs_bmap_find_target_seq(btree, key); |
870 | if (ptr != NILFS_BMAP_INVALID_PTR) | 969 | if (ptr != NILFS_BMAP_INVALID_PTR) |
871 | /* sequential access */ | 970 | /* sequential access */ |
872 | return ptr; | 971 | return ptr; |
@@ -877,17 +976,10 @@ static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree, | |||
877 | return ptr; | 976 | return ptr; |
878 | } | 977 | } |
879 | /* block group */ | 978 | /* block group */ |
880 | return nilfs_bmap_find_target_in_group(&btree->bt_bmap); | 979 | return nilfs_bmap_find_target_in_group(btree); |
881 | } | ||
882 | |||
883 | static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key, | ||
884 | __u64 ptr) | ||
885 | { | ||
886 | btree->bt_bmap.b_last_allocated_key = key; | ||
887 | btree->bt_bmap.b_last_allocated_ptr = ptr; | ||
888 | } | 980 | } |
889 | 981 | ||
890 | static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | 982 | static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, |
891 | struct nilfs_btree_path *path, | 983 | struct nilfs_btree_path *path, |
892 | int *levelp, __u64 key, __u64 ptr, | 984 | int *levelp, __u64 key, __u64 ptr, |
893 | struct nilfs_bmap_stats *stats) | 985 | struct nilfs_bmap_stats *stats) |
@@ -895,79 +987,78 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
895 | struct buffer_head *bh; | 987 | struct buffer_head *bh; |
896 | struct nilfs_btree_node *node, *parent, *sib; | 988 | struct nilfs_btree_node *node, *parent, *sib; |
897 | __u64 sibptr; | 989 | __u64 sibptr; |
898 | int pindex, level, ret; | 990 | int pindex, level, ncmax, ncblk, ret; |
899 | struct inode *dat = NULL; | 991 | struct inode *dat = NULL; |
900 | 992 | ||
901 | stats->bs_nblocks = 0; | 993 | stats->bs_nblocks = 0; |
902 | level = NILFS_BTREE_LEVEL_DATA; | 994 | level = NILFS_BTREE_LEVEL_DATA; |
903 | 995 | ||
904 | /* allocate a new ptr for data block */ | 996 | /* allocate a new ptr for data block */ |
905 | if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) { | 997 | if (NILFS_BMAP_USE_VBN(btree)) { |
906 | path[level].bp_newreq.bpr_ptr = | 998 | path[level].bp_newreq.bpr_ptr = |
907 | nilfs_btree_find_target_v(btree, path, key); | 999 | nilfs_btree_find_target_v(btree, path, key); |
908 | dat = nilfs_bmap_get_dat(&btree->bt_bmap); | 1000 | dat = nilfs_bmap_get_dat(btree); |
909 | } | 1001 | } |
910 | 1002 | ||
911 | ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, | 1003 | ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); |
912 | &path[level].bp_newreq, dat); | ||
913 | if (ret < 0) | 1004 | if (ret < 0) |
914 | goto err_out_data; | 1005 | goto err_out_data; |
915 | 1006 | ||
1007 | ncblk = nilfs_btree_nchildren_per_block(btree); | ||
1008 | |||
916 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | 1009 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; |
917 | level < nilfs_btree_height(btree) - 1; | 1010 | level < nilfs_btree_height(btree) - 1; |
918 | level++) { | 1011 | level++) { |
919 | node = nilfs_btree_get_nonroot_node(path, level); | 1012 | node = nilfs_btree_get_nonroot_node(path, level); |
920 | if (nilfs_btree_node_get_nchildren(node) < | 1013 | if (nilfs_btree_node_get_nchildren(node) < ncblk) { |
921 | nilfs_btree_node_nchildren_max(node, btree)) { | ||
922 | path[level].bp_op = nilfs_btree_do_insert; | 1014 | path[level].bp_op = nilfs_btree_do_insert; |
923 | stats->bs_nblocks++; | 1015 | stats->bs_nblocks++; |
924 | goto out; | 1016 | goto out; |
925 | } | 1017 | } |
926 | 1018 | ||
927 | parent = nilfs_btree_get_node(btree, path, level + 1); | 1019 | parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); |
928 | pindex = path[level + 1].bp_index; | 1020 | pindex = path[level + 1].bp_index; |
929 | 1021 | ||
930 | /* left sibling */ | 1022 | /* left sibling */ |
931 | if (pindex > 0) { | 1023 | if (pindex > 0) { |
932 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | 1024 | sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, |
933 | pindex - 1); | 1025 | ncmax); |
934 | ret = nilfs_btree_get_block(btree, sibptr, &bh); | 1026 | ret = nilfs_btree_get_block(btree, sibptr, &bh); |
935 | if (ret < 0) | 1027 | if (ret < 0) |
936 | goto err_out_child_node; | 1028 | goto err_out_child_node; |
937 | sib = (struct nilfs_btree_node *)bh->b_data; | 1029 | sib = (struct nilfs_btree_node *)bh->b_data; |
938 | if (nilfs_btree_node_get_nchildren(sib) < | 1030 | if (nilfs_btree_node_get_nchildren(sib) < ncblk) { |
939 | nilfs_btree_node_nchildren_max(sib, btree)) { | ||
940 | path[level].bp_sib_bh = bh; | 1031 | path[level].bp_sib_bh = bh; |
941 | path[level].bp_op = nilfs_btree_carry_left; | 1032 | path[level].bp_op = nilfs_btree_carry_left; |
942 | stats->bs_nblocks++; | 1033 | stats->bs_nblocks++; |
943 | goto out; | 1034 | goto out; |
944 | } else | 1035 | } else { |
945 | brelse(bh); | 1036 | brelse(bh); |
1037 | } | ||
946 | } | 1038 | } |
947 | 1039 | ||
948 | /* right sibling */ | 1040 | /* right sibling */ |
949 | if (pindex < | 1041 | if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { |
950 | nilfs_btree_node_get_nchildren(parent) - 1) { | 1042 | sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, |
951 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | 1043 | ncmax); |
952 | pindex + 1); | ||
953 | ret = nilfs_btree_get_block(btree, sibptr, &bh); | 1044 | ret = nilfs_btree_get_block(btree, sibptr, &bh); |
954 | if (ret < 0) | 1045 | if (ret < 0) |
955 | goto err_out_child_node; | 1046 | goto err_out_child_node; |
956 | sib = (struct nilfs_btree_node *)bh->b_data; | 1047 | sib = (struct nilfs_btree_node *)bh->b_data; |
957 | if (nilfs_btree_node_get_nchildren(sib) < | 1048 | if (nilfs_btree_node_get_nchildren(sib) < ncblk) { |
958 | nilfs_btree_node_nchildren_max(sib, btree)) { | ||
959 | path[level].bp_sib_bh = bh; | 1049 | path[level].bp_sib_bh = bh; |
960 | path[level].bp_op = nilfs_btree_carry_right; | 1050 | path[level].bp_op = nilfs_btree_carry_right; |
961 | stats->bs_nblocks++; | 1051 | stats->bs_nblocks++; |
962 | goto out; | 1052 | goto out; |
963 | } else | 1053 | } else { |
964 | brelse(bh); | 1054 | brelse(bh); |
1055 | } | ||
965 | } | 1056 | } |
966 | 1057 | ||
967 | /* split */ | 1058 | /* split */ |
968 | path[level].bp_newreq.bpr_ptr = | 1059 | path[level].bp_newreq.bpr_ptr = |
969 | path[level - 1].bp_newreq.bpr_ptr + 1; | 1060 | path[level - 1].bp_newreq.bpr_ptr + 1; |
970 | ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, | 1061 | ret = nilfs_bmap_prepare_alloc_ptr(btree, |
971 | &path[level].bp_newreq, dat); | 1062 | &path[level].bp_newreq, dat); |
972 | if (ret < 0) | 1063 | if (ret < 0) |
973 | goto err_out_child_node; | 1064 | goto err_out_child_node; |
@@ -979,9 +1070,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
979 | 1070 | ||
980 | stats->bs_nblocks++; | 1071 | stats->bs_nblocks++; |
981 | 1072 | ||
982 | nilfs_btree_node_init(btree, | 1073 | sib = (struct nilfs_btree_node *)bh->b_data; |
983 | (struct nilfs_btree_node *)bh->b_data, | 1074 | nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); |
984 | 0, level, 0, NULL, NULL); | ||
985 | path[level].bp_sib_bh = bh; | 1075 | path[level].bp_sib_bh = bh; |
986 | path[level].bp_op = nilfs_btree_split; | 1076 | path[level].bp_op = nilfs_btree_split; |
987 | } | 1077 | } |
@@ -989,7 +1079,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
989 | /* root */ | 1079 | /* root */ |
990 | node = nilfs_btree_get_root(btree); | 1080 | node = nilfs_btree_get_root(btree); |
991 | if (nilfs_btree_node_get_nchildren(node) < | 1081 | if (nilfs_btree_node_get_nchildren(node) < |
992 | nilfs_btree_node_nchildren_max(node, btree)) { | 1082 | NILFS_BTREE_ROOT_NCHILDREN_MAX) { |
993 | path[level].bp_op = nilfs_btree_do_insert; | 1083 | path[level].bp_op = nilfs_btree_do_insert; |
994 | stats->bs_nblocks++; | 1084 | stats->bs_nblocks++; |
995 | goto out; | 1085 | goto out; |
@@ -997,8 +1087,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
997 | 1087 | ||
998 | /* grow */ | 1088 | /* grow */ |
999 | path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; | 1089 | path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; |
1000 | ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, | 1090 | ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); |
1001 | &path[level].bp_newreq, dat); | ||
1002 | if (ret < 0) | 1091 | if (ret < 0) |
1003 | goto err_out_child_node; | 1092 | goto err_out_child_node; |
1004 | ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, | 1093 | ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, |
@@ -1006,8 +1095,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
1006 | if (ret < 0) | 1095 | if (ret < 0) |
1007 | goto err_out_curr_node; | 1096 | goto err_out_curr_node; |
1008 | 1097 | ||
1009 | nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, | 1098 | nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, |
1010 | 0, level, 0, NULL, NULL); | 1099 | 0, level, 0, ncblk, NULL, NULL); |
1011 | path[level].bp_sib_bh = bh; | 1100 | path[level].bp_sib_bh = bh; |
1012 | path[level].bp_op = nilfs_btree_grow; | 1101 | path[level].bp_op = nilfs_btree_grow; |
1013 | 1102 | ||
@@ -1024,25 +1113,22 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, | |||
1024 | 1113 | ||
1025 | /* error */ | 1114 | /* error */ |
1026 | err_out_curr_node: | 1115 | err_out_curr_node: |
1027 | nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq, | 1116 | nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); |
1028 | dat); | ||
1029 | err_out_child_node: | 1117 | err_out_child_node: |
1030 | for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { | 1118 | for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { |
1031 | nilfs_btnode_delete(path[level].bp_sib_bh); | 1119 | nilfs_btnode_delete(path[level].bp_sib_bh); |
1032 | nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, | 1120 | nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); |
1033 | &path[level].bp_newreq, dat); | ||
1034 | 1121 | ||
1035 | } | 1122 | } |
1036 | 1123 | ||
1037 | nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq, | 1124 | nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); |
1038 | dat); | ||
1039 | err_out_data: | 1125 | err_out_data: |
1040 | *levelp = level; | 1126 | *levelp = level; |
1041 | stats->bs_nblocks = 0; | 1127 | stats->bs_nblocks = 0; |
1042 | return ret; | 1128 | return ret; |
1043 | } | 1129 | } |
1044 | 1130 | ||
1045 | static void nilfs_btree_commit_insert(struct nilfs_btree *btree, | 1131 | static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, |
1046 | struct nilfs_btree_path *path, | 1132 | struct nilfs_btree_path *path, |
1047 | int maxlevel, __u64 key, __u64 ptr) | 1133 | int maxlevel, __u64 key, __u64 ptr) |
1048 | { | 1134 | { |
@@ -1051,35 +1137,33 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree, | |||
1051 | 1137 | ||
1052 | set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); | 1138 | set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); |
1053 | ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; | 1139 | ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; |
1054 | if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) { | 1140 | if (NILFS_BMAP_USE_VBN(btree)) { |
1055 | nilfs_btree_set_target_v(btree, key, ptr); | 1141 | nilfs_bmap_set_target_v(btree, key, ptr); |
1056 | dat = nilfs_bmap_get_dat(&btree->bt_bmap); | 1142 | dat = nilfs_bmap_get_dat(btree); |
1057 | } | 1143 | } |
1058 | 1144 | ||
1059 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { | 1145 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { |
1060 | nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap, | 1146 | nilfs_bmap_commit_alloc_ptr(btree, |
1061 | &path[level - 1].bp_newreq, dat); | 1147 | &path[level - 1].bp_newreq, dat); |
1062 | path[level].bp_op(btree, path, level, &key, &ptr); | 1148 | path[level].bp_op(btree, path, level, &key, &ptr); |
1063 | } | 1149 | } |
1064 | 1150 | ||
1065 | if (!nilfs_bmap_dirty(&btree->bt_bmap)) | 1151 | if (!nilfs_bmap_dirty(btree)) |
1066 | nilfs_bmap_set_dirty(&btree->bt_bmap); | 1152 | nilfs_bmap_set_dirty(btree); |
1067 | } | 1153 | } |
1068 | 1154 | ||
1069 | static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | 1155 | static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) |
1070 | { | 1156 | { |
1071 | struct nilfs_btree *btree; | ||
1072 | struct nilfs_btree_path *path; | 1157 | struct nilfs_btree_path *path; |
1073 | struct nilfs_bmap_stats stats; | 1158 | struct nilfs_bmap_stats stats; |
1074 | int level, ret; | 1159 | int level, ret; |
1075 | 1160 | ||
1076 | btree = (struct nilfs_btree *)bmap; | ||
1077 | path = nilfs_btree_alloc_path(); | 1161 | path = nilfs_btree_alloc_path(); |
1078 | if (path == NULL) | 1162 | if (path == NULL) |
1079 | return -ENOMEM; | 1163 | return -ENOMEM; |
1080 | 1164 | ||
1081 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, | 1165 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, |
1082 | NILFS_BTREE_LEVEL_NODE_MIN); | 1166 | NILFS_BTREE_LEVEL_NODE_MIN, 0); |
1083 | if (ret != -ENOENT) { | 1167 | if (ret != -ENOENT) { |
1084 | if (ret == 0) | 1168 | if (ret == 0) |
1085 | ret = -EEXIST; | 1169 | ret = -EEXIST; |
@@ -1090,23 +1174,25 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | |||
1090 | if (ret < 0) | 1174 | if (ret < 0) |
1091 | goto out; | 1175 | goto out; |
1092 | nilfs_btree_commit_insert(btree, path, level, key, ptr); | 1176 | nilfs_btree_commit_insert(btree, path, level, key, ptr); |
1093 | nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); | 1177 | nilfs_bmap_add_blocks(btree, stats.bs_nblocks); |
1094 | 1178 | ||
1095 | out: | 1179 | out: |
1096 | nilfs_btree_free_path(path); | 1180 | nilfs_btree_free_path(path); |
1097 | return ret; | 1181 | return ret; |
1098 | } | 1182 | } |
1099 | 1183 | ||
1100 | static void nilfs_btree_do_delete(struct nilfs_btree *btree, | 1184 | static void nilfs_btree_do_delete(struct nilfs_bmap *btree, |
1101 | struct nilfs_btree_path *path, | 1185 | struct nilfs_btree_path *path, |
1102 | int level, __u64 *keyp, __u64 *ptrp) | 1186 | int level, __u64 *keyp, __u64 *ptrp) |
1103 | { | 1187 | { |
1104 | struct nilfs_btree_node *node; | 1188 | struct nilfs_btree_node *node; |
1189 | int ncblk; | ||
1105 | 1190 | ||
1106 | if (level < nilfs_btree_height(btree) - 1) { | 1191 | if (level < nilfs_btree_height(btree) - 1) { |
1107 | node = nilfs_btree_get_nonroot_node(path, level); | 1192 | node = nilfs_btree_get_nonroot_node(path, level); |
1108 | nilfs_btree_node_delete(btree, node, keyp, ptrp, | 1193 | ncblk = nilfs_btree_nchildren_per_block(btree); |
1109 | path[level].bp_index); | 1194 | nilfs_btree_node_delete(node, path[level].bp_index, |
1195 | keyp, ptrp, ncblk); | ||
1110 | if (!buffer_dirty(path[level].bp_bh)) | 1196 | if (!buffer_dirty(path[level].bp_bh)) |
1111 | nilfs_btnode_mark_dirty(path[level].bp_bh); | 1197 | nilfs_btnode_mark_dirty(path[level].bp_bh); |
1112 | if (path[level].bp_index == 0) | 1198 | if (path[level].bp_index == 0) |
@@ -1114,17 +1200,18 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree, | |||
1114 | nilfs_btree_node_get_key(node, 0)); | 1200 | nilfs_btree_node_get_key(node, 0)); |
1115 | } else { | 1201 | } else { |
1116 | node = nilfs_btree_get_root(btree); | 1202 | node = nilfs_btree_get_root(btree); |
1117 | nilfs_btree_node_delete(btree, node, keyp, ptrp, | 1203 | nilfs_btree_node_delete(node, path[level].bp_index, |
1118 | path[level].bp_index); | 1204 | keyp, ptrp, |
1205 | NILFS_BTREE_ROOT_NCHILDREN_MAX); | ||
1119 | } | 1206 | } |
1120 | } | 1207 | } |
1121 | 1208 | ||
1122 | static void nilfs_btree_borrow_left(struct nilfs_btree *btree, | 1209 | static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, |
1123 | struct nilfs_btree_path *path, | 1210 | struct nilfs_btree_path *path, |
1124 | int level, __u64 *keyp, __u64 *ptrp) | 1211 | int level, __u64 *keyp, __u64 *ptrp) |
1125 | { | 1212 | { |
1126 | struct nilfs_btree_node *node, *left; | 1213 | struct nilfs_btree_node *node, *left; |
1127 | int nchildren, lnchildren, n; | 1214 | int nchildren, lnchildren, n, ncblk; |
1128 | 1215 | ||
1129 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | 1216 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); |
1130 | 1217 | ||
@@ -1132,10 +1219,11 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree, | |||
1132 | left = nilfs_btree_get_sib_node(path, level); | 1219 | left = nilfs_btree_get_sib_node(path, level); |
1133 | nchildren = nilfs_btree_node_get_nchildren(node); | 1220 | nchildren = nilfs_btree_node_get_nchildren(node); |
1134 | lnchildren = nilfs_btree_node_get_nchildren(left); | 1221 | lnchildren = nilfs_btree_node_get_nchildren(left); |
1222 | ncblk = nilfs_btree_nchildren_per_block(btree); | ||
1135 | 1223 | ||
1136 | n = (nchildren + lnchildren) / 2 - nchildren; | 1224 | n = (nchildren + lnchildren) / 2 - nchildren; |
1137 | 1225 | ||
1138 | nilfs_btree_node_move_right(btree, left, node, n); | 1226 | nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); |
1139 | 1227 | ||
1140 | if (!buffer_dirty(path[level].bp_bh)) | 1228 | if (!buffer_dirty(path[level].bp_bh)) |
1141 | nilfs_btnode_mark_dirty(path[level].bp_bh); | 1229 | nilfs_btnode_mark_dirty(path[level].bp_bh); |
@@ -1150,12 +1238,12 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree, | |||
1150 | path[level].bp_index += n; | 1238 | path[level].bp_index += n; |
1151 | } | 1239 | } |
1152 | 1240 | ||
1153 | static void nilfs_btree_borrow_right(struct nilfs_btree *btree, | 1241 | static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, |
1154 | struct nilfs_btree_path *path, | 1242 | struct nilfs_btree_path *path, |
1155 | int level, __u64 *keyp, __u64 *ptrp) | 1243 | int level, __u64 *keyp, __u64 *ptrp) |
1156 | { | 1244 | { |
1157 | struct nilfs_btree_node *node, *right; | 1245 | struct nilfs_btree_node *node, *right; |
1158 | int nchildren, rnchildren, n; | 1246 | int nchildren, rnchildren, n, ncblk; |
1159 | 1247 | ||
1160 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | 1248 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); |
1161 | 1249 | ||
@@ -1163,10 +1251,11 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree, | |||
1163 | right = nilfs_btree_get_sib_node(path, level); | 1251 | right = nilfs_btree_get_sib_node(path, level); |
1164 | nchildren = nilfs_btree_node_get_nchildren(node); | 1252 | nchildren = nilfs_btree_node_get_nchildren(node); |
1165 | rnchildren = nilfs_btree_node_get_nchildren(right); | 1253 | rnchildren = nilfs_btree_node_get_nchildren(right); |
1254 | ncblk = nilfs_btree_nchildren_per_block(btree); | ||
1166 | 1255 | ||
1167 | n = (nchildren + rnchildren) / 2 - nchildren; | 1256 | n = (nchildren + rnchildren) / 2 - nchildren; |
1168 | 1257 | ||
1169 | nilfs_btree_node_move_left(btree, node, right, n); | 1258 | nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); |
1170 | 1259 | ||
1171 | if (!buffer_dirty(path[level].bp_bh)) | 1260 | if (!buffer_dirty(path[level].bp_bh)) |
1172 | nilfs_btnode_mark_dirty(path[level].bp_bh); | 1261 | nilfs_btnode_mark_dirty(path[level].bp_bh); |
@@ -1182,21 +1271,22 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree, | |||
1182 | path[level].bp_sib_bh = NULL; | 1271 | path[level].bp_sib_bh = NULL; |
1183 | } | 1272 | } |
1184 | 1273 | ||
1185 | static void nilfs_btree_concat_left(struct nilfs_btree *btree, | 1274 | static void nilfs_btree_concat_left(struct nilfs_bmap *btree, |
1186 | struct nilfs_btree_path *path, | 1275 | struct nilfs_btree_path *path, |
1187 | int level, __u64 *keyp, __u64 *ptrp) | 1276 | int level, __u64 *keyp, __u64 *ptrp) |
1188 | { | 1277 | { |
1189 | struct nilfs_btree_node *node, *left; | 1278 | struct nilfs_btree_node *node, *left; |
1190 | int n; | 1279 | int n, ncblk; |
1191 | 1280 | ||
1192 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | 1281 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); |
1193 | 1282 | ||
1194 | node = nilfs_btree_get_nonroot_node(path, level); | 1283 | node = nilfs_btree_get_nonroot_node(path, level); |
1195 | left = nilfs_btree_get_sib_node(path, level); | 1284 | left = nilfs_btree_get_sib_node(path, level); |
1285 | ncblk = nilfs_btree_nchildren_per_block(btree); | ||
1196 | 1286 | ||
1197 | n = nilfs_btree_node_get_nchildren(node); | 1287 | n = nilfs_btree_node_get_nchildren(node); |
1198 | 1288 | ||
1199 | nilfs_btree_node_move_left(btree, left, node, n); | 1289 | nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); |
1200 | 1290 | ||
1201 | if (!buffer_dirty(path[level].bp_sib_bh)) | 1291 | if (!buffer_dirty(path[level].bp_sib_bh)) |
1202 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); | 1292 | nilfs_btnode_mark_dirty(path[level].bp_sib_bh); |
@@ -1207,21 +1297,22 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree, | |||
1207 | path[level].bp_index += nilfs_btree_node_get_nchildren(left); | 1297 | path[level].bp_index += nilfs_btree_node_get_nchildren(left); |
1208 | } | 1298 | } |
1209 | 1299 | ||
1210 | static void nilfs_btree_concat_right(struct nilfs_btree *btree, | 1300 | static void nilfs_btree_concat_right(struct nilfs_bmap *btree, |
1211 | struct nilfs_btree_path *path, | 1301 | struct nilfs_btree_path *path, |
1212 | int level, __u64 *keyp, __u64 *ptrp) | 1302 | int level, __u64 *keyp, __u64 *ptrp) |
1213 | { | 1303 | { |
1214 | struct nilfs_btree_node *node, *right; | 1304 | struct nilfs_btree_node *node, *right; |
1215 | int n; | 1305 | int n, ncblk; |
1216 | 1306 | ||
1217 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | 1307 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); |
1218 | 1308 | ||
1219 | node = nilfs_btree_get_nonroot_node(path, level); | 1309 | node = nilfs_btree_get_nonroot_node(path, level); |
1220 | right = nilfs_btree_get_sib_node(path, level); | 1310 | right = nilfs_btree_get_sib_node(path, level); |
1311 | ncblk = nilfs_btree_nchildren_per_block(btree); | ||
1221 | 1312 | ||
1222 | n = nilfs_btree_node_get_nchildren(right); | 1313 | n = nilfs_btree_node_get_nchildren(right); |
1223 | 1314 | ||
1224 | nilfs_btree_node_move_left(btree, node, right, n); | 1315 | nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); |
1225 | 1316 | ||
1226 | if (!buffer_dirty(path[level].bp_bh)) | 1317 | if (!buffer_dirty(path[level].bp_bh)) |
1227 | nilfs_btnode_mark_dirty(path[level].bp_bh); | 1318 | nilfs_btnode_mark_dirty(path[level].bp_bh); |
@@ -1231,29 +1322,32 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree, | |||
1231 | path[level + 1].bp_index++; | 1322 | path[level + 1].bp_index++; |
1232 | } | 1323 | } |
1233 | 1324 | ||
1234 | static void nilfs_btree_shrink(struct nilfs_btree *btree, | 1325 | static void nilfs_btree_shrink(struct nilfs_bmap *btree, |
1235 | struct nilfs_btree_path *path, | 1326 | struct nilfs_btree_path *path, |
1236 | int level, __u64 *keyp, __u64 *ptrp) | 1327 | int level, __u64 *keyp, __u64 *ptrp) |
1237 | { | 1328 | { |
1238 | struct nilfs_btree_node *root, *child; | 1329 | struct nilfs_btree_node *root, *child; |
1239 | int n; | 1330 | int n, ncblk; |
1240 | 1331 | ||
1241 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); | 1332 | nilfs_btree_do_delete(btree, path, level, keyp, ptrp); |
1242 | 1333 | ||
1243 | root = nilfs_btree_get_root(btree); | 1334 | root = nilfs_btree_get_root(btree); |
1244 | child = nilfs_btree_get_nonroot_node(path, level); | 1335 | child = nilfs_btree_get_nonroot_node(path, level); |
1336 | ncblk = nilfs_btree_nchildren_per_block(btree); | ||
1245 | 1337 | ||
1246 | nilfs_btree_node_delete(btree, root, NULL, NULL, 0); | 1338 | nilfs_btree_node_delete(root, 0, NULL, NULL, |
1339 | NILFS_BTREE_ROOT_NCHILDREN_MAX); | ||
1247 | nilfs_btree_node_set_level(root, level); | 1340 | nilfs_btree_node_set_level(root, level); |
1248 | n = nilfs_btree_node_get_nchildren(child); | 1341 | n = nilfs_btree_node_get_nchildren(child); |
1249 | nilfs_btree_node_move_left(btree, root, child, n); | 1342 | nilfs_btree_node_move_left(root, child, n, |
1343 | NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); | ||
1250 | 1344 | ||
1251 | nilfs_btnode_delete(path[level].bp_bh); | 1345 | nilfs_btnode_delete(path[level].bp_bh); |
1252 | path[level].bp_bh = NULL; | 1346 | path[level].bp_bh = NULL; |
1253 | } | 1347 | } |
1254 | 1348 | ||
1255 | 1349 | ||
1256 | static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | 1350 | static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, |
1257 | struct nilfs_btree_path *path, | 1351 | struct nilfs_btree_path *path, |
1258 | int *levelp, | 1352 | int *levelp, |
1259 | struct nilfs_bmap_stats *stats, | 1353 | struct nilfs_bmap_stats *stats, |
@@ -1262,42 +1356,43 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | |||
1262 | struct buffer_head *bh; | 1356 | struct buffer_head *bh; |
1263 | struct nilfs_btree_node *node, *parent, *sib; | 1357 | struct nilfs_btree_node *node, *parent, *sib; |
1264 | __u64 sibptr; | 1358 | __u64 sibptr; |
1265 | int pindex, level, ret; | 1359 | int pindex, level, ncmin, ncmax, ncblk, ret; |
1266 | 1360 | ||
1267 | ret = 0; | 1361 | ret = 0; |
1268 | stats->bs_nblocks = 0; | 1362 | stats->bs_nblocks = 0; |
1363 | ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); | ||
1364 | ncblk = nilfs_btree_nchildren_per_block(btree); | ||
1365 | |||
1269 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; | 1366 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; |
1270 | level < nilfs_btree_height(btree) - 1; | 1367 | level < nilfs_btree_height(btree) - 1; |
1271 | level++) { | 1368 | level++) { |
1272 | node = nilfs_btree_get_nonroot_node(path, level); | 1369 | node = nilfs_btree_get_nonroot_node(path, level); |
1273 | path[level].bp_oldreq.bpr_ptr = | 1370 | path[level].bp_oldreq.bpr_ptr = |
1274 | nilfs_btree_node_get_ptr(btree, node, | 1371 | nilfs_btree_node_get_ptr(node, path[level].bp_index, |
1275 | path[level].bp_index); | 1372 | ncblk); |
1276 | ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, | 1373 | ret = nilfs_bmap_prepare_end_ptr(btree, |
1277 | &path[level].bp_oldreq, dat); | 1374 | &path[level].bp_oldreq, dat); |
1278 | if (ret < 0) | 1375 | if (ret < 0) |
1279 | goto err_out_child_node; | 1376 | goto err_out_child_node; |
1280 | 1377 | ||
1281 | if (nilfs_btree_node_get_nchildren(node) > | 1378 | if (nilfs_btree_node_get_nchildren(node) > ncmin) { |
1282 | nilfs_btree_node_nchildren_min(node, btree)) { | ||
1283 | path[level].bp_op = nilfs_btree_do_delete; | 1379 | path[level].bp_op = nilfs_btree_do_delete; |
1284 | stats->bs_nblocks++; | 1380 | stats->bs_nblocks++; |
1285 | goto out; | 1381 | goto out; |
1286 | } | 1382 | } |
1287 | 1383 | ||
1288 | parent = nilfs_btree_get_node(btree, path, level + 1); | 1384 | parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); |
1289 | pindex = path[level + 1].bp_index; | 1385 | pindex = path[level + 1].bp_index; |
1290 | 1386 | ||
1291 | if (pindex > 0) { | 1387 | if (pindex > 0) { |
1292 | /* left sibling */ | 1388 | /* left sibling */ |
1293 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | 1389 | sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, |
1294 | pindex - 1); | 1390 | ncmax); |
1295 | ret = nilfs_btree_get_block(btree, sibptr, &bh); | 1391 | ret = nilfs_btree_get_block(btree, sibptr, &bh); |
1296 | if (ret < 0) | 1392 | if (ret < 0) |
1297 | goto err_out_curr_node; | 1393 | goto err_out_curr_node; |
1298 | sib = (struct nilfs_btree_node *)bh->b_data; | 1394 | sib = (struct nilfs_btree_node *)bh->b_data; |
1299 | if (nilfs_btree_node_get_nchildren(sib) > | 1395 | if (nilfs_btree_node_get_nchildren(sib) > ncmin) { |
1300 | nilfs_btree_node_nchildren_min(sib, btree)) { | ||
1301 | path[level].bp_sib_bh = bh; | 1396 | path[level].bp_sib_bh = bh; |
1302 | path[level].bp_op = nilfs_btree_borrow_left; | 1397 | path[level].bp_op = nilfs_btree_borrow_left; |
1303 | stats->bs_nblocks++; | 1398 | stats->bs_nblocks++; |
@@ -1311,14 +1406,13 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | |||
1311 | } else if (pindex < | 1406 | } else if (pindex < |
1312 | nilfs_btree_node_get_nchildren(parent) - 1) { | 1407 | nilfs_btree_node_get_nchildren(parent) - 1) { |
1313 | /* right sibling */ | 1408 | /* right sibling */ |
1314 | sibptr = nilfs_btree_node_get_ptr(btree, parent, | 1409 | sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, |
1315 | pindex + 1); | 1410 | ncmax); |
1316 | ret = nilfs_btree_get_block(btree, sibptr, &bh); | 1411 | ret = nilfs_btree_get_block(btree, sibptr, &bh); |
1317 | if (ret < 0) | 1412 | if (ret < 0) |
1318 | goto err_out_curr_node; | 1413 | goto err_out_curr_node; |
1319 | sib = (struct nilfs_btree_node *)bh->b_data; | 1414 | sib = (struct nilfs_btree_node *)bh->b_data; |
1320 | if (nilfs_btree_node_get_nchildren(sib) > | 1415 | if (nilfs_btree_node_get_nchildren(sib) > ncmin) { |
1321 | nilfs_btree_node_nchildren_min(sib, btree)) { | ||
1322 | path[level].bp_sib_bh = bh; | 1416 | path[level].bp_sib_bh = bh; |
1323 | path[level].bp_op = nilfs_btree_borrow_right; | 1417 | path[level].bp_op = nilfs_btree_borrow_right; |
1324 | stats->bs_nblocks++; | 1418 | stats->bs_nblocks++; |
@@ -1349,10 +1443,10 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | |||
1349 | 1443 | ||
1350 | node = nilfs_btree_get_root(btree); | 1444 | node = nilfs_btree_get_root(btree); |
1351 | path[level].bp_oldreq.bpr_ptr = | 1445 | path[level].bp_oldreq.bpr_ptr = |
1352 | nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); | 1446 | nilfs_btree_node_get_ptr(node, path[level].bp_index, |
1447 | NILFS_BTREE_ROOT_NCHILDREN_MAX); | ||
1353 | 1448 | ||
1354 | ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, | 1449 | ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); |
1355 | &path[level].bp_oldreq, dat); | ||
1356 | if (ret < 0) | 1450 | if (ret < 0) |
1357 | goto err_out_child_node; | 1451 | goto err_out_child_node; |
1358 | 1452 | ||
@@ -1367,75 +1461,68 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, | |||
1367 | 1461 | ||
1368 | /* error */ | 1462 | /* error */ |
1369 | err_out_curr_node: | 1463 | err_out_curr_node: |
1370 | nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat); | 1464 | nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); |
1371 | err_out_child_node: | 1465 | err_out_child_node: |
1372 | for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { | 1466 | for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { |
1373 | brelse(path[level].bp_sib_bh); | 1467 | brelse(path[level].bp_sib_bh); |
1374 | nilfs_bmap_abort_end_ptr(&btree->bt_bmap, | 1468 | nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); |
1375 | &path[level].bp_oldreq, dat); | ||
1376 | } | 1469 | } |
1377 | *levelp = level; | 1470 | *levelp = level; |
1378 | stats->bs_nblocks = 0; | 1471 | stats->bs_nblocks = 0; |
1379 | return ret; | 1472 | return ret; |
1380 | } | 1473 | } |
1381 | 1474 | ||
1382 | static void nilfs_btree_commit_delete(struct nilfs_btree *btree, | 1475 | static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, |
1383 | struct nilfs_btree_path *path, | 1476 | struct nilfs_btree_path *path, |
1384 | int maxlevel, struct inode *dat) | 1477 | int maxlevel, struct inode *dat) |
1385 | { | 1478 | { |
1386 | int level; | 1479 | int level; |
1387 | 1480 | ||
1388 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { | 1481 | for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { |
1389 | nilfs_bmap_commit_end_ptr(&btree->bt_bmap, | 1482 | nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); |
1390 | &path[level].bp_oldreq, dat); | ||
1391 | path[level].bp_op(btree, path, level, NULL, NULL); | 1483 | path[level].bp_op(btree, path, level, NULL, NULL); |
1392 | } | 1484 | } |
1393 | 1485 | ||
1394 | if (!nilfs_bmap_dirty(&btree->bt_bmap)) | 1486 | if (!nilfs_bmap_dirty(btree)) |
1395 | nilfs_bmap_set_dirty(&btree->bt_bmap); | 1487 | nilfs_bmap_set_dirty(btree); |
1396 | } | 1488 | } |
1397 | 1489 | ||
1398 | static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key) | 1490 | static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) |
1399 | 1491 | ||
1400 | { | 1492 | { |
1401 | struct nilfs_btree *btree; | ||
1402 | struct nilfs_btree_path *path; | 1493 | struct nilfs_btree_path *path; |
1403 | struct nilfs_bmap_stats stats; | 1494 | struct nilfs_bmap_stats stats; |
1404 | struct inode *dat; | 1495 | struct inode *dat; |
1405 | int level, ret; | 1496 | int level, ret; |
1406 | 1497 | ||
1407 | btree = (struct nilfs_btree *)bmap; | ||
1408 | path = nilfs_btree_alloc_path(); | 1498 | path = nilfs_btree_alloc_path(); |
1409 | if (path == NULL) | 1499 | if (path == NULL) |
1410 | return -ENOMEM; | 1500 | return -ENOMEM; |
1411 | 1501 | ||
1412 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, | 1502 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, |
1413 | NILFS_BTREE_LEVEL_NODE_MIN); | 1503 | NILFS_BTREE_LEVEL_NODE_MIN, 0); |
1414 | if (ret < 0) | 1504 | if (ret < 0) |
1415 | goto out; | 1505 | goto out; |
1416 | 1506 | ||
1417 | 1507 | ||
1418 | dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ? | 1508 | dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; |
1419 | nilfs_bmap_get_dat(&btree->bt_bmap) : NULL; | ||
1420 | 1509 | ||
1421 | ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); | 1510 | ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); |
1422 | if (ret < 0) | 1511 | if (ret < 0) |
1423 | goto out; | 1512 | goto out; |
1424 | nilfs_btree_commit_delete(btree, path, level, dat); | 1513 | nilfs_btree_commit_delete(btree, path, level, dat); |
1425 | nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); | 1514 | nilfs_bmap_sub_blocks(btree, stats.bs_nblocks); |
1426 | 1515 | ||
1427 | out: | 1516 | out: |
1428 | nilfs_btree_free_path(path); | 1517 | nilfs_btree_free_path(path); |
1429 | return ret; | 1518 | return ret; |
1430 | } | 1519 | } |
1431 | 1520 | ||
1432 | static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) | 1521 | static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) |
1433 | { | 1522 | { |
1434 | struct nilfs_btree *btree; | ||
1435 | struct nilfs_btree_path *path; | 1523 | struct nilfs_btree_path *path; |
1436 | int ret; | 1524 | int ret; |
1437 | 1525 | ||
1438 | btree = (struct nilfs_btree *)bmap; | ||
1439 | path = nilfs_btree_alloc_path(); | 1526 | path = nilfs_btree_alloc_path(); |
1440 | if (path == NULL) | 1527 | if (path == NULL) |
1441 | return -ENOMEM; | 1528 | return -ENOMEM; |
@@ -1447,16 +1534,14 @@ static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) | |||
1447 | return ret; | 1534 | return ret; |
1448 | } | 1535 | } |
1449 | 1536 | ||
1450 | static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) | 1537 | static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) |
1451 | { | 1538 | { |
1452 | struct buffer_head *bh; | 1539 | struct buffer_head *bh; |
1453 | struct nilfs_btree *btree; | ||
1454 | struct nilfs_btree_node *root, *node; | 1540 | struct nilfs_btree_node *root, *node; |
1455 | __u64 maxkey, nextmaxkey; | 1541 | __u64 maxkey, nextmaxkey; |
1456 | __u64 ptr; | 1542 | __u64 ptr; |
1457 | int nchildren, ret; | 1543 | int nchildren, ret; |
1458 | 1544 | ||
1459 | btree = (struct nilfs_btree *)bmap; | ||
1460 | root = nilfs_btree_get_root(btree); | 1545 | root = nilfs_btree_get_root(btree); |
1461 | switch (nilfs_btree_height(btree)) { | 1546 | switch (nilfs_btree_height(btree)) { |
1462 | case 2: | 1547 | case 2: |
@@ -1467,7 +1552,8 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) | |||
1467 | nchildren = nilfs_btree_node_get_nchildren(root); | 1552 | nchildren = nilfs_btree_node_get_nchildren(root); |
1468 | if (nchildren > 1) | 1553 | if (nchildren > 1) |
1469 | return 0; | 1554 | return 0; |
1470 | ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); | 1555 | ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, |
1556 | NILFS_BTREE_ROOT_NCHILDREN_MAX); | ||
1471 | ret = nilfs_btree_get_block(btree, ptr, &bh); | 1557 | ret = nilfs_btree_get_block(btree, ptr, &bh); |
1472 | if (ret < 0) | 1558 | if (ret < 0) |
1473 | return ret; | 1559 | return ret; |
@@ -1487,32 +1573,33 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) | |||
1487 | return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); | 1573 | return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); |
1488 | } | 1574 | } |
1489 | 1575 | ||
1490 | static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, | 1576 | static int nilfs_btree_gather_data(struct nilfs_bmap *btree, |
1491 | __u64 *keys, __u64 *ptrs, int nitems) | 1577 | __u64 *keys, __u64 *ptrs, int nitems) |
1492 | { | 1578 | { |
1493 | struct buffer_head *bh; | 1579 | struct buffer_head *bh; |
1494 | struct nilfs_btree *btree; | ||
1495 | struct nilfs_btree_node *node, *root; | 1580 | struct nilfs_btree_node *node, *root; |
1496 | __le64 *dkeys; | 1581 | __le64 *dkeys; |
1497 | __le64 *dptrs; | 1582 | __le64 *dptrs; |
1498 | __u64 ptr; | 1583 | __u64 ptr; |
1499 | int nchildren, i, ret; | 1584 | int nchildren, ncmax, i, ret; |
1500 | 1585 | ||
1501 | btree = (struct nilfs_btree *)bmap; | ||
1502 | root = nilfs_btree_get_root(btree); | 1586 | root = nilfs_btree_get_root(btree); |
1503 | switch (nilfs_btree_height(btree)) { | 1587 | switch (nilfs_btree_height(btree)) { |
1504 | case 2: | 1588 | case 2: |
1505 | bh = NULL; | 1589 | bh = NULL; |
1506 | node = root; | 1590 | node = root; |
1591 | ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; | ||
1507 | break; | 1592 | break; |
1508 | case 3: | 1593 | case 3: |
1509 | nchildren = nilfs_btree_node_get_nchildren(root); | 1594 | nchildren = nilfs_btree_node_get_nchildren(root); |
1510 | WARN_ON(nchildren > 1); | 1595 | WARN_ON(nchildren > 1); |
1511 | ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); | 1596 | ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, |
1597 | NILFS_BTREE_ROOT_NCHILDREN_MAX); | ||
1512 | ret = nilfs_btree_get_block(btree, ptr, &bh); | 1598 | ret = nilfs_btree_get_block(btree, ptr, &bh); |
1513 | if (ret < 0) | 1599 | if (ret < 0) |
1514 | return ret; | 1600 | return ret; |
1515 | node = (struct nilfs_btree_node *)bh->b_data; | 1601 | node = (struct nilfs_btree_node *)bh->b_data; |
1602 | ncmax = nilfs_btree_nchildren_per_block(btree); | ||
1516 | break; | 1603 | break; |
1517 | default: | 1604 | default: |
1518 | node = NULL; | 1605 | node = NULL; |
@@ -1523,10 +1610,10 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, | |||
1523 | if (nchildren < nitems) | 1610 | if (nchildren < nitems) |
1524 | nitems = nchildren; | 1611 | nitems = nchildren; |
1525 | dkeys = nilfs_btree_node_dkeys(node); | 1612 | dkeys = nilfs_btree_node_dkeys(node); |
1526 | dptrs = nilfs_btree_node_dptrs(node, btree); | 1613 | dptrs = nilfs_btree_node_dptrs(node, ncmax); |
1527 | for (i = 0; i < nitems; i++) { | 1614 | for (i = 0; i < nitems; i++) { |
1528 | keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]); | 1615 | keys[i] = le64_to_cpu(dkeys[i]); |
1529 | ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]); | 1616 | ptrs[i] = le64_to_cpu(dptrs[i]); |
1530 | } | 1617 | } |
1531 | 1618 | ||
1532 | if (bh != NULL) | 1619 | if (bh != NULL) |
@@ -1536,14 +1623,13 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, | |||
1536 | } | 1623 | } |
1537 | 1624 | ||
1538 | static int | 1625 | static int |
1539 | nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, | 1626 | nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, |
1540 | union nilfs_bmap_ptr_req *dreq, | 1627 | union nilfs_bmap_ptr_req *dreq, |
1541 | union nilfs_bmap_ptr_req *nreq, | 1628 | union nilfs_bmap_ptr_req *nreq, |
1542 | struct buffer_head **bhp, | 1629 | struct buffer_head **bhp, |
1543 | struct nilfs_bmap_stats *stats) | 1630 | struct nilfs_bmap_stats *stats) |
1544 | { | 1631 | { |
1545 | struct buffer_head *bh; | 1632 | struct buffer_head *bh; |
1546 | struct nilfs_btree *btree = (struct nilfs_btree *)bmap; | ||
1547 | struct inode *dat = NULL; | 1633 | struct inode *dat = NULL; |
1548 | int ret; | 1634 | int ret; |
1549 | 1635 | ||
@@ -1551,12 +1637,12 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, | |||
1551 | 1637 | ||
1552 | /* for data */ | 1638 | /* for data */ |
1553 | /* cannot find near ptr */ | 1639 | /* cannot find near ptr */ |
1554 | if (NILFS_BMAP_USE_VBN(bmap)) { | 1640 | if (NILFS_BMAP_USE_VBN(btree)) { |
1555 | dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); | 1641 | dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); |
1556 | dat = nilfs_bmap_get_dat(bmap); | 1642 | dat = nilfs_bmap_get_dat(btree); |
1557 | } | 1643 | } |
1558 | 1644 | ||
1559 | ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat); | 1645 | ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); |
1560 | if (ret < 0) | 1646 | if (ret < 0) |
1561 | return ret; | 1647 | return ret; |
1562 | 1648 | ||
@@ -1564,7 +1650,7 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, | |||
1564 | stats->bs_nblocks++; | 1650 | stats->bs_nblocks++; |
1565 | if (nreq != NULL) { | 1651 | if (nreq != NULL) { |
1566 | nreq->bpr_ptr = dreq->bpr_ptr + 1; | 1652 | nreq->bpr_ptr = dreq->bpr_ptr + 1; |
1567 | ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat); | 1653 | ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); |
1568 | if (ret < 0) | 1654 | if (ret < 0) |
1569 | goto err_out_dreq; | 1655 | goto err_out_dreq; |
1570 | 1656 | ||
@@ -1581,16 +1667,16 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, | |||
1581 | 1667 | ||
1582 | /* error */ | 1668 | /* error */ |
1583 | err_out_nreq: | 1669 | err_out_nreq: |
1584 | nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat); | 1670 | nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); |
1585 | err_out_dreq: | 1671 | err_out_dreq: |
1586 | nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat); | 1672 | nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); |
1587 | stats->bs_nblocks = 0; | 1673 | stats->bs_nblocks = 0; |
1588 | return ret; | 1674 | return ret; |
1589 | 1675 | ||
1590 | } | 1676 | } |
1591 | 1677 | ||
1592 | static void | 1678 | static void |
1593 | nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, | 1679 | nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, |
1594 | __u64 key, __u64 ptr, | 1680 | __u64 key, __u64 ptr, |
1595 | const __u64 *keys, const __u64 *ptrs, | 1681 | const __u64 *keys, const __u64 *ptrs, |
1596 | int n, | 1682 | int n, |
@@ -1598,57 +1684,59 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, | |||
1598 | union nilfs_bmap_ptr_req *nreq, | 1684 | union nilfs_bmap_ptr_req *nreq, |
1599 | struct buffer_head *bh) | 1685 | struct buffer_head *bh) |
1600 | { | 1686 | { |
1601 | struct nilfs_btree *btree = (struct nilfs_btree *)bmap; | ||
1602 | struct nilfs_btree_node *node; | 1687 | struct nilfs_btree_node *node; |
1603 | struct inode *dat; | 1688 | struct inode *dat; |
1604 | __u64 tmpptr; | 1689 | __u64 tmpptr; |
1690 | int ncblk; | ||
1605 | 1691 | ||
1606 | /* free resources */ | 1692 | /* free resources */ |
1607 | if (bmap->b_ops->bop_clear != NULL) | 1693 | if (btree->b_ops->bop_clear != NULL) |
1608 | bmap->b_ops->bop_clear(bmap); | 1694 | btree->b_ops->bop_clear(btree); |
1609 | 1695 | ||
1610 | /* ptr must be a pointer to a buffer head. */ | 1696 | /* ptr must be a pointer to a buffer head. */ |
1611 | set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); | 1697 | set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); |
1612 | 1698 | ||
1613 | /* convert and insert */ | 1699 | /* convert and insert */ |
1614 | dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL; | 1700 | dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; |
1615 | nilfs_btree_init(bmap); | 1701 | nilfs_btree_init(btree); |
1616 | if (nreq != NULL) { | 1702 | if (nreq != NULL) { |
1617 | nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat); | 1703 | nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); |
1618 | nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat); | 1704 | nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); |
1619 | 1705 | ||
1620 | /* create child node at level 1 */ | 1706 | /* create child node at level 1 */ |
1621 | node = (struct nilfs_btree_node *)bh->b_data; | 1707 | node = (struct nilfs_btree_node *)bh->b_data; |
1622 | nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs); | 1708 | ncblk = nilfs_btree_nchildren_per_block(btree); |
1623 | nilfs_btree_node_insert(btree, node, | 1709 | nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); |
1624 | key, dreq->bpr_ptr, n); | 1710 | nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); |
1625 | if (!buffer_dirty(bh)) | 1711 | if (!buffer_dirty(bh)) |
1626 | nilfs_btnode_mark_dirty(bh); | 1712 | nilfs_btnode_mark_dirty(bh); |
1627 | if (!nilfs_bmap_dirty(bmap)) | 1713 | if (!nilfs_bmap_dirty(btree)) |
1628 | nilfs_bmap_set_dirty(bmap); | 1714 | nilfs_bmap_set_dirty(btree); |
1629 | 1715 | ||
1630 | brelse(bh); | 1716 | brelse(bh); |
1631 | 1717 | ||
1632 | /* create root node at level 2 */ | 1718 | /* create root node at level 2 */ |
1633 | node = nilfs_btree_get_root(btree); | 1719 | node = nilfs_btree_get_root(btree); |
1634 | tmpptr = nreq->bpr_ptr; | 1720 | tmpptr = nreq->bpr_ptr; |
1635 | nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, | 1721 | nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, |
1636 | 2, 1, &keys[0], &tmpptr); | 1722 | NILFS_BTREE_ROOT_NCHILDREN_MAX, |
1723 | &keys[0], &tmpptr); | ||
1637 | } else { | 1724 | } else { |
1638 | nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat); | 1725 | nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); |
1639 | 1726 | ||
1640 | /* create root node at level 1 */ | 1727 | /* create root node at level 1 */ |
1641 | node = nilfs_btree_get_root(btree); | 1728 | node = nilfs_btree_get_root(btree); |
1642 | nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, | 1729 | nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, |
1643 | 1, n, keys, ptrs); | 1730 | NILFS_BTREE_ROOT_NCHILDREN_MAX, |
1644 | nilfs_btree_node_insert(btree, node, | 1731 | keys, ptrs); |
1645 | key, dreq->bpr_ptr, n); | 1732 | nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, |
1646 | if (!nilfs_bmap_dirty(bmap)) | 1733 | NILFS_BTREE_ROOT_NCHILDREN_MAX); |
1647 | nilfs_bmap_set_dirty(bmap); | 1734 | if (!nilfs_bmap_dirty(btree)) |
1735 | nilfs_bmap_set_dirty(btree); | ||
1648 | } | 1736 | } |
1649 | 1737 | ||
1650 | if (NILFS_BMAP_USE_VBN(bmap)) | 1738 | if (NILFS_BMAP_USE_VBN(btree)) |
1651 | nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr); | 1739 | nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); |
1652 | } | 1740 | } |
1653 | 1741 | ||
1654 | /** | 1742 | /** |
@@ -1660,7 +1748,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, | |||
1660 | * @ptrs: | 1748 | * @ptrs: |
1661 | * @n: | 1749 | * @n: |
1662 | */ | 1750 | */ |
1663 | int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, | 1751 | int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, |
1664 | __u64 key, __u64 ptr, | 1752 | __u64 key, __u64 ptr, |
1665 | const __u64 *keys, const __u64 *ptrs, int n) | 1753 | const __u64 *keys, const __u64 *ptrs, int n) |
1666 | { | 1754 | { |
@@ -1673,7 +1761,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, | |||
1673 | di = &dreq; | 1761 | di = &dreq; |
1674 | ni = NULL; | 1762 | ni = NULL; |
1675 | } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( | 1763 | } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( |
1676 | 1 << bmap->b_inode->i_blkbits)) { | 1764 | 1 << btree->b_inode->i_blkbits)) { |
1677 | di = &dreq; | 1765 | di = &dreq; |
1678 | ni = &nreq; | 1766 | ni = &nreq; |
1679 | } else { | 1767 | } else { |
@@ -1682,17 +1770,17 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, | |||
1682 | BUG(); | 1770 | BUG(); |
1683 | } | 1771 | } |
1684 | 1772 | ||
1685 | ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh, | 1773 | ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, |
1686 | &stats); | 1774 | &stats); |
1687 | if (ret < 0) | 1775 | if (ret < 0) |
1688 | return ret; | 1776 | return ret; |
1689 | nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n, | 1777 | nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, |
1690 | di, ni, bh); | 1778 | di, ni, bh); |
1691 | nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); | 1779 | nilfs_bmap_add_blocks(btree, stats.bs_nblocks); |
1692 | return 0; | 1780 | return 0; |
1693 | } | 1781 | } |
1694 | 1782 | ||
1695 | static int nilfs_btree_propagate_p(struct nilfs_btree *btree, | 1783 | static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, |
1696 | struct nilfs_btree_path *path, | 1784 | struct nilfs_btree_path *path, |
1697 | int level, | 1785 | int level, |
1698 | struct buffer_head *bh) | 1786 | struct buffer_head *bh) |
@@ -1704,17 +1792,17 @@ static int nilfs_btree_propagate_p(struct nilfs_btree *btree, | |||
1704 | return 0; | 1792 | return 0; |
1705 | } | 1793 | } |
1706 | 1794 | ||
1707 | static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, | 1795 | static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, |
1708 | struct nilfs_btree_path *path, | 1796 | struct nilfs_btree_path *path, |
1709 | int level, struct inode *dat) | 1797 | int level, struct inode *dat) |
1710 | { | 1798 | { |
1711 | struct nilfs_btree_node *parent; | 1799 | struct nilfs_btree_node *parent; |
1712 | int ret; | 1800 | int ncmax, ret; |
1713 | 1801 | ||
1714 | parent = nilfs_btree_get_node(btree, path, level + 1); | 1802 | parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); |
1715 | path[level].bp_oldreq.bpr_ptr = | 1803 | path[level].bp_oldreq.bpr_ptr = |
1716 | nilfs_btree_node_get_ptr(btree, parent, | 1804 | nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, |
1717 | path[level + 1].bp_index); | 1805 | ncmax); |
1718 | path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; | 1806 | path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; |
1719 | ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, | 1807 | ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, |
1720 | &path[level].bp_newreq.bpr_req); | 1808 | &path[level].bp_newreq.bpr_req); |
@@ -1726,7 +1814,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, | |||
1726 | path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; | 1814 | path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; |
1727 | path[level].bp_ctxt.bh = path[level].bp_bh; | 1815 | path[level].bp_ctxt.bh = path[level].bp_bh; |
1728 | ret = nilfs_btnode_prepare_change_key( | 1816 | ret = nilfs_btnode_prepare_change_key( |
1729 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | 1817 | &NILFS_BMAP_I(btree)->i_btnode_cache, |
1730 | &path[level].bp_ctxt); | 1818 | &path[level].bp_ctxt); |
1731 | if (ret < 0) { | 1819 | if (ret < 0) { |
1732 | nilfs_dat_abort_update(dat, | 1820 | nilfs_dat_abort_update(dat, |
@@ -1739,30 +1827,31 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, | |||
1739 | return 0; | 1827 | return 0; |
1740 | } | 1828 | } |
1741 | 1829 | ||
1742 | static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, | 1830 | static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, |
1743 | struct nilfs_btree_path *path, | 1831 | struct nilfs_btree_path *path, |
1744 | int level, struct inode *dat) | 1832 | int level, struct inode *dat) |
1745 | { | 1833 | { |
1746 | struct nilfs_btree_node *parent; | 1834 | struct nilfs_btree_node *parent; |
1835 | int ncmax; | ||
1747 | 1836 | ||
1748 | nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, | 1837 | nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, |
1749 | &path[level].bp_newreq.bpr_req, | 1838 | &path[level].bp_newreq.bpr_req, |
1750 | btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS); | 1839 | btree->b_ptr_type == NILFS_BMAP_PTR_VS); |
1751 | 1840 | ||
1752 | if (buffer_nilfs_node(path[level].bp_bh)) { | 1841 | if (buffer_nilfs_node(path[level].bp_bh)) { |
1753 | nilfs_btnode_commit_change_key( | 1842 | nilfs_btnode_commit_change_key( |
1754 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | 1843 | &NILFS_BMAP_I(btree)->i_btnode_cache, |
1755 | &path[level].bp_ctxt); | 1844 | &path[level].bp_ctxt); |
1756 | path[level].bp_bh = path[level].bp_ctxt.bh; | 1845 | path[level].bp_bh = path[level].bp_ctxt.bh; |
1757 | } | 1846 | } |
1758 | set_buffer_nilfs_volatile(path[level].bp_bh); | 1847 | set_buffer_nilfs_volatile(path[level].bp_bh); |
1759 | 1848 | ||
1760 | parent = nilfs_btree_get_node(btree, path, level + 1); | 1849 | parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); |
1761 | nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index, | 1850 | nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, |
1762 | path[level].bp_newreq.bpr_ptr); | 1851 | path[level].bp_newreq.bpr_ptr, ncmax); |
1763 | } | 1852 | } |
1764 | 1853 | ||
1765 | static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, | 1854 | static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, |
1766 | struct nilfs_btree_path *path, | 1855 | struct nilfs_btree_path *path, |
1767 | int level, struct inode *dat) | 1856 | int level, struct inode *dat) |
1768 | { | 1857 | { |
@@ -1770,11 +1859,11 @@ static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, | |||
1770 | &path[level].bp_newreq.bpr_req); | 1859 | &path[level].bp_newreq.bpr_req); |
1771 | if (buffer_nilfs_node(path[level].bp_bh)) | 1860 | if (buffer_nilfs_node(path[level].bp_bh)) |
1772 | nilfs_btnode_abort_change_key( | 1861 | nilfs_btnode_abort_change_key( |
1773 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | 1862 | &NILFS_BMAP_I(btree)->i_btnode_cache, |
1774 | &path[level].bp_ctxt); | 1863 | &path[level].bp_ctxt); |
1775 | } | 1864 | } |
1776 | 1865 | ||
1777 | static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, | 1866 | static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, |
1778 | struct nilfs_btree_path *path, | 1867 | struct nilfs_btree_path *path, |
1779 | int minlevel, int *maxlevelp, | 1868 | int minlevel, int *maxlevelp, |
1780 | struct inode *dat) | 1869 | struct inode *dat) |
@@ -1809,7 +1898,7 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, | |||
1809 | return ret; | 1898 | return ret; |
1810 | } | 1899 | } |
1811 | 1900 | ||
1812 | static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, | 1901 | static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, |
1813 | struct nilfs_btree_path *path, | 1902 | struct nilfs_btree_path *path, |
1814 | int minlevel, int maxlevel, | 1903 | int minlevel, int maxlevel, |
1815 | struct buffer_head *bh, | 1904 | struct buffer_head *bh, |
@@ -1824,14 +1913,15 @@ static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, | |||
1824 | nilfs_btree_commit_update_v(btree, path, level, dat); | 1913 | nilfs_btree_commit_update_v(btree, path, level, dat); |
1825 | } | 1914 | } |
1826 | 1915 | ||
1827 | static int nilfs_btree_propagate_v(struct nilfs_btree *btree, | 1916 | static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, |
1828 | struct nilfs_btree_path *path, | 1917 | struct nilfs_btree_path *path, |
1829 | int level, struct buffer_head *bh) | 1918 | int level, struct buffer_head *bh) |
1830 | { | 1919 | { |
1831 | int maxlevel = 0, ret; | 1920 | int maxlevel = 0, ret; |
1832 | struct nilfs_btree_node *parent; | 1921 | struct nilfs_btree_node *parent; |
1833 | struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap); | 1922 | struct inode *dat = nilfs_bmap_get_dat(btree); |
1834 | __u64 ptr; | 1923 | __u64 ptr; |
1924 | int ncmax; | ||
1835 | 1925 | ||
1836 | get_bh(bh); | 1926 | get_bh(bh); |
1837 | path[level].bp_bh = bh; | 1927 | path[level].bp_bh = bh; |
@@ -1841,9 +1931,10 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree, | |||
1841 | goto out; | 1931 | goto out; |
1842 | 1932 | ||
1843 | if (buffer_nilfs_volatile(path[level].bp_bh)) { | 1933 | if (buffer_nilfs_volatile(path[level].bp_bh)) { |
1844 | parent = nilfs_btree_get_node(btree, path, level + 1); | 1934 | parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); |
1845 | ptr = nilfs_btree_node_get_ptr(btree, parent, | 1935 | ptr = nilfs_btree_node_get_ptr(parent, |
1846 | path[level + 1].bp_index); | 1936 | path[level + 1].bp_index, |
1937 | ncmax); | ||
1847 | ret = nilfs_dat_mark_dirty(dat, ptr); | 1938 | ret = nilfs_dat_mark_dirty(dat, ptr); |
1848 | if (ret < 0) | 1939 | if (ret < 0) |
1849 | goto out; | 1940 | goto out; |
@@ -1857,10 +1948,9 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree, | |||
1857 | return ret; | 1948 | return ret; |
1858 | } | 1949 | } |
1859 | 1950 | ||
1860 | static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, | 1951 | static int nilfs_btree_propagate(struct nilfs_bmap *btree, |
1861 | struct buffer_head *bh) | 1952 | struct buffer_head *bh) |
1862 | { | 1953 | { |
1863 | struct nilfs_btree *btree; | ||
1864 | struct nilfs_btree_path *path; | 1954 | struct nilfs_btree_path *path; |
1865 | struct nilfs_btree_node *node; | 1955 | struct nilfs_btree_node *node; |
1866 | __u64 key; | 1956 | __u64 key; |
@@ -1868,7 +1958,6 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, | |||
1868 | 1958 | ||
1869 | WARN_ON(!buffer_dirty(bh)); | 1959 | WARN_ON(!buffer_dirty(bh)); |
1870 | 1960 | ||
1871 | btree = (struct nilfs_btree *)bmap; | ||
1872 | path = nilfs_btree_alloc_path(); | 1961 | path = nilfs_btree_alloc_path(); |
1873 | if (path == NULL) | 1962 | if (path == NULL) |
1874 | return -ENOMEM; | 1963 | return -ENOMEM; |
@@ -1878,11 +1967,11 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, | |||
1878 | key = nilfs_btree_node_get_key(node, 0); | 1967 | key = nilfs_btree_node_get_key(node, 0); |
1879 | level = nilfs_btree_node_get_level(node); | 1968 | level = nilfs_btree_node_get_level(node); |
1880 | } else { | 1969 | } else { |
1881 | key = nilfs_bmap_data_get_key(bmap, bh); | 1970 | key = nilfs_bmap_data_get_key(btree, bh); |
1882 | level = NILFS_BTREE_LEVEL_DATA; | 1971 | level = NILFS_BTREE_LEVEL_DATA; |
1883 | } | 1972 | } |
1884 | 1973 | ||
1885 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); | 1974 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); |
1886 | if (ret < 0) { | 1975 | if (ret < 0) { |
1887 | if (unlikely(ret == -ENOENT)) | 1976 | if (unlikely(ret == -ENOENT)) |
1888 | printk(KERN_CRIT "%s: key = %llu, level == %d\n", | 1977 | printk(KERN_CRIT "%s: key = %llu, level == %d\n", |
@@ -1890,7 +1979,7 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, | |||
1890 | goto out; | 1979 | goto out; |
1891 | } | 1980 | } |
1892 | 1981 | ||
1893 | ret = NILFS_BMAP_USE_VBN(bmap) ? | 1982 | ret = NILFS_BMAP_USE_VBN(btree) ? |
1894 | nilfs_btree_propagate_v(btree, path, level, bh) : | 1983 | nilfs_btree_propagate_v(btree, path, level, bh) : |
1895 | nilfs_btree_propagate_p(btree, path, level, bh); | 1984 | nilfs_btree_propagate_p(btree, path, level, bh); |
1896 | 1985 | ||
@@ -1900,13 +1989,13 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, | |||
1900 | return ret; | 1989 | return ret; |
1901 | } | 1990 | } |
1902 | 1991 | ||
1903 | static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap, | 1992 | static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, |
1904 | struct buffer_head *bh) | 1993 | struct buffer_head *bh) |
1905 | { | 1994 | { |
1906 | return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr); | 1995 | return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); |
1907 | } | 1996 | } |
1908 | 1997 | ||
1909 | static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, | 1998 | static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, |
1910 | struct list_head *lists, | 1999 | struct list_head *lists, |
1911 | struct buffer_head *bh) | 2000 | struct buffer_head *bh) |
1912 | { | 2001 | { |
@@ -1920,6 +2009,18 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, | |||
1920 | node = (struct nilfs_btree_node *)bh->b_data; | 2009 | node = (struct nilfs_btree_node *)bh->b_data; |
1921 | key = nilfs_btree_node_get_key(node, 0); | 2010 | key = nilfs_btree_node_get_key(node, 0); |
1922 | level = nilfs_btree_node_get_level(node); | 2011 | level = nilfs_btree_node_get_level(node); |
2012 | if (level < NILFS_BTREE_LEVEL_NODE_MIN || | ||
2013 | level >= NILFS_BTREE_LEVEL_MAX) { | ||
2014 | dump_stack(); | ||
2015 | printk(KERN_WARNING | ||
2016 | "%s: invalid btree level: %d (key=%llu, ino=%lu, " | ||
2017 | "blocknr=%llu)\n", | ||
2018 | __func__, level, (unsigned long long)key, | ||
2019 | NILFS_BMAP_I(btree)->vfs_inode.i_ino, | ||
2020 | (unsigned long long)bh->b_blocknr); | ||
2021 | return; | ||
2022 | } | ||
2023 | |||
1923 | list_for_each(head, &lists[level]) { | 2024 | list_for_each(head, &lists[level]) { |
1924 | cbh = list_entry(head, struct buffer_head, b_assoc_buffers); | 2025 | cbh = list_entry(head, struct buffer_head, b_assoc_buffers); |
1925 | cnode = (struct nilfs_btree_node *)cbh->b_data; | 2026 | cnode = (struct nilfs_btree_node *)cbh->b_data; |
@@ -1930,11 +2031,10 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, | |||
1930 | list_add_tail(&bh->b_assoc_buffers, head); | 2031 | list_add_tail(&bh->b_assoc_buffers, head); |
1931 | } | 2032 | } |
1932 | 2033 | ||
1933 | static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap, | 2034 | static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, |
1934 | struct list_head *listp) | 2035 | struct list_head *listp) |
1935 | { | 2036 | { |
1936 | struct nilfs_btree *btree = (struct nilfs_btree *)bmap; | 2037 | struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; |
1937 | struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache; | ||
1938 | struct list_head lists[NILFS_BTREE_LEVEL_MAX]; | 2038 | struct list_head lists[NILFS_BTREE_LEVEL_MAX]; |
1939 | struct pagevec pvec; | 2039 | struct pagevec pvec; |
1940 | struct buffer_head *bh, *head; | 2040 | struct buffer_head *bh, *head; |
@@ -1968,7 +2068,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap, | |||
1968 | list_splice_tail(&lists[level], listp); | 2068 | list_splice_tail(&lists[level], listp); |
1969 | } | 2069 | } |
1970 | 2070 | ||
1971 | static int nilfs_btree_assign_p(struct nilfs_btree *btree, | 2071 | static int nilfs_btree_assign_p(struct nilfs_bmap *btree, |
1972 | struct nilfs_btree_path *path, | 2072 | struct nilfs_btree_path *path, |
1973 | int level, | 2073 | int level, |
1974 | struct buffer_head **bh, | 2074 | struct buffer_head **bh, |
@@ -1978,38 +2078,38 @@ static int nilfs_btree_assign_p(struct nilfs_btree *btree, | |||
1978 | struct nilfs_btree_node *parent; | 2078 | struct nilfs_btree_node *parent; |
1979 | __u64 key; | 2079 | __u64 key; |
1980 | __u64 ptr; | 2080 | __u64 ptr; |
1981 | int ret; | 2081 | int ncmax, ret; |
1982 | 2082 | ||
1983 | parent = nilfs_btree_get_node(btree, path, level + 1); | 2083 | parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); |
1984 | ptr = nilfs_btree_node_get_ptr(btree, parent, | 2084 | ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, |
1985 | path[level + 1].bp_index); | 2085 | ncmax); |
1986 | if (buffer_nilfs_node(*bh)) { | 2086 | if (buffer_nilfs_node(*bh)) { |
1987 | path[level].bp_ctxt.oldkey = ptr; | 2087 | path[level].bp_ctxt.oldkey = ptr; |
1988 | path[level].bp_ctxt.newkey = blocknr; | 2088 | path[level].bp_ctxt.newkey = blocknr; |
1989 | path[level].bp_ctxt.bh = *bh; | 2089 | path[level].bp_ctxt.bh = *bh; |
1990 | ret = nilfs_btnode_prepare_change_key( | 2090 | ret = nilfs_btnode_prepare_change_key( |
1991 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | 2091 | &NILFS_BMAP_I(btree)->i_btnode_cache, |
1992 | &path[level].bp_ctxt); | 2092 | &path[level].bp_ctxt); |
1993 | if (ret < 0) | 2093 | if (ret < 0) |
1994 | return ret; | 2094 | return ret; |
1995 | nilfs_btnode_commit_change_key( | 2095 | nilfs_btnode_commit_change_key( |
1996 | &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, | 2096 | &NILFS_BMAP_I(btree)->i_btnode_cache, |
1997 | &path[level].bp_ctxt); | 2097 | &path[level].bp_ctxt); |
1998 | *bh = path[level].bp_ctxt.bh; | 2098 | *bh = path[level].bp_ctxt.bh; |
1999 | } | 2099 | } |
2000 | 2100 | ||
2001 | nilfs_btree_node_set_ptr(btree, parent, | 2101 | nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, |
2002 | path[level + 1].bp_index, blocknr); | 2102 | ncmax); |
2003 | 2103 | ||
2004 | key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); | 2104 | key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); |
2005 | /* on-disk format */ | 2105 | /* on-disk format */ |
2006 | binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); | 2106 | binfo->bi_dat.bi_blkoff = cpu_to_le64(key); |
2007 | binfo->bi_dat.bi_level = level; | 2107 | binfo->bi_dat.bi_level = level; |
2008 | 2108 | ||
2009 | return 0; | 2109 | return 0; |
2010 | } | 2110 | } |
2011 | 2111 | ||
2012 | static int nilfs_btree_assign_v(struct nilfs_btree *btree, | 2112 | static int nilfs_btree_assign_v(struct nilfs_bmap *btree, |
2013 | struct nilfs_btree_path *path, | 2113 | struct nilfs_btree_path *path, |
2014 | int level, | 2114 | int level, |
2015 | struct buffer_head **bh, | 2115 | struct buffer_head **bh, |
@@ -2017,15 +2117,15 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree, | |||
2017 | union nilfs_binfo *binfo) | 2117 | union nilfs_binfo *binfo) |
2018 | { | 2118 | { |
2019 | struct nilfs_btree_node *parent; | 2119 | struct nilfs_btree_node *parent; |
2020 | struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap); | 2120 | struct inode *dat = nilfs_bmap_get_dat(btree); |
2021 | __u64 key; | 2121 | __u64 key; |
2022 | __u64 ptr; | 2122 | __u64 ptr; |
2023 | union nilfs_bmap_ptr_req req; | 2123 | union nilfs_bmap_ptr_req req; |
2024 | int ret; | 2124 | int ncmax, ret; |
2025 | 2125 | ||
2026 | parent = nilfs_btree_get_node(btree, path, level + 1); | 2126 | parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); |
2027 | ptr = nilfs_btree_node_get_ptr(btree, parent, | 2127 | ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, |
2028 | path[level + 1].bp_index); | 2128 | ncmax); |
2029 | req.bpr_ptr = ptr; | 2129 | req.bpr_ptr = ptr; |
2030 | ret = nilfs_dat_prepare_start(dat, &req.bpr_req); | 2130 | ret = nilfs_dat_prepare_start(dat, &req.bpr_req); |
2031 | if (ret < 0) | 2131 | if (ret < 0) |
@@ -2034,24 +2134,22 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree, | |||
2034 | 2134 | ||
2035 | key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); | 2135 | key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); |
2036 | /* on-disk format */ | 2136 | /* on-disk format */ |
2037 | binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); | 2137 | binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); |
2038 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); | 2138 | binfo->bi_v.bi_blkoff = cpu_to_le64(key); |
2039 | 2139 | ||
2040 | return 0; | 2140 | return 0; |
2041 | } | 2141 | } |
2042 | 2142 | ||
2043 | static int nilfs_btree_assign(struct nilfs_bmap *bmap, | 2143 | static int nilfs_btree_assign(struct nilfs_bmap *btree, |
2044 | struct buffer_head **bh, | 2144 | struct buffer_head **bh, |
2045 | sector_t blocknr, | 2145 | sector_t blocknr, |
2046 | union nilfs_binfo *binfo) | 2146 | union nilfs_binfo *binfo) |
2047 | { | 2147 | { |
2048 | struct nilfs_btree *btree; | ||
2049 | struct nilfs_btree_path *path; | 2148 | struct nilfs_btree_path *path; |
2050 | struct nilfs_btree_node *node; | 2149 | struct nilfs_btree_node *node; |
2051 | __u64 key; | 2150 | __u64 key; |
2052 | int level, ret; | 2151 | int level, ret; |
2053 | 2152 | ||
2054 | btree = (struct nilfs_btree *)bmap; | ||
2055 | path = nilfs_btree_alloc_path(); | 2153 | path = nilfs_btree_alloc_path(); |
2056 | if (path == NULL) | 2154 | if (path == NULL) |
2057 | return -ENOMEM; | 2155 | return -ENOMEM; |
@@ -2061,17 +2159,17 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap, | |||
2061 | key = nilfs_btree_node_get_key(node, 0); | 2159 | key = nilfs_btree_node_get_key(node, 0); |
2062 | level = nilfs_btree_node_get_level(node); | 2160 | level = nilfs_btree_node_get_level(node); |
2063 | } else { | 2161 | } else { |
2064 | key = nilfs_bmap_data_get_key(bmap, *bh); | 2162 | key = nilfs_bmap_data_get_key(btree, *bh); |
2065 | level = NILFS_BTREE_LEVEL_DATA; | 2163 | level = NILFS_BTREE_LEVEL_DATA; |
2066 | } | 2164 | } |
2067 | 2165 | ||
2068 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); | 2166 | ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); |
2069 | if (ret < 0) { | 2167 | if (ret < 0) { |
2070 | WARN_ON(ret == -ENOENT); | 2168 | WARN_ON(ret == -ENOENT); |
2071 | goto out; | 2169 | goto out; |
2072 | } | 2170 | } |
2073 | 2171 | ||
2074 | ret = NILFS_BMAP_USE_VBN(bmap) ? | 2172 | ret = NILFS_BMAP_USE_VBN(btree) ? |
2075 | nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : | 2173 | nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : |
2076 | nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); | 2174 | nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); |
2077 | 2175 | ||
@@ -2081,7 +2179,7 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap, | |||
2081 | return ret; | 2179 | return ret; |
2082 | } | 2180 | } |
2083 | 2181 | ||
2084 | static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap, | 2182 | static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, |
2085 | struct buffer_head **bh, | 2183 | struct buffer_head **bh, |
2086 | sector_t blocknr, | 2184 | sector_t blocknr, |
2087 | union nilfs_binfo *binfo) | 2185 | union nilfs_binfo *binfo) |
@@ -2090,7 +2188,7 @@ static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap, | |||
2090 | __u64 key; | 2188 | __u64 key; |
2091 | int ret; | 2189 | int ret; |
2092 | 2190 | ||
2093 | ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr, | 2191 | ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, |
2094 | blocknr); | 2192 | blocknr); |
2095 | if (ret < 0) | 2193 | if (ret < 0) |
2096 | return ret; | 2194 | return ret; |
@@ -2099,29 +2197,27 @@ static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap, | |||
2099 | node = (struct nilfs_btree_node *)(*bh)->b_data; | 2197 | node = (struct nilfs_btree_node *)(*bh)->b_data; |
2100 | key = nilfs_btree_node_get_key(node, 0); | 2198 | key = nilfs_btree_node_get_key(node, 0); |
2101 | } else | 2199 | } else |
2102 | key = nilfs_bmap_data_get_key(bmap, *bh); | 2200 | key = nilfs_bmap_data_get_key(btree, *bh); |
2103 | 2201 | ||
2104 | /* on-disk format */ | 2202 | /* on-disk format */ |
2105 | binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); | 2203 | binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); |
2106 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); | 2204 | binfo->bi_v.bi_blkoff = cpu_to_le64(key); |
2107 | 2205 | ||
2108 | return 0; | 2206 | return 0; |
2109 | } | 2207 | } |
2110 | 2208 | ||
2111 | static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) | 2209 | static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) |
2112 | { | 2210 | { |
2113 | struct buffer_head *bh; | 2211 | struct buffer_head *bh; |
2114 | struct nilfs_btree *btree; | ||
2115 | struct nilfs_btree_path *path; | 2212 | struct nilfs_btree_path *path; |
2116 | __u64 ptr; | 2213 | __u64 ptr; |
2117 | int ret; | 2214 | int ret; |
2118 | 2215 | ||
2119 | btree = (struct nilfs_btree *)bmap; | ||
2120 | path = nilfs_btree_alloc_path(); | 2216 | path = nilfs_btree_alloc_path(); |
2121 | if (path == NULL) | 2217 | if (path == NULL) |
2122 | return -ENOMEM; | 2218 | return -ENOMEM; |
2123 | 2219 | ||
2124 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); | 2220 | ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); |
2125 | if (ret < 0) { | 2221 | if (ret < 0) { |
2126 | WARN_ON(ret == -ENOENT); | 2222 | WARN_ON(ret == -ENOENT); |
2127 | goto out; | 2223 | goto out; |
@@ -2135,8 +2231,8 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) | |||
2135 | if (!buffer_dirty(bh)) | 2231 | if (!buffer_dirty(bh)) |
2136 | nilfs_btnode_mark_dirty(bh); | 2232 | nilfs_btnode_mark_dirty(bh); |
2137 | brelse(bh); | 2233 | brelse(bh); |
2138 | if (!nilfs_bmap_dirty(&btree->bt_bmap)) | 2234 | if (!nilfs_bmap_dirty(btree)) |
2139 | nilfs_bmap_set_dirty(&btree->bt_bmap); | 2235 | nilfs_bmap_set_dirty(btree); |
2140 | 2236 | ||
2141 | out: | 2237 | out: |
2142 | nilfs_btree_free_path(path); | 2238 | nilfs_btree_free_path(path); |
@@ -2186,10 +2282,14 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { | |||
2186 | int nilfs_btree_init(struct nilfs_bmap *bmap) | 2282 | int nilfs_btree_init(struct nilfs_bmap *bmap) |
2187 | { | 2283 | { |
2188 | bmap->b_ops = &nilfs_btree_ops; | 2284 | bmap->b_ops = &nilfs_btree_ops; |
2285 | bmap->b_nchildren_per_block = | ||
2286 | NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); | ||
2189 | return 0; | 2287 | return 0; |
2190 | } | 2288 | } |
2191 | 2289 | ||
2192 | void nilfs_btree_init_gc(struct nilfs_bmap *bmap) | 2290 | void nilfs_btree_init_gc(struct nilfs_bmap *bmap) |
2193 | { | 2291 | { |
2194 | bmap->b_ops = &nilfs_btree_ops_gc; | 2292 | bmap->b_ops = &nilfs_btree_ops_gc; |
2293 | bmap->b_nchildren_per_block = | ||
2294 | NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); | ||
2195 | } | 2295 | } |
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 43c8c5b541fd..22c02e35b6ef 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h | |||
@@ -31,14 +31,6 @@ | |||
31 | #include "bmap.h" | 31 | #include "bmap.h" |
32 | 32 | ||
33 | /** | 33 | /** |
34 | * struct nilfs_btree - B-tree structure | ||
35 | * @bt_bmap: bmap base structure | ||
36 | */ | ||
37 | struct nilfs_btree { | ||
38 | struct nilfs_bmap bt_bmap; | ||
39 | }; | ||
40 | |||
41 | /** | ||
42 | * struct nilfs_btree_path - A path on which B-tree operations are executed | 34 | * struct nilfs_btree_path - A path on which B-tree operations are executed |
43 | * @bp_bh: buffer head of node block | 35 | * @bp_bh: buffer head of node block |
44 | * @bp_sib_bh: buffer head of sibling node block | 36 | * @bp_sib_bh: buffer head of sibling node block |
@@ -54,7 +46,7 @@ struct nilfs_btree_path { | |||
54 | union nilfs_bmap_ptr_req bp_oldreq; | 46 | union nilfs_bmap_ptr_req bp_oldreq; |
55 | union nilfs_bmap_ptr_req bp_newreq; | 47 | union nilfs_bmap_ptr_req bp_newreq; |
56 | struct nilfs_btnode_chkey_ctxt bp_ctxt; | 48 | struct nilfs_btnode_chkey_ctxt bp_ctxt; |
57 | void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *, | 49 | void (*bp_op)(struct nilfs_bmap *, struct nilfs_btree_path *, |
58 | int, __u64 *, __u64 *); | 50 | int, __u64 *, __u64 *); |
59 | }; | 51 | }; |
60 | 52 | ||
@@ -80,4 +72,6 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64, | |||
80 | const __u64 *, const __u64 *, int); | 72 | const __u64 *, const __u64 *, int); |
81 | void nilfs_btree_init_gc(struct nilfs_bmap *); | 73 | void nilfs_btree_init_gc(struct nilfs_bmap *); |
82 | 74 | ||
75 | int nilfs_btree_broken_node_block(struct buffer_head *bh); | ||
76 | |||
83 | #endif /* _NILFS_BTREE_H */ | 77 | #endif /* _NILFS_BTREE_H */ |
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 85c89dfc71f0..cb003c8ee1f6 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c | |||
@@ -80,23 +80,10 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr) | |||
80 | return last_byte; | 80 | return last_byte; |
81 | } | 81 | } |
82 | 82 | ||
83 | static int nilfs_prepare_chunk_uninterruptible(struct page *page, | 83 | static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to) |
84 | struct address_space *mapping, | ||
85 | unsigned from, unsigned to) | ||
86 | { | 84 | { |
87 | loff_t pos = page_offset(page) + from; | 85 | loff_t pos = page_offset(page) + from; |
88 | return block_write_begin(NULL, mapping, pos, to - from, | 86 | return __block_write_begin(page, pos, to - from, nilfs_get_block); |
89 | AOP_FLAG_UNINTERRUPTIBLE, &page, | ||
90 | NULL, nilfs_get_block); | ||
91 | } | ||
92 | |||
93 | static int nilfs_prepare_chunk(struct page *page, | ||
94 | struct address_space *mapping, | ||
95 | unsigned from, unsigned to) | ||
96 | { | ||
97 | loff_t pos = page_offset(page) + from; | ||
98 | return block_write_begin(NULL, mapping, pos, to - from, 0, &page, | ||
99 | NULL, nilfs_get_block); | ||
100 | } | 87 | } |
101 | 88 | ||
102 | static void nilfs_commit_chunk(struct page *page, | 89 | static void nilfs_commit_chunk(struct page *page, |
@@ -141,7 +128,7 @@ static void nilfs_check_page(struct page *page) | |||
141 | } | 128 | } |
142 | for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) { | 129 | for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) { |
143 | p = (struct nilfs_dir_entry *)(kaddr + offs); | 130 | p = (struct nilfs_dir_entry *)(kaddr + offs); |
144 | rec_len = le16_to_cpu(p->rec_len); | 131 | rec_len = nilfs_rec_len_from_disk(p->rec_len); |
145 | 132 | ||
146 | if (rec_len < NILFS_DIR_REC_LEN(1)) | 133 | if (rec_len < NILFS_DIR_REC_LEN(1)) |
147 | goto Eshort; | 134 | goto Eshort; |
@@ -199,13 +186,10 @@ fail: | |||
199 | static struct page *nilfs_get_page(struct inode *dir, unsigned long n) | 186 | static struct page *nilfs_get_page(struct inode *dir, unsigned long n) |
200 | { | 187 | { |
201 | struct address_space *mapping = dir->i_mapping; | 188 | struct address_space *mapping = dir->i_mapping; |
202 | struct page *page = read_cache_page(mapping, n, | 189 | struct page *page = read_mapping_page(mapping, n, NULL); |
203 | (filler_t *)mapping->a_ops->readpage, NULL); | 190 | |
204 | if (!IS_ERR(page)) { | 191 | if (!IS_ERR(page)) { |
205 | wait_on_page_locked(page); | ||
206 | kmap(page); | 192 | kmap(page); |
207 | if (!PageUptodate(page)) | ||
208 | goto fail; | ||
209 | if (!PageChecked(page)) | 193 | if (!PageChecked(page)) |
210 | nilfs_check_page(page); | 194 | nilfs_check_page(page); |
211 | if (PageError(page)) | 195 | if (PageError(page)) |
@@ -238,7 +222,8 @@ nilfs_match(int len, const unsigned char *name, struct nilfs_dir_entry *de) | |||
238 | */ | 222 | */ |
239 | static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) | 223 | static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) |
240 | { | 224 | { |
241 | return (struct nilfs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len)); | 225 | return (struct nilfs_dir_entry *)((char *)p + |
226 | nilfs_rec_len_from_disk(p->rec_len)); | ||
242 | } | 227 | } |
243 | 228 | ||
244 | static unsigned char | 229 | static unsigned char |
@@ -329,7 +314,7 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
329 | goto success; | 314 | goto success; |
330 | } | 315 | } |
331 | } | 316 | } |
332 | filp->f_pos += le16_to_cpu(de->rec_len); | 317 | filp->f_pos += nilfs_rec_len_from_disk(de->rec_len); |
333 | } | 318 | } |
334 | nilfs_put_page(page); | 319 | nilfs_put_page(page); |
335 | } | 320 | } |
@@ -444,12 +429,12 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, | |||
444 | struct page *page, struct inode *inode) | 429 | struct page *page, struct inode *inode) |
445 | { | 430 | { |
446 | unsigned from = (char *) de - (char *) page_address(page); | 431 | unsigned from = (char *) de - (char *) page_address(page); |
447 | unsigned to = from + le16_to_cpu(de->rec_len); | 432 | unsigned to = from + nilfs_rec_len_from_disk(de->rec_len); |
448 | struct address_space *mapping = page->mapping; | 433 | struct address_space *mapping = page->mapping; |
449 | int err; | 434 | int err; |
450 | 435 | ||
451 | lock_page(page); | 436 | lock_page(page); |
452 | err = nilfs_prepare_chunk_uninterruptible(page, mapping, from, to); | 437 | err = nilfs_prepare_chunk(page, from, to); |
453 | BUG_ON(err); | 438 | BUG_ON(err); |
454 | de->inode = cpu_to_le64(inode->i_ino); | 439 | de->inode = cpu_to_le64(inode->i_ino); |
455 | nilfs_set_de_type(de, inode); | 440 | nilfs_set_de_type(de, inode); |
@@ -500,7 +485,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) | |||
500 | /* We hit i_size */ | 485 | /* We hit i_size */ |
501 | name_len = 0; | 486 | name_len = 0; |
502 | rec_len = chunk_size; | 487 | rec_len = chunk_size; |
503 | de->rec_len = cpu_to_le16(chunk_size); | 488 | de->rec_len = nilfs_rec_len_to_disk(chunk_size); |
504 | de->inode = 0; | 489 | de->inode = 0; |
505 | goto got_it; | 490 | goto got_it; |
506 | } | 491 | } |
@@ -514,7 +499,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) | |||
514 | if (nilfs_match(namelen, name, de)) | 499 | if (nilfs_match(namelen, name, de)) |
515 | goto out_unlock; | 500 | goto out_unlock; |
516 | name_len = NILFS_DIR_REC_LEN(de->name_len); | 501 | name_len = NILFS_DIR_REC_LEN(de->name_len); |
517 | rec_len = le16_to_cpu(de->rec_len); | 502 | rec_len = nilfs_rec_len_from_disk(de->rec_len); |
518 | if (!de->inode && rec_len >= reclen) | 503 | if (!de->inode && rec_len >= reclen) |
519 | goto got_it; | 504 | goto got_it; |
520 | if (rec_len >= name_len + reclen) | 505 | if (rec_len >= name_len + reclen) |
@@ -530,15 +515,15 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) | |||
530 | got_it: | 515 | got_it: |
531 | from = (char *)de - (char *)page_address(page); | 516 | from = (char *)de - (char *)page_address(page); |
532 | to = from + rec_len; | 517 | to = from + rec_len; |
533 | err = nilfs_prepare_chunk(page, page->mapping, from, to); | 518 | err = nilfs_prepare_chunk(page, from, to); |
534 | if (err) | 519 | if (err) |
535 | goto out_unlock; | 520 | goto out_unlock; |
536 | if (de->inode) { | 521 | if (de->inode) { |
537 | struct nilfs_dir_entry *de1; | 522 | struct nilfs_dir_entry *de1; |
538 | 523 | ||
539 | de1 = (struct nilfs_dir_entry *)((char *)de + name_len); | 524 | de1 = (struct nilfs_dir_entry *)((char *)de + name_len); |
540 | de1->rec_len = cpu_to_le16(rec_len - name_len); | 525 | de1->rec_len = nilfs_rec_len_to_disk(rec_len - name_len); |
541 | de->rec_len = cpu_to_le16(name_len); | 526 | de->rec_len = nilfs_rec_len_to_disk(name_len); |
542 | de = de1; | 527 | de = de1; |
543 | } | 528 | } |
544 | de->name_len = namelen; | 529 | de->name_len = namelen; |
@@ -569,7 +554,8 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) | |||
569 | struct inode *inode = mapping->host; | 554 | struct inode *inode = mapping->host; |
570 | char *kaddr = page_address(page); | 555 | char *kaddr = page_address(page); |
571 | unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1); | 556 | unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1); |
572 | unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len); | 557 | unsigned to = ((char *)dir - kaddr) + |
558 | nilfs_rec_len_from_disk(dir->rec_len); | ||
573 | struct nilfs_dir_entry *pde = NULL; | 559 | struct nilfs_dir_entry *pde = NULL; |
574 | struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from); | 560 | struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from); |
575 | int err; | 561 | int err; |
@@ -587,10 +573,10 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) | |||
587 | if (pde) | 573 | if (pde) |
588 | from = (char *)pde - (char *)page_address(page); | 574 | from = (char *)pde - (char *)page_address(page); |
589 | lock_page(page); | 575 | lock_page(page); |
590 | err = nilfs_prepare_chunk(page, mapping, from, to); | 576 | err = nilfs_prepare_chunk(page, from, to); |
591 | BUG_ON(err); | 577 | BUG_ON(err); |
592 | if (pde) | 578 | if (pde) |
593 | pde->rec_len = cpu_to_le16(to - from); | 579 | pde->rec_len = nilfs_rec_len_to_disk(to - from); |
594 | dir->inode = 0; | 580 | dir->inode = 0; |
595 | nilfs_commit_chunk(page, mapping, from, to); | 581 | nilfs_commit_chunk(page, mapping, from, to); |
596 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 582 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
@@ -615,7 +601,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) | |||
615 | if (!page) | 601 | if (!page) |
616 | return -ENOMEM; | 602 | return -ENOMEM; |
617 | 603 | ||
618 | err = nilfs_prepare_chunk(page, mapping, 0, chunk_size); | 604 | err = nilfs_prepare_chunk(page, 0, chunk_size); |
619 | if (unlikely(err)) { | 605 | if (unlikely(err)) { |
620 | unlock_page(page); | 606 | unlock_page(page); |
621 | goto fail; | 607 | goto fail; |
@@ -624,14 +610,14 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) | |||
624 | memset(kaddr, 0, chunk_size); | 610 | memset(kaddr, 0, chunk_size); |
625 | de = (struct nilfs_dir_entry *)kaddr; | 611 | de = (struct nilfs_dir_entry *)kaddr; |
626 | de->name_len = 1; | 612 | de->name_len = 1; |
627 | de->rec_len = cpu_to_le16(NILFS_DIR_REC_LEN(1)); | 613 | de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1)); |
628 | memcpy(de->name, ".\0\0", 4); | 614 | memcpy(de->name, ".\0\0", 4); |
629 | de->inode = cpu_to_le64(inode->i_ino); | 615 | de->inode = cpu_to_le64(inode->i_ino); |
630 | nilfs_set_de_type(de, inode); | 616 | nilfs_set_de_type(de, inode); |
631 | 617 | ||
632 | de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); | 618 | de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); |
633 | de->name_len = 2; | 619 | de->name_len = 2; |
634 | de->rec_len = cpu_to_le16(chunk_size - NILFS_DIR_REC_LEN(1)); | 620 | de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1)); |
635 | de->inode = cpu_to_le64(parent->i_ino); | 621 | de->inode = cpu_to_le64(parent->i_ino); |
636 | memcpy(de->name, "..\0", 4); | 622 | memcpy(de->name, "..\0", 4); |
637 | nilfs_set_de_type(de, inode); | 623 | nilfs_set_de_type(de, inode); |
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 236753df5cdf..324d80c57518 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c | |||
@@ -27,47 +27,43 @@ | |||
27 | #include "alloc.h" | 27 | #include "alloc.h" |
28 | #include "dat.h" | 28 | #include "dat.h" |
29 | 29 | ||
30 | static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct) | 30 | static inline __le64 *nilfs_direct_dptrs(const struct nilfs_bmap *direct) |
31 | { | 31 | { |
32 | return (__le64 *) | 32 | return (__le64 *) |
33 | ((struct nilfs_direct_node *)direct->d_bmap.b_u.u_data + 1); | 33 | ((struct nilfs_direct_node *)direct->b_u.u_data + 1); |
34 | } | 34 | } |
35 | 35 | ||
36 | static inline __u64 | 36 | static inline __u64 |
37 | nilfs_direct_get_ptr(const struct nilfs_direct *direct, __u64 key) | 37 | nilfs_direct_get_ptr(const struct nilfs_bmap *direct, __u64 key) |
38 | { | 38 | { |
39 | return nilfs_bmap_dptr_to_ptr(*(nilfs_direct_dptrs(direct) + key)); | 39 | return le64_to_cpu(*(nilfs_direct_dptrs(direct) + key)); |
40 | } | 40 | } |
41 | 41 | ||
42 | static inline void nilfs_direct_set_ptr(struct nilfs_direct *direct, | 42 | static inline void nilfs_direct_set_ptr(struct nilfs_bmap *direct, |
43 | __u64 key, __u64 ptr) | 43 | __u64 key, __u64 ptr) |
44 | { | 44 | { |
45 | *(nilfs_direct_dptrs(direct) + key) = nilfs_bmap_ptr_to_dptr(ptr); | 45 | *(nilfs_direct_dptrs(direct) + key) = cpu_to_le64(ptr); |
46 | } | 46 | } |
47 | 47 | ||
48 | static int nilfs_direct_lookup(const struct nilfs_bmap *bmap, | 48 | static int nilfs_direct_lookup(const struct nilfs_bmap *direct, |
49 | __u64 key, int level, __u64 *ptrp) | 49 | __u64 key, int level, __u64 *ptrp) |
50 | { | 50 | { |
51 | struct nilfs_direct *direct; | ||
52 | __u64 ptr; | 51 | __u64 ptr; |
53 | 52 | ||
54 | direct = (struct nilfs_direct *)bmap; /* XXX: use macro for level 1 */ | ||
55 | if (key > NILFS_DIRECT_KEY_MAX || level != 1) | 53 | if (key > NILFS_DIRECT_KEY_MAX || level != 1) |
56 | return -ENOENT; | 54 | return -ENOENT; |
57 | ptr = nilfs_direct_get_ptr(direct, key); | 55 | ptr = nilfs_direct_get_ptr(direct, key); |
58 | if (ptr == NILFS_BMAP_INVALID_PTR) | 56 | if (ptr == NILFS_BMAP_INVALID_PTR) |
59 | return -ENOENT; | 57 | return -ENOENT; |
60 | 58 | ||
61 | if (ptrp != NULL) | 59 | *ptrp = ptr; |
62 | *ptrp = ptr; | ||
63 | return 0; | 60 | return 0; |
64 | } | 61 | } |
65 | 62 | ||
66 | static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap, | 63 | static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, |
67 | __u64 key, __u64 *ptrp, | 64 | __u64 key, __u64 *ptrp, |
68 | unsigned maxblocks) | 65 | unsigned maxblocks) |
69 | { | 66 | { |
70 | struct nilfs_direct *direct = (struct nilfs_direct *)bmap; | ||
71 | struct inode *dat = NULL; | 67 | struct inode *dat = NULL; |
72 | __u64 ptr, ptr2; | 68 | __u64 ptr, ptr2; |
73 | sector_t blocknr; | 69 | sector_t blocknr; |
@@ -79,8 +75,8 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap, | |||
79 | if (ptr == NILFS_BMAP_INVALID_PTR) | 75 | if (ptr == NILFS_BMAP_INVALID_PTR) |
80 | return -ENOENT; | 76 | return -ENOENT; |
81 | 77 | ||
82 | if (NILFS_BMAP_USE_VBN(bmap)) { | 78 | if (NILFS_BMAP_USE_VBN(direct)) { |
83 | dat = nilfs_bmap_get_dat(bmap); | 79 | dat = nilfs_bmap_get_dat(direct); |
84 | ret = nilfs_dat_translate(dat, ptr, &blocknr); | 80 | ret = nilfs_dat_translate(dat, ptr, &blocknr); |
85 | if (ret < 0) | 81 | if (ret < 0) |
86 | return ret; | 82 | return ret; |
@@ -106,29 +102,21 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap, | |||
106 | } | 102 | } |
107 | 103 | ||
108 | static __u64 | 104 | static __u64 |
109 | nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key) | 105 | nilfs_direct_find_target_v(const struct nilfs_bmap *direct, __u64 key) |
110 | { | 106 | { |
111 | __u64 ptr; | 107 | __u64 ptr; |
112 | 108 | ||
113 | ptr = nilfs_bmap_find_target_seq(&direct->d_bmap, key); | 109 | ptr = nilfs_bmap_find_target_seq(direct, key); |
114 | if (ptr != NILFS_BMAP_INVALID_PTR) | 110 | if (ptr != NILFS_BMAP_INVALID_PTR) |
115 | /* sequential access */ | 111 | /* sequential access */ |
116 | return ptr; | 112 | return ptr; |
117 | else | 113 | else |
118 | /* block group */ | 114 | /* block group */ |
119 | return nilfs_bmap_find_target_in_group(&direct->d_bmap); | 115 | return nilfs_bmap_find_target_in_group(direct); |
120 | } | ||
121 | |||
122 | static void nilfs_direct_set_target_v(struct nilfs_direct *direct, | ||
123 | __u64 key, __u64 ptr) | ||
124 | { | ||
125 | direct->d_bmap.b_last_allocated_key = key; | ||
126 | direct->d_bmap.b_last_allocated_ptr = ptr; | ||
127 | } | 116 | } |
128 | 117 | ||
129 | static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | 118 | static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) |
130 | { | 119 | { |
131 | struct nilfs_direct *direct = (struct nilfs_direct *)bmap; | ||
132 | union nilfs_bmap_ptr_req req; | 120 | union nilfs_bmap_ptr_req req; |
133 | struct inode *dat = NULL; | 121 | struct inode *dat = NULL; |
134 | struct buffer_head *bh; | 122 | struct buffer_head *bh; |
@@ -136,11 +124,11 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | |||
136 | 124 | ||
137 | if (key > NILFS_DIRECT_KEY_MAX) | 125 | if (key > NILFS_DIRECT_KEY_MAX) |
138 | return -ENOENT; | 126 | return -ENOENT; |
139 | if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) | 127 | if (nilfs_direct_get_ptr(bmap, key) != NILFS_BMAP_INVALID_PTR) |
140 | return -EEXIST; | 128 | return -EEXIST; |
141 | 129 | ||
142 | if (NILFS_BMAP_USE_VBN(bmap)) { | 130 | if (NILFS_BMAP_USE_VBN(bmap)) { |
143 | req.bpr_ptr = nilfs_direct_find_target_v(direct, key); | 131 | req.bpr_ptr = nilfs_direct_find_target_v(bmap, key); |
144 | dat = nilfs_bmap_get_dat(bmap); | 132 | dat = nilfs_bmap_get_dat(bmap); |
145 | } | 133 | } |
146 | ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat); | 134 | ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat); |
@@ -150,13 +138,13 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | |||
150 | set_buffer_nilfs_volatile(bh); | 138 | set_buffer_nilfs_volatile(bh); |
151 | 139 | ||
152 | nilfs_bmap_commit_alloc_ptr(bmap, &req, dat); | 140 | nilfs_bmap_commit_alloc_ptr(bmap, &req, dat); |
153 | nilfs_direct_set_ptr(direct, key, req.bpr_ptr); | 141 | nilfs_direct_set_ptr(bmap, key, req.bpr_ptr); |
154 | 142 | ||
155 | if (!nilfs_bmap_dirty(bmap)) | 143 | if (!nilfs_bmap_dirty(bmap)) |
156 | nilfs_bmap_set_dirty(bmap); | 144 | nilfs_bmap_set_dirty(bmap); |
157 | 145 | ||
158 | if (NILFS_BMAP_USE_VBN(bmap)) | 146 | if (NILFS_BMAP_USE_VBN(bmap)) |
159 | nilfs_direct_set_target_v(direct, key, req.bpr_ptr); | 147 | nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr); |
160 | 148 | ||
161 | nilfs_bmap_add_blocks(bmap, 1); | 149 | nilfs_bmap_add_blocks(bmap, 1); |
162 | } | 150 | } |
@@ -165,33 +153,30 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) | |||
165 | 153 | ||
166 | static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) | 154 | static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) |
167 | { | 155 | { |
168 | struct nilfs_direct *direct = (struct nilfs_direct *)bmap; | ||
169 | union nilfs_bmap_ptr_req req; | 156 | union nilfs_bmap_ptr_req req; |
170 | struct inode *dat; | 157 | struct inode *dat; |
171 | int ret; | 158 | int ret; |
172 | 159 | ||
173 | if (key > NILFS_DIRECT_KEY_MAX || | 160 | if (key > NILFS_DIRECT_KEY_MAX || |
174 | nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR) | 161 | nilfs_direct_get_ptr(bmap, key) == NILFS_BMAP_INVALID_PTR) |
175 | return -ENOENT; | 162 | return -ENOENT; |
176 | 163 | ||
177 | dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL; | 164 | dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL; |
178 | req.bpr_ptr = nilfs_direct_get_ptr(direct, key); | 165 | req.bpr_ptr = nilfs_direct_get_ptr(bmap, key); |
179 | 166 | ||
180 | ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat); | 167 | ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat); |
181 | if (!ret) { | 168 | if (!ret) { |
182 | nilfs_bmap_commit_end_ptr(bmap, &req, dat); | 169 | nilfs_bmap_commit_end_ptr(bmap, &req, dat); |
183 | nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); | 170 | nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR); |
184 | nilfs_bmap_sub_blocks(bmap, 1); | 171 | nilfs_bmap_sub_blocks(bmap, 1); |
185 | } | 172 | } |
186 | return ret; | 173 | return ret; |
187 | } | 174 | } |
188 | 175 | ||
189 | static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) | 176 | static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp) |
190 | { | 177 | { |
191 | struct nilfs_direct *direct; | ||
192 | __u64 key, lastkey; | 178 | __u64 key, lastkey; |
193 | 179 | ||
194 | direct = (struct nilfs_direct *)bmap; | ||
195 | lastkey = NILFS_DIRECT_KEY_MAX + 1; | 180 | lastkey = NILFS_DIRECT_KEY_MAX + 1; |
196 | for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++) | 181 | for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++) |
197 | if (nilfs_direct_get_ptr(direct, key) != | 182 | if (nilfs_direct_get_ptr(direct, key) != |
@@ -211,15 +196,13 @@ static int nilfs_direct_check_insert(const struct nilfs_bmap *bmap, __u64 key) | |||
211 | return key > NILFS_DIRECT_KEY_MAX; | 196 | return key > NILFS_DIRECT_KEY_MAX; |
212 | } | 197 | } |
213 | 198 | ||
214 | static int nilfs_direct_gather_data(struct nilfs_bmap *bmap, | 199 | static int nilfs_direct_gather_data(struct nilfs_bmap *direct, |
215 | __u64 *keys, __u64 *ptrs, int nitems) | 200 | __u64 *keys, __u64 *ptrs, int nitems) |
216 | { | 201 | { |
217 | struct nilfs_direct *direct; | ||
218 | __u64 key; | 202 | __u64 key; |
219 | __u64 ptr; | 203 | __u64 ptr; |
220 | int n; | 204 | int n; |
221 | 205 | ||
222 | direct = (struct nilfs_direct *)bmap; | ||
223 | if (nitems > NILFS_DIRECT_NBLOCKS) | 206 | if (nitems > NILFS_DIRECT_NBLOCKS) |
224 | nitems = NILFS_DIRECT_NBLOCKS; | 207 | nitems = NILFS_DIRECT_NBLOCKS; |
225 | n = 0; | 208 | n = 0; |
@@ -237,7 +220,6 @@ static int nilfs_direct_gather_data(struct nilfs_bmap *bmap, | |||
237 | int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, | 220 | int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, |
238 | __u64 key, __u64 *keys, __u64 *ptrs, int n) | 221 | __u64 key, __u64 *keys, __u64 *ptrs, int n) |
239 | { | 222 | { |
240 | struct nilfs_direct *direct; | ||
241 | __le64 *dptrs; | 223 | __le64 *dptrs; |
242 | int ret, i, j; | 224 | int ret, i, j; |
243 | 225 | ||
@@ -253,12 +235,11 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, | |||
253 | bmap->b_ops->bop_clear(bmap); | 235 | bmap->b_ops->bop_clear(bmap); |
254 | 236 | ||
255 | /* convert */ | 237 | /* convert */ |
256 | direct = (struct nilfs_direct *)bmap; | 238 | dptrs = nilfs_direct_dptrs(bmap); |
257 | dptrs = nilfs_direct_dptrs(direct); | ||
258 | for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) { | 239 | for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) { |
259 | if ((j < n) && (i == keys[j])) { | 240 | if ((j < n) && (i == keys[j])) { |
260 | dptrs[i] = (i != key) ? | 241 | dptrs[i] = (i != key) ? |
261 | nilfs_bmap_ptr_to_dptr(ptrs[j]) : | 242 | cpu_to_le64(ptrs[j]) : |
262 | NILFS_BMAP_INVALID_PTR; | 243 | NILFS_BMAP_INVALID_PTR; |
263 | j++; | 244 | j++; |
264 | } else | 245 | } else |
@@ -269,10 +250,9 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, | |||
269 | return 0; | 250 | return 0; |
270 | } | 251 | } |
271 | 252 | ||
272 | static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, | 253 | static int nilfs_direct_propagate(struct nilfs_bmap *bmap, |
273 | struct buffer_head *bh) | 254 | struct buffer_head *bh) |
274 | { | 255 | { |
275 | struct nilfs_direct *direct = (struct nilfs_direct *)bmap; | ||
276 | struct nilfs_palloc_req oldreq, newreq; | 256 | struct nilfs_palloc_req oldreq, newreq; |
277 | struct inode *dat; | 257 | struct inode *dat; |
278 | __u64 key; | 258 | __u64 key; |
@@ -284,7 +264,7 @@ static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, | |||
284 | 264 | ||
285 | dat = nilfs_bmap_get_dat(bmap); | 265 | dat = nilfs_bmap_get_dat(bmap); |
286 | key = nilfs_bmap_data_get_key(bmap, bh); | 266 | key = nilfs_bmap_data_get_key(bmap, bh); |
287 | ptr = nilfs_direct_get_ptr(direct, key); | 267 | ptr = nilfs_direct_get_ptr(bmap, key); |
288 | if (!buffer_nilfs_volatile(bh)) { | 268 | if (!buffer_nilfs_volatile(bh)) { |
289 | oldreq.pr_entry_nr = ptr; | 269 | oldreq.pr_entry_nr = ptr; |
290 | newreq.pr_entry_nr = ptr; | 270 | newreq.pr_entry_nr = ptr; |
@@ -294,20 +274,20 @@ static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, | |||
294 | nilfs_dat_commit_update(dat, &oldreq, &newreq, | 274 | nilfs_dat_commit_update(dat, &oldreq, &newreq, |
295 | bmap->b_ptr_type == NILFS_BMAP_PTR_VS); | 275 | bmap->b_ptr_type == NILFS_BMAP_PTR_VS); |
296 | set_buffer_nilfs_volatile(bh); | 276 | set_buffer_nilfs_volatile(bh); |
297 | nilfs_direct_set_ptr(direct, key, newreq.pr_entry_nr); | 277 | nilfs_direct_set_ptr(bmap, key, newreq.pr_entry_nr); |
298 | } else | 278 | } else |
299 | ret = nilfs_dat_mark_dirty(dat, ptr); | 279 | ret = nilfs_dat_mark_dirty(dat, ptr); |
300 | 280 | ||
301 | return ret; | 281 | return ret; |
302 | } | 282 | } |
303 | 283 | ||
304 | static int nilfs_direct_assign_v(struct nilfs_direct *direct, | 284 | static int nilfs_direct_assign_v(struct nilfs_bmap *direct, |
305 | __u64 key, __u64 ptr, | 285 | __u64 key, __u64 ptr, |
306 | struct buffer_head **bh, | 286 | struct buffer_head **bh, |
307 | sector_t blocknr, | 287 | sector_t blocknr, |
308 | union nilfs_binfo *binfo) | 288 | union nilfs_binfo *binfo) |
309 | { | 289 | { |
310 | struct inode *dat = nilfs_bmap_get_dat(&direct->d_bmap); | 290 | struct inode *dat = nilfs_bmap_get_dat(direct); |
311 | union nilfs_bmap_ptr_req req; | 291 | union nilfs_bmap_ptr_req req; |
312 | int ret; | 292 | int ret; |
313 | 293 | ||
@@ -315,13 +295,13 @@ static int nilfs_direct_assign_v(struct nilfs_direct *direct, | |||
315 | ret = nilfs_dat_prepare_start(dat, &req.bpr_req); | 295 | ret = nilfs_dat_prepare_start(dat, &req.bpr_req); |
316 | if (!ret) { | 296 | if (!ret) { |
317 | nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); | 297 | nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); |
318 | binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); | 298 | binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); |
319 | binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); | 299 | binfo->bi_v.bi_blkoff = cpu_to_le64(key); |
320 | } | 300 | } |
321 | return ret; | 301 | return ret; |
322 | } | 302 | } |
323 | 303 | ||
324 | static int nilfs_direct_assign_p(struct nilfs_direct *direct, | 304 | static int nilfs_direct_assign_p(struct nilfs_bmap *direct, |
325 | __u64 key, __u64 ptr, | 305 | __u64 key, __u64 ptr, |
326 | struct buffer_head **bh, | 306 | struct buffer_head **bh, |
327 | sector_t blocknr, | 307 | sector_t blocknr, |
@@ -329,7 +309,7 @@ static int nilfs_direct_assign_p(struct nilfs_direct *direct, | |||
329 | { | 309 | { |
330 | nilfs_direct_set_ptr(direct, key, blocknr); | 310 | nilfs_direct_set_ptr(direct, key, blocknr); |
331 | 311 | ||
332 | binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); | 312 | binfo->bi_dat.bi_blkoff = cpu_to_le64(key); |
333 | binfo->bi_dat.bi_level = 0; | 313 | binfo->bi_dat.bi_level = 0; |
334 | 314 | ||
335 | return 0; | 315 | return 0; |
@@ -340,18 +320,16 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, | |||
340 | sector_t blocknr, | 320 | sector_t blocknr, |
341 | union nilfs_binfo *binfo) | 321 | union nilfs_binfo *binfo) |
342 | { | 322 | { |
343 | struct nilfs_direct *direct; | ||
344 | __u64 key; | 323 | __u64 key; |
345 | __u64 ptr; | 324 | __u64 ptr; |
346 | 325 | ||
347 | direct = (struct nilfs_direct *)bmap; | ||
348 | key = nilfs_bmap_data_get_key(bmap, *bh); | 326 | key = nilfs_bmap_data_get_key(bmap, *bh); |
349 | if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { | 327 | if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { |
350 | printk(KERN_CRIT "%s: invalid key: %llu\n", __func__, | 328 | printk(KERN_CRIT "%s: invalid key: %llu\n", __func__, |
351 | (unsigned long long)key); | 329 | (unsigned long long)key); |
352 | return -EINVAL; | 330 | return -EINVAL; |
353 | } | 331 | } |
354 | ptr = nilfs_direct_get_ptr(direct, key); | 332 | ptr = nilfs_direct_get_ptr(bmap, key); |
355 | if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { | 333 | if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { |
356 | printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__, | 334 | printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__, |
357 | (unsigned long long)ptr); | 335 | (unsigned long long)ptr); |
@@ -359,8 +337,8 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, | |||
359 | } | 337 | } |
360 | 338 | ||
361 | return NILFS_BMAP_USE_VBN(bmap) ? | 339 | return NILFS_BMAP_USE_VBN(bmap) ? |
362 | nilfs_direct_assign_v(direct, key, ptr, bh, blocknr, binfo) : | 340 | nilfs_direct_assign_v(bmap, key, ptr, bh, blocknr, binfo) : |
363 | nilfs_direct_assign_p(direct, key, ptr, bh, blocknr, binfo); | 341 | nilfs_direct_assign_p(bmap, key, ptr, bh, blocknr, binfo); |
364 | } | 342 | } |
365 | 343 | ||
366 | static const struct nilfs_bmap_operations nilfs_direct_ops = { | 344 | static const struct nilfs_bmap_operations nilfs_direct_ops = { |
diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h index a5ffd66e25d0..dc643de20a25 100644 --- a/fs/nilfs2/direct.h +++ b/fs/nilfs2/direct.h | |||
@@ -28,8 +28,6 @@ | |||
28 | #include "bmap.h" | 28 | #include "bmap.h" |
29 | 29 | ||
30 | 30 | ||
31 | struct nilfs_direct; | ||
32 | |||
33 | /** | 31 | /** |
34 | * struct nilfs_direct_node - direct node | 32 | * struct nilfs_direct_node - direct node |
35 | * @dn_flags: flags | 33 | * @dn_flags: flags |
@@ -40,15 +38,6 @@ struct nilfs_direct_node { | |||
40 | __u8 pad[7]; | 38 | __u8 pad[7]; |
41 | }; | 39 | }; |
42 | 40 | ||
43 | /** | ||
44 | * struct nilfs_direct - direct mapping | ||
45 | * @d_bmap: bmap structure | ||
46 | */ | ||
47 | struct nilfs_direct { | ||
48 | struct nilfs_bmap d_bmap; | ||
49 | }; | ||
50 | |||
51 | |||
52 | #define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1) | 41 | #define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1) |
53 | #define NILFS_DIRECT_KEY_MIN 0 | 42 | #define NILFS_DIRECT_KEY_MIN 0 |
54 | #define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) | 43 | #define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) |
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c index dd5f7e0a95f6..84a45d1d5464 100644 --- a/fs/nilfs2/gcdat.c +++ b/fs/nilfs2/gcdat.c | |||
@@ -78,7 +78,7 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs) | |||
78 | struct inode *gcdat = nilfs->ns_gc_dat; | 78 | struct inode *gcdat = nilfs->ns_gc_dat; |
79 | struct nilfs_inode_info *gii = NILFS_I(gcdat); | 79 | struct nilfs_inode_info *gii = NILFS_I(gcdat); |
80 | 80 | ||
81 | gcdat->i_state = I_CLEAR; | 81 | gcdat->i_state = I_FREEING | I_CLEAR; |
82 | gii->i_flags = 0; | 82 | gii->i_flags = 0; |
83 | 83 | ||
84 | nilfs_palloc_clear_cache(gcdat); | 84 | nilfs_palloc_clear_cache(gcdat); |
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 145f03cd7d3e..bed3a783129b 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c | |||
@@ -48,6 +48,8 @@ | |||
48 | #include <linux/slab.h> | 48 | #include <linux/slab.h> |
49 | #include <linux/swap.h> | 49 | #include <linux/swap.h> |
50 | #include "nilfs.h" | 50 | #include "nilfs.h" |
51 | #include "btree.h" | ||
52 | #include "btnode.h" | ||
51 | #include "page.h" | 53 | #include "page.h" |
52 | #include "mdt.h" | 54 | #include "mdt.h" |
53 | #include "dat.h" | 55 | #include "dat.h" |
@@ -149,8 +151,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, | |||
149 | int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, | 151 | int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, |
150 | __u64 vbn, struct buffer_head **out_bh) | 152 | __u64 vbn, struct buffer_head **out_bh) |
151 | { | 153 | { |
152 | int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, | 154 | int ret; |
153 | vbn ? : pbn, pbn, out_bh); | 155 | |
156 | ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, | ||
157 | vbn ? : pbn, pbn, READ, out_bh, &pbn); | ||
154 | if (ret == -EEXIST) /* internal code (cache hit) */ | 158 | if (ret == -EEXIST) /* internal code (cache hit) */ |
155 | ret = 0; | 159 | ret = 0; |
156 | return ret; | 160 | return ret; |
@@ -164,10 +168,15 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) | |||
164 | if (buffer_dirty(bh)) | 168 | if (buffer_dirty(bh)) |
165 | return -EEXIST; | 169 | return -EEXIST; |
166 | 170 | ||
167 | if (buffer_nilfs_node(bh)) | 171 | if (buffer_nilfs_node(bh)) { |
172 | if (nilfs_btree_broken_node_block(bh)) { | ||
173 | clear_buffer_uptodate(bh); | ||
174 | return -EIO; | ||
175 | } | ||
168 | nilfs_btnode_mark_dirty(bh); | 176 | nilfs_btnode_mark_dirty(bh); |
169 | else | 177 | } else { |
170 | nilfs_mdt_mark_buffer_dirty(bh); | 178 | nilfs_mdt_mark_buffer_dirty(bh); |
179 | } | ||
171 | return 0; | 180 | return 0; |
172 | } | 181 | } |
173 | 182 | ||
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 39e038ac8fcb..eccb2f2e2315 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/uio.h> | 28 | #include <linux/uio.h> |
29 | #include "nilfs.h" | 29 | #include "nilfs.h" |
30 | #include "btnode.h" | ||
30 | #include "segment.h" | 31 | #include "segment.h" |
31 | #include "page.h" | 32 | #include "page.h" |
32 | #include "mdt.h" | 33 | #include "mdt.h" |
@@ -197,11 +198,15 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, | |||
197 | if (unlikely(err)) | 198 | if (unlikely(err)) |
198 | return err; | 199 | return err; |
199 | 200 | ||
200 | *pagep = NULL; | 201 | err = block_write_begin(mapping, pos, len, flags, pagep, |
201 | err = block_write_begin(file, mapping, pos, len, flags, pagep, | 202 | nilfs_get_block); |
202 | fsdata, nilfs_get_block); | 203 | if (unlikely(err)) { |
203 | if (unlikely(err)) | 204 | loff_t isize = mapping->host->i_size; |
205 | if (pos + len > isize) | ||
206 | vmtruncate(mapping->host, isize); | ||
207 | |||
204 | nilfs_transaction_abort(inode->i_sb); | 208 | nilfs_transaction_abort(inode->i_sb); |
209 | } | ||
205 | return err; | 210 | return err; |
206 | } | 211 | } |
207 | 212 | ||
@@ -237,6 +242,19 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
237 | /* Needs synchronization with the cleaner */ | 242 | /* Needs synchronization with the cleaner */ |
238 | size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 243 | size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
239 | offset, nr_segs, nilfs_get_block, NULL); | 244 | offset, nr_segs, nilfs_get_block, NULL); |
245 | |||
246 | /* | ||
247 | * In case of error extending write may have instantiated a few | ||
248 | * blocks outside i_size. Trim these off again. | ||
249 | */ | ||
250 | if (unlikely((rw & WRITE) && size < 0)) { | ||
251 | loff_t isize = i_size_read(inode); | ||
252 | loff_t end = offset + iov_length(iov, nr_segs); | ||
253 | |||
254 | if (end > isize) | ||
255 | vmtruncate(inode, isize); | ||
256 | } | ||
257 | |||
240 | return size; | 258 | return size; |
241 | } | 259 | } |
242 | 260 | ||
@@ -337,7 +355,6 @@ void nilfs_free_inode(struct inode *inode) | |||
337 | struct super_block *sb = inode->i_sb; | 355 | struct super_block *sb = inode->i_sb; |
338 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 356 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
339 | 357 | ||
340 | clear_inode(inode); | ||
341 | /* XXX: check error code? Is there any thing I can do? */ | 358 | /* XXX: check error code? Is there any thing I can do? */ |
342 | (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino); | 359 | (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino); |
343 | atomic_dec(&sbi->s_inodes_count); | 360 | atomic_dec(&sbi->s_inodes_count); |
@@ -597,16 +614,34 @@ void nilfs_truncate(struct inode *inode) | |||
597 | But truncate has no return value. */ | 614 | But truncate has no return value. */ |
598 | } | 615 | } |
599 | 616 | ||
600 | void nilfs_delete_inode(struct inode *inode) | 617 | static void nilfs_clear_inode(struct inode *inode) |
618 | { | ||
619 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
620 | |||
621 | /* | ||
622 | * Free resources allocated in nilfs_read_inode(), here. | ||
623 | */ | ||
624 | BUG_ON(!list_empty(&ii->i_dirty)); | ||
625 | brelse(ii->i_bh); | ||
626 | ii->i_bh = NULL; | ||
627 | |||
628 | if (test_bit(NILFS_I_BMAP, &ii->i_state)) | ||
629 | nilfs_bmap_clear(ii->i_bmap); | ||
630 | |||
631 | nilfs_btnode_cache_clear(&ii->i_btnode_cache); | ||
632 | } | ||
633 | |||
634 | void nilfs_evict_inode(struct inode *inode) | ||
601 | { | 635 | { |
602 | struct nilfs_transaction_info ti; | 636 | struct nilfs_transaction_info ti; |
603 | struct super_block *sb = inode->i_sb; | 637 | struct super_block *sb = inode->i_sb; |
604 | struct nilfs_inode_info *ii = NILFS_I(inode); | 638 | struct nilfs_inode_info *ii = NILFS_I(inode); |
605 | 639 | ||
606 | if (unlikely(is_bad_inode(inode))) { | 640 | if (inode->i_nlink || unlikely(is_bad_inode(inode))) { |
607 | if (inode->i_data.nrpages) | 641 | if (inode->i_data.nrpages) |
608 | truncate_inode_pages(&inode->i_data, 0); | 642 | truncate_inode_pages(&inode->i_data, 0); |
609 | clear_inode(inode); | 643 | end_writeback(inode); |
644 | nilfs_clear_inode(inode); | ||
610 | return; | 645 | return; |
611 | } | 646 | } |
612 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ | 647 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ |
@@ -616,6 +651,8 @@ void nilfs_delete_inode(struct inode *inode) | |||
616 | 651 | ||
617 | nilfs_truncate_bmap(ii, 0); | 652 | nilfs_truncate_bmap(ii, 0); |
618 | nilfs_mark_inode_dirty(inode); | 653 | nilfs_mark_inode_dirty(inode); |
654 | end_writeback(inode); | ||
655 | nilfs_clear_inode(inode); | ||
619 | nilfs_free_inode(inode); | 656 | nilfs_free_inode(inode); |
620 | /* nilfs_free_inode() marks inode buffer dirty */ | 657 | /* nilfs_free_inode() marks inode buffer dirty */ |
621 | if (IS_SYNC(inode)) | 658 | if (IS_SYNC(inode)) |
@@ -639,14 +676,27 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
639 | err = nilfs_transaction_begin(sb, &ti, 0); | 676 | err = nilfs_transaction_begin(sb, &ti, 0); |
640 | if (unlikely(err)) | 677 | if (unlikely(err)) |
641 | return err; | 678 | return err; |
642 | err = inode_setattr(inode, iattr); | 679 | |
643 | if (!err && (iattr->ia_valid & ATTR_MODE)) | 680 | if ((iattr->ia_valid & ATTR_SIZE) && |
681 | iattr->ia_size != i_size_read(inode)) { | ||
682 | err = vmtruncate(inode, iattr->ia_size); | ||
683 | if (unlikely(err)) | ||
684 | goto out_err; | ||
685 | } | ||
686 | |||
687 | setattr_copy(inode, iattr); | ||
688 | mark_inode_dirty(inode); | ||
689 | |||
690 | if (iattr->ia_valid & ATTR_MODE) { | ||
644 | err = nilfs_acl_chmod(inode); | 691 | err = nilfs_acl_chmod(inode); |
645 | if (likely(!err)) | 692 | if (unlikely(err)) |
646 | err = nilfs_transaction_commit(sb); | 693 | goto out_err; |
647 | else | 694 | } |
648 | nilfs_transaction_abort(sb); | 695 | |
696 | return nilfs_transaction_commit(sb); | ||
649 | 697 | ||
698 | out_err: | ||
699 | nilfs_transaction_abort(sb); | ||
650 | return err; | 700 | return err; |
651 | } | 701 | } |
652 | 702 | ||
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 024be8c35bb6..d01aff4957d9 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/swap.h> | 28 | #include <linux/swap.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include "nilfs.h" | 30 | #include "nilfs.h" |
31 | #include "btnode.h" | ||
31 | #include "segment.h" | 32 | #include "segment.h" |
32 | #include "page.h" | 33 | #include "page.h" |
33 | #include "mdt.h" | 34 | #include "mdt.h" |
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 47d6d7928122..d3d54046e5f8 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h | |||
@@ -32,7 +32,6 @@ | |||
32 | #include "the_nilfs.h" | 32 | #include "the_nilfs.h" |
33 | #include "sb.h" | 33 | #include "sb.h" |
34 | #include "bmap.h" | 34 | #include "bmap.h" |
35 | #include "bmap_union.h" | ||
36 | 35 | ||
37 | /* | 36 | /* |
38 | * nilfs inode data in memory | 37 | * nilfs inode data in memory |
@@ -41,7 +40,7 @@ struct nilfs_inode_info { | |||
41 | __u32 i_flags; | 40 | __u32 i_flags; |
42 | unsigned long i_state; /* Dynamic state flags */ | 41 | unsigned long i_state; /* Dynamic state flags */ |
43 | struct nilfs_bmap *i_bmap; | 42 | struct nilfs_bmap *i_bmap; |
44 | union nilfs_bmap_union i_bmap_union; | 43 | struct nilfs_bmap i_bmap_data; |
45 | __u64 i_xattr; /* sector_t ??? */ | 44 | __u64 i_xattr; /* sector_t ??? */ |
46 | __u32 i_dir_start_lookup; | 45 | __u32 i_dir_start_lookup; |
47 | __u64 i_cno; /* check point number for GC inode */ | 46 | __u64 i_cno; /* check point number for GC inode */ |
@@ -71,9 +70,7 @@ static inline struct nilfs_inode_info *NILFS_I(const struct inode *inode) | |||
71 | static inline struct nilfs_inode_info * | 70 | static inline struct nilfs_inode_info * |
72 | NILFS_BMAP_I(const struct nilfs_bmap *bmap) | 71 | NILFS_BMAP_I(const struct nilfs_bmap *bmap) |
73 | { | 72 | { |
74 | return container_of((union nilfs_bmap_union *)bmap, | 73 | return container_of(bmap, struct nilfs_inode_info, i_bmap_data); |
75 | struct nilfs_inode_info, | ||
76 | i_bmap_union); | ||
77 | } | 74 | } |
78 | 75 | ||
79 | static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) | 76 | static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) |
@@ -107,6 +104,14 @@ enum { | |||
107 | }; | 104 | }; |
108 | 105 | ||
109 | /* | 106 | /* |
107 | * commit flags for nilfs_commit_super and nilfs_sync_super | ||
108 | */ | ||
109 | enum { | ||
110 | NILFS_SB_COMMIT = 0, /* Commit a super block alternately */ | ||
111 | NILFS_SB_COMMIT_ALL /* Commit both super blocks */ | ||
112 | }; | ||
113 | |||
114 | /* | ||
110 | * Macros to check inode numbers | 115 | * Macros to check inode numbers |
111 | */ | 116 | */ |
112 | #define NILFS_MDT_INO_BITS \ | 117 | #define NILFS_MDT_INO_BITS \ |
@@ -245,7 +250,7 @@ extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); | |||
245 | extern struct inode *nilfs_iget(struct super_block *, unsigned long); | 250 | extern struct inode *nilfs_iget(struct super_block *, unsigned long); |
246 | extern void nilfs_update_inode(struct inode *, struct buffer_head *); | 251 | extern void nilfs_update_inode(struct inode *, struct buffer_head *); |
247 | extern void nilfs_truncate(struct inode *); | 252 | extern void nilfs_truncate(struct inode *); |
248 | extern void nilfs_delete_inode(struct inode *); | 253 | extern void nilfs_evict_inode(struct inode *); |
249 | extern int nilfs_setattr(struct dentry *, struct iattr *); | 254 | extern int nilfs_setattr(struct dentry *, struct iattr *); |
250 | extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, | 255 | extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, |
251 | struct buffer_head **); | 256 | struct buffer_head **); |
@@ -270,7 +275,14 @@ extern struct nilfs_super_block * | |||
270 | nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); | 275 | nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); |
271 | extern int nilfs_store_magic_and_option(struct super_block *, | 276 | extern int nilfs_store_magic_and_option(struct super_block *, |
272 | struct nilfs_super_block *, char *); | 277 | struct nilfs_super_block *, char *); |
278 | extern int nilfs_check_feature_compatibility(struct super_block *, | ||
279 | struct nilfs_super_block *); | ||
280 | extern void nilfs_set_log_cursor(struct nilfs_super_block *, | ||
281 | struct the_nilfs *); | ||
282 | extern struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *, | ||
283 | int flip); | ||
273 | extern int nilfs_commit_super(struct nilfs_sb_info *, int); | 284 | extern int nilfs_commit_super(struct nilfs_sb_info *, int); |
285 | extern int nilfs_cleanup_super(struct nilfs_sb_info *); | ||
274 | extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64); | 286 | extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64); |
275 | extern void nilfs_detach_checkpoint(struct nilfs_sb_info *); | 287 | extern void nilfs_detach_checkpoint(struct nilfs_sb_info *); |
276 | 288 | ||
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 8de3e1e48130..aab11db2cb08 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c | |||
@@ -37,7 +37,8 @@ | |||
37 | 37 | ||
38 | #define NILFS_BUFFER_INHERENT_BITS \ | 38 | #define NILFS_BUFFER_INHERENT_BITS \ |
39 | ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ | 39 | ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ |
40 | (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated)) | 40 | (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \ |
41 | (1UL << BH_NILFS_Checked)) | ||
41 | 42 | ||
42 | static struct buffer_head * | 43 | static struct buffer_head * |
43 | __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, | 44 | __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, |
@@ -129,6 +130,7 @@ void nilfs_forget_buffer(struct buffer_head *bh) | |||
129 | 130 | ||
130 | lock_buffer(bh); | 131 | lock_buffer(bh); |
131 | clear_buffer_nilfs_volatile(bh); | 132 | clear_buffer_nilfs_volatile(bh); |
133 | clear_buffer_nilfs_checked(bh); | ||
132 | clear_buffer_dirty(bh); | 134 | clear_buffer_dirty(bh); |
133 | if (nilfs_page_buffers_clean(page)) | 135 | if (nilfs_page_buffers_clean(page)) |
134 | __nilfs_clear_page_dirty(page); | 136 | __nilfs_clear_page_dirty(page); |
@@ -480,6 +482,7 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) | |||
480 | lock_buffer(bh); | 482 | lock_buffer(bh); |
481 | clear_buffer_dirty(bh); | 483 | clear_buffer_dirty(bh); |
482 | clear_buffer_nilfs_volatile(bh); | 484 | clear_buffer_nilfs_volatile(bh); |
485 | clear_buffer_nilfs_checked(bh); | ||
483 | clear_buffer_uptodate(bh); | 486 | clear_buffer_uptodate(bh); |
484 | clear_buffer_mapped(bh); | 487 | clear_buffer_mapped(bh); |
485 | unlock_buffer(bh); | 488 | unlock_buffer(bh); |
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index 8abca4d1c1f8..f53d8da41ed7 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h | |||
@@ -34,11 +34,13 @@ enum { | |||
34 | BH_NILFS_Allocated = BH_PrivateStart, | 34 | BH_NILFS_Allocated = BH_PrivateStart, |
35 | BH_NILFS_Node, | 35 | BH_NILFS_Node, |
36 | BH_NILFS_Volatile, | 36 | BH_NILFS_Volatile, |
37 | BH_NILFS_Checked, | ||
37 | }; | 38 | }; |
38 | 39 | ||
39 | BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ | 40 | BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ |
40 | BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ | 41 | BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ |
41 | BUFFER_FNS(NILFS_Volatile, nilfs_volatile) | 42 | BUFFER_FNS(NILFS_Volatile, nilfs_volatile) |
43 | BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ | ||
42 | 44 | ||
43 | 45 | ||
44 | void nilfs_mark_buffer_dirty(struct buffer_head *bh); | 46 | void nilfs_mark_buffer_dirty(struct buffer_head *bh); |
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index bae2a516b4ee..d0c35ef39f6a 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c | |||
@@ -91,27 +91,9 @@ static int nilfs_warn_segment_error(int err) | |||
91 | return -EINVAL; | 91 | return -EINVAL; |
92 | } | 92 | } |
93 | 93 | ||
94 | static void store_segsum_info(struct nilfs_segsum_info *ssi, | ||
95 | struct nilfs_segment_summary *sum, | ||
96 | unsigned int blocksize) | ||
97 | { | ||
98 | ssi->flags = le16_to_cpu(sum->ss_flags); | ||
99 | ssi->seg_seq = le64_to_cpu(sum->ss_seq); | ||
100 | ssi->ctime = le64_to_cpu(sum->ss_create); | ||
101 | ssi->next = le64_to_cpu(sum->ss_next); | ||
102 | ssi->nblocks = le32_to_cpu(sum->ss_nblocks); | ||
103 | ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo); | ||
104 | ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes); | ||
105 | |||
106 | ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize); | ||
107 | ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi); | ||
108 | |||
109 | /* need to verify ->ss_bytes field if read ->ss_cno */ | ||
110 | } | ||
111 | |||
112 | /** | 94 | /** |
113 | * calc_crc_cont - check CRC of blocks continuously | 95 | * nilfs_compute_checksum - compute checksum of blocks continuously |
114 | * @sbi: nilfs_sb_info | 96 | * @nilfs: nilfs object |
115 | * @bhs: buffer head of start block | 97 | * @bhs: buffer head of start block |
116 | * @sum: place to store result | 98 | * @sum: place to store result |
117 | * @offset: offset bytes in the first block | 99 | * @offset: offset bytes in the first block |
@@ -119,23 +101,25 @@ static void store_segsum_info(struct nilfs_segsum_info *ssi, | |||
119 | * @start: DBN of start block | 101 | * @start: DBN of start block |
120 | * @nblock: number of blocks to be checked | 102 | * @nblock: number of blocks to be checked |
121 | */ | 103 | */ |
122 | static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs, | 104 | static int nilfs_compute_checksum(struct the_nilfs *nilfs, |
123 | u32 *sum, unsigned long offset, u64 check_bytes, | 105 | struct buffer_head *bhs, u32 *sum, |
124 | sector_t start, unsigned long nblock) | 106 | unsigned long offset, u64 check_bytes, |
107 | sector_t start, unsigned long nblock) | ||
125 | { | 108 | { |
126 | unsigned long blocksize = sbi->s_super->s_blocksize; | 109 | unsigned int blocksize = nilfs->ns_blocksize; |
127 | unsigned long size; | 110 | unsigned long size; |
128 | u32 crc; | 111 | u32 crc; |
129 | 112 | ||
130 | BUG_ON(offset >= blocksize); | 113 | BUG_ON(offset >= blocksize); |
131 | check_bytes -= offset; | 114 | check_bytes -= offset; |
132 | size = min_t(u64, check_bytes, blocksize - offset); | 115 | size = min_t(u64, check_bytes, blocksize - offset); |
133 | crc = crc32_le(sbi->s_nilfs->ns_crc_seed, | 116 | crc = crc32_le(nilfs->ns_crc_seed, |
134 | (unsigned char *)bhs->b_data + offset, size); | 117 | (unsigned char *)bhs->b_data + offset, size); |
135 | if (--nblock > 0) { | 118 | if (--nblock > 0) { |
136 | do { | 119 | do { |
137 | struct buffer_head *bh | 120 | struct buffer_head *bh; |
138 | = sb_bread(sbi->s_super, ++start); | 121 | |
122 | bh = __bread(nilfs->ns_bdev, ++start, blocksize); | ||
139 | if (!bh) | 123 | if (!bh) |
140 | return -EIO; | 124 | return -EIO; |
141 | check_bytes -= size; | 125 | check_bytes -= size; |
@@ -150,12 +134,12 @@ static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs, | |||
150 | 134 | ||
151 | /** | 135 | /** |
152 | * nilfs_read_super_root_block - read super root block | 136 | * nilfs_read_super_root_block - read super root block |
153 | * @sb: super_block | 137 | * @nilfs: nilfs object |
154 | * @sr_block: disk block number of the super root block | 138 | * @sr_block: disk block number of the super root block |
155 | * @pbh: address of a buffer_head pointer to return super root buffer | 139 | * @pbh: address of a buffer_head pointer to return super root buffer |
156 | * @check: CRC check flag | 140 | * @check: CRC check flag |
157 | */ | 141 | */ |
158 | int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, | 142 | int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block, |
159 | struct buffer_head **pbh, int check) | 143 | struct buffer_head **pbh, int check) |
160 | { | 144 | { |
161 | struct buffer_head *bh_sr; | 145 | struct buffer_head *bh_sr; |
@@ -164,7 +148,7 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, | |||
164 | int ret; | 148 | int ret; |
165 | 149 | ||
166 | *pbh = NULL; | 150 | *pbh = NULL; |
167 | bh_sr = sb_bread(sb, sr_block); | 151 | bh_sr = __bread(nilfs->ns_bdev, sr_block, nilfs->ns_blocksize); |
168 | if (unlikely(!bh_sr)) { | 152 | if (unlikely(!bh_sr)) { |
169 | ret = NILFS_SEG_FAIL_IO; | 153 | ret = NILFS_SEG_FAIL_IO; |
170 | goto failed; | 154 | goto failed; |
@@ -174,12 +158,13 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, | |||
174 | if (check) { | 158 | if (check) { |
175 | unsigned bytes = le16_to_cpu(sr->sr_bytes); | 159 | unsigned bytes = le16_to_cpu(sr->sr_bytes); |
176 | 160 | ||
177 | if (bytes == 0 || bytes > sb->s_blocksize) { | 161 | if (bytes == 0 || bytes > nilfs->ns_blocksize) { |
178 | ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT; | 162 | ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT; |
179 | goto failed_bh; | 163 | goto failed_bh; |
180 | } | 164 | } |
181 | if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc, | 165 | if (nilfs_compute_checksum( |
182 | sizeof(sr->sr_sum), bytes, sr_block, 1)) { | 166 | nilfs, bh_sr, &crc, sizeof(sr->sr_sum), bytes, |
167 | sr_block, 1)) { | ||
183 | ret = NILFS_SEG_FAIL_IO; | 168 | ret = NILFS_SEG_FAIL_IO; |
184 | goto failed_bh; | 169 | goto failed_bh; |
185 | } | 170 | } |
@@ -199,64 +184,76 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, | |||
199 | } | 184 | } |
200 | 185 | ||
201 | /** | 186 | /** |
202 | * load_segment_summary - read segment summary of the specified partial segment | 187 | * nilfs_read_log_header - read summary header of the specified log |
203 | * @sbi: nilfs_sb_info | 188 | * @nilfs: nilfs object |
204 | * @pseg_start: start disk block number of partial segment | 189 | * @start_blocknr: start block number of the log |
205 | * @seg_seq: sequence number requested | 190 | * @sum: pointer to return segment summary structure |
206 | * @ssi: pointer to nilfs_segsum_info struct to store information | ||
207 | */ | 191 | */ |
208 | static int | 192 | static struct buffer_head * |
209 | load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start, | 193 | nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr, |
210 | u64 seg_seq, struct nilfs_segsum_info *ssi) | 194 | struct nilfs_segment_summary **sum) |
211 | { | 195 | { |
212 | struct buffer_head *bh_sum; | 196 | struct buffer_head *bh_sum; |
213 | struct nilfs_segment_summary *sum; | 197 | |
198 | bh_sum = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize); | ||
199 | if (bh_sum) | ||
200 | *sum = (struct nilfs_segment_summary *)bh_sum->b_data; | ||
201 | return bh_sum; | ||
202 | } | ||
203 | |||
204 | /** | ||
205 | * nilfs_validate_log - verify consistency of log | ||
206 | * @nilfs: nilfs object | ||
207 | * @seg_seq: sequence number of segment | ||
208 | * @bh_sum: buffer head of summary block | ||
209 | * @sum: segment summary struct | ||
210 | */ | ||
211 | static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq, | ||
212 | struct buffer_head *bh_sum, | ||
213 | struct nilfs_segment_summary *sum) | ||
214 | { | ||
214 | unsigned long nblock; | 215 | unsigned long nblock; |
215 | u32 crc; | 216 | u32 crc; |
216 | int ret = NILFS_SEG_FAIL_IO; | 217 | int ret; |
217 | 218 | ||
218 | bh_sum = sb_bread(sbi->s_super, pseg_start); | 219 | ret = NILFS_SEG_FAIL_MAGIC; |
219 | if (!bh_sum) | 220 | if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) |
220 | goto out; | 221 | goto out; |
221 | 222 | ||
222 | sum = (struct nilfs_segment_summary *)bh_sum->b_data; | 223 | ret = NILFS_SEG_FAIL_SEQ; |
223 | 224 | if (le64_to_cpu(sum->ss_seq) != seg_seq) | |
224 | /* Check consistency of segment summary */ | 225 | goto out; |
225 | if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) { | ||
226 | ret = NILFS_SEG_FAIL_MAGIC; | ||
227 | goto failed; | ||
228 | } | ||
229 | store_segsum_info(ssi, sum, sbi->s_super->s_blocksize); | ||
230 | if (seg_seq != ssi->seg_seq) { | ||
231 | ret = NILFS_SEG_FAIL_SEQ; | ||
232 | goto failed; | ||
233 | } | ||
234 | 226 | ||
235 | nblock = ssi->nblocks; | 227 | nblock = le32_to_cpu(sum->ss_nblocks); |
236 | if (unlikely(nblock == 0 || | 228 | ret = NILFS_SEG_FAIL_CONSISTENCY; |
237 | nblock > sbi->s_nilfs->ns_blocks_per_segment)) { | 229 | if (unlikely(nblock == 0 || nblock > nilfs->ns_blocks_per_segment)) |
238 | /* This limits the number of blocks read in the CRC check */ | 230 | /* This limits the number of blocks read in the CRC check */ |
239 | ret = NILFS_SEG_FAIL_CONSISTENCY; | 231 | goto out; |
240 | goto failed; | 232 | |
241 | } | 233 | ret = NILFS_SEG_FAIL_IO; |
242 | if (calc_crc_cont(sbi, bh_sum, &crc, sizeof(sum->ss_datasum), | 234 | if (nilfs_compute_checksum(nilfs, bh_sum, &crc, sizeof(sum->ss_datasum), |
243 | ((u64)nblock << sbi->s_super->s_blocksize_bits), | 235 | ((u64)nblock << nilfs->ns_blocksize_bits), |
244 | pseg_start, nblock)) { | 236 | bh_sum->b_blocknr, nblock)) |
245 | ret = NILFS_SEG_FAIL_IO; | 237 | goto out; |
246 | goto failed; | 238 | |
247 | } | 239 | ret = NILFS_SEG_FAIL_CHECKSUM_FULL; |
248 | if (crc == le32_to_cpu(sum->ss_datasum)) | 240 | if (crc != le32_to_cpu(sum->ss_datasum)) |
249 | ret = 0; | 241 | goto out; |
250 | else | 242 | ret = 0; |
251 | ret = NILFS_SEG_FAIL_CHECKSUM_FULL; | 243 | out: |
252 | failed: | ||
253 | brelse(bh_sum); | ||
254 | out: | ||
255 | return ret; | 244 | return ret; |
256 | } | 245 | } |
257 | 246 | ||
258 | static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, | 247 | /** |
259 | unsigned int *offset, unsigned int bytes) | 248 | * nilfs_read_summary_info - read an item on summary blocks of a log |
249 | * @nilfs: nilfs object | ||
250 | * @pbh: the current buffer head on summary blocks [in, out] | ||
251 | * @offset: the current byte offset on summary blocks [in, out] | ||
252 | * @bytes: byte size of the item to be read | ||
253 | */ | ||
254 | static void *nilfs_read_summary_info(struct the_nilfs *nilfs, | ||
255 | struct buffer_head **pbh, | ||
256 | unsigned int *offset, unsigned int bytes) | ||
260 | { | 257 | { |
261 | void *ptr; | 258 | void *ptr; |
262 | sector_t blocknr; | 259 | sector_t blocknr; |
@@ -265,7 +262,8 @@ static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, | |||
265 | if (bytes > (*pbh)->b_size - *offset) { | 262 | if (bytes > (*pbh)->b_size - *offset) { |
266 | blocknr = (*pbh)->b_blocknr; | 263 | blocknr = (*pbh)->b_blocknr; |
267 | brelse(*pbh); | 264 | brelse(*pbh); |
268 | *pbh = sb_bread(sb, blocknr + 1); | 265 | *pbh = __bread(nilfs->ns_bdev, blocknr + 1, |
266 | nilfs->ns_blocksize); | ||
269 | if (unlikely(!*pbh)) | 267 | if (unlikely(!*pbh)) |
270 | return NULL; | 268 | return NULL; |
271 | *offset = 0; | 269 | *offset = 0; |
@@ -275,9 +273,18 @@ static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, | |||
275 | return ptr; | 273 | return ptr; |
276 | } | 274 | } |
277 | 275 | ||
278 | static void segsum_skip(struct super_block *sb, struct buffer_head **pbh, | 276 | /** |
279 | unsigned int *offset, unsigned int bytes, | 277 | * nilfs_skip_summary_info - skip items on summary blocks of a log |
280 | unsigned long count) | 278 | * @nilfs: nilfs object |
279 | * @pbh: the current buffer head on summary blocks [in, out] | ||
280 | * @offset: the current byte offset on summary blocks [in, out] | ||
281 | * @bytes: byte size of the item to be skipped | ||
282 | * @count: number of items to be skipped | ||
283 | */ | ||
284 | static void nilfs_skip_summary_info(struct the_nilfs *nilfs, | ||
285 | struct buffer_head **pbh, | ||
286 | unsigned int *offset, unsigned int bytes, | ||
287 | unsigned long count) | ||
281 | { | 288 | { |
282 | unsigned int rest_item_in_current_block | 289 | unsigned int rest_item_in_current_block |
283 | = ((*pbh)->b_size - *offset) / bytes; | 290 | = ((*pbh)->b_size - *offset) / bytes; |
@@ -294,36 +301,46 @@ static void segsum_skip(struct super_block *sb, struct buffer_head **pbh, | |||
294 | *offset = bytes * (count - (bcnt - 1) * nitem_per_block); | 301 | *offset = bytes * (count - (bcnt - 1) * nitem_per_block); |
295 | 302 | ||
296 | brelse(*pbh); | 303 | brelse(*pbh); |
297 | *pbh = sb_bread(sb, blocknr + bcnt); | 304 | *pbh = __bread(nilfs->ns_bdev, blocknr + bcnt, |
305 | nilfs->ns_blocksize); | ||
298 | } | 306 | } |
299 | } | 307 | } |
300 | 308 | ||
301 | static int | 309 | /** |
302 | collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, | 310 | * nilfs_scan_dsync_log - get block information of a log written for data sync |
303 | struct nilfs_segsum_info *ssi, | 311 | * @nilfs: nilfs object |
304 | struct list_head *head) | 312 | * @start_blocknr: start block number of the log |
313 | * @sum: log summary information | ||
314 | * @head: list head to add nilfs_recovery_block struct | ||
315 | */ | ||
316 | static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr, | ||
317 | struct nilfs_segment_summary *sum, | ||
318 | struct list_head *head) | ||
305 | { | 319 | { |
306 | struct buffer_head *bh; | 320 | struct buffer_head *bh; |
307 | unsigned int offset; | 321 | unsigned int offset; |
308 | unsigned long nfinfo = ssi->nfinfo; | 322 | u32 nfinfo, sumbytes; |
309 | sector_t blocknr = sum_blocknr + ssi->nsumblk; | 323 | sector_t blocknr; |
310 | ino_t ino; | 324 | ino_t ino; |
311 | int err = -EIO; | 325 | int err = -EIO; |
312 | 326 | ||
327 | nfinfo = le32_to_cpu(sum->ss_nfinfo); | ||
313 | if (!nfinfo) | 328 | if (!nfinfo) |
314 | return 0; | 329 | return 0; |
315 | 330 | ||
316 | bh = sb_bread(sbi->s_super, sum_blocknr); | 331 | sumbytes = le32_to_cpu(sum->ss_sumbytes); |
332 | blocknr = start_blocknr + DIV_ROUND_UP(sumbytes, nilfs->ns_blocksize); | ||
333 | bh = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize); | ||
317 | if (unlikely(!bh)) | 334 | if (unlikely(!bh)) |
318 | goto out; | 335 | goto out; |
319 | 336 | ||
320 | offset = le16_to_cpu( | 337 | offset = le16_to_cpu(sum->ss_bytes); |
321 | ((struct nilfs_segment_summary *)bh->b_data)->ss_bytes); | ||
322 | for (;;) { | 338 | for (;;) { |
323 | unsigned long nblocks, ndatablk, nnodeblk; | 339 | unsigned long nblocks, ndatablk, nnodeblk; |
324 | struct nilfs_finfo *finfo; | 340 | struct nilfs_finfo *finfo; |
325 | 341 | ||
326 | finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo)); | 342 | finfo = nilfs_read_summary_info(nilfs, &bh, &offset, |
343 | sizeof(*finfo)); | ||
327 | if (unlikely(!finfo)) | 344 | if (unlikely(!finfo)) |
328 | goto out; | 345 | goto out; |
329 | 346 | ||
@@ -336,8 +353,8 @@ collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, | |||
336 | struct nilfs_recovery_block *rb; | 353 | struct nilfs_recovery_block *rb; |
337 | struct nilfs_binfo_v *binfo; | 354 | struct nilfs_binfo_v *binfo; |
338 | 355 | ||
339 | binfo = segsum_get(sbi->s_super, &bh, &offset, | 356 | binfo = nilfs_read_summary_info(nilfs, &bh, &offset, |
340 | sizeof(*binfo)); | 357 | sizeof(*binfo)); |
341 | if (unlikely(!binfo)) | 358 | if (unlikely(!binfo)) |
342 | goto out; | 359 | goto out; |
343 | 360 | ||
@@ -355,9 +372,9 @@ collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, | |||
355 | } | 372 | } |
356 | if (--nfinfo == 0) | 373 | if (--nfinfo == 0) |
357 | break; | 374 | break; |
358 | blocknr += nnodeblk; /* always 0 for the data sync segments */ | 375 | blocknr += nnodeblk; /* always 0 for data sync logs */ |
359 | segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64), | 376 | nilfs_skip_summary_info(nilfs, &bh, &offset, sizeof(__le64), |
360 | nnodeblk); | 377 | nnodeblk); |
361 | if (unlikely(!bh)) | 378 | if (unlikely(!bh)) |
362 | goto out; | 379 | goto out; |
363 | } | 380 | } |
@@ -467,14 +484,14 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, | |||
467 | return err; | 484 | return err; |
468 | } | 485 | } |
469 | 486 | ||
470 | static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi, | 487 | static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, |
471 | struct nilfs_recovery_block *rb, | 488 | struct nilfs_recovery_block *rb, |
472 | struct page *page) | 489 | struct page *page) |
473 | { | 490 | { |
474 | struct buffer_head *bh_org; | 491 | struct buffer_head *bh_org; |
475 | void *kaddr; | 492 | void *kaddr; |
476 | 493 | ||
477 | bh_org = sb_bread(sbi->s_super, rb->blocknr); | 494 | bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize); |
478 | if (unlikely(!bh_org)) | 495 | if (unlikely(!bh_org)) |
479 | return -EIO; | 496 | return -EIO; |
480 | 497 | ||
@@ -485,13 +502,14 @@ static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi, | |||
485 | return 0; | 502 | return 0; |
486 | } | 503 | } |
487 | 504 | ||
488 | static int recover_dsync_blocks(struct nilfs_sb_info *sbi, | 505 | static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, |
489 | struct list_head *head, | 506 | struct nilfs_sb_info *sbi, |
490 | unsigned long *nr_salvaged_blocks) | 507 | struct list_head *head, |
508 | unsigned long *nr_salvaged_blocks) | ||
491 | { | 509 | { |
492 | struct inode *inode; | 510 | struct inode *inode; |
493 | struct nilfs_recovery_block *rb, *n; | 511 | struct nilfs_recovery_block *rb, *n; |
494 | unsigned blocksize = sbi->s_super->s_blocksize; | 512 | unsigned blocksize = nilfs->ns_blocksize; |
495 | struct page *page; | 513 | struct page *page; |
496 | loff_t pos; | 514 | loff_t pos; |
497 | int err = 0, err2 = 0; | 515 | int err = 0, err2 = 0; |
@@ -505,13 +523,16 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi, | |||
505 | } | 523 | } |
506 | 524 | ||
507 | pos = rb->blkoff << inode->i_blkbits; | 525 | pos = rb->blkoff << inode->i_blkbits; |
508 | page = NULL; | 526 | err = block_write_begin(inode->i_mapping, pos, blocksize, |
509 | err = block_write_begin(NULL, inode->i_mapping, pos, blocksize, | 527 | 0, &page, nilfs_get_block); |
510 | 0, &page, NULL, nilfs_get_block); | 528 | if (unlikely(err)) { |
511 | if (unlikely(err)) | 529 | loff_t isize = inode->i_size; |
530 | if (pos + blocksize > isize) | ||
531 | vmtruncate(inode, isize); | ||
512 | goto failed_inode; | 532 | goto failed_inode; |
533 | } | ||
513 | 534 | ||
514 | err = nilfs_recovery_copy_block(sbi, rb, page); | 535 | err = nilfs_recovery_copy_block(nilfs, rb, page); |
515 | if (unlikely(err)) | 536 | if (unlikely(err)) |
516 | goto failed_page; | 537 | goto failed_page; |
517 | 538 | ||
@@ -551,18 +572,20 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi, | |||
551 | /** | 572 | /** |
552 | * nilfs_do_roll_forward - salvage logical segments newer than the latest | 573 | * nilfs_do_roll_forward - salvage logical segments newer than the latest |
553 | * checkpoint | 574 | * checkpoint |
575 | * @nilfs: nilfs object | ||
554 | * @sbi: nilfs_sb_info | 576 | * @sbi: nilfs_sb_info |
555 | * @nilfs: the_nilfs | ||
556 | * @ri: pointer to a nilfs_recovery_info | 577 | * @ri: pointer to a nilfs_recovery_info |
557 | */ | 578 | */ |
558 | static int nilfs_do_roll_forward(struct the_nilfs *nilfs, | 579 | static int nilfs_do_roll_forward(struct the_nilfs *nilfs, |
559 | struct nilfs_sb_info *sbi, | 580 | struct nilfs_sb_info *sbi, |
560 | struct nilfs_recovery_info *ri) | 581 | struct nilfs_recovery_info *ri) |
561 | { | 582 | { |
562 | struct nilfs_segsum_info ssi; | 583 | struct buffer_head *bh_sum = NULL; |
584 | struct nilfs_segment_summary *sum; | ||
563 | sector_t pseg_start; | 585 | sector_t pseg_start; |
564 | sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */ | 586 | sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */ |
565 | unsigned long nsalvaged_blocks = 0; | 587 | unsigned long nsalvaged_blocks = 0; |
588 | unsigned int flags; | ||
566 | u64 seg_seq; | 589 | u64 seg_seq; |
567 | __u64 segnum, nextnum = 0; | 590 | __u64 segnum, nextnum = 0; |
568 | int empty_seg = 0; | 591 | int empty_seg = 0; |
@@ -581,8 +604,14 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, | |||
581 | nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); | 604 | nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); |
582 | 605 | ||
583 | while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) { | 606 | while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) { |
607 | brelse(bh_sum); | ||
608 | bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum); | ||
609 | if (!bh_sum) { | ||
610 | err = -EIO; | ||
611 | goto failed; | ||
612 | } | ||
584 | 613 | ||
585 | ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi); | 614 | ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum); |
586 | if (ret) { | 615 | if (ret) { |
587 | if (ret == NILFS_SEG_FAIL_IO) { | 616 | if (ret == NILFS_SEG_FAIL_IO) { |
588 | err = -EIO; | 617 | err = -EIO; |
@@ -590,33 +619,38 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, | |||
590 | } | 619 | } |
591 | goto strayed; | 620 | goto strayed; |
592 | } | 621 | } |
593 | if (unlikely(NILFS_SEG_HAS_SR(&ssi))) | 622 | |
623 | flags = le16_to_cpu(sum->ss_flags); | ||
624 | if (flags & NILFS_SS_SR) | ||
594 | goto confused; | 625 | goto confused; |
595 | 626 | ||
596 | /* Found a valid partial segment; do recovery actions */ | 627 | /* Found a valid partial segment; do recovery actions */ |
597 | nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); | 628 | nextnum = nilfs_get_segnum_of_block(nilfs, |
629 | le64_to_cpu(sum->ss_next)); | ||
598 | empty_seg = 0; | 630 | empty_seg = 0; |
599 | nilfs->ns_ctime = ssi.ctime; | 631 | nilfs->ns_ctime = le64_to_cpu(sum->ss_create); |
600 | if (!(ssi.flags & NILFS_SS_GC)) | 632 | if (!(flags & NILFS_SS_GC)) |
601 | nilfs->ns_nongc_ctime = ssi.ctime; | 633 | nilfs->ns_nongc_ctime = nilfs->ns_ctime; |
602 | 634 | ||
603 | switch (state) { | 635 | switch (state) { |
604 | case RF_INIT_ST: | 636 | case RF_INIT_ST: |
605 | if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi)) | 637 | if (!(flags & NILFS_SS_LOGBGN) || |
638 | !(flags & NILFS_SS_SYNDT)) | ||
606 | goto try_next_pseg; | 639 | goto try_next_pseg; |
607 | state = RF_DSYNC_ST; | 640 | state = RF_DSYNC_ST; |
608 | /* Fall through */ | 641 | /* Fall through */ |
609 | case RF_DSYNC_ST: | 642 | case RF_DSYNC_ST: |
610 | if (!NILFS_SEG_DSYNC(&ssi)) | 643 | if (!(flags & NILFS_SS_SYNDT)) |
611 | goto confused; | 644 | goto confused; |
612 | 645 | ||
613 | err = collect_blocks_from_segsum( | 646 | err = nilfs_scan_dsync_log(nilfs, pseg_start, sum, |
614 | sbi, pseg_start, &ssi, &dsync_blocks); | 647 | &dsync_blocks); |
615 | if (unlikely(err)) | 648 | if (unlikely(err)) |
616 | goto failed; | 649 | goto failed; |
617 | if (NILFS_SEG_LOGEND(&ssi)) { | 650 | if (flags & NILFS_SS_LOGEND) { |
618 | err = recover_dsync_blocks( | 651 | err = nilfs_recover_dsync_blocks( |
619 | sbi, &dsync_blocks, &nsalvaged_blocks); | 652 | nilfs, sbi, &dsync_blocks, |
653 | &nsalvaged_blocks); | ||
620 | if (unlikely(err)) | 654 | if (unlikely(err)) |
621 | goto failed; | 655 | goto failed; |
622 | state = RF_INIT_ST; | 656 | state = RF_INIT_ST; |
@@ -627,7 +661,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, | |||
627 | try_next_pseg: | 661 | try_next_pseg: |
628 | if (pseg_start == ri->ri_lsegs_end) | 662 | if (pseg_start == ri->ri_lsegs_end) |
629 | break; | 663 | break; |
630 | pseg_start += ssi.nblocks; | 664 | pseg_start += le32_to_cpu(sum->ss_nblocks); |
631 | if (pseg_start < seg_end) | 665 | if (pseg_start < seg_end) |
632 | continue; | 666 | continue; |
633 | goto feed_segment; | 667 | goto feed_segment; |
@@ -652,8 +686,9 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, | |||
652 | ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; | 686 | ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; |
653 | } | 687 | } |
654 | out: | 688 | out: |
689 | brelse(bh_sum); | ||
655 | dispose_recovery_list(&dsync_blocks); | 690 | dispose_recovery_list(&dsync_blocks); |
656 | nilfs_detach_writer(sbi->s_nilfs, sbi); | 691 | nilfs_detach_writer(nilfs, sbi); |
657 | return err; | 692 | return err; |
658 | 693 | ||
659 | confused: | 694 | confused: |
@@ -667,7 +702,6 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, | |||
667 | } | 702 | } |
668 | 703 | ||
669 | static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, | 704 | static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, |
670 | struct nilfs_sb_info *sbi, | ||
671 | struct nilfs_recovery_info *ri) | 705 | struct nilfs_recovery_info *ri) |
672 | { | 706 | { |
673 | struct buffer_head *bh; | 707 | struct buffer_head *bh; |
@@ -677,7 +711,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, | |||
677 | nilfs_get_segnum_of_block(nilfs, ri->ri_super_root)) | 711 | nilfs_get_segnum_of_block(nilfs, ri->ri_super_root)) |
678 | return; | 712 | return; |
679 | 713 | ||
680 | bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start); | 714 | bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize); |
681 | BUG_ON(!bh); | 715 | BUG_ON(!bh); |
682 | memset(bh->b_data, 0, bh->b_size); | 716 | memset(bh->b_data, 0, bh->b_size); |
683 | set_buffer_dirty(bh); | 717 | set_buffer_dirty(bh); |
@@ -690,9 +724,8 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, | |||
690 | } | 724 | } |
691 | 725 | ||
692 | /** | 726 | /** |
693 | * nilfs_recover_logical_segments - salvage logical segments written after | 727 | * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint |
694 | * the latest super root | 728 | * @nilfs: nilfs object |
695 | * @nilfs: the_nilfs | ||
696 | * @sbi: nilfs_sb_info | 729 | * @sbi: nilfs_sb_info |
697 | * @ri: pointer to a nilfs_recovery_info struct to store search results. | 730 | * @ri: pointer to a nilfs_recovery_info struct to store search results. |
698 | * | 731 | * |
@@ -709,9 +742,9 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, | |||
709 | * | 742 | * |
710 | * %-ENOMEM - Insufficient memory available. | 743 | * %-ENOMEM - Insufficient memory available. |
711 | */ | 744 | */ |
712 | int nilfs_recover_logical_segments(struct the_nilfs *nilfs, | 745 | int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, |
713 | struct nilfs_sb_info *sbi, | 746 | struct nilfs_sb_info *sbi, |
714 | struct nilfs_recovery_info *ri) | 747 | struct nilfs_recovery_info *ri) |
715 | { | 748 | { |
716 | int err; | 749 | int err; |
717 | 750 | ||
@@ -751,7 +784,7 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, | |||
751 | goto failed; | 784 | goto failed; |
752 | } | 785 | } |
753 | 786 | ||
754 | nilfs_finish_roll_forward(nilfs, sbi, ri); | 787 | nilfs_finish_roll_forward(nilfs, ri); |
755 | } | 788 | } |
756 | 789 | ||
757 | failed: | 790 | failed: |
@@ -762,7 +795,6 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, | |||
762 | /** | 795 | /** |
763 | * nilfs_search_super_root - search the latest valid super root | 796 | * nilfs_search_super_root - search the latest valid super root |
764 | * @nilfs: the_nilfs | 797 | * @nilfs: the_nilfs |
765 | * @sbi: nilfs_sb_info | ||
766 | * @ri: pointer to a nilfs_recovery_info struct to store search results. | 798 | * @ri: pointer to a nilfs_recovery_info struct to store search results. |
767 | * | 799 | * |
768 | * nilfs_search_super_root() looks for the latest super-root from a partial | 800 | * nilfs_search_super_root() looks for the latest super-root from a partial |
@@ -775,14 +807,19 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, | |||
775 | * %-EINVAL - No valid segment found | 807 | * %-EINVAL - No valid segment found |
776 | * | 808 | * |
777 | * %-EIO - I/O error | 809 | * %-EIO - I/O error |
810 | * | ||
811 | * %-ENOMEM - Insufficient memory available. | ||
778 | */ | 812 | */ |
779 | int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, | 813 | int nilfs_search_super_root(struct the_nilfs *nilfs, |
780 | struct nilfs_recovery_info *ri) | 814 | struct nilfs_recovery_info *ri) |
781 | { | 815 | { |
782 | struct nilfs_segsum_info ssi; | 816 | struct buffer_head *bh_sum = NULL; |
817 | struct nilfs_segment_summary *sum; | ||
783 | sector_t pseg_start, pseg_end, sr_pseg_start = 0; | 818 | sector_t pseg_start, pseg_end, sr_pseg_start = 0; |
784 | sector_t seg_start, seg_end; /* range of full segment (block number) */ | 819 | sector_t seg_start, seg_end; /* range of full segment (block number) */ |
785 | sector_t b, end; | 820 | sector_t b, end; |
821 | unsigned long nblocks; | ||
822 | unsigned int flags; | ||
786 | u64 seg_seq; | 823 | u64 seg_seq; |
787 | __u64 segnum, nextnum = 0; | 824 | __u64 segnum, nextnum = 0; |
788 | __u64 cno; | 825 | __u64 cno; |
@@ -801,17 +838,24 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, | |||
801 | /* Read ahead segment */ | 838 | /* Read ahead segment */ |
802 | b = seg_start; | 839 | b = seg_start; |
803 | while (b <= seg_end) | 840 | while (b <= seg_end) |
804 | sb_breadahead(sbi->s_super, b++); | 841 | __breadahead(nilfs->ns_bdev, b++, nilfs->ns_blocksize); |
805 | 842 | ||
806 | for (;;) { | 843 | for (;;) { |
807 | /* Load segment summary */ | 844 | brelse(bh_sum); |
808 | ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi); | 845 | ret = NILFS_SEG_FAIL_IO; |
846 | bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum); | ||
847 | if (!bh_sum) | ||
848 | goto failed; | ||
849 | |||
850 | ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum); | ||
809 | if (ret) { | 851 | if (ret) { |
810 | if (ret == NILFS_SEG_FAIL_IO) | 852 | if (ret == NILFS_SEG_FAIL_IO) |
811 | goto failed; | 853 | goto failed; |
812 | goto strayed; | 854 | goto strayed; |
813 | } | 855 | } |
814 | pseg_end = pseg_start + ssi.nblocks - 1; | 856 | |
857 | nblocks = le32_to_cpu(sum->ss_nblocks); | ||
858 | pseg_end = pseg_start + nblocks - 1; | ||
815 | if (unlikely(pseg_end > seg_end)) { | 859 | if (unlikely(pseg_end > seg_end)) { |
816 | ret = NILFS_SEG_FAIL_CONSISTENCY; | 860 | ret = NILFS_SEG_FAIL_CONSISTENCY; |
817 | goto strayed; | 861 | goto strayed; |
@@ -821,11 +865,13 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, | |||
821 | ri->ri_pseg_start = pseg_start; | 865 | ri->ri_pseg_start = pseg_start; |
822 | ri->ri_seq = seg_seq; | 866 | ri->ri_seq = seg_seq; |
823 | ri->ri_segnum = segnum; | 867 | ri->ri_segnum = segnum; |
824 | nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); | 868 | nextnum = nilfs_get_segnum_of_block(nilfs, |
869 | le64_to_cpu(sum->ss_next)); | ||
825 | ri->ri_nextnum = nextnum; | 870 | ri->ri_nextnum = nextnum; |
826 | empty_seg = 0; | 871 | empty_seg = 0; |
827 | 872 | ||
828 | if (!NILFS_SEG_HAS_SR(&ssi) && !scan_newer) { | 873 | flags = le16_to_cpu(sum->ss_flags); |
874 | if (!(flags & NILFS_SS_SR) && !scan_newer) { | ||
829 | /* This will never happen because a superblock | 875 | /* This will never happen because a superblock |
830 | (last_segment) always points to a pseg | 876 | (last_segment) always points to a pseg |
831 | having a super root. */ | 877 | having a super root. */ |
@@ -836,14 +882,15 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, | |||
836 | if (pseg_start == seg_start) { | 882 | if (pseg_start == seg_start) { |
837 | nilfs_get_segment_range(nilfs, nextnum, &b, &end); | 883 | nilfs_get_segment_range(nilfs, nextnum, &b, &end); |
838 | while (b <= end) | 884 | while (b <= end) |
839 | sb_breadahead(sbi->s_super, b++); | 885 | __breadahead(nilfs->ns_bdev, b++, |
886 | nilfs->ns_blocksize); | ||
840 | } | 887 | } |
841 | if (!NILFS_SEG_HAS_SR(&ssi)) { | 888 | if (!(flags & NILFS_SS_SR)) { |
842 | if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) { | 889 | if (!ri->ri_lsegs_start && (flags & NILFS_SS_LOGBGN)) { |
843 | ri->ri_lsegs_start = pseg_start; | 890 | ri->ri_lsegs_start = pseg_start; |
844 | ri->ri_lsegs_start_seq = seg_seq; | 891 | ri->ri_lsegs_start_seq = seg_seq; |
845 | } | 892 | } |
846 | if (NILFS_SEG_LOGEND(&ssi)) | 893 | if (flags & NILFS_SS_LOGEND) |
847 | ri->ri_lsegs_end = pseg_start; | 894 | ri->ri_lsegs_end = pseg_start; |
848 | goto try_next_pseg; | 895 | goto try_next_pseg; |
849 | } | 896 | } |
@@ -854,12 +901,12 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, | |||
854 | ri->ri_lsegs_start = ri->ri_lsegs_end = 0; | 901 | ri->ri_lsegs_start = ri->ri_lsegs_end = 0; |
855 | 902 | ||
856 | nilfs_dispose_segment_list(&segments); | 903 | nilfs_dispose_segment_list(&segments); |
857 | nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start) | 904 | sr_pseg_start = pseg_start; |
858 | + ssi.nblocks - seg_start; | 905 | nilfs->ns_pseg_offset = pseg_start + nblocks - seg_start; |
859 | nilfs->ns_seg_seq = seg_seq; | 906 | nilfs->ns_seg_seq = seg_seq; |
860 | nilfs->ns_segnum = segnum; | 907 | nilfs->ns_segnum = segnum; |
861 | nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */ | 908 | nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */ |
862 | nilfs->ns_ctime = ssi.ctime; | 909 | nilfs->ns_ctime = le64_to_cpu(sum->ss_create); |
863 | nilfs->ns_nextnum = nextnum; | 910 | nilfs->ns_nextnum = nextnum; |
864 | 911 | ||
865 | if (scan_newer) | 912 | if (scan_newer) |
@@ -870,15 +917,9 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, | |||
870 | scan_newer = 1; | 917 | scan_newer = 1; |
871 | } | 918 | } |
872 | 919 | ||
873 | /* reset region for roll-forward */ | ||
874 | pseg_start += ssi.nblocks; | ||
875 | if (pseg_start < seg_end) | ||
876 | continue; | ||
877 | goto feed_segment; | ||
878 | |||
879 | try_next_pseg: | 920 | try_next_pseg: |
880 | /* Standing on a course, or met an inconsistent state */ | 921 | /* Standing on a course, or met an inconsistent state */ |
881 | pseg_start += ssi.nblocks; | 922 | pseg_start += nblocks; |
882 | if (pseg_start < seg_end) | 923 | if (pseg_start < seg_end) |
883 | continue; | 924 | continue; |
884 | goto feed_segment; | 925 | goto feed_segment; |
@@ -909,6 +950,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, | |||
909 | 950 | ||
910 | super_root_found: | 951 | super_root_found: |
911 | /* Updating pointers relating to the latest checkpoint */ | 952 | /* Updating pointers relating to the latest checkpoint */ |
953 | brelse(bh_sum); | ||
912 | list_splice_tail(&segments, &ri->ri_used_segments); | 954 | list_splice_tail(&segments, &ri->ri_used_segments); |
913 | nilfs->ns_last_pseg = sr_pseg_start; | 955 | nilfs->ns_last_pseg = sr_pseg_start; |
914 | nilfs->ns_last_seq = nilfs->ns_seg_seq; | 956 | nilfs->ns_last_seq = nilfs->ns_seg_seq; |
@@ -916,6 +958,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, | |||
916 | return 0; | 958 | return 0; |
917 | 959 | ||
918 | failed: | 960 | failed: |
961 | brelse(bh_sum); | ||
919 | nilfs_dispose_segment_list(&segments); | 962 | nilfs_dispose_segment_list(&segments); |
920 | return (ret < 0) ? ret : nilfs_warn_segment_error(ret); | 963 | return (ret < 0) ? ret : nilfs_warn_segment_error(ret); |
921 | } | 964 | } |
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 2e6a2723b8fa..4588fb9e93df 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c | |||
@@ -508,7 +508,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, | |||
508 | * Last BIO is always sent through the following | 508 | * Last BIO is always sent through the following |
509 | * submission. | 509 | * submission. |
510 | */ | 510 | */ |
511 | rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); | 511 | rw |= REQ_SYNC | REQ_UNPLUG; |
512 | res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); | 512 | res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); |
513 | } | 513 | } |
514 | 514 | ||
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 85fbb66455e2..b04f08cc2397 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h | |||
@@ -54,17 +54,6 @@ struct nilfs_segsum_info { | |||
54 | sector_t next; | 54 | sector_t next; |
55 | }; | 55 | }; |
56 | 56 | ||
57 | /* macro for the flags */ | ||
58 | #define NILFS_SEG_HAS_SR(sum) ((sum)->flags & NILFS_SS_SR) | ||
59 | #define NILFS_SEG_LOGBGN(sum) ((sum)->flags & NILFS_SS_LOGBGN) | ||
60 | #define NILFS_SEG_LOGEND(sum) ((sum)->flags & NILFS_SS_LOGEND) | ||
61 | #define NILFS_SEG_DSYNC(sum) ((sum)->flags & NILFS_SS_SYNDT) | ||
62 | #define NILFS_SEG_SIMPLEX(sum) \ | ||
63 | (((sum)->flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) == \ | ||
64 | (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) | ||
65 | |||
66 | #define NILFS_SEG_EMPTY(sum) ((sum)->nblocks == (sum)->nsumblk) | ||
67 | |||
68 | /** | 57 | /** |
69 | * struct nilfs_segment_buffer - Segment buffer | 58 | * struct nilfs_segment_buffer - Segment buffer |
70 | * @sb_super: back pointer to a superblock struct | 59 | * @sb_super: back pointer to a superblock struct |
@@ -141,6 +130,19 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *, | |||
141 | struct buffer_head **); | 130 | struct buffer_head **); |
142 | void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *); | 131 | void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *); |
143 | 132 | ||
133 | static inline int nilfs_segbuf_simplex(struct nilfs_segment_buffer *segbuf) | ||
134 | { | ||
135 | unsigned int flags = segbuf->sb_sum.flags; | ||
136 | |||
137 | return (flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) == | ||
138 | (NILFS_SS_LOGBGN | NILFS_SS_LOGEND); | ||
139 | } | ||
140 | |||
141 | static inline int nilfs_segbuf_empty(struct nilfs_segment_buffer *segbuf) | ||
142 | { | ||
143 | return segbuf->sb_sum.nblocks == segbuf->sb_sum.nsumblk; | ||
144 | } | ||
145 | |||
144 | static inline void | 146 | static inline void |
145 | nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf, | 147 | nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf, |
146 | struct buffer_head *bh) | 148 | struct buffer_head *bh) |
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index c9201649cc49..9fd051a33c4f 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c | |||
@@ -1914,12 +1914,12 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) | |||
1914 | } | 1914 | } |
1915 | } | 1915 | } |
1916 | 1916 | ||
1917 | if (!NILFS_SEG_SIMPLEX(&segbuf->sb_sum)) { | 1917 | if (!nilfs_segbuf_simplex(segbuf)) { |
1918 | if (NILFS_SEG_LOGBGN(&segbuf->sb_sum)) { | 1918 | if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) { |
1919 | set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); | 1919 | set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); |
1920 | sci->sc_lseg_stime = jiffies; | 1920 | sci->sc_lseg_stime = jiffies; |
1921 | } | 1921 | } |
1922 | if (NILFS_SEG_LOGEND(&segbuf->sb_sum)) | 1922 | if (segbuf->sb_sum.flags & NILFS_SS_LOGEND) |
1923 | clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); | 1923 | clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); |
1924 | } | 1924 | } |
1925 | } | 1925 | } |
@@ -1951,7 +1951,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) | |||
1951 | if (update_sr) { | 1951 | if (update_sr) { |
1952 | nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, | 1952 | nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, |
1953 | segbuf->sb_sum.seg_seq, nilfs->ns_cno++); | 1953 | segbuf->sb_sum.seg_seq, nilfs->ns_cno++); |
1954 | set_nilfs_sb_dirty(nilfs); | ||
1955 | 1954 | ||
1956 | clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); | 1955 | clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); |
1957 | clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); | 1956 | clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); |
@@ -2082,7 +2081,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) | |||
2082 | 2081 | ||
2083 | /* Avoid empty segment */ | 2082 | /* Avoid empty segment */ |
2084 | if (sci->sc_stage.scnt == NILFS_ST_DONE && | 2083 | if (sci->sc_stage.scnt == NILFS_ST_DONE && |
2085 | NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { | 2084 | nilfs_segbuf_empty(sci->sc_curseg)) { |
2086 | nilfs_segctor_abort_construction(sci, nilfs, 1); | 2085 | nilfs_segctor_abort_construction(sci, nilfs, 1); |
2087 | goto out; | 2086 | goto out; |
2088 | } | 2087 | } |
@@ -2408,6 +2407,7 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) | |||
2408 | { | 2407 | { |
2409 | struct nilfs_sb_info *sbi = sci->sc_sbi; | 2408 | struct nilfs_sb_info *sbi = sci->sc_sbi; |
2410 | struct the_nilfs *nilfs = sbi->s_nilfs; | 2409 | struct the_nilfs *nilfs = sbi->s_nilfs; |
2410 | struct nilfs_super_block **sbp; | ||
2411 | int err = 0; | 2411 | int err = 0; |
2412 | 2412 | ||
2413 | nilfs_segctor_accept(sci); | 2413 | nilfs_segctor_accept(sci); |
@@ -2423,8 +2423,13 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) | |||
2423 | if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && | 2423 | if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && |
2424 | nilfs_discontinued(nilfs)) { | 2424 | nilfs_discontinued(nilfs)) { |
2425 | down_write(&nilfs->ns_sem); | 2425 | down_write(&nilfs->ns_sem); |
2426 | err = nilfs_commit_super( | 2426 | err = -EIO; |
2427 | sbi, nilfs_altsb_need_update(nilfs)); | 2427 | sbp = nilfs_prepare_super(sbi, |
2428 | nilfs_sb_will_flip(nilfs)); | ||
2429 | if (likely(sbp)) { | ||
2430 | nilfs_set_log_cursor(sbp[0], nilfs); | ||
2431 | err = nilfs_commit_super(sbi, NILFS_SB_COMMIT); | ||
2432 | } | ||
2428 | up_write(&nilfs->ns_sem); | 2433 | up_write(&nilfs->ns_sem); |
2429 | } | 2434 | } |
2430 | } | 2435 | } |
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 01e20dbb217d..17c487bd8152 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h | |||
@@ -234,13 +234,13 @@ extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *); | |||
234 | extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); | 234 | extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); |
235 | 235 | ||
236 | /* recovery.c */ | 236 | /* recovery.c */ |
237 | extern int nilfs_read_super_root_block(struct super_block *, sector_t, | 237 | extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t, |
238 | struct buffer_head **, int); | 238 | struct buffer_head **, int); |
239 | extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *, | 239 | extern int nilfs_search_super_root(struct the_nilfs *, |
240 | struct nilfs_recovery_info *); | 240 | struct nilfs_recovery_info *); |
241 | extern int nilfs_recover_logical_segments(struct the_nilfs *, | 241 | extern int nilfs_salvage_orphan_logs(struct the_nilfs *, |
242 | struct nilfs_sb_info *, | 242 | struct nilfs_sb_info *, |
243 | struct nilfs_recovery_info *); | 243 | struct nilfs_recovery_info *); |
244 | extern void nilfs_dispose_segment_list(struct list_head *); | 244 | extern void nilfs_dispose_segment_list(struct list_head *); |
245 | 245 | ||
246 | #endif /* _NILFS_SEGMENT_H */ | 246 | #endif /* _NILFS_SEGMENT_H */ |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 414ef68931cf..1fa86b9df73b 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -55,6 +55,8 @@ | |||
55 | #include "nilfs.h" | 55 | #include "nilfs.h" |
56 | #include "mdt.h" | 56 | #include "mdt.h" |
57 | #include "alloc.h" | 57 | #include "alloc.h" |
58 | #include "btree.h" | ||
59 | #include "btnode.h" | ||
58 | #include "page.h" | 60 | #include "page.h" |
59 | #include "cpfile.h" | 61 | #include "cpfile.h" |
60 | #include "ifile.h" | 62 | #include "ifile.h" |
@@ -74,6 +76,25 @@ struct kmem_cache *nilfs_btree_path_cache; | |||
74 | 76 | ||
75 | static int nilfs_remount(struct super_block *sb, int *flags, char *data); | 77 | static int nilfs_remount(struct super_block *sb, int *flags, char *data); |
76 | 78 | ||
79 | static void nilfs_set_error(struct nilfs_sb_info *sbi) | ||
80 | { | ||
81 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
82 | struct nilfs_super_block **sbp; | ||
83 | |||
84 | down_write(&nilfs->ns_sem); | ||
85 | if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { | ||
86 | nilfs->ns_mount_state |= NILFS_ERROR_FS; | ||
87 | sbp = nilfs_prepare_super(sbi, 0); | ||
88 | if (likely(sbp)) { | ||
89 | sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS); | ||
90 | if (sbp[1]) | ||
91 | sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS); | ||
92 | nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); | ||
93 | } | ||
94 | } | ||
95 | up_write(&nilfs->ns_sem); | ||
96 | } | ||
97 | |||
77 | /** | 98 | /** |
78 | * nilfs_error() - report failure condition on a filesystem | 99 | * nilfs_error() - report failure condition on a filesystem |
79 | * | 100 | * |
@@ -99,16 +120,7 @@ void nilfs_error(struct super_block *sb, const char *function, | |||
99 | va_end(args); | 120 | va_end(args); |
100 | 121 | ||
101 | if (!(sb->s_flags & MS_RDONLY)) { | 122 | if (!(sb->s_flags & MS_RDONLY)) { |
102 | struct the_nilfs *nilfs = sbi->s_nilfs; | 123 | nilfs_set_error(sbi); |
103 | |||
104 | down_write(&nilfs->ns_sem); | ||
105 | if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { | ||
106 | nilfs->ns_mount_state |= NILFS_ERROR_FS; | ||
107 | nilfs->ns_sbp[0]->s_state |= | ||
108 | cpu_to_le16(NILFS_ERROR_FS); | ||
109 | nilfs_commit_super(sbi, 1); | ||
110 | } | ||
111 | up_write(&nilfs->ns_sem); | ||
112 | 124 | ||
113 | if (nilfs_test_opt(sbi, ERRORS_RO)) { | 125 | if (nilfs_test_opt(sbi, ERRORS_RO)) { |
114 | printk(KERN_CRIT "Remounting filesystem read-only\n"); | 126 | printk(KERN_CRIT "Remounting filesystem read-only\n"); |
@@ -159,24 +171,7 @@ void nilfs_destroy_inode(struct inode *inode) | |||
159 | kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); | 171 | kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); |
160 | } | 172 | } |
161 | 173 | ||
162 | static void nilfs_clear_inode(struct inode *inode) | 174 | static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) |
163 | { | ||
164 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
165 | |||
166 | /* | ||
167 | * Free resources allocated in nilfs_read_inode(), here. | ||
168 | */ | ||
169 | BUG_ON(!list_empty(&ii->i_dirty)); | ||
170 | brelse(ii->i_bh); | ||
171 | ii->i_bh = NULL; | ||
172 | |||
173 | if (test_bit(NILFS_I_BMAP, &ii->i_state)) | ||
174 | nilfs_bmap_clear(ii->i_bmap); | ||
175 | |||
176 | nilfs_btnode_cache_clear(&ii->i_btnode_cache); | ||
177 | } | ||
178 | |||
179 | static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) | ||
180 | { | 175 | { |
181 | struct the_nilfs *nilfs = sbi->s_nilfs; | 176 | struct the_nilfs *nilfs = sbi->s_nilfs; |
182 | int err; | 177 | int err; |
@@ -202,12 +197,20 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) | |||
202 | printk(KERN_ERR | 197 | printk(KERN_ERR |
203 | "NILFS: unable to write superblock (err=%d)\n", err); | 198 | "NILFS: unable to write superblock (err=%d)\n", err); |
204 | if (err == -EIO && nilfs->ns_sbh[1]) { | 199 | if (err == -EIO && nilfs->ns_sbh[1]) { |
200 | /* | ||
201 | * sbp[0] points to newer log than sbp[1], | ||
202 | * so copy sbp[0] to sbp[1] to take over sbp[0]. | ||
203 | */ | ||
204 | memcpy(nilfs->ns_sbp[1], nilfs->ns_sbp[0], | ||
205 | nilfs->ns_sbsize); | ||
205 | nilfs_fall_back_super_block(nilfs); | 206 | nilfs_fall_back_super_block(nilfs); |
206 | goto retry; | 207 | goto retry; |
207 | } | 208 | } |
208 | } else { | 209 | } else { |
209 | struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; | 210 | struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; |
210 | 211 | ||
212 | nilfs->ns_sbwcount++; | ||
213 | |||
211 | /* | 214 | /* |
212 | * The latest segment becomes trailable from the position | 215 | * The latest segment becomes trailable from the position |
213 | * written in superblock. | 216 | * written in superblock. |
@@ -216,66 +219,122 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) | |||
216 | 219 | ||
217 | /* update GC protection for recent segments */ | 220 | /* update GC protection for recent segments */ |
218 | if (nilfs->ns_sbh[1]) { | 221 | if (nilfs->ns_sbh[1]) { |
219 | sbp = NULL; | 222 | if (flag == NILFS_SB_COMMIT_ALL) { |
220 | if (dupsb) { | ||
221 | set_buffer_dirty(nilfs->ns_sbh[1]); | 223 | set_buffer_dirty(nilfs->ns_sbh[1]); |
222 | if (!sync_dirty_buffer(nilfs->ns_sbh[1])) | 224 | if (sync_dirty_buffer(nilfs->ns_sbh[1]) < 0) |
223 | sbp = nilfs->ns_sbp[1]; | 225 | goto out; |
224 | } | 226 | } |
227 | if (le64_to_cpu(nilfs->ns_sbp[1]->s_last_cno) < | ||
228 | le64_to_cpu(nilfs->ns_sbp[0]->s_last_cno)) | ||
229 | sbp = nilfs->ns_sbp[1]; | ||
225 | } | 230 | } |
226 | if (sbp) { | ||
227 | spin_lock(&nilfs->ns_last_segment_lock); | ||
228 | nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); | ||
229 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
230 | } | ||
231 | } | ||
232 | 231 | ||
232 | spin_lock(&nilfs->ns_last_segment_lock); | ||
233 | nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); | ||
234 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
235 | } | ||
236 | out: | ||
233 | return err; | 237 | return err; |
234 | } | 238 | } |
235 | 239 | ||
236 | int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb) | 240 | void nilfs_set_log_cursor(struct nilfs_super_block *sbp, |
241 | struct the_nilfs *nilfs) | ||
242 | { | ||
243 | sector_t nfreeblocks; | ||
244 | |||
245 | /* nilfs->ns_sem must be locked by the caller. */ | ||
246 | nilfs_count_free_blocks(nilfs, &nfreeblocks); | ||
247 | sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks); | ||
248 | |||
249 | spin_lock(&nilfs->ns_last_segment_lock); | ||
250 | sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); | ||
251 | sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); | ||
252 | sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); | ||
253 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
254 | } | ||
255 | |||
256 | struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi, | ||
257 | int flip) | ||
237 | { | 258 | { |
238 | struct the_nilfs *nilfs = sbi->s_nilfs; | 259 | struct the_nilfs *nilfs = sbi->s_nilfs; |
239 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | 260 | struct nilfs_super_block **sbp = nilfs->ns_sbp; |
240 | sector_t nfreeblocks; | ||
241 | time_t t; | ||
242 | int err; | ||
243 | 261 | ||
244 | /* nilfs->sem must be locked by the caller. */ | 262 | /* nilfs->ns_sem must be locked by the caller. */ |
245 | if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { | 263 | if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { |
246 | if (sbp[1] && sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) | 264 | if (sbp[1] && |
247 | nilfs_swap_super_block(nilfs); | 265 | sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) { |
248 | else { | 266 | memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); |
267 | } else { | ||
249 | printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", | 268 | printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", |
250 | sbi->s_super->s_id); | 269 | sbi->s_super->s_id); |
251 | return -EIO; | 270 | return NULL; |
252 | } | 271 | } |
272 | } else if (sbp[1] && | ||
273 | sbp[1]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { | ||
274 | memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); | ||
253 | } | 275 | } |
254 | err = nilfs_count_free_blocks(nilfs, &nfreeblocks); | ||
255 | if (unlikely(err)) { | ||
256 | printk(KERN_ERR "NILFS: failed to count free blocks\n"); | ||
257 | return err; | ||
258 | } | ||
259 | spin_lock(&nilfs->ns_last_segment_lock); | ||
260 | sbp[0]->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); | ||
261 | sbp[0]->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); | ||
262 | sbp[0]->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); | ||
263 | spin_unlock(&nilfs->ns_last_segment_lock); | ||
264 | 276 | ||
277 | if (flip && sbp[1]) | ||
278 | nilfs_swap_super_block(nilfs); | ||
279 | |||
280 | return sbp; | ||
281 | } | ||
282 | |||
283 | int nilfs_commit_super(struct nilfs_sb_info *sbi, int flag) | ||
284 | { | ||
285 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
286 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | ||
287 | time_t t; | ||
288 | |||
289 | /* nilfs->ns_sem must be locked by the caller. */ | ||
265 | t = get_seconds(); | 290 | t = get_seconds(); |
266 | nilfs->ns_sbwtime[0] = t; | 291 | nilfs->ns_sbwtime = t; |
267 | sbp[0]->s_free_blocks_count = cpu_to_le64(nfreeblocks); | ||
268 | sbp[0]->s_wtime = cpu_to_le64(t); | 292 | sbp[0]->s_wtime = cpu_to_le64(t); |
269 | sbp[0]->s_sum = 0; | 293 | sbp[0]->s_sum = 0; |
270 | sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, | 294 | sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, |
271 | (unsigned char *)sbp[0], | 295 | (unsigned char *)sbp[0], |
272 | nilfs->ns_sbsize)); | 296 | nilfs->ns_sbsize)); |
273 | if (dupsb && sbp[1]) { | 297 | if (flag == NILFS_SB_COMMIT_ALL && sbp[1]) { |
274 | memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); | 298 | sbp[1]->s_wtime = sbp[0]->s_wtime; |
275 | nilfs->ns_sbwtime[1] = t; | 299 | sbp[1]->s_sum = 0; |
300 | sbp[1]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, | ||
301 | (unsigned char *)sbp[1], | ||
302 | nilfs->ns_sbsize)); | ||
276 | } | 303 | } |
277 | clear_nilfs_sb_dirty(nilfs); | 304 | clear_nilfs_sb_dirty(nilfs); |
278 | return nilfs_sync_super(sbi, dupsb); | 305 | return nilfs_sync_super(sbi, flag); |
306 | } | ||
307 | |||
308 | /** | ||
309 | * nilfs_cleanup_super() - write filesystem state for cleanup | ||
310 | * @sbi: nilfs_sb_info to be unmounted or degraded to read-only | ||
311 | * | ||
312 | * This function restores state flags in the on-disk super block. | ||
313 | * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the | ||
314 | * filesystem was not clean previously. | ||
315 | */ | ||
316 | int nilfs_cleanup_super(struct nilfs_sb_info *sbi) | ||
317 | { | ||
318 | struct nilfs_super_block **sbp; | ||
319 | int flag = NILFS_SB_COMMIT; | ||
320 | int ret = -EIO; | ||
321 | |||
322 | sbp = nilfs_prepare_super(sbi, 0); | ||
323 | if (sbp) { | ||
324 | sbp[0]->s_state = cpu_to_le16(sbi->s_nilfs->ns_mount_state); | ||
325 | nilfs_set_log_cursor(sbp[0], sbi->s_nilfs); | ||
326 | if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) { | ||
327 | /* | ||
328 | * make the "clean" flag also to the opposite | ||
329 | * super block if both super blocks point to | ||
330 | * the same checkpoint. | ||
331 | */ | ||
332 | sbp[1]->s_state = sbp[0]->s_state; | ||
333 | flag = NILFS_SB_COMMIT_ALL; | ||
334 | } | ||
335 | ret = nilfs_commit_super(sbi, flag); | ||
336 | } | ||
337 | return ret; | ||
279 | } | 338 | } |
280 | 339 | ||
281 | static void nilfs_put_super(struct super_block *sb) | 340 | static void nilfs_put_super(struct super_block *sb) |
@@ -289,8 +348,7 @@ static void nilfs_put_super(struct super_block *sb) | |||
289 | 348 | ||
290 | if (!(sb->s_flags & MS_RDONLY)) { | 349 | if (!(sb->s_flags & MS_RDONLY)) { |
291 | down_write(&nilfs->ns_sem); | 350 | down_write(&nilfs->ns_sem); |
292 | nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); | 351 | nilfs_cleanup_super(sbi); |
293 | nilfs_commit_super(sbi, 1); | ||
294 | up_write(&nilfs->ns_sem); | 352 | up_write(&nilfs->ns_sem); |
295 | } | 353 | } |
296 | down_write(&nilfs->ns_super_sem); | 354 | down_write(&nilfs->ns_super_sem); |
@@ -311,6 +369,7 @@ static int nilfs_sync_fs(struct super_block *sb, int wait) | |||
311 | { | 369 | { |
312 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 370 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
313 | struct the_nilfs *nilfs = sbi->s_nilfs; | 371 | struct the_nilfs *nilfs = sbi->s_nilfs; |
372 | struct nilfs_super_block **sbp; | ||
314 | int err = 0; | 373 | int err = 0; |
315 | 374 | ||
316 | /* This function is called when super block should be written back */ | 375 | /* This function is called when super block should be written back */ |
@@ -318,8 +377,13 @@ static int nilfs_sync_fs(struct super_block *sb, int wait) | |||
318 | err = nilfs_construct_segment(sb); | 377 | err = nilfs_construct_segment(sb); |
319 | 378 | ||
320 | down_write(&nilfs->ns_sem); | 379 | down_write(&nilfs->ns_sem); |
321 | if (nilfs_sb_dirty(nilfs)) | 380 | if (nilfs_sb_dirty(nilfs)) { |
322 | nilfs_commit_super(sbi, 1); | 381 | sbp = nilfs_prepare_super(sbi, nilfs_sb_will_flip(nilfs)); |
382 | if (likely(sbp)) { | ||
383 | nilfs_set_log_cursor(sbp[0], nilfs); | ||
384 | nilfs_commit_super(sbi, NILFS_SB_COMMIT); | ||
385 | } | ||
386 | } | ||
323 | up_write(&nilfs->ns_sem); | 387 | up_write(&nilfs->ns_sem); |
324 | 388 | ||
325 | return err; | 389 | return err; |
@@ -442,20 +506,20 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
442 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 506 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
443 | 507 | ||
444 | if (!nilfs_test_opt(sbi, BARRIER)) | 508 | if (!nilfs_test_opt(sbi, BARRIER)) |
445 | seq_printf(seq, ",nobarrier"); | 509 | seq_puts(seq, ",nobarrier"); |
446 | if (nilfs_test_opt(sbi, SNAPSHOT)) | 510 | if (nilfs_test_opt(sbi, SNAPSHOT)) |
447 | seq_printf(seq, ",cp=%llu", | 511 | seq_printf(seq, ",cp=%llu", |
448 | (unsigned long long int)sbi->s_snapshot_cno); | 512 | (unsigned long long int)sbi->s_snapshot_cno); |
449 | if (nilfs_test_opt(sbi, ERRORS_PANIC)) | 513 | if (nilfs_test_opt(sbi, ERRORS_PANIC)) |
450 | seq_printf(seq, ",errors=panic"); | 514 | seq_puts(seq, ",errors=panic"); |
451 | if (nilfs_test_opt(sbi, ERRORS_CONT)) | 515 | if (nilfs_test_opt(sbi, ERRORS_CONT)) |
452 | seq_printf(seq, ",errors=continue"); | 516 | seq_puts(seq, ",errors=continue"); |
453 | if (nilfs_test_opt(sbi, STRICT_ORDER)) | 517 | if (nilfs_test_opt(sbi, STRICT_ORDER)) |
454 | seq_printf(seq, ",order=strict"); | 518 | seq_puts(seq, ",order=strict"); |
455 | if (nilfs_test_opt(sbi, NORECOVERY)) | 519 | if (nilfs_test_opt(sbi, NORECOVERY)) |
456 | seq_printf(seq, ",norecovery"); | 520 | seq_puts(seq, ",norecovery"); |
457 | if (nilfs_test_opt(sbi, DISCARD)) | 521 | if (nilfs_test_opt(sbi, DISCARD)) |
458 | seq_printf(seq, ",discard"); | 522 | seq_puts(seq, ",discard"); |
459 | 523 | ||
460 | return 0; | 524 | return 0; |
461 | } | 525 | } |
@@ -467,7 +531,7 @@ static const struct super_operations nilfs_sops = { | |||
467 | /* .write_inode = nilfs_write_inode, */ | 531 | /* .write_inode = nilfs_write_inode, */ |
468 | /* .put_inode = nilfs_put_inode, */ | 532 | /* .put_inode = nilfs_put_inode, */ |
469 | /* .drop_inode = nilfs_drop_inode, */ | 533 | /* .drop_inode = nilfs_drop_inode, */ |
470 | .delete_inode = nilfs_delete_inode, | 534 | .evict_inode = nilfs_evict_inode, |
471 | .put_super = nilfs_put_super, | 535 | .put_super = nilfs_put_super, |
472 | /* .write_super = nilfs_write_super, */ | 536 | /* .write_super = nilfs_write_super, */ |
473 | .sync_fs = nilfs_sync_fs, | 537 | .sync_fs = nilfs_sync_fs, |
@@ -475,7 +539,6 @@ static const struct super_operations nilfs_sops = { | |||
475 | /* .unlockfs */ | 539 | /* .unlockfs */ |
476 | .statfs = nilfs_statfs, | 540 | .statfs = nilfs_statfs, |
477 | .remount_fs = nilfs_remount, | 541 | .remount_fs = nilfs_remount, |
478 | .clear_inode = nilfs_clear_inode, | ||
479 | /* .umount_begin */ | 542 | /* .umount_begin */ |
480 | .show_options = nilfs_show_options | 543 | .show_options = nilfs_show_options |
481 | }; | 544 | }; |
@@ -524,23 +587,25 @@ static const struct export_operations nilfs_export_ops = { | |||
524 | 587 | ||
525 | enum { | 588 | enum { |
526 | Opt_err_cont, Opt_err_panic, Opt_err_ro, | 589 | Opt_err_cont, Opt_err_panic, Opt_err_ro, |
527 | Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, | 590 | Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, |
528 | Opt_discard, Opt_err, | 591 | Opt_discard, Opt_nodiscard, Opt_err, |
529 | }; | 592 | }; |
530 | 593 | ||
531 | static match_table_t tokens = { | 594 | static match_table_t tokens = { |
532 | {Opt_err_cont, "errors=continue"}, | 595 | {Opt_err_cont, "errors=continue"}, |
533 | {Opt_err_panic, "errors=panic"}, | 596 | {Opt_err_panic, "errors=panic"}, |
534 | {Opt_err_ro, "errors=remount-ro"}, | 597 | {Opt_err_ro, "errors=remount-ro"}, |
598 | {Opt_barrier, "barrier"}, | ||
535 | {Opt_nobarrier, "nobarrier"}, | 599 | {Opt_nobarrier, "nobarrier"}, |
536 | {Opt_snapshot, "cp=%u"}, | 600 | {Opt_snapshot, "cp=%u"}, |
537 | {Opt_order, "order=%s"}, | 601 | {Opt_order, "order=%s"}, |
538 | {Opt_norecovery, "norecovery"}, | 602 | {Opt_norecovery, "norecovery"}, |
539 | {Opt_discard, "discard"}, | 603 | {Opt_discard, "discard"}, |
604 | {Opt_nodiscard, "nodiscard"}, | ||
540 | {Opt_err, NULL} | 605 | {Opt_err, NULL} |
541 | }; | 606 | }; |
542 | 607 | ||
543 | static int parse_options(char *options, struct super_block *sb) | 608 | static int parse_options(char *options, struct super_block *sb, int is_remount) |
544 | { | 609 | { |
545 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 610 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
546 | char *p; | 611 | char *p; |
@@ -557,6 +622,9 @@ static int parse_options(char *options, struct super_block *sb) | |||
557 | 622 | ||
558 | token = match_token(p, tokens, args); | 623 | token = match_token(p, tokens, args); |
559 | switch (token) { | 624 | switch (token) { |
625 | case Opt_barrier: | ||
626 | nilfs_set_opt(sbi, BARRIER); | ||
627 | break; | ||
560 | case Opt_nobarrier: | 628 | case Opt_nobarrier: |
561 | nilfs_clear_opt(sbi, BARRIER); | 629 | nilfs_clear_opt(sbi, BARRIER); |
562 | break; | 630 | break; |
@@ -582,8 +650,26 @@ static int parse_options(char *options, struct super_block *sb) | |||
582 | case Opt_snapshot: | 650 | case Opt_snapshot: |
583 | if (match_int(&args[0], &option) || option <= 0) | 651 | if (match_int(&args[0], &option) || option <= 0) |
584 | return 0; | 652 | return 0; |
585 | if (!(sb->s_flags & MS_RDONLY)) | 653 | if (is_remount) { |
654 | if (!nilfs_test_opt(sbi, SNAPSHOT)) { | ||
655 | printk(KERN_ERR | ||
656 | "NILFS: cannot change regular " | ||
657 | "mount to snapshot.\n"); | ||
658 | return 0; | ||
659 | } else if (option != sbi->s_snapshot_cno) { | ||
660 | printk(KERN_ERR | ||
661 | "NILFS: cannot remount to a " | ||
662 | "different snapshot.\n"); | ||
663 | return 0; | ||
664 | } | ||
665 | break; | ||
666 | } | ||
667 | if (!(sb->s_flags & MS_RDONLY)) { | ||
668 | printk(KERN_ERR "NILFS: cannot mount snapshot " | ||
669 | "read/write. A read-only option is " | ||
670 | "required.\n"); | ||
586 | return 0; | 671 | return 0; |
672 | } | ||
587 | sbi->s_snapshot_cno = option; | 673 | sbi->s_snapshot_cno = option; |
588 | nilfs_set_opt(sbi, SNAPSHOT); | 674 | nilfs_set_opt(sbi, SNAPSHOT); |
589 | break; | 675 | break; |
@@ -593,6 +679,9 @@ static int parse_options(char *options, struct super_block *sb) | |||
593 | case Opt_discard: | 679 | case Opt_discard: |
594 | nilfs_set_opt(sbi, DISCARD); | 680 | nilfs_set_opt(sbi, DISCARD); |
595 | break; | 681 | break; |
682 | case Opt_nodiscard: | ||
683 | nilfs_clear_opt(sbi, DISCARD); | ||
684 | break; | ||
596 | default: | 685 | default: |
597 | printk(KERN_ERR | 686 | printk(KERN_ERR |
598 | "NILFS: Unrecognized mount option \"%s\"\n", p); | 687 | "NILFS: Unrecognized mount option \"%s\"\n", p); |
@@ -613,11 +702,18 @@ nilfs_set_default_options(struct nilfs_sb_info *sbi, | |||
613 | static int nilfs_setup_super(struct nilfs_sb_info *sbi) | 702 | static int nilfs_setup_super(struct nilfs_sb_info *sbi) |
614 | { | 703 | { |
615 | struct the_nilfs *nilfs = sbi->s_nilfs; | 704 | struct the_nilfs *nilfs = sbi->s_nilfs; |
616 | struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; | 705 | struct nilfs_super_block **sbp; |
617 | int max_mnt_count = le16_to_cpu(sbp->s_max_mnt_count); | 706 | int max_mnt_count; |
618 | int mnt_count = le16_to_cpu(sbp->s_mnt_count); | 707 | int mnt_count; |
708 | |||
709 | /* nilfs->ns_sem must be locked by the caller. */ | ||
710 | sbp = nilfs_prepare_super(sbi, 0); | ||
711 | if (!sbp) | ||
712 | return -EIO; | ||
713 | |||
714 | max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count); | ||
715 | mnt_count = le16_to_cpu(sbp[0]->s_mnt_count); | ||
619 | 716 | ||
620 | /* nilfs->sem must be locked by the caller. */ | ||
621 | if (nilfs->ns_mount_state & NILFS_ERROR_FS) { | 717 | if (nilfs->ns_mount_state & NILFS_ERROR_FS) { |
622 | printk(KERN_WARNING | 718 | printk(KERN_WARNING |
623 | "NILFS warning: mounting fs with errors\n"); | 719 | "NILFS warning: mounting fs with errors\n"); |
@@ -628,12 +724,15 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi) | |||
628 | #endif | 724 | #endif |
629 | } | 725 | } |
630 | if (!max_mnt_count) | 726 | if (!max_mnt_count) |
631 | sbp->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); | 727 | sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); |
632 | 728 | ||
633 | sbp->s_mnt_count = cpu_to_le16(mnt_count + 1); | 729 | sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1); |
634 | sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS); | 730 | sbp[0]->s_state = |
635 | sbp->s_mtime = cpu_to_le64(get_seconds()); | 731 | cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); |
636 | return nilfs_commit_super(sbi, 1); | 732 | sbp[0]->s_mtime = cpu_to_le64(get_seconds()); |
733 | /* synchronize sbp[1] with sbp[0] */ | ||
734 | memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); | ||
735 | return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); | ||
637 | } | 736 | } |
638 | 737 | ||
639 | struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, | 738 | struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, |
@@ -670,7 +769,31 @@ int nilfs_store_magic_and_option(struct super_block *sb, | |||
670 | sbi->s_interval = le32_to_cpu(sbp->s_c_interval); | 769 | sbi->s_interval = le32_to_cpu(sbp->s_c_interval); |
671 | sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); | 770 | sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); |
672 | 771 | ||
673 | return !parse_options(data, sb) ? -EINVAL : 0 ; | 772 | return !parse_options(data, sb, 0) ? -EINVAL : 0 ; |
773 | } | ||
774 | |||
775 | int nilfs_check_feature_compatibility(struct super_block *sb, | ||
776 | struct nilfs_super_block *sbp) | ||
777 | { | ||
778 | __u64 features; | ||
779 | |||
780 | features = le64_to_cpu(sbp->s_feature_incompat) & | ||
781 | ~NILFS_FEATURE_INCOMPAT_SUPP; | ||
782 | if (features) { | ||
783 | printk(KERN_ERR "NILFS: couldn't mount because of unsupported " | ||
784 | "optional features (%llx)\n", | ||
785 | (unsigned long long)features); | ||
786 | return -EINVAL; | ||
787 | } | ||
788 | features = le64_to_cpu(sbp->s_feature_compat_ro) & | ||
789 | ~NILFS_FEATURE_COMPAT_RO_SUPP; | ||
790 | if (!(sb->s_flags & MS_RDONLY) && features) { | ||
791 | printk(KERN_ERR "NILFS: couldn't mount RDWR because of " | ||
792 | "unsupported optional features (%llx)\n", | ||
793 | (unsigned long long)features); | ||
794 | return -EINVAL; | ||
795 | } | ||
796 | return 0; | ||
674 | } | 797 | } |
675 | 798 | ||
676 | /** | 799 | /** |
@@ -819,7 +942,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, | |||
819 | static int nilfs_remount(struct super_block *sb, int *flags, char *data) | 942 | static int nilfs_remount(struct super_block *sb, int *flags, char *data) |
820 | { | 943 | { |
821 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 944 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
822 | struct nilfs_super_block *sbp; | ||
823 | struct the_nilfs *nilfs = sbi->s_nilfs; | 945 | struct the_nilfs *nilfs = sbi->s_nilfs; |
824 | unsigned long old_sb_flags; | 946 | unsigned long old_sb_flags; |
825 | struct nilfs_mount_options old_opts; | 947 | struct nilfs_mount_options old_opts; |
@@ -833,32 +955,17 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) | |||
833 | old_opts.snapshot_cno = sbi->s_snapshot_cno; | 955 | old_opts.snapshot_cno = sbi->s_snapshot_cno; |
834 | was_snapshot = nilfs_test_opt(sbi, SNAPSHOT); | 956 | was_snapshot = nilfs_test_opt(sbi, SNAPSHOT); |
835 | 957 | ||
836 | if (!parse_options(data, sb)) { | 958 | if (!parse_options(data, sb, 1)) { |
837 | err = -EINVAL; | 959 | err = -EINVAL; |
838 | goto restore_opts; | 960 | goto restore_opts; |
839 | } | 961 | } |
840 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL); | 962 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL); |
841 | 963 | ||
842 | err = -EINVAL; | 964 | err = -EINVAL; |
843 | if (was_snapshot) { | 965 | if (was_snapshot && !(*flags & MS_RDONLY)) { |
844 | if (!(*flags & MS_RDONLY)) { | 966 | printk(KERN_ERR "NILFS (device %s): cannot remount snapshot " |
845 | printk(KERN_ERR "NILFS (device %s): cannot remount " | 967 | "read/write.\n", sb->s_id); |
846 | "snapshot read/write.\n", | 968 | goto restore_opts; |
847 | sb->s_id); | ||
848 | goto restore_opts; | ||
849 | } else if (sbi->s_snapshot_cno != old_opts.snapshot_cno) { | ||
850 | printk(KERN_ERR "NILFS (device %s): cannot " | ||
851 | "remount to a different snapshot.\n", | ||
852 | sb->s_id); | ||
853 | goto restore_opts; | ||
854 | } | ||
855 | } else { | ||
856 | if (nilfs_test_opt(sbi, SNAPSHOT)) { | ||
857 | printk(KERN_ERR "NILFS (device %s): cannot change " | ||
858 | "a regular mount to a snapshot.\n", | ||
859 | sb->s_id); | ||
860 | goto restore_opts; | ||
861 | } | ||
862 | } | 969 | } |
863 | 970 | ||
864 | if (!nilfs_valid_fs(nilfs)) { | 971 | if (!nilfs_valid_fs(nilfs)) { |
@@ -880,19 +987,29 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) | |||
880 | * the RDONLY flag and then mark the partition as valid again. | 987 | * the RDONLY flag and then mark the partition as valid again. |
881 | */ | 988 | */ |
882 | down_write(&nilfs->ns_sem); | 989 | down_write(&nilfs->ns_sem); |
883 | sbp = nilfs->ns_sbp[0]; | 990 | nilfs_cleanup_super(sbi); |
884 | if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) && | ||
885 | (nilfs->ns_mount_state & NILFS_VALID_FS)) | ||
886 | sbp->s_state = cpu_to_le16(nilfs->ns_mount_state); | ||
887 | sbp->s_mtime = cpu_to_le64(get_seconds()); | ||
888 | nilfs_commit_super(sbi, 1); | ||
889 | up_write(&nilfs->ns_sem); | 991 | up_write(&nilfs->ns_sem); |
890 | } else { | 992 | } else { |
993 | __u64 features; | ||
994 | |||
891 | /* | 995 | /* |
892 | * Mounting a RDONLY partition read-write, so reread and | 996 | * Mounting a RDONLY partition read-write, so reread and |
893 | * store the current valid flag. (It may have been changed | 997 | * store the current valid flag. (It may have been changed |
894 | * by fsck since we originally mounted the partition.) | 998 | * by fsck since we originally mounted the partition.) |
895 | */ | 999 | */ |
1000 | down_read(&nilfs->ns_sem); | ||
1001 | features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) & | ||
1002 | ~NILFS_FEATURE_COMPAT_RO_SUPP; | ||
1003 | up_read(&nilfs->ns_sem); | ||
1004 | if (features) { | ||
1005 | printk(KERN_WARNING "NILFS (device %s): couldn't " | ||
1006 | "remount RDWR because of unsupported optional " | ||
1007 | "features (%llx)\n", | ||
1008 | sb->s_id, (unsigned long long)features); | ||
1009 | err = -EROFS; | ||
1010 | goto restore_opts; | ||
1011 | } | ||
1012 | |||
896 | sb->s_flags &= ~MS_RDONLY; | 1013 | sb->s_flags &= ~MS_RDONLY; |
897 | 1014 | ||
898 | err = nilfs_attach_segment_constructor(sbi); | 1015 | err = nilfs_attach_segment_constructor(sbi); |
@@ -1119,7 +1236,7 @@ static void nilfs_inode_init_once(void *obj) | |||
1119 | init_rwsem(&ii->xattr_sem); | 1236 | init_rwsem(&ii->xattr_sem); |
1120 | #endif | 1237 | #endif |
1121 | nilfs_btnode_cache_init_once(&ii->i_btnode_cache); | 1238 | nilfs_btnode_cache_init_once(&ii->i_btnode_cache); |
1122 | ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union; | 1239 | ii->i_bmap = &ii->i_bmap_data; |
1123 | inode_init_once(&ii->vfs_inode); | 1240 | inode_init_once(&ii->vfs_inode); |
1124 | } | 1241 | } |
1125 | 1242 | ||
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 8c1097327abc..37de1f062d81 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -38,6 +38,8 @@ | |||
38 | static LIST_HEAD(nilfs_objects); | 38 | static LIST_HEAD(nilfs_objects); |
39 | static DEFINE_SPINLOCK(nilfs_lock); | 39 | static DEFINE_SPINLOCK(nilfs_lock); |
40 | 40 | ||
41 | static int nilfs_valid_sb(struct nilfs_super_block *sbp); | ||
42 | |||
41 | void nilfs_set_last_segment(struct the_nilfs *nilfs, | 43 | void nilfs_set_last_segment(struct the_nilfs *nilfs, |
42 | sector_t start_blocknr, u64 seq, __u64 cno) | 44 | sector_t start_blocknr, u64 seq, __u64 cno) |
43 | { | 45 | { |
@@ -45,6 +47,16 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs, | |||
45 | nilfs->ns_last_pseg = start_blocknr; | 47 | nilfs->ns_last_pseg = start_blocknr; |
46 | nilfs->ns_last_seq = seq; | 48 | nilfs->ns_last_seq = seq; |
47 | nilfs->ns_last_cno = cno; | 49 | nilfs->ns_last_cno = cno; |
50 | |||
51 | if (!nilfs_sb_dirty(nilfs)) { | ||
52 | if (nilfs->ns_prev_seq == nilfs->ns_last_seq) | ||
53 | goto stay_cursor; | ||
54 | |||
55 | set_nilfs_sb_dirty(nilfs); | ||
56 | } | ||
57 | nilfs->ns_prev_seq = nilfs->ns_last_seq; | ||
58 | |||
59 | stay_cursor: | ||
48 | spin_unlock(&nilfs->ns_last_segment_lock); | 60 | spin_unlock(&nilfs->ns_last_segment_lock); |
49 | } | 61 | } |
50 | 62 | ||
@@ -159,8 +171,7 @@ void put_nilfs(struct the_nilfs *nilfs) | |||
159 | kfree(nilfs); | 171 | kfree(nilfs); |
160 | } | 172 | } |
161 | 173 | ||
162 | static int nilfs_load_super_root(struct the_nilfs *nilfs, | 174 | static int nilfs_load_super_root(struct the_nilfs *nilfs, sector_t sr_block) |
163 | struct nilfs_sb_info *sbi, sector_t sr_block) | ||
164 | { | 175 | { |
165 | struct buffer_head *bh_sr; | 176 | struct buffer_head *bh_sr; |
166 | struct nilfs_super_root *raw_sr; | 177 | struct nilfs_super_root *raw_sr; |
@@ -169,7 +180,7 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, | |||
169 | unsigned inode_size; | 180 | unsigned inode_size; |
170 | int err; | 181 | int err; |
171 | 182 | ||
172 | err = nilfs_read_super_root_block(sbi->s_super, sr_block, &bh_sr, 1); | 183 | err = nilfs_read_super_root_block(nilfs, sr_block, &bh_sr, 1); |
173 | if (unlikely(err)) | 184 | if (unlikely(err)) |
174 | return err; | 185 | return err; |
175 | 186 | ||
@@ -248,6 +259,37 @@ static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri) | |||
248 | } | 259 | } |
249 | 260 | ||
250 | /** | 261 | /** |
262 | * nilfs_store_log_cursor - load log cursor from a super block | ||
263 | * @nilfs: nilfs object | ||
264 | * @sbp: buffer storing super block to be read | ||
265 | * | ||
266 | * nilfs_store_log_cursor() reads the last position of the log | ||
267 | * containing a super root from a given super block, and initializes | ||
268 | * relevant information on the nilfs object preparatory for log | ||
269 | * scanning and recovery. | ||
270 | */ | ||
271 | static int nilfs_store_log_cursor(struct the_nilfs *nilfs, | ||
272 | struct nilfs_super_block *sbp) | ||
273 | { | ||
274 | int ret = 0; | ||
275 | |||
276 | nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg); | ||
277 | nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno); | ||
278 | nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq); | ||
279 | |||
280 | nilfs->ns_prev_seq = nilfs->ns_last_seq; | ||
281 | nilfs->ns_seg_seq = nilfs->ns_last_seq; | ||
282 | nilfs->ns_segnum = | ||
283 | nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); | ||
284 | nilfs->ns_cno = nilfs->ns_last_cno + 1; | ||
285 | if (nilfs->ns_segnum >= nilfs->ns_nsegments) { | ||
286 | printk(KERN_ERR "NILFS invalid last segment number.\n"); | ||
287 | ret = -EINVAL; | ||
288 | } | ||
289 | return ret; | ||
290 | } | ||
291 | |||
292 | /** | ||
251 | * load_nilfs - load and recover the nilfs | 293 | * load_nilfs - load and recover the nilfs |
252 | * @nilfs: the_nilfs structure to be released | 294 | * @nilfs: the_nilfs structure to be released |
253 | * @sbi: nilfs_sb_info used to recover past segment | 295 | * @sbi: nilfs_sb_info used to recover past segment |
@@ -285,13 +327,55 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | |||
285 | 327 | ||
286 | nilfs_init_recovery_info(&ri); | 328 | nilfs_init_recovery_info(&ri); |
287 | 329 | ||
288 | err = nilfs_search_super_root(nilfs, sbi, &ri); | 330 | err = nilfs_search_super_root(nilfs, &ri); |
289 | if (unlikely(err)) { | 331 | if (unlikely(err)) { |
290 | printk(KERN_ERR "NILFS: error searching super root.\n"); | 332 | struct nilfs_super_block **sbp = nilfs->ns_sbp; |
291 | goto failed; | 333 | int blocksize; |
334 | |||
335 | if (err != -EINVAL) | ||
336 | goto scan_error; | ||
337 | |||
338 | if (!nilfs_valid_sb(sbp[1])) { | ||
339 | printk(KERN_WARNING | ||
340 | "NILFS warning: unable to fall back to spare" | ||
341 | "super block\n"); | ||
342 | goto scan_error; | ||
343 | } | ||
344 | printk(KERN_INFO | ||
345 | "NILFS: try rollback from an earlier position\n"); | ||
346 | |||
347 | /* | ||
348 | * restore super block with its spare and reconfigure | ||
349 | * relevant states of the nilfs object. | ||
350 | */ | ||
351 | memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); | ||
352 | nilfs->ns_crc_seed = le32_to_cpu(sbp[0]->s_crc_seed); | ||
353 | nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); | ||
354 | |||
355 | /* verify consistency between two super blocks */ | ||
356 | blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size); | ||
357 | if (blocksize != nilfs->ns_blocksize) { | ||
358 | printk(KERN_WARNING | ||
359 | "NILFS warning: blocksize differs between " | ||
360 | "two super blocks (%d != %d)\n", | ||
361 | blocksize, nilfs->ns_blocksize); | ||
362 | goto scan_error; | ||
363 | } | ||
364 | |||
365 | err = nilfs_store_log_cursor(nilfs, sbp[0]); | ||
366 | if (err) | ||
367 | goto scan_error; | ||
368 | |||
369 | /* drop clean flag to allow roll-forward and recovery */ | ||
370 | nilfs->ns_mount_state &= ~NILFS_VALID_FS; | ||
371 | valid_fs = 0; | ||
372 | |||
373 | err = nilfs_search_super_root(nilfs, &ri); | ||
374 | if (err) | ||
375 | goto scan_error; | ||
292 | } | 376 | } |
293 | 377 | ||
294 | err = nilfs_load_super_root(nilfs, sbi, ri.ri_super_root); | 378 | err = nilfs_load_super_root(nilfs, ri.ri_super_root); |
295 | if (unlikely(err)) { | 379 | if (unlikely(err)) { |
296 | printk(KERN_ERR "NILFS: error loading super root.\n"); | 380 | printk(KERN_ERR "NILFS: error loading super root.\n"); |
297 | goto failed; | 381 | goto failed; |
@@ -301,11 +385,23 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | |||
301 | goto skip_recovery; | 385 | goto skip_recovery; |
302 | 386 | ||
303 | if (s_flags & MS_RDONLY) { | 387 | if (s_flags & MS_RDONLY) { |
388 | __u64 features; | ||
389 | |||
304 | if (nilfs_test_opt(sbi, NORECOVERY)) { | 390 | if (nilfs_test_opt(sbi, NORECOVERY)) { |
305 | printk(KERN_INFO "NILFS: norecovery option specified. " | 391 | printk(KERN_INFO "NILFS: norecovery option specified. " |
306 | "skipping roll-forward recovery\n"); | 392 | "skipping roll-forward recovery\n"); |
307 | goto skip_recovery; | 393 | goto skip_recovery; |
308 | } | 394 | } |
395 | features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) & | ||
396 | ~NILFS_FEATURE_COMPAT_RO_SUPP; | ||
397 | if (features) { | ||
398 | printk(KERN_ERR "NILFS: couldn't proceed with " | ||
399 | "recovery because of unsupported optional " | ||
400 | "features (%llx)\n", | ||
401 | (unsigned long long)features); | ||
402 | err = -EROFS; | ||
403 | goto failed_unload; | ||
404 | } | ||
309 | if (really_read_only) { | 405 | if (really_read_only) { |
310 | printk(KERN_ERR "NILFS: write access " | 406 | printk(KERN_ERR "NILFS: write access " |
311 | "unavailable, cannot proceed.\n"); | 407 | "unavailable, cannot proceed.\n"); |
@@ -320,14 +416,13 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | |||
320 | goto failed_unload; | 416 | goto failed_unload; |
321 | } | 417 | } |
322 | 418 | ||
323 | err = nilfs_recover_logical_segments(nilfs, sbi, &ri); | 419 | err = nilfs_salvage_orphan_logs(nilfs, sbi, &ri); |
324 | if (err) | 420 | if (err) |
325 | goto failed_unload; | 421 | goto failed_unload; |
326 | 422 | ||
327 | down_write(&nilfs->ns_sem); | 423 | down_write(&nilfs->ns_sem); |
328 | nilfs->ns_mount_state |= NILFS_VALID_FS; | 424 | nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */ |
329 | nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); | 425 | err = nilfs_cleanup_super(sbi); |
330 | err = nilfs_commit_super(sbi, 1); | ||
331 | up_write(&nilfs->ns_sem); | 426 | up_write(&nilfs->ns_sem); |
332 | 427 | ||
333 | if (err) { | 428 | if (err) { |
@@ -343,6 +438,10 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | |||
343 | sbi->s_super->s_flags = s_flags; | 438 | sbi->s_super->s_flags = s_flags; |
344 | return 0; | 439 | return 0; |
345 | 440 | ||
441 | scan_error: | ||
442 | printk(KERN_ERR "NILFS: error searching super root.\n"); | ||
443 | goto failed; | ||
444 | |||
346 | failed_unload: | 445 | failed_unload: |
347 | nilfs_mdt_destroy(nilfs->ns_cpfile); | 446 | nilfs_mdt_destroy(nilfs->ns_cpfile); |
348 | nilfs_mdt_destroy(nilfs->ns_sufile); | 447 | nilfs_mdt_destroy(nilfs->ns_sufile); |
@@ -515,8 +614,8 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, | |||
515 | nilfs_swap_super_block(nilfs); | 614 | nilfs_swap_super_block(nilfs); |
516 | } | 615 | } |
517 | 616 | ||
518 | nilfs->ns_sbwtime[0] = le64_to_cpu(sbp[0]->s_wtime); | 617 | nilfs->ns_sbwcount = 0; |
519 | nilfs->ns_sbwtime[1] = valid[!swp] ? le64_to_cpu(sbp[1]->s_wtime) : 0; | 618 | nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); |
520 | nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); | 619 | nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); |
521 | *sbpp = sbp[0]; | 620 | *sbpp = sbp[0]; |
522 | return 0; | 621 | return 0; |
@@ -557,6 +656,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) | |||
557 | if (err) | 656 | if (err) |
558 | goto out; | 657 | goto out; |
559 | 658 | ||
659 | err = nilfs_check_feature_compatibility(sb, sbp); | ||
660 | if (err) | ||
661 | goto out; | ||
662 | |||
560 | blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); | 663 | blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); |
561 | if (sb->s_blocksize != blocksize && | 664 | if (sb->s_blocksize != blocksize && |
562 | !sb_set_blocksize(sb, blocksize)) { | 665 | !sb_set_blocksize(sb, blocksize)) { |
@@ -568,7 +671,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) | |||
568 | goto out; | 671 | goto out; |
569 | } | 672 | } |
570 | 673 | ||
571 | blocksize = sb_min_blocksize(sb, BLOCK_SIZE); | 674 | blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); |
572 | if (!blocksize) { | 675 | if (!blocksize) { |
573 | printk(KERN_ERR "NILFS: unable to set blocksize\n"); | 676 | printk(KERN_ERR "NILFS: unable to set blocksize\n"); |
574 | err = -EINVAL; | 677 | err = -EINVAL; |
@@ -582,7 +685,18 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) | |||
582 | if (err) | 685 | if (err) |
583 | goto failed_sbh; | 686 | goto failed_sbh; |
584 | 687 | ||
688 | err = nilfs_check_feature_compatibility(sb, sbp); | ||
689 | if (err) | ||
690 | goto failed_sbh; | ||
691 | |||
585 | blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); | 692 | blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); |
693 | if (blocksize < NILFS_MIN_BLOCK_SIZE || | ||
694 | blocksize > NILFS_MAX_BLOCK_SIZE) { | ||
695 | printk(KERN_ERR "NILFS: couldn't mount because of unsupported " | ||
696 | "filesystem blocksize %d\n", blocksize); | ||
697 | err = -EINVAL; | ||
698 | goto failed_sbh; | ||
699 | } | ||
586 | if (sb->s_blocksize != blocksize) { | 700 | if (sb->s_blocksize != blocksize) { |
587 | int hw_blocksize = bdev_logical_block_size(sb->s_bdev); | 701 | int hw_blocksize = bdev_logical_block_size(sb->s_bdev); |
588 | 702 | ||
@@ -604,6 +718,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) | |||
604 | when reloading fails. */ | 718 | when reloading fails. */ |
605 | } | 719 | } |
606 | nilfs->ns_blocksize_bits = sb->s_blocksize_bits; | 720 | nilfs->ns_blocksize_bits = sb->s_blocksize_bits; |
721 | nilfs->ns_blocksize = blocksize; | ||
607 | 722 | ||
608 | err = nilfs_store_disk_layout(nilfs, sbp); | 723 | err = nilfs_store_disk_layout(nilfs, sbp); |
609 | if (err) | 724 | if (err) |
@@ -616,23 +731,9 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) | |||
616 | bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; | 731 | bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; |
617 | nilfs->ns_bdi = bdi ? : &default_backing_dev_info; | 732 | nilfs->ns_bdi = bdi ? : &default_backing_dev_info; |
618 | 733 | ||
619 | /* Finding last segment */ | 734 | err = nilfs_store_log_cursor(nilfs, sbp); |
620 | nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg); | 735 | if (err) |
621 | nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno); | ||
622 | nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq); | ||
623 | |||
624 | nilfs->ns_seg_seq = nilfs->ns_last_seq; | ||
625 | nilfs->ns_segnum = | ||
626 | nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); | ||
627 | nilfs->ns_cno = nilfs->ns_last_cno + 1; | ||
628 | if (nilfs->ns_segnum >= nilfs->ns_nsegments) { | ||
629 | printk(KERN_ERR "NILFS invalid last segment number.\n"); | ||
630 | err = -EINVAL; | ||
631 | goto failed_sbh; | 736 | goto failed_sbh; |
632 | } | ||
633 | /* Dummy values */ | ||
634 | nilfs->ns_free_segments_count = | ||
635 | nilfs->ns_nsegments - (nilfs->ns_segnum + 1); | ||
636 | 737 | ||
637 | /* Initialize gcinode cache */ | 738 | /* Initialize gcinode cache */ |
638 | err = nilfs_init_gccache(nilfs); | 739 | err = nilfs_init_gccache(nilfs); |
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 1ab974533697..f785a7b0ab99 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h | |||
@@ -57,7 +57,8 @@ enum { | |||
57 | * @ns_current: back pointer to current mount | 57 | * @ns_current: back pointer to current mount |
58 | * @ns_sbh: buffer heads of on-disk super blocks | 58 | * @ns_sbh: buffer heads of on-disk super blocks |
59 | * @ns_sbp: pointers to super block data | 59 | * @ns_sbp: pointers to super block data |
60 | * @ns_sbwtime: previous write time of super blocks | 60 | * @ns_sbwtime: previous write time of super block |
61 | * @ns_sbwcount: write count of super block | ||
61 | * @ns_sbsize: size of valid data in super block | 62 | * @ns_sbsize: size of valid data in super block |
62 | * @ns_supers: list of nilfs super block structs | 63 | * @ns_supers: list of nilfs super block structs |
63 | * @ns_seg_seq: segment sequence counter | 64 | * @ns_seg_seq: segment sequence counter |
@@ -73,7 +74,7 @@ enum { | |||
73 | * @ns_last_seq: sequence value of the latest segment | 74 | * @ns_last_seq: sequence value of the latest segment |
74 | * @ns_last_cno: checkpoint number of the latest segment | 75 | * @ns_last_cno: checkpoint number of the latest segment |
75 | * @ns_prot_seq: least sequence number of segments which must not be reclaimed | 76 | * @ns_prot_seq: least sequence number of segments which must not be reclaimed |
76 | * @ns_free_segments_count: counter of free segments | 77 | * @ns_prev_seq: base sequence number used to decide if advance log cursor |
77 | * @ns_segctor_sem: segment constructor semaphore | 78 | * @ns_segctor_sem: segment constructor semaphore |
78 | * @ns_dat: DAT file inode | 79 | * @ns_dat: DAT file inode |
79 | * @ns_cpfile: checkpoint file inode | 80 | * @ns_cpfile: checkpoint file inode |
@@ -82,6 +83,7 @@ enum { | |||
82 | * @ns_gc_inodes: dummy inodes to keep live blocks | 83 | * @ns_gc_inodes: dummy inodes to keep live blocks |
83 | * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks | 84 | * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks |
84 | * @ns_blocksize_bits: bit length of block size | 85 | * @ns_blocksize_bits: bit length of block size |
86 | * @ns_blocksize: block size | ||
85 | * @ns_nsegments: number of segments in filesystem | 87 | * @ns_nsegments: number of segments in filesystem |
86 | * @ns_blocks_per_segment: number of blocks per segment | 88 | * @ns_blocks_per_segment: number of blocks per segment |
87 | * @ns_r_segments_percentage: reserved segments percentage | 89 | * @ns_r_segments_percentage: reserved segments percentage |
@@ -119,7 +121,8 @@ struct the_nilfs { | |||
119 | */ | 121 | */ |
120 | struct buffer_head *ns_sbh[2]; | 122 | struct buffer_head *ns_sbh[2]; |
121 | struct nilfs_super_block *ns_sbp[2]; | 123 | struct nilfs_super_block *ns_sbp[2]; |
122 | time_t ns_sbwtime[2]; | 124 | time_t ns_sbwtime; |
125 | unsigned ns_sbwcount; | ||
123 | unsigned ns_sbsize; | 126 | unsigned ns_sbsize; |
124 | unsigned ns_mount_state; | 127 | unsigned ns_mount_state; |
125 | 128 | ||
@@ -149,7 +152,7 @@ struct the_nilfs { | |||
149 | u64 ns_last_seq; | 152 | u64 ns_last_seq; |
150 | __u64 ns_last_cno; | 153 | __u64 ns_last_cno; |
151 | u64 ns_prot_seq; | 154 | u64 ns_prot_seq; |
152 | unsigned long ns_free_segments_count; | 155 | u64 ns_prev_seq; |
153 | 156 | ||
154 | struct rw_semaphore ns_segctor_sem; | 157 | struct rw_semaphore ns_segctor_sem; |
155 | 158 | ||
@@ -168,6 +171,7 @@ struct the_nilfs { | |||
168 | 171 | ||
169 | /* Disk layout information (static) */ | 172 | /* Disk layout information (static) */ |
170 | unsigned int ns_blocksize_bits; | 173 | unsigned int ns_blocksize_bits; |
174 | unsigned int ns_blocksize; | ||
171 | unsigned long ns_nsegments; | 175 | unsigned long ns_nsegments; |
172 | unsigned long ns_blocks_per_segment; | 176 | unsigned long ns_blocks_per_segment; |
173 | unsigned long ns_r_segments_percentage; | 177 | unsigned long ns_r_segments_percentage; |
@@ -203,20 +207,17 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty) | |||
203 | 207 | ||
204 | /* Minimum interval of periodical update of superblocks (in seconds) */ | 208 | /* Minimum interval of periodical update of superblocks (in seconds) */ |
205 | #define NILFS_SB_FREQ 10 | 209 | #define NILFS_SB_FREQ 10 |
206 | #define NILFS_ALTSB_FREQ 60 /* spare superblock */ | ||
207 | 210 | ||
208 | static inline int nilfs_sb_need_update(struct the_nilfs *nilfs) | 211 | static inline int nilfs_sb_need_update(struct the_nilfs *nilfs) |
209 | { | 212 | { |
210 | u64 t = get_seconds(); | 213 | u64 t = get_seconds(); |
211 | return t < nilfs->ns_sbwtime[0] || | 214 | return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ; |
212 | t > nilfs->ns_sbwtime[0] + NILFS_SB_FREQ; | ||
213 | } | 215 | } |
214 | 216 | ||
215 | static inline int nilfs_altsb_need_update(struct the_nilfs *nilfs) | 217 | static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) |
216 | { | 218 | { |
217 | u64 t = get_seconds(); | 219 | int flip_bits = nilfs->ns_sbwcount & 0x0FL; |
218 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | 220 | return (flip_bits != 0x08 && flip_bits != 0x0F); |
219 | return sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ; | ||
220 | } | 221 | } |
221 | 222 | ||
222 | void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); | 223 | void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); |
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index dffbb0911d02..22c629eedd82 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig | |||
@@ -3,3 +3,4 @@ config FSNOTIFY | |||
3 | 3 | ||
4 | source "fs/notify/dnotify/Kconfig" | 4 | source "fs/notify/dnotify/Kconfig" |
5 | source "fs/notify/inotify/Kconfig" | 5 | source "fs/notify/inotify/Kconfig" |
6 | source "fs/notify/fanotify/Kconfig" | ||
diff --git a/fs/notify/Makefile b/fs/notify/Makefile index 0922cc826c46..ae5f33a6d868 100644 --- a/fs/notify/Makefile +++ b/fs/notify/Makefile | |||
@@ -1,4 +1,6 @@ | |||
1 | obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o | 1 | obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o \ |
2 | mark.o vfsmount_mark.o | ||
2 | 3 | ||
3 | obj-y += dnotify/ | 4 | obj-y += dnotify/ |
4 | obj-y += inotify/ | 5 | obj-y += inotify/ |
6 | obj-y += fanotify/ | ||
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 7e54e52964dd..3344bdd5506e 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c | |||
@@ -29,17 +29,17 @@ | |||
29 | int dir_notify_enable __read_mostly = 1; | 29 | int dir_notify_enable __read_mostly = 1; |
30 | 30 | ||
31 | static struct kmem_cache *dnotify_struct_cache __read_mostly; | 31 | static struct kmem_cache *dnotify_struct_cache __read_mostly; |
32 | static struct kmem_cache *dnotify_mark_entry_cache __read_mostly; | 32 | static struct kmem_cache *dnotify_mark_cache __read_mostly; |
33 | static struct fsnotify_group *dnotify_group __read_mostly; | 33 | static struct fsnotify_group *dnotify_group __read_mostly; |
34 | static DEFINE_MUTEX(dnotify_mark_mutex); | 34 | static DEFINE_MUTEX(dnotify_mark_mutex); |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which | 37 | * dnotify will attach one of these to each inode (i_fsnotify_marks) which |
38 | * is being watched by dnotify. If multiple userspace applications are watching | 38 | * is being watched by dnotify. If multiple userspace applications are watching |
39 | * the same directory with dnotify their information is chained in dn | 39 | * the same directory with dnotify their information is chained in dn |
40 | */ | 40 | */ |
41 | struct dnotify_mark_entry { | 41 | struct dnotify_mark { |
42 | struct fsnotify_mark_entry fsn_entry; | 42 | struct fsnotify_mark fsn_mark; |
43 | struct dnotify_struct *dn; | 43 | struct dnotify_struct *dn; |
44 | }; | 44 | }; |
45 | 45 | ||
@@ -51,27 +51,27 @@ struct dnotify_mark_entry { | |||
51 | * it calls the fsnotify function so it can update the set of all events relevant | 51 | * it calls the fsnotify function so it can update the set of all events relevant |
52 | * to this inode. | 52 | * to this inode. |
53 | */ | 53 | */ |
54 | static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry) | 54 | static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) |
55 | { | 55 | { |
56 | __u32 new_mask, old_mask; | 56 | __u32 new_mask, old_mask; |
57 | struct dnotify_struct *dn; | 57 | struct dnotify_struct *dn; |
58 | struct dnotify_mark_entry *dnentry = container_of(entry, | 58 | struct dnotify_mark *dn_mark = container_of(fsn_mark, |
59 | struct dnotify_mark_entry, | 59 | struct dnotify_mark, |
60 | fsn_entry); | 60 | fsn_mark); |
61 | 61 | ||
62 | assert_spin_locked(&entry->lock); | 62 | assert_spin_locked(&fsn_mark->lock); |
63 | 63 | ||
64 | old_mask = entry->mask; | 64 | old_mask = fsn_mark->mask; |
65 | new_mask = 0; | 65 | new_mask = 0; |
66 | for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next) | 66 | for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next) |
67 | new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT); | 67 | new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT); |
68 | entry->mask = new_mask; | 68 | fsnotify_set_mark_mask_locked(fsn_mark, new_mask); |
69 | 69 | ||
70 | if (old_mask == new_mask) | 70 | if (old_mask == new_mask) |
71 | return; | 71 | return; |
72 | 72 | ||
73 | if (entry->inode) | 73 | if (fsn_mark->i.inode) |
74 | fsnotify_recalc_inode_mask(entry->inode); | 74 | fsnotify_recalc_inode_mask(fsn_mark->i.inode); |
75 | } | 75 | } |
76 | 76 | ||
77 | /* | 77 | /* |
@@ -83,29 +83,25 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry) | |||
83 | * events. | 83 | * events. |
84 | */ | 84 | */ |
85 | static int dnotify_handle_event(struct fsnotify_group *group, | 85 | static int dnotify_handle_event(struct fsnotify_group *group, |
86 | struct fsnotify_mark *inode_mark, | ||
87 | struct fsnotify_mark *vfsmount_mark, | ||
86 | struct fsnotify_event *event) | 88 | struct fsnotify_event *event) |
87 | { | 89 | { |
88 | struct fsnotify_mark_entry *entry = NULL; | 90 | struct dnotify_mark *dn_mark; |
89 | struct dnotify_mark_entry *dnentry; | ||
90 | struct inode *to_tell; | 91 | struct inode *to_tell; |
91 | struct dnotify_struct *dn; | 92 | struct dnotify_struct *dn; |
92 | struct dnotify_struct **prev; | 93 | struct dnotify_struct **prev; |
93 | struct fown_struct *fown; | 94 | struct fown_struct *fown; |
94 | __u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD; | 95 | __u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD; |
95 | 96 | ||
96 | to_tell = event->to_tell; | 97 | BUG_ON(vfsmount_mark); |
97 | 98 | ||
98 | spin_lock(&to_tell->i_lock); | 99 | to_tell = event->to_tell; |
99 | entry = fsnotify_find_mark_entry(group, to_tell); | ||
100 | spin_unlock(&to_tell->i_lock); | ||
101 | 100 | ||
102 | /* unlikely since we alreay passed dnotify_should_send_event() */ | 101 | dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark); |
103 | if (unlikely(!entry)) | ||
104 | return 0; | ||
105 | dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry); | ||
106 | 102 | ||
107 | spin_lock(&entry->lock); | 103 | spin_lock(&inode_mark->lock); |
108 | prev = &dnentry->dn; | 104 | prev = &dn_mark->dn; |
109 | while ((dn = *prev) != NULL) { | 105 | while ((dn = *prev) != NULL) { |
110 | if ((dn->dn_mask & test_mask) == 0) { | 106 | if ((dn->dn_mask & test_mask) == 0) { |
111 | prev = &dn->dn_next; | 107 | prev = &dn->dn_next; |
@@ -118,12 +114,11 @@ static int dnotify_handle_event(struct fsnotify_group *group, | |||
118 | else { | 114 | else { |
119 | *prev = dn->dn_next; | 115 | *prev = dn->dn_next; |
120 | kmem_cache_free(dnotify_struct_cache, dn); | 116 | kmem_cache_free(dnotify_struct_cache, dn); |
121 | dnotify_recalc_inode_mask(entry); | 117 | dnotify_recalc_inode_mask(inode_mark); |
122 | } | 118 | } |
123 | } | 119 | } |
124 | 120 | ||
125 | spin_unlock(&entry->lock); | 121 | spin_unlock(&inode_mark->lock); |
126 | fsnotify_put_mark(entry); | ||
127 | 122 | ||
128 | return 0; | 123 | return 0; |
129 | } | 124 | } |
@@ -133,44 +128,27 @@ static int dnotify_handle_event(struct fsnotify_group *group, | |||
133 | * userspace notification for that pair. | 128 | * userspace notification for that pair. |
134 | */ | 129 | */ |
135 | static bool dnotify_should_send_event(struct fsnotify_group *group, | 130 | static bool dnotify_should_send_event(struct fsnotify_group *group, |
136 | struct inode *inode, __u32 mask) | 131 | struct inode *inode, |
132 | struct fsnotify_mark *inode_mark, | ||
133 | struct fsnotify_mark *vfsmount_mark, | ||
134 | __u32 mask, void *data, int data_type) | ||
137 | { | 135 | { |
138 | struct fsnotify_mark_entry *entry; | ||
139 | bool send; | ||
140 | |||
141 | /* !dir_notify_enable should never get here, don't waste time checking | ||
142 | if (!dir_notify_enable) | ||
143 | return 0; */ | ||
144 | |||
145 | /* not a dir, dnotify doesn't care */ | 136 | /* not a dir, dnotify doesn't care */ |
146 | if (!S_ISDIR(inode->i_mode)) | 137 | if (!S_ISDIR(inode->i_mode)) |
147 | return false; | 138 | return false; |
148 | 139 | ||
149 | spin_lock(&inode->i_lock); | 140 | return true; |
150 | entry = fsnotify_find_mark_entry(group, inode); | ||
151 | spin_unlock(&inode->i_lock); | ||
152 | |||
153 | /* no mark means no dnotify watch */ | ||
154 | if (!entry) | ||
155 | return false; | ||
156 | |||
157 | mask = (mask & ~FS_EVENT_ON_CHILD); | ||
158 | send = (mask & entry->mask); | ||
159 | |||
160 | fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */ | ||
161 | |||
162 | return send; | ||
163 | } | 141 | } |
164 | 142 | ||
165 | static void dnotify_free_mark(struct fsnotify_mark_entry *entry) | 143 | static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) |
166 | { | 144 | { |
167 | struct dnotify_mark_entry *dnentry = container_of(entry, | 145 | struct dnotify_mark *dn_mark = container_of(fsn_mark, |
168 | struct dnotify_mark_entry, | 146 | struct dnotify_mark, |
169 | fsn_entry); | 147 | fsn_mark); |
170 | 148 | ||
171 | BUG_ON(dnentry->dn); | 149 | BUG_ON(dn_mark->dn); |
172 | 150 | ||
173 | kmem_cache_free(dnotify_mark_entry_cache, dnentry); | 151 | kmem_cache_free(dnotify_mark_cache, dn_mark); |
174 | } | 152 | } |
175 | 153 | ||
176 | static struct fsnotify_ops dnotify_fsnotify_ops = { | 154 | static struct fsnotify_ops dnotify_fsnotify_ops = { |
@@ -183,15 +161,15 @@ static struct fsnotify_ops dnotify_fsnotify_ops = { | |||
183 | 161 | ||
184 | /* | 162 | /* |
185 | * Called every time a file is closed. Looks first for a dnotify mark on the | 163 | * Called every time a file is closed. Looks first for a dnotify mark on the |
186 | * inode. If one is found run all of the ->dn entries attached to that | 164 | * inode. If one is found run all of the ->dn structures attached to that |
187 | * mark for one relevant to this process closing the file and remove that | 165 | * mark for one relevant to this process closing the file and remove that |
188 | * dnotify_struct. If that was the last dnotify_struct also remove the | 166 | * dnotify_struct. If that was the last dnotify_struct also remove the |
189 | * fsnotify_mark_entry. | 167 | * fsnotify_mark. |
190 | */ | 168 | */ |
191 | void dnotify_flush(struct file *filp, fl_owner_t id) | 169 | void dnotify_flush(struct file *filp, fl_owner_t id) |
192 | { | 170 | { |
193 | struct fsnotify_mark_entry *entry; | 171 | struct fsnotify_mark *fsn_mark; |
194 | struct dnotify_mark_entry *dnentry; | 172 | struct dnotify_mark *dn_mark; |
195 | struct dnotify_struct *dn; | 173 | struct dnotify_struct *dn; |
196 | struct dnotify_struct **prev; | 174 | struct dnotify_struct **prev; |
197 | struct inode *inode; | 175 | struct inode *inode; |
@@ -200,38 +178,34 @@ void dnotify_flush(struct file *filp, fl_owner_t id) | |||
200 | if (!S_ISDIR(inode->i_mode)) | 178 | if (!S_ISDIR(inode->i_mode)) |
201 | return; | 179 | return; |
202 | 180 | ||
203 | spin_lock(&inode->i_lock); | 181 | fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); |
204 | entry = fsnotify_find_mark_entry(dnotify_group, inode); | 182 | if (!fsn_mark) |
205 | spin_unlock(&inode->i_lock); | ||
206 | if (!entry) | ||
207 | return; | 183 | return; |
208 | dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry); | 184 | dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); |
209 | 185 | ||
210 | mutex_lock(&dnotify_mark_mutex); | 186 | mutex_lock(&dnotify_mark_mutex); |
211 | 187 | ||
212 | spin_lock(&entry->lock); | 188 | spin_lock(&fsn_mark->lock); |
213 | prev = &dnentry->dn; | 189 | prev = &dn_mark->dn; |
214 | while ((dn = *prev) != NULL) { | 190 | while ((dn = *prev) != NULL) { |
215 | if ((dn->dn_owner == id) && (dn->dn_filp == filp)) { | 191 | if ((dn->dn_owner == id) && (dn->dn_filp == filp)) { |
216 | *prev = dn->dn_next; | 192 | *prev = dn->dn_next; |
217 | kmem_cache_free(dnotify_struct_cache, dn); | 193 | kmem_cache_free(dnotify_struct_cache, dn); |
218 | dnotify_recalc_inode_mask(entry); | 194 | dnotify_recalc_inode_mask(fsn_mark); |
219 | break; | 195 | break; |
220 | } | 196 | } |
221 | prev = &dn->dn_next; | 197 | prev = &dn->dn_next; |
222 | } | 198 | } |
223 | 199 | ||
224 | spin_unlock(&entry->lock); | 200 | spin_unlock(&fsn_mark->lock); |
225 | 201 | ||
226 | /* nothing else could have found us thanks to the dnotify_mark_mutex */ | 202 | /* nothing else could have found us thanks to the dnotify_mark_mutex */ |
227 | if (dnentry->dn == NULL) | 203 | if (dn_mark->dn == NULL) |
228 | fsnotify_destroy_mark_by_entry(entry); | 204 | fsnotify_destroy_mark(fsn_mark); |
229 | |||
230 | fsnotify_recalc_group_mask(dnotify_group); | ||
231 | 205 | ||
232 | mutex_unlock(&dnotify_mark_mutex); | 206 | mutex_unlock(&dnotify_mark_mutex); |
233 | 207 | ||
234 | fsnotify_put_mark(entry); | 208 | fsnotify_put_mark(fsn_mark); |
235 | } | 209 | } |
236 | 210 | ||
237 | /* this conversion is done only at watch creation */ | 211 | /* this conversion is done only at watch creation */ |
@@ -259,16 +233,16 @@ static __u32 convert_arg(unsigned long arg) | |||
259 | 233 | ||
260 | /* | 234 | /* |
261 | * If multiple processes watch the same inode with dnotify there is only one | 235 | * If multiple processes watch the same inode with dnotify there is only one |
262 | * dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct | 236 | * dnotify mark in inode->i_fsnotify_marks but we chain a dnotify_struct |
263 | * onto that mark. This function either attaches the new dnotify_struct onto | 237 | * onto that mark. This function either attaches the new dnotify_struct onto |
264 | * that list, or it |= the mask onto an existing dnofiy_struct. | 238 | * that list, or it |= the mask onto an existing dnofiy_struct. |
265 | */ | 239 | */ |
266 | static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry, | 240 | static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark, |
267 | fl_owner_t id, int fd, struct file *filp, __u32 mask) | 241 | fl_owner_t id, int fd, struct file *filp, __u32 mask) |
268 | { | 242 | { |
269 | struct dnotify_struct *odn; | 243 | struct dnotify_struct *odn; |
270 | 244 | ||
271 | odn = dnentry->dn; | 245 | odn = dn_mark->dn; |
272 | while (odn != NULL) { | 246 | while (odn != NULL) { |
273 | /* adding more events to existing dnofiy_struct? */ | 247 | /* adding more events to existing dnofiy_struct? */ |
274 | if ((odn->dn_owner == id) && (odn->dn_filp == filp)) { | 248 | if ((odn->dn_owner == id) && (odn->dn_filp == filp)) { |
@@ -283,8 +257,8 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent | |||
283 | dn->dn_fd = fd; | 257 | dn->dn_fd = fd; |
284 | dn->dn_filp = filp; | 258 | dn->dn_filp = filp; |
285 | dn->dn_owner = id; | 259 | dn->dn_owner = id; |
286 | dn->dn_next = dnentry->dn; | 260 | dn->dn_next = dn_mark->dn; |
287 | dnentry->dn = dn; | 261 | dn_mark->dn = dn; |
288 | 262 | ||
289 | return 0; | 263 | return 0; |
290 | } | 264 | } |
@@ -296,8 +270,8 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent | |||
296 | */ | 270 | */ |
297 | int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | 271 | int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) |
298 | { | 272 | { |
299 | struct dnotify_mark_entry *new_dnentry, *dnentry; | 273 | struct dnotify_mark *new_dn_mark, *dn_mark; |
300 | struct fsnotify_mark_entry *new_entry, *entry; | 274 | struct fsnotify_mark *new_fsn_mark, *fsn_mark; |
301 | struct dnotify_struct *dn; | 275 | struct dnotify_struct *dn; |
302 | struct inode *inode; | 276 | struct inode *inode; |
303 | fl_owner_t id = current->files; | 277 | fl_owner_t id = current->files; |
@@ -306,7 +280,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
306 | __u32 mask; | 280 | __u32 mask; |
307 | 281 | ||
308 | /* we use these to tell if we need to kfree */ | 282 | /* we use these to tell if we need to kfree */ |
309 | new_entry = NULL; | 283 | new_fsn_mark = NULL; |
310 | dn = NULL; | 284 | dn = NULL; |
311 | 285 | ||
312 | if (!dir_notify_enable) { | 286 | if (!dir_notify_enable) { |
@@ -336,8 +310,8 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
336 | } | 310 | } |
337 | 311 | ||
338 | /* new fsnotify mark, we expect most fcntl calls to add a new mark */ | 312 | /* new fsnotify mark, we expect most fcntl calls to add a new mark */ |
339 | new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL); | 313 | new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL); |
340 | if (!new_dnentry) { | 314 | if (!new_dn_mark) { |
341 | error = -ENOMEM; | 315 | error = -ENOMEM; |
342 | goto out_err; | 316 | goto out_err; |
343 | } | 317 | } |
@@ -345,29 +319,27 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
345 | /* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */ | 319 | /* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */ |
346 | mask = convert_arg(arg); | 320 | mask = convert_arg(arg); |
347 | 321 | ||
348 | /* set up the new_entry and new_dnentry */ | 322 | /* set up the new_fsn_mark and new_dn_mark */ |
349 | new_entry = &new_dnentry->fsn_entry; | 323 | new_fsn_mark = &new_dn_mark->fsn_mark; |
350 | fsnotify_init_mark(new_entry, dnotify_free_mark); | 324 | fsnotify_init_mark(new_fsn_mark, dnotify_free_mark); |
351 | new_entry->mask = mask; | 325 | new_fsn_mark->mask = mask; |
352 | new_dnentry->dn = NULL; | 326 | new_dn_mark->dn = NULL; |
353 | 327 | ||
354 | /* this is needed to prevent the fcntl/close race described below */ | 328 | /* this is needed to prevent the fcntl/close race described below */ |
355 | mutex_lock(&dnotify_mark_mutex); | 329 | mutex_lock(&dnotify_mark_mutex); |
356 | 330 | ||
357 | /* add the new_entry or find an old one. */ | 331 | /* add the new_fsn_mark or find an old one. */ |
358 | spin_lock(&inode->i_lock); | 332 | fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); |
359 | entry = fsnotify_find_mark_entry(dnotify_group, inode); | 333 | if (fsn_mark) { |
360 | spin_unlock(&inode->i_lock); | 334 | dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); |
361 | if (entry) { | 335 | spin_lock(&fsn_mark->lock); |
362 | dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry); | ||
363 | spin_lock(&entry->lock); | ||
364 | } else { | 336 | } else { |
365 | fsnotify_add_mark(new_entry, dnotify_group, inode); | 337 | fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, NULL, 0); |
366 | spin_lock(&new_entry->lock); | 338 | spin_lock(&new_fsn_mark->lock); |
367 | entry = new_entry; | 339 | fsn_mark = new_fsn_mark; |
368 | dnentry = new_dnentry; | 340 | dn_mark = new_dn_mark; |
369 | /* we used new_entry, so don't free it */ | 341 | /* we used new_fsn_mark, so don't free it */ |
370 | new_entry = NULL; | 342 | new_fsn_mark = NULL; |
371 | } | 343 | } |
372 | 344 | ||
373 | rcu_read_lock(); | 345 | rcu_read_lock(); |
@@ -376,17 +348,17 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
376 | 348 | ||
377 | /* if (f != filp) means that we lost a race and another task/thread | 349 | /* if (f != filp) means that we lost a race and another task/thread |
378 | * actually closed the fd we are still playing with before we grabbed | 350 | * actually closed the fd we are still playing with before we grabbed |
379 | * the dnotify_mark_mutex and entry->lock. Since closing the fd is the | 351 | * the dnotify_mark_mutex and fsn_mark->lock. Since closing the fd is the |
380 | * only time we clean up the mark entries we need to get our mark off | 352 | * only time we clean up the marks we need to get our mark off |
381 | * the list. */ | 353 | * the list. */ |
382 | if (f != filp) { | 354 | if (f != filp) { |
383 | /* if we added ourselves, shoot ourselves, it's possible that | 355 | /* if we added ourselves, shoot ourselves, it's possible that |
384 | * the flush actually did shoot this entry. That's fine too | 356 | * the flush actually did shoot this fsn_mark. That's fine too |
385 | * since multiple calls to destroy_mark is perfectly safe, if | 357 | * since multiple calls to destroy_mark is perfectly safe, if |
386 | * we found a dnentry already attached to the inode, just sod | 358 | * we found a dn_mark already attached to the inode, just sod |
387 | * off silently as the flush at close time dealt with it. | 359 | * off silently as the flush at close time dealt with it. |
388 | */ | 360 | */ |
389 | if (dnentry == new_dnentry) | 361 | if (dn_mark == new_dn_mark) |
390 | destroy = 1; | 362 | destroy = 1; |
391 | goto out; | 363 | goto out; |
392 | } | 364 | } |
@@ -394,13 +366,13 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
394 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); | 366 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); |
395 | if (error) { | 367 | if (error) { |
396 | /* if we added, we must shoot */ | 368 | /* if we added, we must shoot */ |
397 | if (dnentry == new_dnentry) | 369 | if (dn_mark == new_dn_mark) |
398 | destroy = 1; | 370 | destroy = 1; |
399 | goto out; | 371 | goto out; |
400 | } | 372 | } |
401 | 373 | ||
402 | error = attach_dn(dn, dnentry, id, fd, filp, mask); | 374 | error = attach_dn(dn, dn_mark, id, fd, filp, mask); |
403 | /* !error means that we attached the dn to the dnentry, so don't free it */ | 375 | /* !error means that we attached the dn to the dn_mark, so don't free it */ |
404 | if (!error) | 376 | if (!error) |
405 | dn = NULL; | 377 | dn = NULL; |
406 | /* -EEXIST means that we didn't add this new dn and used an old one. | 378 | /* -EEXIST means that we didn't add this new dn and used an old one. |
@@ -408,20 +380,18 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) | |||
408 | else if (error == -EEXIST) | 380 | else if (error == -EEXIST) |
409 | error = 0; | 381 | error = 0; |
410 | 382 | ||
411 | dnotify_recalc_inode_mask(entry); | 383 | dnotify_recalc_inode_mask(fsn_mark); |
412 | out: | 384 | out: |
413 | spin_unlock(&entry->lock); | 385 | spin_unlock(&fsn_mark->lock); |
414 | 386 | ||
415 | if (destroy) | 387 | if (destroy) |
416 | fsnotify_destroy_mark_by_entry(entry); | 388 | fsnotify_destroy_mark(fsn_mark); |
417 | |||
418 | fsnotify_recalc_group_mask(dnotify_group); | ||
419 | 389 | ||
420 | mutex_unlock(&dnotify_mark_mutex); | 390 | mutex_unlock(&dnotify_mark_mutex); |
421 | fsnotify_put_mark(entry); | 391 | fsnotify_put_mark(fsn_mark); |
422 | out_err: | 392 | out_err: |
423 | if (new_entry) | 393 | if (new_fsn_mark) |
424 | fsnotify_put_mark(new_entry); | 394 | fsnotify_put_mark(new_fsn_mark); |
425 | if (dn) | 395 | if (dn) |
426 | kmem_cache_free(dnotify_struct_cache, dn); | 396 | kmem_cache_free(dnotify_struct_cache, dn); |
427 | return error; | 397 | return error; |
@@ -430,10 +400,9 @@ out_err: | |||
430 | static int __init dnotify_init(void) | 400 | static int __init dnotify_init(void) |
431 | { | 401 | { |
432 | dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC); | 402 | dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC); |
433 | dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC); | 403 | dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC); |
434 | 404 | ||
435 | dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM, | 405 | dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops); |
436 | 0, &dnotify_fsnotify_ops); | ||
437 | if (IS_ERR(dnotify_group)) | 406 | if (IS_ERR(dnotify_group)) |
438 | panic("unable to allocate fsnotify group for dnotify\n"); | 407 | panic("unable to allocate fsnotify group for dnotify\n"); |
439 | return 0; | 408 | return 0; |
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig new file mode 100644 index 000000000000..3ac36b7bf6b9 --- /dev/null +++ b/fs/notify/fanotify/Kconfig | |||
@@ -0,0 +1,26 @@ | |||
1 | config FANOTIFY | ||
2 | bool "Filesystem wide access notification" | ||
3 | select FSNOTIFY | ||
4 | select ANON_INODES | ||
5 | default n | ||
6 | ---help--- | ||
7 | Say Y here to enable fanotify suport. fanotify is a file access | ||
8 | notification system which differs from inotify in that it sends | ||
9 | and open file descriptor to the userspace listener along with | ||
10 | the event. | ||
11 | |||
12 | If unsure, say Y. | ||
13 | |||
14 | config FANOTIFY_ACCESS_PERMISSIONS | ||
15 | bool "fanotify permissions checking" | ||
16 | depends on FANOTIFY | ||
17 | depends on SECURITY | ||
18 | default n | ||
19 | ---help--- | ||
20 | Say Y here is you want fanotify listeners to be able to make permissions | ||
21 | decisions concerning filesystem events. This is used by some fanotify | ||
22 | listeners which need to scan files before allowing the system access to | ||
23 | use those files. This is used by some anti-malware vendors and by some | ||
24 | hierarchical storage managent systems. | ||
25 | |||
26 | If unsure, say N. | ||
diff --git a/fs/notify/fanotify/Makefile b/fs/notify/fanotify/Makefile new file mode 100644 index 000000000000..0999213e7e6e --- /dev/null +++ b/fs/notify/fanotify/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-$(CONFIG_FANOTIFY) += fanotify.o fanotify_user.o | |||
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c new file mode 100644 index 000000000000..eb8f73c9c131 --- /dev/null +++ b/fs/notify/fanotify/fanotify.c | |||
@@ -0,0 +1,212 @@ | |||
1 | #include <linux/fanotify.h> | ||
2 | #include <linux/fdtable.h> | ||
3 | #include <linux/fsnotify_backend.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/jiffies.h> | ||
6 | #include <linux/kernel.h> /* UINT_MAX */ | ||
7 | #include <linux/mount.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/types.h> | ||
10 | #include <linux/wait.h> | ||
11 | |||
12 | static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) | ||
13 | { | ||
14 | pr_debug("%s: old=%p new=%p\n", __func__, old, new); | ||
15 | |||
16 | if (old->to_tell == new->to_tell && | ||
17 | old->data_type == new->data_type && | ||
18 | old->tgid == new->tgid) { | ||
19 | switch (old->data_type) { | ||
20 | case (FSNOTIFY_EVENT_FILE): | ||
21 | if ((old->file->f_path.mnt == new->file->f_path.mnt) && | ||
22 | (old->file->f_path.dentry == new->file->f_path.dentry)) | ||
23 | return true; | ||
24 | case (FSNOTIFY_EVENT_NONE): | ||
25 | return true; | ||
26 | default: | ||
27 | BUG(); | ||
28 | }; | ||
29 | } | ||
30 | return false; | ||
31 | } | ||
32 | |||
33 | /* and the list better be locked by something too! */ | ||
34 | static struct fsnotify_event *fanotify_merge(struct list_head *list, | ||
35 | struct fsnotify_event *event) | ||
36 | { | ||
37 | struct fsnotify_event_holder *test_holder; | ||
38 | struct fsnotify_event *test_event = NULL; | ||
39 | struct fsnotify_event *new_event; | ||
40 | |||
41 | pr_debug("%s: list=%p event=%p\n", __func__, list, event); | ||
42 | |||
43 | |||
44 | list_for_each_entry_reverse(test_holder, list, event_list) { | ||
45 | if (should_merge(test_holder->event, event)) { | ||
46 | test_event = test_holder->event; | ||
47 | break; | ||
48 | } | ||
49 | } | ||
50 | |||
51 | if (!test_event) | ||
52 | return NULL; | ||
53 | |||
54 | fsnotify_get_event(test_event); | ||
55 | |||
56 | /* if they are exactly the same we are done */ | ||
57 | if (test_event->mask == event->mask) | ||
58 | return test_event; | ||
59 | |||
60 | /* | ||
61 | * if the refcnt == 2 this is the only queue | ||
62 | * for this event and so we can update the mask | ||
63 | * in place. | ||
64 | */ | ||
65 | if (atomic_read(&test_event->refcnt) == 2) { | ||
66 | test_event->mask |= event->mask; | ||
67 | return test_event; | ||
68 | } | ||
69 | |||
70 | new_event = fsnotify_clone_event(test_event); | ||
71 | |||
72 | /* done with test_event */ | ||
73 | fsnotify_put_event(test_event); | ||
74 | |||
75 | /* couldn't allocate memory, merge was not possible */ | ||
76 | if (unlikely(!new_event)) | ||
77 | return ERR_PTR(-ENOMEM); | ||
78 | |||
79 | /* build new event and replace it on the list */ | ||
80 | new_event->mask = (test_event->mask | event->mask); | ||
81 | fsnotify_replace_event(test_holder, new_event); | ||
82 | |||
83 | /* we hold a reference on new_event from clone_event */ | ||
84 | return new_event; | ||
85 | } | ||
86 | |||
87 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
88 | static int fanotify_get_response_from_access(struct fsnotify_group *group, | ||
89 | struct fsnotify_event *event) | ||
90 | { | ||
91 | int ret; | ||
92 | |||
93 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | ||
94 | |||
95 | wait_event(group->fanotify_data.access_waitq, event->response); | ||
96 | |||
97 | /* userspace responded, convert to something usable */ | ||
98 | spin_lock(&event->lock); | ||
99 | switch (event->response) { | ||
100 | case FAN_ALLOW: | ||
101 | ret = 0; | ||
102 | break; | ||
103 | case FAN_DENY: | ||
104 | default: | ||
105 | ret = -EPERM; | ||
106 | } | ||
107 | event->response = 0; | ||
108 | spin_unlock(&event->lock); | ||
109 | |||
110 | pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, | ||
111 | group, event, ret); | ||
112 | |||
113 | return ret; | ||
114 | } | ||
115 | #endif | ||
116 | |||
117 | static int fanotify_handle_event(struct fsnotify_group *group, | ||
118 | struct fsnotify_mark *inode_mark, | ||
119 | struct fsnotify_mark *fanotify_mark, | ||
120 | struct fsnotify_event *event) | ||
121 | { | ||
122 | int ret = 0; | ||
123 | struct fsnotify_event *notify_event = NULL; | ||
124 | |||
125 | BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); | ||
126 | BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); | ||
127 | BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); | ||
128 | BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE); | ||
129 | BUILD_BUG_ON(FAN_OPEN != FS_OPEN); | ||
130 | BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); | ||
131 | BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); | ||
132 | BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); | ||
133 | BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); | ||
134 | |||
135 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | ||
136 | |||
137 | notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge); | ||
138 | if (IS_ERR(notify_event)) | ||
139 | return PTR_ERR(notify_event); | ||
140 | |||
141 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
142 | if (event->mask & FAN_ALL_PERM_EVENTS) { | ||
143 | /* if we merged we need to wait on the new event */ | ||
144 | if (notify_event) | ||
145 | event = notify_event; | ||
146 | ret = fanotify_get_response_from_access(group, event); | ||
147 | } | ||
148 | #endif | ||
149 | |||
150 | if (notify_event) | ||
151 | fsnotify_put_event(notify_event); | ||
152 | |||
153 | return ret; | ||
154 | } | ||
155 | |||
156 | static bool fanotify_should_send_event(struct fsnotify_group *group, | ||
157 | struct inode *to_tell, | ||
158 | struct fsnotify_mark *inode_mark, | ||
159 | struct fsnotify_mark *vfsmnt_mark, | ||
160 | __u32 event_mask, void *data, int data_type) | ||
161 | { | ||
162 | __u32 marks_mask, marks_ignored_mask; | ||
163 | |||
164 | pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p " | ||
165 | "mask=%x data=%p data_type=%d\n", __func__, group, to_tell, | ||
166 | inode_mark, vfsmnt_mark, event_mask, data, data_type); | ||
167 | |||
168 | pr_debug("%s: group=%p vfsmount_mark=%p inode_mark=%p mask=%x\n", | ||
169 | __func__, group, vfsmnt_mark, inode_mark, event_mask); | ||
170 | |||
171 | /* sorry, fanotify only gives a damn about files and dirs */ | ||
172 | if (!S_ISREG(to_tell->i_mode) && | ||
173 | !S_ISDIR(to_tell->i_mode)) | ||
174 | return false; | ||
175 | |||
176 | /* if we don't have enough info to send an event to userspace say no */ | ||
177 | if (data_type != FSNOTIFY_EVENT_FILE) | ||
178 | return false; | ||
179 | |||
180 | if (inode_mark && vfsmnt_mark) { | ||
181 | marks_mask = (vfsmnt_mark->mask | inode_mark->mask); | ||
182 | marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask); | ||
183 | } else if (inode_mark) { | ||
184 | /* | ||
185 | * if the event is for a child and this inode doesn't care about | ||
186 | * events on the child, don't send it! | ||
187 | */ | ||
188 | if ((event_mask & FS_EVENT_ON_CHILD) && | ||
189 | !(inode_mark->mask & FS_EVENT_ON_CHILD)) | ||
190 | return false; | ||
191 | marks_mask = inode_mark->mask; | ||
192 | marks_ignored_mask = inode_mark->ignored_mask; | ||
193 | } else if (vfsmnt_mark) { | ||
194 | marks_mask = vfsmnt_mark->mask; | ||
195 | marks_ignored_mask = vfsmnt_mark->ignored_mask; | ||
196 | } else { | ||
197 | BUG(); | ||
198 | } | ||
199 | |||
200 | if (event_mask & marks_mask & ~marks_ignored_mask) | ||
201 | return true; | ||
202 | |||
203 | return false; | ||
204 | } | ||
205 | |||
206 | const struct fsnotify_ops fanotify_fsnotify_ops = { | ||
207 | .handle_event = fanotify_handle_event, | ||
208 | .should_send_event = fanotify_should_send_event, | ||
209 | .free_group_priv = NULL, | ||
210 | .free_event_priv = NULL, | ||
211 | .freeing_mark = NULL, | ||
212 | }; | ||
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c new file mode 100644 index 000000000000..25a3b4dfcf61 --- /dev/null +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -0,0 +1,760 @@ | |||
1 | #include <linux/fanotify.h> | ||
2 | #include <linux/fcntl.h> | ||
3 | #include <linux/file.h> | ||
4 | #include <linux/fs.h> | ||
5 | #include <linux/anon_inodes.h> | ||
6 | #include <linux/fsnotify_backend.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/mount.h> | ||
9 | #include <linux/namei.h> | ||
10 | #include <linux/poll.h> | ||
11 | #include <linux/security.h> | ||
12 | #include <linux/syscalls.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/uaccess.h> | ||
16 | |||
17 | #include <asm/ioctls.h> | ||
18 | |||
19 | extern const struct fsnotify_ops fanotify_fsnotify_ops; | ||
20 | |||
21 | static struct kmem_cache *fanotify_mark_cache __read_mostly; | ||
22 | static struct kmem_cache *fanotify_response_event_cache __read_mostly; | ||
23 | |||
24 | struct fanotify_response_event { | ||
25 | struct list_head list; | ||
26 | __s32 fd; | ||
27 | struct fsnotify_event *event; | ||
28 | }; | ||
29 | |||
30 | /* | ||
31 | * Get an fsnotify notification event if one exists and is small | ||
32 | * enough to fit in "count". Return an error pointer if the count | ||
33 | * is not large enough. | ||
34 | * | ||
35 | * Called with the group->notification_mutex held. | ||
36 | */ | ||
37 | static struct fsnotify_event *get_one_event(struct fsnotify_group *group, | ||
38 | size_t count) | ||
39 | { | ||
40 | BUG_ON(!mutex_is_locked(&group->notification_mutex)); | ||
41 | |||
42 | pr_debug("%s: group=%p count=%zd\n", __func__, group, count); | ||
43 | |||
44 | if (fsnotify_notify_queue_is_empty(group)) | ||
45 | return NULL; | ||
46 | |||
47 | if (FAN_EVENT_METADATA_LEN > count) | ||
48 | return ERR_PTR(-EINVAL); | ||
49 | |||
50 | /* held the notification_mutex the whole time, so this is the | ||
51 | * same event we peeked above */ | ||
52 | return fsnotify_remove_notify_event(group); | ||
53 | } | ||
54 | |||
55 | static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | ||
56 | { | ||
57 | int client_fd; | ||
58 | struct dentry *dentry; | ||
59 | struct vfsmount *mnt; | ||
60 | struct file *new_file; | ||
61 | |||
62 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | ||
63 | |||
64 | client_fd = get_unused_fd(); | ||
65 | if (client_fd < 0) | ||
66 | return client_fd; | ||
67 | |||
68 | if (event->data_type != FSNOTIFY_EVENT_FILE) { | ||
69 | WARN_ON(1); | ||
70 | put_unused_fd(client_fd); | ||
71 | return -EINVAL; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * we need a new file handle for the userspace program so it can read even if it was | ||
76 | * originally opened O_WRONLY. | ||
77 | */ | ||
78 | dentry = dget(event->file->f_path.dentry); | ||
79 | mnt = mntget(event->file->f_path.mnt); | ||
80 | /* it's possible this event was an overflow event. in that case dentry and mnt | ||
81 | * are NULL; That's fine, just don't call dentry open */ | ||
82 | if (dentry && mnt) | ||
83 | new_file = dentry_open(dentry, mnt, | ||
84 | group->fanotify_data.f_flags | FMODE_NONOTIFY, | ||
85 | current_cred()); | ||
86 | else | ||
87 | new_file = ERR_PTR(-EOVERFLOW); | ||
88 | if (IS_ERR(new_file)) { | ||
89 | /* | ||
90 | * we still send an event even if we can't open the file. this | ||
91 | * can happen when say tasks are gone and we try to open their | ||
92 | * /proc files or we try to open a WRONLY file like in sysfs | ||
93 | * we just send the errno to userspace since there isn't much | ||
94 | * else we can do. | ||
95 | */ | ||
96 | put_unused_fd(client_fd); | ||
97 | client_fd = PTR_ERR(new_file); | ||
98 | } else { | ||
99 | fd_install(client_fd, new_file); | ||
100 | } | ||
101 | |||
102 | return client_fd; | ||
103 | } | ||
104 | |||
105 | static ssize_t fill_event_metadata(struct fsnotify_group *group, | ||
106 | struct fanotify_event_metadata *metadata, | ||
107 | struct fsnotify_event *event) | ||
108 | { | ||
109 | pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, | ||
110 | group, metadata, event); | ||
111 | |||
112 | metadata->event_len = FAN_EVENT_METADATA_LEN; | ||
113 | metadata->vers = FANOTIFY_METADATA_VERSION; | ||
114 | metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; | ||
115 | metadata->pid = pid_vnr(event->tgid); | ||
116 | metadata->fd = create_fd(group, event); | ||
117 | |||
118 | return metadata->fd; | ||
119 | } | ||
120 | |||
121 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
122 | static struct fanotify_response_event *dequeue_re(struct fsnotify_group *group, | ||
123 | __s32 fd) | ||
124 | { | ||
125 | struct fanotify_response_event *re, *return_re = NULL; | ||
126 | |||
127 | mutex_lock(&group->fanotify_data.access_mutex); | ||
128 | list_for_each_entry(re, &group->fanotify_data.access_list, list) { | ||
129 | if (re->fd != fd) | ||
130 | continue; | ||
131 | |||
132 | list_del_init(&re->list); | ||
133 | return_re = re; | ||
134 | break; | ||
135 | } | ||
136 | mutex_unlock(&group->fanotify_data.access_mutex); | ||
137 | |||
138 | pr_debug("%s: found return_re=%p\n", __func__, return_re); | ||
139 | |||
140 | return return_re; | ||
141 | } | ||
142 | |||
143 | static int process_access_response(struct fsnotify_group *group, | ||
144 | struct fanotify_response *response_struct) | ||
145 | { | ||
146 | struct fanotify_response_event *re; | ||
147 | __s32 fd = response_struct->fd; | ||
148 | __u32 response = response_struct->response; | ||
149 | |||
150 | pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group, | ||
151 | fd, response); | ||
152 | /* | ||
153 | * make sure the response is valid, if invalid we do nothing and either | ||
154 | * userspace can send a valid responce or we will clean it up after the | ||
155 | * timeout | ||
156 | */ | ||
157 | switch (response) { | ||
158 | case FAN_ALLOW: | ||
159 | case FAN_DENY: | ||
160 | break; | ||
161 | default: | ||
162 | return -EINVAL; | ||
163 | } | ||
164 | |||
165 | if (fd < 0) | ||
166 | return -EINVAL; | ||
167 | |||
168 | re = dequeue_re(group, fd); | ||
169 | if (!re) | ||
170 | return -ENOENT; | ||
171 | |||
172 | re->event->response = response; | ||
173 | |||
174 | wake_up(&group->fanotify_data.access_waitq); | ||
175 | |||
176 | kmem_cache_free(fanotify_response_event_cache, re); | ||
177 | |||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static int prepare_for_access_response(struct fsnotify_group *group, | ||
182 | struct fsnotify_event *event, | ||
183 | __s32 fd) | ||
184 | { | ||
185 | struct fanotify_response_event *re; | ||
186 | |||
187 | if (!(event->mask & FAN_ALL_PERM_EVENTS)) | ||
188 | return 0; | ||
189 | |||
190 | re = kmem_cache_alloc(fanotify_response_event_cache, GFP_KERNEL); | ||
191 | if (!re) | ||
192 | return -ENOMEM; | ||
193 | |||
194 | re->event = event; | ||
195 | re->fd = fd; | ||
196 | |||
197 | mutex_lock(&group->fanotify_data.access_mutex); | ||
198 | list_add_tail(&re->list, &group->fanotify_data.access_list); | ||
199 | mutex_unlock(&group->fanotify_data.access_mutex); | ||
200 | |||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | static void remove_access_response(struct fsnotify_group *group, | ||
205 | struct fsnotify_event *event, | ||
206 | __s32 fd) | ||
207 | { | ||
208 | struct fanotify_response_event *re; | ||
209 | |||
210 | if (!(event->mask & FAN_ALL_PERM_EVENTS)) | ||
211 | return; | ||
212 | |||
213 | re = dequeue_re(group, fd); | ||
214 | if (!re) | ||
215 | return; | ||
216 | |||
217 | BUG_ON(re->event != event); | ||
218 | |||
219 | kmem_cache_free(fanotify_response_event_cache, re); | ||
220 | |||
221 | return; | ||
222 | } | ||
223 | #else | ||
224 | static int prepare_for_access_response(struct fsnotify_group *group, | ||
225 | struct fsnotify_event *event, | ||
226 | __s32 fd) | ||
227 | { | ||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | static void remove_access_response(struct fsnotify_group *group, | ||
232 | struct fsnotify_event *event, | ||
233 | __s32 fd) | ||
234 | { | ||
235 | return; | ||
236 | } | ||
237 | #endif | ||
238 | |||
239 | static ssize_t copy_event_to_user(struct fsnotify_group *group, | ||
240 | struct fsnotify_event *event, | ||
241 | char __user *buf) | ||
242 | { | ||
243 | struct fanotify_event_metadata fanotify_event_metadata; | ||
244 | int fd, ret; | ||
245 | |||
246 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | ||
247 | |||
248 | fd = fill_event_metadata(group, &fanotify_event_metadata, event); | ||
249 | if (fd < 0) | ||
250 | return fd; | ||
251 | |||
252 | ret = prepare_for_access_response(group, event, fd); | ||
253 | if (ret) | ||
254 | goto out_close_fd; | ||
255 | |||
256 | ret = -EFAULT; | ||
257 | if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN)) | ||
258 | goto out_kill_access_response; | ||
259 | |||
260 | return FAN_EVENT_METADATA_LEN; | ||
261 | |||
262 | out_kill_access_response: | ||
263 | remove_access_response(group, event, fd); | ||
264 | out_close_fd: | ||
265 | sys_close(fd); | ||
266 | return ret; | ||
267 | } | ||
268 | |||
269 | /* intofiy userspace file descriptor functions */ | ||
270 | static unsigned int fanotify_poll(struct file *file, poll_table *wait) | ||
271 | { | ||
272 | struct fsnotify_group *group = file->private_data; | ||
273 | int ret = 0; | ||
274 | |||
275 | poll_wait(file, &group->notification_waitq, wait); | ||
276 | mutex_lock(&group->notification_mutex); | ||
277 | if (!fsnotify_notify_queue_is_empty(group)) | ||
278 | ret = POLLIN | POLLRDNORM; | ||
279 | mutex_unlock(&group->notification_mutex); | ||
280 | |||
281 | return ret; | ||
282 | } | ||
283 | |||
284 | static ssize_t fanotify_read(struct file *file, char __user *buf, | ||
285 | size_t count, loff_t *pos) | ||
286 | { | ||
287 | struct fsnotify_group *group; | ||
288 | struct fsnotify_event *kevent; | ||
289 | char __user *start; | ||
290 | int ret; | ||
291 | DEFINE_WAIT(wait); | ||
292 | |||
293 | start = buf; | ||
294 | group = file->private_data; | ||
295 | |||
296 | pr_debug("%s: group=%p\n", __func__, group); | ||
297 | |||
298 | while (1) { | ||
299 | prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE); | ||
300 | |||
301 | mutex_lock(&group->notification_mutex); | ||
302 | kevent = get_one_event(group, count); | ||
303 | mutex_unlock(&group->notification_mutex); | ||
304 | |||
305 | if (kevent) { | ||
306 | ret = PTR_ERR(kevent); | ||
307 | if (IS_ERR(kevent)) | ||
308 | break; | ||
309 | ret = copy_event_to_user(group, kevent, buf); | ||
310 | fsnotify_put_event(kevent); | ||
311 | if (ret < 0) | ||
312 | break; | ||
313 | buf += ret; | ||
314 | count -= ret; | ||
315 | continue; | ||
316 | } | ||
317 | |||
318 | ret = -EAGAIN; | ||
319 | if (file->f_flags & O_NONBLOCK) | ||
320 | break; | ||
321 | ret = -EINTR; | ||
322 | if (signal_pending(current)) | ||
323 | break; | ||
324 | |||
325 | if (start != buf) | ||
326 | break; | ||
327 | |||
328 | schedule(); | ||
329 | } | ||
330 | |||
331 | finish_wait(&group->notification_waitq, &wait); | ||
332 | if (start != buf && ret != -EFAULT) | ||
333 | ret = buf - start; | ||
334 | return ret; | ||
335 | } | ||
336 | |||
337 | static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) | ||
338 | { | ||
339 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
340 | struct fanotify_response response = { .fd = -1, .response = -1 }; | ||
341 | struct fsnotify_group *group; | ||
342 | int ret; | ||
343 | |||
344 | group = file->private_data; | ||
345 | |||
346 | if (count > sizeof(response)) | ||
347 | count = sizeof(response); | ||
348 | |||
349 | pr_debug("%s: group=%p count=%zu\n", __func__, group, count); | ||
350 | |||
351 | if (copy_from_user(&response, buf, count)) | ||
352 | return -EFAULT; | ||
353 | |||
354 | ret = process_access_response(group, &response); | ||
355 | if (ret < 0) | ||
356 | count = ret; | ||
357 | |||
358 | return count; | ||
359 | #else | ||
360 | return -EINVAL; | ||
361 | #endif | ||
362 | } | ||
363 | |||
364 | static int fanotify_release(struct inode *ignored, struct file *file) | ||
365 | { | ||
366 | struct fsnotify_group *group = file->private_data; | ||
367 | |||
368 | pr_debug("%s: file=%p group=%p\n", __func__, file, group); | ||
369 | |||
370 | /* matches the fanotify_init->fsnotify_alloc_group */ | ||
371 | fsnotify_put_group(group); | ||
372 | |||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
377 | { | ||
378 | struct fsnotify_group *group; | ||
379 | struct fsnotify_event_holder *holder; | ||
380 | void __user *p; | ||
381 | int ret = -ENOTTY; | ||
382 | size_t send_len = 0; | ||
383 | |||
384 | group = file->private_data; | ||
385 | |||
386 | p = (void __user *) arg; | ||
387 | |||
388 | switch (cmd) { | ||
389 | case FIONREAD: | ||
390 | mutex_lock(&group->notification_mutex); | ||
391 | list_for_each_entry(holder, &group->notification_list, event_list) | ||
392 | send_len += FAN_EVENT_METADATA_LEN; | ||
393 | mutex_unlock(&group->notification_mutex); | ||
394 | ret = put_user(send_len, (int __user *) p); | ||
395 | break; | ||
396 | } | ||
397 | |||
398 | return ret; | ||
399 | } | ||
400 | |||
401 | static const struct file_operations fanotify_fops = { | ||
402 | .poll = fanotify_poll, | ||
403 | .read = fanotify_read, | ||
404 | .write = fanotify_write, | ||
405 | .fasync = NULL, | ||
406 | .release = fanotify_release, | ||
407 | .unlocked_ioctl = fanotify_ioctl, | ||
408 | .compat_ioctl = fanotify_ioctl, | ||
409 | }; | ||
410 | |||
411 | static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) | ||
412 | { | ||
413 | kmem_cache_free(fanotify_mark_cache, fsn_mark); | ||
414 | } | ||
415 | |||
416 | static int fanotify_find_path(int dfd, const char __user *filename, | ||
417 | struct path *path, unsigned int flags) | ||
418 | { | ||
419 | int ret; | ||
420 | |||
421 | pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__, | ||
422 | dfd, filename, flags); | ||
423 | |||
424 | if (filename == NULL) { | ||
425 | struct file *file; | ||
426 | int fput_needed; | ||
427 | |||
428 | ret = -EBADF; | ||
429 | file = fget_light(dfd, &fput_needed); | ||
430 | if (!file) | ||
431 | goto out; | ||
432 | |||
433 | ret = -ENOTDIR; | ||
434 | if ((flags & FAN_MARK_ONLYDIR) && | ||
435 | !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) { | ||
436 | fput_light(file, fput_needed); | ||
437 | goto out; | ||
438 | } | ||
439 | |||
440 | *path = file->f_path; | ||
441 | path_get(path); | ||
442 | fput_light(file, fput_needed); | ||
443 | } else { | ||
444 | unsigned int lookup_flags = 0; | ||
445 | |||
446 | if (!(flags & FAN_MARK_DONT_FOLLOW)) | ||
447 | lookup_flags |= LOOKUP_FOLLOW; | ||
448 | if (flags & FAN_MARK_ONLYDIR) | ||
449 | lookup_flags |= LOOKUP_DIRECTORY; | ||
450 | |||
451 | ret = user_path_at(dfd, filename, lookup_flags, path); | ||
452 | if (ret) | ||
453 | goto out; | ||
454 | } | ||
455 | |||
456 | /* you can only watch an inode if you have read permissions on it */ | ||
457 | ret = inode_permission(path->dentry->d_inode, MAY_READ); | ||
458 | if (ret) | ||
459 | path_put(path); | ||
460 | out: | ||
461 | return ret; | ||
462 | } | ||
463 | |||
464 | static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, | ||
465 | __u32 mask, | ||
466 | unsigned int flags) | ||
467 | { | ||
468 | __u32 oldmask; | ||
469 | |||
470 | spin_lock(&fsn_mark->lock); | ||
471 | if (!(flags & FAN_MARK_IGNORED_MASK)) { | ||
472 | oldmask = fsn_mark->mask; | ||
473 | fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask)); | ||
474 | } else { | ||
475 | oldmask = fsn_mark->ignored_mask; | ||
476 | fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask)); | ||
477 | } | ||
478 | spin_unlock(&fsn_mark->lock); | ||
479 | |||
480 | if (!(oldmask & ~mask)) | ||
481 | fsnotify_destroy_mark(fsn_mark); | ||
482 | |||
483 | return mask & oldmask; | ||
484 | } | ||
485 | |||
486 | static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, | ||
487 | struct vfsmount *mnt, __u32 mask, | ||
488 | unsigned int flags) | ||
489 | { | ||
490 | struct fsnotify_mark *fsn_mark = NULL; | ||
491 | __u32 removed; | ||
492 | |||
493 | fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); | ||
494 | if (!fsn_mark) | ||
495 | return -ENOENT; | ||
496 | |||
497 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags); | ||
498 | fsnotify_put_mark(fsn_mark); | ||
499 | if (removed & mnt->mnt_fsnotify_mask) | ||
500 | fsnotify_recalc_vfsmount_mask(mnt); | ||
501 | |||
502 | return 0; | ||
503 | } | ||
504 | |||
505 | static int fanotify_remove_inode_mark(struct fsnotify_group *group, | ||
506 | struct inode *inode, __u32 mask, | ||
507 | unsigned int flags) | ||
508 | { | ||
509 | struct fsnotify_mark *fsn_mark = NULL; | ||
510 | __u32 removed; | ||
511 | |||
512 | fsn_mark = fsnotify_find_inode_mark(group, inode); | ||
513 | if (!fsn_mark) | ||
514 | return -ENOENT; | ||
515 | |||
516 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags); | ||
517 | /* matches the fsnotify_find_inode_mark() */ | ||
518 | fsnotify_put_mark(fsn_mark); | ||
519 | if (removed & inode->i_fsnotify_mask) | ||
520 | fsnotify_recalc_inode_mask(inode); | ||
521 | |||
522 | return 0; | ||
523 | } | ||
524 | |||
525 | static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, | ||
526 | __u32 mask, | ||
527 | unsigned int flags) | ||
528 | { | ||
529 | __u32 oldmask; | ||
530 | |||
531 | spin_lock(&fsn_mark->lock); | ||
532 | if (!(flags & FAN_MARK_IGNORED_MASK)) { | ||
533 | oldmask = fsn_mark->mask; | ||
534 | fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask)); | ||
535 | } else { | ||
536 | oldmask = fsn_mark->ignored_mask; | ||
537 | fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask)); | ||
538 | if (flags & FAN_MARK_IGNORED_SURV_MODIFY) | ||
539 | fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; | ||
540 | } | ||
541 | spin_unlock(&fsn_mark->lock); | ||
542 | |||
543 | return mask & ~oldmask; | ||
544 | } | ||
545 | |||
546 | static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, | ||
547 | struct vfsmount *mnt, __u32 mask, | ||
548 | unsigned int flags) | ||
549 | { | ||
550 | struct fsnotify_mark *fsn_mark; | ||
551 | __u32 added; | ||
552 | |||
553 | fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); | ||
554 | if (!fsn_mark) { | ||
555 | int ret; | ||
556 | |||
557 | fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); | ||
558 | if (!fsn_mark) | ||
559 | return -ENOMEM; | ||
560 | |||
561 | fsnotify_init_mark(fsn_mark, fanotify_free_mark); | ||
562 | ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0); | ||
563 | if (ret) { | ||
564 | fanotify_free_mark(fsn_mark); | ||
565 | return ret; | ||
566 | } | ||
567 | } | ||
568 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); | ||
569 | fsnotify_put_mark(fsn_mark); | ||
570 | if (added & ~mnt->mnt_fsnotify_mask) | ||
571 | fsnotify_recalc_vfsmount_mask(mnt); | ||
572 | |||
573 | return 0; | ||
574 | } | ||
575 | |||
576 | static int fanotify_add_inode_mark(struct fsnotify_group *group, | ||
577 | struct inode *inode, __u32 mask, | ||
578 | unsigned int flags) | ||
579 | { | ||
580 | struct fsnotify_mark *fsn_mark; | ||
581 | __u32 added; | ||
582 | |||
583 | pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); | ||
584 | |||
585 | fsn_mark = fsnotify_find_inode_mark(group, inode); | ||
586 | if (!fsn_mark) { | ||
587 | int ret; | ||
588 | |||
589 | fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); | ||
590 | if (!fsn_mark) | ||
591 | return -ENOMEM; | ||
592 | |||
593 | fsnotify_init_mark(fsn_mark, fanotify_free_mark); | ||
594 | ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0); | ||
595 | if (ret) { | ||
596 | fanotify_free_mark(fsn_mark); | ||
597 | return ret; | ||
598 | } | ||
599 | } | ||
600 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); | ||
601 | fsnotify_put_mark(fsn_mark); | ||
602 | if (added & ~inode->i_fsnotify_mask) | ||
603 | fsnotify_recalc_inode_mask(inode); | ||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | /* fanotify syscalls */ | ||
608 | SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) | ||
609 | { | ||
610 | struct fsnotify_group *group; | ||
611 | int f_flags, fd; | ||
612 | |||
613 | pr_debug("%s: flags=%d event_f_flags=%d\n", | ||
614 | __func__, flags, event_f_flags); | ||
615 | |||
616 | if (!capable(CAP_SYS_ADMIN)) | ||
617 | return -EACCES; | ||
618 | |||
619 | if (flags & ~FAN_ALL_INIT_FLAGS) | ||
620 | return -EINVAL; | ||
621 | |||
622 | f_flags = O_RDWR | FMODE_NONOTIFY; | ||
623 | if (flags & FAN_CLOEXEC) | ||
624 | f_flags |= O_CLOEXEC; | ||
625 | if (flags & FAN_NONBLOCK) | ||
626 | f_flags |= O_NONBLOCK; | ||
627 | |||
628 | /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ | ||
629 | group = fsnotify_alloc_group(&fanotify_fsnotify_ops); | ||
630 | if (IS_ERR(group)) | ||
631 | return PTR_ERR(group); | ||
632 | |||
633 | group->fanotify_data.f_flags = event_f_flags; | ||
634 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
635 | mutex_init(&group->fanotify_data.access_mutex); | ||
636 | init_waitqueue_head(&group->fanotify_data.access_waitq); | ||
637 | INIT_LIST_HEAD(&group->fanotify_data.access_list); | ||
638 | #endif | ||
639 | |||
640 | fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); | ||
641 | if (fd < 0) | ||
642 | goto out_put_group; | ||
643 | |||
644 | return fd; | ||
645 | |||
646 | out_put_group: | ||
647 | fsnotify_put_group(group); | ||
648 | return fd; | ||
649 | } | ||
650 | |||
651 | SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, | ||
652 | __u64 mask, int dfd, | ||
653 | const char __user * pathname) | ||
654 | { | ||
655 | struct inode *inode = NULL; | ||
656 | struct vfsmount *mnt = NULL; | ||
657 | struct fsnotify_group *group; | ||
658 | struct file *filp; | ||
659 | struct path path; | ||
660 | int ret, fput_needed; | ||
661 | |||
662 | pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n", | ||
663 | __func__, fanotify_fd, flags, dfd, pathname, mask); | ||
664 | |||
665 | /* we only use the lower 32 bits as of right now. */ | ||
666 | if (mask & ((__u64)0xffffffff << 32)) | ||
667 | return -EINVAL; | ||
668 | |||
669 | if (flags & ~FAN_ALL_MARK_FLAGS) | ||
670 | return -EINVAL; | ||
671 | switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { | ||
672 | case FAN_MARK_ADD: | ||
673 | case FAN_MARK_REMOVE: | ||
674 | case FAN_MARK_FLUSH: | ||
675 | break; | ||
676 | default: | ||
677 | return -EINVAL; | ||
678 | } | ||
679 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
680 | if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD)) | ||
681 | #else | ||
682 | if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD)) | ||
683 | #endif | ||
684 | return -EINVAL; | ||
685 | |||
686 | filp = fget_light(fanotify_fd, &fput_needed); | ||
687 | if (unlikely(!filp)) | ||
688 | return -EBADF; | ||
689 | |||
690 | /* verify that this is indeed an fanotify instance */ | ||
691 | ret = -EINVAL; | ||
692 | if (unlikely(filp->f_op != &fanotify_fops)) | ||
693 | goto fput_and_out; | ||
694 | |||
695 | ret = fanotify_find_path(dfd, pathname, &path, flags); | ||
696 | if (ret) | ||
697 | goto fput_and_out; | ||
698 | |||
699 | /* inode held in place by reference to path; group by fget on fd */ | ||
700 | if (!(flags & FAN_MARK_MOUNT)) | ||
701 | inode = path.dentry->d_inode; | ||
702 | else | ||
703 | mnt = path.mnt; | ||
704 | group = filp->private_data; | ||
705 | |||
706 | /* create/update an inode mark */ | ||
707 | switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { | ||
708 | case FAN_MARK_ADD: | ||
709 | if (flags & FAN_MARK_MOUNT) | ||
710 | ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags); | ||
711 | else | ||
712 | ret = fanotify_add_inode_mark(group, inode, mask, flags); | ||
713 | break; | ||
714 | case FAN_MARK_REMOVE: | ||
715 | if (flags & FAN_MARK_MOUNT) | ||
716 | ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags); | ||
717 | else | ||
718 | ret = fanotify_remove_inode_mark(group, inode, mask, flags); | ||
719 | break; | ||
720 | case FAN_MARK_FLUSH: | ||
721 | if (flags & FAN_MARK_MOUNT) | ||
722 | fsnotify_clear_vfsmount_marks_by_group(group); | ||
723 | else | ||
724 | fsnotify_clear_inode_marks_by_group(group); | ||
725 | break; | ||
726 | default: | ||
727 | ret = -EINVAL; | ||
728 | } | ||
729 | |||
730 | path_put(&path); | ||
731 | fput_and_out: | ||
732 | fput_light(filp, fput_needed); | ||
733 | return ret; | ||
734 | } | ||
735 | |||
736 | #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS | ||
737 | asmlinkage long SyS_fanotify_mark(long fanotify_fd, long flags, __u64 mask, | ||
738 | long dfd, long pathname) | ||
739 | { | ||
740 | return SYSC_fanotify_mark((int) fanotify_fd, (unsigned int) flags, | ||
741 | mask, (int) dfd, | ||
742 | (const char __user *) pathname); | ||
743 | } | ||
744 | SYSCALL_ALIAS(sys_fanotify_mark, SyS_fanotify_mark); | ||
745 | #endif | ||
746 | |||
747 | /* | ||
748 | * fanotify_user_setup - Our initialization function. Note that we cannnot return | ||
749 | * error because we have compiled-in VFS hooks. So an (unlikely) failure here | ||
750 | * must result in panic(). | ||
751 | */ | ||
752 | static int __init fanotify_user_setup(void) | ||
753 | { | ||
754 | fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); | ||
755 | fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event, | ||
756 | SLAB_PANIC); | ||
757 | |||
758 | return 0; | ||
759 | } | ||
760 | device_initcall(fanotify_user_setup); | ||
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index fcc2f064af83..4d2a82c1ceb1 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/gfp.h> | 21 | #include <linux/gfp.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/mount.h> | ||
24 | #include <linux/srcu.h> | 25 | #include <linux/srcu.h> |
25 | 26 | ||
26 | #include <linux/fsnotify_backend.h> | 27 | #include <linux/fsnotify_backend.h> |
@@ -35,6 +36,11 @@ void __fsnotify_inode_delete(struct inode *inode) | |||
35 | } | 36 | } |
36 | EXPORT_SYMBOL_GPL(__fsnotify_inode_delete); | 37 | EXPORT_SYMBOL_GPL(__fsnotify_inode_delete); |
37 | 38 | ||
39 | void __fsnotify_vfsmount_delete(struct vfsmount *mnt) | ||
40 | { | ||
41 | fsnotify_clear_marks_by_mount(mnt); | ||
42 | } | ||
43 | |||
38 | /* | 44 | /* |
39 | * Given an inode, first check if we care what happens to our children. Inotify | 45 | * Given an inode, first check if we care what happens to our children. Inotify |
40 | * and dnotify both tell their parents about events. If we care about any event | 46 | * and dnotify both tell their parents about events. If we care about any event |
@@ -78,13 +84,16 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) | |||
78 | } | 84 | } |
79 | 85 | ||
80 | /* Notify this dentry's parent about a child's events. */ | 86 | /* Notify this dentry's parent about a child's events. */ |
81 | void __fsnotify_parent(struct dentry *dentry, __u32 mask) | 87 | void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask) |
82 | { | 88 | { |
83 | struct dentry *parent; | 89 | struct dentry *parent; |
84 | struct inode *p_inode; | 90 | struct inode *p_inode; |
85 | bool send = false; | 91 | bool send = false; |
86 | bool should_update_children = false; | 92 | bool should_update_children = false; |
87 | 93 | ||
94 | if (!dentry) | ||
95 | dentry = file->f_path.dentry; | ||
96 | |||
88 | if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) | 97 | if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) |
89 | return; | 98 | return; |
90 | 99 | ||
@@ -115,8 +124,12 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask) | |||
115 | * specifies these are events which came from a child. */ | 124 | * specifies these are events which came from a child. */ |
116 | mask |= FS_EVENT_ON_CHILD; | 125 | mask |= FS_EVENT_ON_CHILD; |
117 | 126 | ||
118 | fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, | 127 | if (file) |
119 | dentry->d_name.name, 0); | 128 | fsnotify(p_inode, mask, file, FSNOTIFY_EVENT_FILE, |
129 | dentry->d_name.name, 0); | ||
130 | else | ||
131 | fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, | ||
132 | dentry->d_name.name, 0); | ||
120 | dput(parent); | 133 | dput(parent); |
121 | } | 134 | } |
122 | 135 | ||
@@ -127,63 +140,181 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask) | |||
127 | } | 140 | } |
128 | EXPORT_SYMBOL_GPL(__fsnotify_parent); | 141 | EXPORT_SYMBOL_GPL(__fsnotify_parent); |
129 | 142 | ||
143 | static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, | ||
144 | struct fsnotify_mark *inode_mark, | ||
145 | struct fsnotify_mark *vfsmount_mark, | ||
146 | __u32 mask, void *data, | ||
147 | int data_is, u32 cookie, | ||
148 | const unsigned char *file_name, | ||
149 | struct fsnotify_event **event) | ||
150 | { | ||
151 | struct fsnotify_group *group = inode_mark->group; | ||
152 | __u32 inode_test_mask = (mask & ~FS_EVENT_ON_CHILD); | ||
153 | __u32 vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD); | ||
154 | |||
155 | pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p" | ||
156 | " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell, | ||
157 | mnt, inode_mark, mask, data, data_is, cookie, *event); | ||
158 | |||
159 | /* clear ignored on inode modification */ | ||
160 | if (mask & FS_MODIFY) { | ||
161 | if (inode_mark && | ||
162 | !(inode_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) | ||
163 | inode_mark->ignored_mask = 0; | ||
164 | if (vfsmount_mark && | ||
165 | !(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) | ||
166 | vfsmount_mark->ignored_mask = 0; | ||
167 | } | ||
168 | |||
169 | /* does the inode mark tell us to do something? */ | ||
170 | if (inode_mark) { | ||
171 | inode_test_mask &= inode_mark->mask; | ||
172 | inode_test_mask &= ~inode_mark->ignored_mask; | ||
173 | } | ||
174 | |||
175 | /* does the vfsmount_mark tell us to do something? */ | ||
176 | if (vfsmount_mark) { | ||
177 | vfsmount_test_mask &= vfsmount_mark->mask; | ||
178 | vfsmount_test_mask &= ~vfsmount_mark->ignored_mask; | ||
179 | if (inode_mark) | ||
180 | vfsmount_test_mask &= ~inode_mark->ignored_mask; | ||
181 | } | ||
182 | |||
183 | if (!inode_test_mask && !vfsmount_test_mask) | ||
184 | return 0; | ||
185 | |||
186 | if (group->ops->should_send_event(group, to_tell, inode_mark, | ||
187 | vfsmount_mark, mask, data, | ||
188 | data_is) == false) | ||
189 | return 0; | ||
190 | |||
191 | if (!*event) { | ||
192 | *event = fsnotify_create_event(to_tell, mask, data, | ||
193 | data_is, file_name, | ||
194 | cookie, GFP_KERNEL); | ||
195 | if (!*event) | ||
196 | return -ENOMEM; | ||
197 | } | ||
198 | return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event); | ||
199 | } | ||
200 | |||
130 | /* | 201 | /* |
131 | * This is the main call to fsnotify. The VFS calls into hook specific functions | 202 | * This is the main call to fsnotify. The VFS calls into hook specific functions |
132 | * in linux/fsnotify.h. Those functions then in turn call here. Here will call | 203 | * in linux/fsnotify.h. Those functions then in turn call here. Here will call |
133 | * out to all of the registered fsnotify_group. Those groups can then use the | 204 | * out to all of the registered fsnotify_group. Those groups can then use the |
134 | * notification event in whatever means they feel necessary. | 205 | * notification event in whatever means they feel necessary. |
135 | */ | 206 | */ |
136 | void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie) | 207 | int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, |
208 | const unsigned char *file_name, u32 cookie) | ||
137 | { | 209 | { |
138 | struct fsnotify_group *group; | 210 | struct hlist_node *inode_node, *vfsmount_node; |
211 | struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; | ||
212 | struct fsnotify_group *inode_group, *vfsmount_group; | ||
139 | struct fsnotify_event *event = NULL; | 213 | struct fsnotify_event *event = NULL; |
140 | int idx; | 214 | struct vfsmount *mnt; |
215 | int idx, ret = 0; | ||
216 | bool used_inode = false, used_vfsmount = false; | ||
141 | /* global tests shouldn't care about events on child only the specific event */ | 217 | /* global tests shouldn't care about events on child only the specific event */ |
142 | __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); | 218 | __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); |
143 | 219 | ||
144 | if (list_empty(&fsnotify_groups)) | 220 | if (data_is == FSNOTIFY_EVENT_FILE) |
145 | return; | 221 | mnt = ((struct file *)data)->f_path.mnt; |
222 | else | ||
223 | mnt = NULL; | ||
146 | 224 | ||
147 | if (!(test_mask & fsnotify_mask)) | ||
148 | return; | ||
149 | |||
150 | if (!(test_mask & to_tell->i_fsnotify_mask)) | ||
151 | return; | ||
152 | /* | 225 | /* |
153 | * SRCU!! the groups list is very very much read only and the path is | 226 | * if this is a modify event we may need to clear the ignored masks |
154 | * very hot. The VAST majority of events are not going to need to do | 227 | * otherwise return if neither the inode nor the vfsmount care about |
155 | * anything other than walk the list so it's crazy to pre-allocate. | 228 | * this type of event. |
156 | */ | 229 | */ |
157 | idx = srcu_read_lock(&fsnotify_grp_srcu); | 230 | if (!(mask & FS_MODIFY) && |
158 | list_for_each_entry_rcu(group, &fsnotify_groups, group_list) { | 231 | !(test_mask & to_tell->i_fsnotify_mask) && |
159 | if (test_mask & group->mask) { | 232 | !(mnt && test_mask & mnt->mnt_fsnotify_mask)) |
160 | if (!group->ops->should_send_event(group, to_tell, mask)) | 233 | return 0; |
161 | continue; | 234 | |
162 | if (!event) { | 235 | idx = srcu_read_lock(&fsnotify_mark_srcu); |
163 | event = fsnotify_create_event(to_tell, mask, data, | 236 | |
164 | data_is, file_name, cookie, | 237 | if ((mask & FS_MODIFY) || |
165 | GFP_KERNEL); | 238 | (test_mask & to_tell->i_fsnotify_mask)) |
166 | /* shit, we OOM'd and now we can't tell, maybe | 239 | inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, |
167 | * someday someone else will want to do something | 240 | &fsnotify_mark_srcu); |
168 | * here */ | 241 | else |
169 | if (!event) | 242 | inode_node = NULL; |
170 | break; | 243 | |
171 | } | 244 | if (mnt) { |
172 | group->ops->handle_event(group, event); | 245 | if ((mask & FS_MODIFY) || |
246 | (test_mask & mnt->mnt_fsnotify_mask)) | ||
247 | vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first, | ||
248 | &fsnotify_mark_srcu); | ||
249 | else | ||
250 | vfsmount_node = NULL; | ||
251 | } else { | ||
252 | mnt = NULL; | ||
253 | vfsmount_node = NULL; | ||
254 | } | ||
255 | |||
256 | while (inode_node || vfsmount_node) { | ||
257 | if (inode_node) { | ||
258 | inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), | ||
259 | struct fsnotify_mark, i.i_list); | ||
260 | inode_group = inode_mark->group; | ||
261 | } else | ||
262 | inode_group = (void *)-1; | ||
263 | |||
264 | if (vfsmount_node) { | ||
265 | vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu), | ||
266 | struct fsnotify_mark, m.m_list); | ||
267 | vfsmount_group = vfsmount_mark->group; | ||
268 | } else | ||
269 | vfsmount_group = (void *)-1; | ||
270 | |||
271 | if (inode_group < vfsmount_group) { | ||
272 | /* handle inode */ | ||
273 | send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, | ||
274 | data_is, cookie, file_name, &event); | ||
275 | used_inode = true; | ||
276 | } else if (vfsmount_group < inode_group) { | ||
277 | send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, | ||
278 | data_is, cookie, file_name, &event); | ||
279 | used_vfsmount = true; | ||
280 | } else { | ||
281 | send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, | ||
282 | mask, data, data_is, cookie, file_name, | ||
283 | &event); | ||
284 | used_vfsmount = true; | ||
285 | used_inode = true; | ||
173 | } | 286 | } |
287 | |||
288 | if (used_inode) | ||
289 | inode_node = srcu_dereference(inode_node->next, | ||
290 | &fsnotify_mark_srcu); | ||
291 | if (used_vfsmount) | ||
292 | vfsmount_node = srcu_dereference(vfsmount_node->next, | ||
293 | &fsnotify_mark_srcu); | ||
174 | } | 294 | } |
175 | srcu_read_unlock(&fsnotify_grp_srcu, idx); | 295 | |
296 | srcu_read_unlock(&fsnotify_mark_srcu, idx); | ||
176 | /* | 297 | /* |
177 | * fsnotify_create_event() took a reference so the event can't be cleaned | 298 | * fsnotify_create_event() took a reference so the event can't be cleaned |
178 | * up while we are still trying to add it to lists, drop that one. | 299 | * up while we are still trying to add it to lists, drop that one. |
179 | */ | 300 | */ |
180 | if (event) | 301 | if (event) |
181 | fsnotify_put_event(event); | 302 | fsnotify_put_event(event); |
303 | |||
304 | return ret; | ||
182 | } | 305 | } |
183 | EXPORT_SYMBOL_GPL(fsnotify); | 306 | EXPORT_SYMBOL_GPL(fsnotify); |
184 | 307 | ||
185 | static __init int fsnotify_init(void) | 308 | static __init int fsnotify_init(void) |
186 | { | 309 | { |
187 | return init_srcu_struct(&fsnotify_grp_srcu); | 310 | int ret; |
311 | |||
312 | BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23); | ||
313 | |||
314 | ret = init_srcu_struct(&fsnotify_mark_srcu); | ||
315 | if (ret) | ||
316 | panic("initializing fsnotify_mark_srcu"); | ||
317 | |||
318 | return 0; | ||
188 | } | 319 | } |
189 | subsys_initcall(fsnotify_init); | 320 | core_initcall(fsnotify_init); |
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 4dc240824b2d..85e7d2b431d9 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h | |||
@@ -6,21 +6,34 @@ | |||
6 | #include <linux/srcu.h> | 6 | #include <linux/srcu.h> |
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | 8 | ||
9 | /* protects reads of fsnotify_groups */ | ||
10 | extern struct srcu_struct fsnotify_grp_srcu; | ||
11 | /* all groups which receive fsnotify events */ | ||
12 | extern struct list_head fsnotify_groups; | ||
13 | /* all bitwise OR of all event types (FS_*) for all fsnotify_groups */ | ||
14 | extern __u32 fsnotify_mask; | ||
15 | |||
16 | /* destroy all events sitting in this groups notification queue */ | 9 | /* destroy all events sitting in this groups notification queue */ |
17 | extern void fsnotify_flush_notify(struct fsnotify_group *group); | 10 | extern void fsnotify_flush_notify(struct fsnotify_group *group); |
18 | 11 | ||
12 | /* protects reads of inode and vfsmount marks list */ | ||
13 | extern struct srcu_struct fsnotify_mark_srcu; | ||
14 | |||
15 | extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, | ||
16 | __u32 mask); | ||
17 | /* add a mark to an inode */ | ||
18 | extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark, | ||
19 | struct fsnotify_group *group, struct inode *inode, | ||
20 | int allow_dups); | ||
21 | /* add a mark to a vfsmount */ | ||
22 | extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, | ||
23 | struct fsnotify_group *group, struct vfsmount *mnt, | ||
24 | int allow_dups); | ||
25 | |||
19 | /* final kfree of a group */ | 26 | /* final kfree of a group */ |
20 | extern void fsnotify_final_destroy_group(struct fsnotify_group *group); | 27 | extern void fsnotify_final_destroy_group(struct fsnotify_group *group); |
21 | 28 | ||
29 | /* vfsmount specific destruction of a mark */ | ||
30 | extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark); | ||
31 | /* inode specific destruction of a mark */ | ||
32 | extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark); | ||
22 | /* run the list of all marks associated with inode and flag them to be freed */ | 33 | /* run the list of all marks associated with inode and flag them to be freed */ |
23 | extern void fsnotify_clear_marks_by_inode(struct inode *inode); | 34 | extern void fsnotify_clear_marks_by_inode(struct inode *inode); |
35 | /* run the list of all marks associated with vfsmount and flag them to be freed */ | ||
36 | extern void fsnotify_clear_marks_by_mount(struct vfsmount *mnt); | ||
24 | /* | 37 | /* |
25 | * update the dentry->d_flags of all of inode's children to indicate if inode cares | 38 | * update the dentry->d_flags of all of inode's children to indicate if inode cares |
26 | * about events that happen to its children. | 39 | * about events that happen to its children. |
diff --git a/fs/notify/group.c b/fs/notify/group.c index 0e1677144bc5..d309f38449cb 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c | |||
@@ -28,64 +28,6 @@ | |||
28 | 28 | ||
29 | #include <asm/atomic.h> | 29 | #include <asm/atomic.h> |
30 | 30 | ||
31 | /* protects writes to fsnotify_groups and fsnotify_mask */ | ||
32 | static DEFINE_MUTEX(fsnotify_grp_mutex); | ||
33 | /* protects reads while running the fsnotify_groups list */ | ||
34 | struct srcu_struct fsnotify_grp_srcu; | ||
35 | /* all groups registered to receive filesystem notifications */ | ||
36 | LIST_HEAD(fsnotify_groups); | ||
37 | /* bitwise OR of all events (FS_*) interesting to some group on this system */ | ||
38 | __u32 fsnotify_mask; | ||
39 | |||
40 | /* | ||
41 | * When a new group registers or changes it's set of interesting events | ||
42 | * this function updates the fsnotify_mask to contain all interesting events | ||
43 | */ | ||
44 | void fsnotify_recalc_global_mask(void) | ||
45 | { | ||
46 | struct fsnotify_group *group; | ||
47 | __u32 mask = 0; | ||
48 | int idx; | ||
49 | |||
50 | idx = srcu_read_lock(&fsnotify_grp_srcu); | ||
51 | list_for_each_entry_rcu(group, &fsnotify_groups, group_list) | ||
52 | mask |= group->mask; | ||
53 | srcu_read_unlock(&fsnotify_grp_srcu, idx); | ||
54 | fsnotify_mask = mask; | ||
55 | } | ||
56 | |||
57 | /* | ||
58 | * Update the group->mask by running all of the marks associated with this | ||
59 | * group and finding the bitwise | of all of the mark->mask. If we change | ||
60 | * the group->mask we need to update the global mask of events interesting | ||
61 | * to the system. | ||
62 | */ | ||
63 | void fsnotify_recalc_group_mask(struct fsnotify_group *group) | ||
64 | { | ||
65 | __u32 mask = 0; | ||
66 | __u32 old_mask = group->mask; | ||
67 | struct fsnotify_mark_entry *entry; | ||
68 | |||
69 | spin_lock(&group->mark_lock); | ||
70 | list_for_each_entry(entry, &group->mark_entries, g_list) | ||
71 | mask |= entry->mask; | ||
72 | spin_unlock(&group->mark_lock); | ||
73 | |||
74 | group->mask = mask; | ||
75 | |||
76 | if (old_mask != mask) | ||
77 | fsnotify_recalc_global_mask(); | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Take a reference to a group so things found under the fsnotify_grp_mutex | ||
82 | * can't get freed under us | ||
83 | */ | ||
84 | static void fsnotify_get_group(struct fsnotify_group *group) | ||
85 | { | ||
86 | atomic_inc(&group->refcnt); | ||
87 | } | ||
88 | |||
89 | /* | 31 | /* |
90 | * Final freeing of a group | 32 | * Final freeing of a group |
91 | */ | 33 | */ |
@@ -110,145 +52,53 @@ void fsnotify_final_destroy_group(struct fsnotify_group *group) | |||
110 | */ | 52 | */ |
111 | static void fsnotify_destroy_group(struct fsnotify_group *group) | 53 | static void fsnotify_destroy_group(struct fsnotify_group *group) |
112 | { | 54 | { |
113 | /* clear all inode mark entries for this group */ | 55 | /* clear all inode marks for this group */ |
114 | fsnotify_clear_marks_by_group(group); | 56 | fsnotify_clear_marks_by_group(group); |
115 | 57 | ||
58 | synchronize_srcu(&fsnotify_mark_srcu); | ||
59 | |||
116 | /* past the point of no return, matches the initial value of 1 */ | 60 | /* past the point of no return, matches the initial value of 1 */ |
117 | if (atomic_dec_and_test(&group->num_marks)) | 61 | if (atomic_dec_and_test(&group->num_marks)) |
118 | fsnotify_final_destroy_group(group); | 62 | fsnotify_final_destroy_group(group); |
119 | } | 63 | } |
120 | 64 | ||
121 | /* | 65 | /* |
122 | * Remove this group from the global list of groups that will get events | ||
123 | * this can be done even if there are still references and things still using | ||
124 | * this group. This just stops the group from getting new events. | ||
125 | */ | ||
126 | static void __fsnotify_evict_group(struct fsnotify_group *group) | ||
127 | { | ||
128 | BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex)); | ||
129 | |||
130 | if (group->on_group_list) | ||
131 | list_del_rcu(&group->group_list); | ||
132 | group->on_group_list = 0; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Called when a group is no longer interested in getting events. This can be | ||
137 | * used if a group is misbehaving or if for some reason a group should no longer | ||
138 | * get any filesystem events. | ||
139 | */ | ||
140 | void fsnotify_evict_group(struct fsnotify_group *group) | ||
141 | { | ||
142 | mutex_lock(&fsnotify_grp_mutex); | ||
143 | __fsnotify_evict_group(group); | ||
144 | mutex_unlock(&fsnotify_grp_mutex); | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * Drop a reference to a group. Free it if it's through. | 66 | * Drop a reference to a group. Free it if it's through. |
149 | */ | 67 | */ |
150 | void fsnotify_put_group(struct fsnotify_group *group) | 68 | void fsnotify_put_group(struct fsnotify_group *group) |
151 | { | 69 | { |
152 | if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex)) | 70 | if (atomic_dec_and_test(&group->refcnt)) |
153 | return; | 71 | fsnotify_destroy_group(group); |
154 | |||
155 | /* | ||
156 | * OK, now we know that there's no other users *and* we hold mutex, | ||
157 | * so no new references will appear | ||
158 | */ | ||
159 | __fsnotify_evict_group(group); | ||
160 | |||
161 | /* | ||
162 | * now it's off the list, so the only thing we might care about is | ||
163 | * srcu access.... | ||
164 | */ | ||
165 | mutex_unlock(&fsnotify_grp_mutex); | ||
166 | synchronize_srcu(&fsnotify_grp_srcu); | ||
167 | |||
168 | /* and now it is really dead. _Nothing_ could be seeing it */ | ||
169 | fsnotify_recalc_global_mask(); | ||
170 | fsnotify_destroy_group(group); | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * Simply run the fsnotify_groups list and find a group which matches | ||
175 | * the given parameters. If a group is found we take a reference to that | ||
176 | * group. | ||
177 | */ | ||
178 | static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask, | ||
179 | const struct fsnotify_ops *ops) | ||
180 | { | ||
181 | struct fsnotify_group *group_iter; | ||
182 | struct fsnotify_group *group = NULL; | ||
183 | |||
184 | BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex)); | ||
185 | |||
186 | list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) { | ||
187 | if (group_iter->group_num == group_num) { | ||
188 | if ((group_iter->mask == mask) && | ||
189 | (group_iter->ops == ops)) { | ||
190 | fsnotify_get_group(group_iter); | ||
191 | group = group_iter; | ||
192 | } else | ||
193 | group = ERR_PTR(-EEXIST); | ||
194 | } | ||
195 | } | ||
196 | return group; | ||
197 | } | 72 | } |
198 | 73 | ||
199 | /* | 74 | /* |
200 | * Either finds an existing group which matches the group_num, mask, and ops or | 75 | * Create a new fsnotify_group and hold a reference for the group returned. |
201 | * creates a new group and adds it to the global group list. In either case we | ||
202 | * take a reference for the group returned. | ||
203 | */ | 76 | */ |
204 | struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask, | 77 | struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) |
205 | const struct fsnotify_ops *ops) | ||
206 | { | 78 | { |
207 | struct fsnotify_group *group, *tgroup; | 79 | struct fsnotify_group *group; |
208 | 80 | ||
209 | /* very low use, simpler locking if we just always alloc */ | 81 | group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL); |
210 | group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL); | ||
211 | if (!group) | 82 | if (!group) |
212 | return ERR_PTR(-ENOMEM); | 83 | return ERR_PTR(-ENOMEM); |
213 | 84 | ||
85 | /* set to 0 when there a no external references to this group */ | ||
214 | atomic_set(&group->refcnt, 1); | 86 | atomic_set(&group->refcnt, 1); |
215 | 87 | /* | |
216 | group->on_group_list = 0; | 88 | * hits 0 when there are no external references AND no marks for |
217 | group->group_num = group_num; | 89 | * this group |
218 | group->mask = mask; | 90 | */ |
91 | atomic_set(&group->num_marks, 1); | ||
219 | 92 | ||
220 | mutex_init(&group->notification_mutex); | 93 | mutex_init(&group->notification_mutex); |
221 | INIT_LIST_HEAD(&group->notification_list); | 94 | INIT_LIST_HEAD(&group->notification_list); |
222 | init_waitqueue_head(&group->notification_waitq); | 95 | init_waitqueue_head(&group->notification_waitq); |
223 | group->q_len = 0; | ||
224 | group->max_events = UINT_MAX; | 96 | group->max_events = UINT_MAX; |
225 | 97 | ||
226 | spin_lock_init(&group->mark_lock); | 98 | spin_lock_init(&group->mark_lock); |
227 | atomic_set(&group->num_marks, 0); | 99 | INIT_LIST_HEAD(&group->marks_list); |
228 | INIT_LIST_HEAD(&group->mark_entries); | ||
229 | 100 | ||
230 | group->ops = ops; | 101 | group->ops = ops; |
231 | 102 | ||
232 | mutex_lock(&fsnotify_grp_mutex); | ||
233 | tgroup = fsnotify_find_group(group_num, mask, ops); | ||
234 | if (tgroup) { | ||
235 | /* group already exists */ | ||
236 | mutex_unlock(&fsnotify_grp_mutex); | ||
237 | /* destroy the new one we made */ | ||
238 | fsnotify_put_group(group); | ||
239 | return tgroup; | ||
240 | } | ||
241 | |||
242 | /* group not found, add a new one */ | ||
243 | list_add_rcu(&group->group_list, &fsnotify_groups); | ||
244 | group->on_group_list = 1; | ||
245 | /* being on the fsnotify_groups list holds one num_marks */ | ||
246 | atomic_inc(&group->num_marks); | ||
247 | |||
248 | mutex_unlock(&fsnotify_grp_mutex); | ||
249 | |||
250 | if (mask) | ||
251 | fsnotify_recalc_global_mask(); | ||
252 | |||
253 | return group; | 103 | return group; |
254 | } | 104 | } |
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 0399bcbe09c8..33297c005060 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c | |||
@@ -16,72 +16,6 @@ | |||
16 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | 16 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | /* | ||
20 | * fsnotify inode mark locking/lifetime/and refcnting | ||
21 | * | ||
22 | * REFCNT: | ||
23 | * The mark->refcnt tells how many "things" in the kernel currently are | ||
24 | * referencing this object. The object typically will live inside the kernel | ||
25 | * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task | ||
26 | * which can find this object holding the appropriete locks, can take a reference | ||
27 | * and the object itself is guarenteed to survive until the reference is dropped. | ||
28 | * | ||
29 | * LOCKING: | ||
30 | * There are 3 spinlocks involved with fsnotify inode marks and they MUST | ||
31 | * be taken in order as follows: | ||
32 | * | ||
33 | * entry->lock | ||
34 | * group->mark_lock | ||
35 | * inode->i_lock | ||
36 | * | ||
37 | * entry->lock protects 2 things, entry->group and entry->inode. You must hold | ||
38 | * that lock to dereference either of these things (they could be NULL even with | ||
39 | * the lock) | ||
40 | * | ||
41 | * group->mark_lock protects the mark_entries list anchored inside a given group | ||
42 | * and each entry is hooked via the g_list. It also sorta protects the | ||
43 | * free_g_list, which when used is anchored by a private list on the stack of the | ||
44 | * task which held the group->mark_lock. | ||
45 | * | ||
46 | * inode->i_lock protects the i_fsnotify_mark_entries list anchored inside a | ||
47 | * given inode and each entry is hooked via the i_list. (and sorta the | ||
48 | * free_i_list) | ||
49 | * | ||
50 | * | ||
51 | * LIFETIME: | ||
52 | * Inode marks survive between when they are added to an inode and when their | ||
53 | * refcnt==0. | ||
54 | * | ||
55 | * The inode mark can be cleared for a number of different reasons including: | ||
56 | * - The inode is unlinked for the last time. (fsnotify_inode_remove) | ||
57 | * - The inode is being evicted from cache. (fsnotify_inode_delete) | ||
58 | * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes) | ||
59 | * - Something explicitly requests that it be removed. (fsnotify_destroy_mark_by_entry) | ||
60 | * - The fsnotify_group associated with the mark is going away and all such marks | ||
61 | * need to be cleaned up. (fsnotify_clear_marks_by_group) | ||
62 | * | ||
63 | * Worst case we are given an inode and need to clean up all the marks on that | ||
64 | * inode. We take i_lock and walk the i_fsnotify_mark_entries safely. For each | ||
65 | * mark on the list we take a reference (so the mark can't disappear under us). | ||
66 | * We remove that mark form the inode's list of marks and we add this mark to a | ||
67 | * private list anchored on the stack using i_free_list; At this point we no | ||
68 | * longer fear anything finding the mark using the inode's list of marks. | ||
69 | * | ||
70 | * We can safely and locklessly run the private list on the stack of everything | ||
71 | * we just unattached from the original inode. For each mark on the private list | ||
72 | * we grab the mark-> and can thus dereference mark->group and mark->inode. If | ||
73 | * we see the group and inode are not NULL we take those locks. Now holding all | ||
74 | * 3 locks we can completely remove the mark from other tasks finding it in the | ||
75 | * future. Remember, 10 things might already be referencing this mark, but they | ||
76 | * better be holding a ref. We drop our reference we took before we unhooked it | ||
77 | * from the inode. When the ref hits 0 we can free the mark. | ||
78 | * | ||
79 | * Very similarly for freeing by group, except we use free_g_list. | ||
80 | * | ||
81 | * This has the very interesting property of being able to run concurrently with | ||
82 | * any (or all) other directions. | ||
83 | */ | ||
84 | |||
85 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
86 | #include <linux/init.h> | 20 | #include <linux/init.h> |
87 | #include <linux/kernel.h> | 21 | #include <linux/kernel.h> |
@@ -95,30 +29,19 @@ | |||
95 | #include <linux/fsnotify_backend.h> | 29 | #include <linux/fsnotify_backend.h> |
96 | #include "fsnotify.h" | 30 | #include "fsnotify.h" |
97 | 31 | ||
98 | void fsnotify_get_mark(struct fsnotify_mark_entry *entry) | ||
99 | { | ||
100 | atomic_inc(&entry->refcnt); | ||
101 | } | ||
102 | |||
103 | void fsnotify_put_mark(struct fsnotify_mark_entry *entry) | ||
104 | { | ||
105 | if (atomic_dec_and_test(&entry->refcnt)) | ||
106 | entry->free_mark(entry); | ||
107 | } | ||
108 | |||
109 | /* | 32 | /* |
110 | * Recalculate the mask of events relevant to a given inode locked. | 33 | * Recalculate the mask of events relevant to a given inode locked. |
111 | */ | 34 | */ |
112 | static void fsnotify_recalc_inode_mask_locked(struct inode *inode) | 35 | static void fsnotify_recalc_inode_mask_locked(struct inode *inode) |
113 | { | 36 | { |
114 | struct fsnotify_mark_entry *entry; | 37 | struct fsnotify_mark *mark; |
115 | struct hlist_node *pos; | 38 | struct hlist_node *pos; |
116 | __u32 new_mask = 0; | 39 | __u32 new_mask = 0; |
117 | 40 | ||
118 | assert_spin_locked(&inode->i_lock); | 41 | assert_spin_locked(&inode->i_lock); |
119 | 42 | ||
120 | hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) | 43 | hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) |
121 | new_mask |= entry->mask; | 44 | new_mask |= mark->mask; |
122 | inode->i_fsnotify_mask = new_mask; | 45 | inode->i_fsnotify_mask = new_mask; |
123 | } | 46 | } |
124 | 47 | ||
@@ -135,107 +58,26 @@ void fsnotify_recalc_inode_mask(struct inode *inode) | |||
135 | __fsnotify_update_child_dentry_flags(inode); | 58 | __fsnotify_update_child_dentry_flags(inode); |
136 | } | 59 | } |
137 | 60 | ||
138 | /* | 61 | void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark) |
139 | * Any time a mark is getting freed we end up here. | ||
140 | * The caller had better be holding a reference to this mark so we don't actually | ||
141 | * do the final put under the entry->lock | ||
142 | */ | ||
143 | void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry) | ||
144 | { | 62 | { |
145 | struct fsnotify_group *group; | 63 | struct inode *inode = mark->i.inode; |
146 | struct inode *inode; | ||
147 | 64 | ||
148 | spin_lock(&entry->lock); | 65 | assert_spin_locked(&mark->lock); |
66 | assert_spin_locked(&mark->group->mark_lock); | ||
149 | 67 | ||
150 | group = entry->group; | ||
151 | inode = entry->inode; | ||
152 | |||
153 | BUG_ON(group && !inode); | ||
154 | BUG_ON(!group && inode); | ||
155 | |||
156 | /* if !group something else already marked this to die */ | ||
157 | if (!group) { | ||
158 | spin_unlock(&entry->lock); | ||
159 | return; | ||
160 | } | ||
161 | |||
162 | /* 1 from caller and 1 for being on i_list/g_list */ | ||
163 | BUG_ON(atomic_read(&entry->refcnt) < 2); | ||
164 | |||
165 | spin_lock(&group->mark_lock); | ||
166 | spin_lock(&inode->i_lock); | 68 | spin_lock(&inode->i_lock); |
167 | 69 | ||
168 | hlist_del_init(&entry->i_list); | 70 | hlist_del_init_rcu(&mark->i.i_list); |
169 | entry->inode = NULL; | 71 | mark->i.inode = NULL; |
170 | |||
171 | list_del_init(&entry->g_list); | ||
172 | entry->group = NULL; | ||
173 | |||
174 | fsnotify_put_mark(entry); /* for i_list and g_list */ | ||
175 | 72 | ||
176 | /* | 73 | /* |
177 | * this mark is now off the inode->i_fsnotify_mark_entries list and we | 74 | * this mark is now off the inode->i_fsnotify_marks list and we |
178 | * hold the inode->i_lock, so this is the perfect time to update the | 75 | * hold the inode->i_lock, so this is the perfect time to update the |
179 | * inode->i_fsnotify_mask | 76 | * inode->i_fsnotify_mask |
180 | */ | 77 | */ |
181 | fsnotify_recalc_inode_mask_locked(inode); | 78 | fsnotify_recalc_inode_mask_locked(inode); |
182 | 79 | ||
183 | spin_unlock(&inode->i_lock); | 80 | spin_unlock(&inode->i_lock); |
184 | spin_unlock(&group->mark_lock); | ||
185 | spin_unlock(&entry->lock); | ||
186 | |||
187 | /* | ||
188 | * Some groups like to know that marks are being freed. This is a | ||
189 | * callback to the group function to let it know that this entry | ||
190 | * is being freed. | ||
191 | */ | ||
192 | if (group->ops->freeing_mark) | ||
193 | group->ops->freeing_mark(entry, group); | ||
194 | |||
195 | /* | ||
196 | * __fsnotify_update_child_dentry_flags(inode); | ||
197 | * | ||
198 | * I really want to call that, but we can't, we have no idea if the inode | ||
199 | * still exists the second we drop the entry->lock. | ||
200 | * | ||
201 | * The next time an event arrive to this inode from one of it's children | ||
202 | * __fsnotify_parent will see that the inode doesn't care about it's | ||
203 | * children and will update all of these flags then. So really this | ||
204 | * is just a lazy update (and could be a perf win...) | ||
205 | */ | ||
206 | |||
207 | |||
208 | iput(inode); | ||
209 | |||
210 | /* | ||
211 | * it's possible that this group tried to destroy itself, but this | ||
212 | * this mark was simultaneously being freed by inode. If that's the | ||
213 | * case, we finish freeing the group here. | ||
214 | */ | ||
215 | if (unlikely(atomic_dec_and_test(&group->num_marks))) | ||
216 | fsnotify_final_destroy_group(group); | ||
217 | } | ||
218 | |||
219 | /* | ||
220 | * Given a group, destroy all of the marks associated with that group. | ||
221 | */ | ||
222 | void fsnotify_clear_marks_by_group(struct fsnotify_group *group) | ||
223 | { | ||
224 | struct fsnotify_mark_entry *lentry, *entry; | ||
225 | LIST_HEAD(free_list); | ||
226 | |||
227 | spin_lock(&group->mark_lock); | ||
228 | list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) { | ||
229 | list_add(&entry->free_g_list, &free_list); | ||
230 | list_del_init(&entry->g_list); | ||
231 | fsnotify_get_mark(entry); | ||
232 | } | ||
233 | spin_unlock(&group->mark_lock); | ||
234 | |||
235 | list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) { | ||
236 | fsnotify_destroy_mark_by_entry(entry); | ||
237 | fsnotify_put_mark(entry); | ||
238 | } | ||
239 | } | 81 | } |
240 | 82 | ||
241 | /* | 83 | /* |
@@ -243,112 +85,145 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group) | |||
243 | */ | 85 | */ |
244 | void fsnotify_clear_marks_by_inode(struct inode *inode) | 86 | void fsnotify_clear_marks_by_inode(struct inode *inode) |
245 | { | 87 | { |
246 | struct fsnotify_mark_entry *entry, *lentry; | 88 | struct fsnotify_mark *mark, *lmark; |
247 | struct hlist_node *pos, *n; | 89 | struct hlist_node *pos, *n; |
248 | LIST_HEAD(free_list); | 90 | LIST_HEAD(free_list); |
249 | 91 | ||
250 | spin_lock(&inode->i_lock); | 92 | spin_lock(&inode->i_lock); |
251 | hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i_list) { | 93 | hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) { |
252 | list_add(&entry->free_i_list, &free_list); | 94 | list_add(&mark->i.free_i_list, &free_list); |
253 | hlist_del_init(&entry->i_list); | 95 | hlist_del_init_rcu(&mark->i.i_list); |
254 | fsnotify_get_mark(entry); | 96 | fsnotify_get_mark(mark); |
255 | } | 97 | } |
256 | spin_unlock(&inode->i_lock); | 98 | spin_unlock(&inode->i_lock); |
257 | 99 | ||
258 | list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) { | 100 | list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) { |
259 | fsnotify_destroy_mark_by_entry(entry); | 101 | fsnotify_destroy_mark(mark); |
260 | fsnotify_put_mark(entry); | 102 | fsnotify_put_mark(mark); |
261 | } | 103 | } |
262 | } | 104 | } |
263 | 105 | ||
264 | /* | 106 | /* |
107 | * Given a group clear all of the inode marks associated with that group. | ||
108 | */ | ||
109 | void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) | ||
110 | { | ||
111 | fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE); | ||
112 | } | ||
113 | |||
114 | /* | ||
265 | * given a group and inode, find the mark associated with that combination. | 115 | * given a group and inode, find the mark associated with that combination. |
266 | * if found take a reference to that mark and return it, else return NULL | 116 | * if found take a reference to that mark and return it, else return NULL |
267 | */ | 117 | */ |
268 | struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, | 118 | struct fsnotify_mark *fsnotify_find_inode_mark_locked(struct fsnotify_group *group, |
269 | struct inode *inode) | 119 | struct inode *inode) |
270 | { | 120 | { |
271 | struct fsnotify_mark_entry *entry; | 121 | struct fsnotify_mark *mark; |
272 | struct hlist_node *pos; | 122 | struct hlist_node *pos; |
273 | 123 | ||
274 | assert_spin_locked(&inode->i_lock); | 124 | assert_spin_locked(&inode->i_lock); |
275 | 125 | ||
276 | hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) { | 126 | hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) { |
277 | if (entry->group == group) { | 127 | if (mark->group == group) { |
278 | fsnotify_get_mark(entry); | 128 | fsnotify_get_mark(mark); |
279 | return entry; | 129 | return mark; |
280 | } | 130 | } |
281 | } | 131 | } |
282 | return NULL; | 132 | return NULL; |
283 | } | 133 | } |
284 | 134 | ||
285 | /* | 135 | /* |
286 | * Nothing fancy, just initialize lists and locks and counters. | 136 | * given a group and inode, find the mark associated with that combination. |
137 | * if found take a reference to that mark and return it, else return NULL | ||
287 | */ | 138 | */ |
288 | void fsnotify_init_mark(struct fsnotify_mark_entry *entry, | 139 | struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, |
289 | void (*free_mark)(struct fsnotify_mark_entry *entry)) | 140 | struct inode *inode) |
141 | { | ||
142 | struct fsnotify_mark *mark; | ||
143 | |||
144 | spin_lock(&inode->i_lock); | ||
145 | mark = fsnotify_find_inode_mark_locked(group, inode); | ||
146 | spin_unlock(&inode->i_lock); | ||
290 | 147 | ||
148 | return mark; | ||
149 | } | ||
150 | |||
151 | /* | ||
152 | * If we are setting a mark mask on an inode mark we should pin the inode | ||
153 | * in memory. | ||
154 | */ | ||
155 | void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark, | ||
156 | __u32 mask) | ||
291 | { | 157 | { |
292 | spin_lock_init(&entry->lock); | 158 | struct inode *inode; |
293 | atomic_set(&entry->refcnt, 1); | 159 | |
294 | INIT_HLIST_NODE(&entry->i_list); | 160 | assert_spin_locked(&mark->lock); |
295 | entry->group = NULL; | 161 | |
296 | entry->mask = 0; | 162 | if (mask && |
297 | entry->inode = NULL; | 163 | mark->i.inode && |
298 | entry->free_mark = free_mark; | 164 | !(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) { |
165 | mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED; | ||
166 | inode = igrab(mark->i.inode); | ||
167 | /* | ||
168 | * we shouldn't be able to get here if the inode wasn't | ||
169 | * already safely held in memory. But bug in case it | ||
170 | * ever is wrong. | ||
171 | */ | ||
172 | BUG_ON(!inode); | ||
173 | } | ||
299 | } | 174 | } |
300 | 175 | ||
301 | /* | 176 | /* |
302 | * Attach an initialized mark entry to a given group and inode. | 177 | * Attach an initialized mark to a given inode. |
303 | * These marks may be used for the fsnotify backend to determine which | 178 | * These marks may be used for the fsnotify backend to determine which |
304 | * event types should be delivered to which group and for which inodes. | 179 | * event types should be delivered to which group and for which inodes. These |
180 | * marks are ordered according to the group's location in memory. | ||
305 | */ | 181 | */ |
306 | int fsnotify_add_mark(struct fsnotify_mark_entry *entry, | 182 | int fsnotify_add_inode_mark(struct fsnotify_mark *mark, |
307 | struct fsnotify_group *group, struct inode *inode) | 183 | struct fsnotify_group *group, struct inode *inode, |
184 | int allow_dups) | ||
308 | { | 185 | { |
309 | struct fsnotify_mark_entry *lentry; | 186 | struct fsnotify_mark *lmark; |
187 | struct hlist_node *node, *last = NULL; | ||
310 | int ret = 0; | 188 | int ret = 0; |
311 | 189 | ||
312 | inode = igrab(inode); | 190 | mark->flags |= FSNOTIFY_MARK_FLAG_INODE; |
313 | if (unlikely(!inode)) | 191 | |
314 | return -EINVAL; | 192 | assert_spin_locked(&mark->lock); |
193 | assert_spin_locked(&group->mark_lock); | ||
315 | 194 | ||
316 | /* | ||
317 | * LOCKING ORDER!!!! | ||
318 | * entry->lock | ||
319 | * group->mark_lock | ||
320 | * inode->i_lock | ||
321 | */ | ||
322 | spin_lock(&entry->lock); | ||
323 | spin_lock(&group->mark_lock); | ||
324 | spin_lock(&inode->i_lock); | 195 | spin_lock(&inode->i_lock); |
325 | 196 | ||
326 | lentry = fsnotify_find_mark_entry(group, inode); | 197 | mark->i.inode = inode; |
327 | if (!lentry) { | ||
328 | entry->group = group; | ||
329 | entry->inode = inode; | ||
330 | 198 | ||
331 | hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries); | 199 | /* is mark the first mark? */ |
332 | list_add(&entry->g_list, &group->mark_entries); | 200 | if (hlist_empty(&inode->i_fsnotify_marks)) { |
201 | hlist_add_head_rcu(&mark->i.i_list, &inode->i_fsnotify_marks); | ||
202 | goto out; | ||
203 | } | ||
333 | 204 | ||
334 | fsnotify_get_mark(entry); /* for i_list and g_list */ | 205 | /* should mark be in the middle of the current list? */ |
206 | hlist_for_each_entry(lmark, node, &inode->i_fsnotify_marks, i.i_list) { | ||
207 | last = node; | ||
208 | |||
209 | if ((lmark->group == group) && !allow_dups) { | ||
210 | ret = -EEXIST; | ||
211 | goto out; | ||
212 | } | ||
335 | 213 | ||
336 | atomic_inc(&group->num_marks); | 214 | if (mark->group < lmark->group) |
215 | continue; | ||
337 | 216 | ||
338 | fsnotify_recalc_inode_mask_locked(inode); | 217 | hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); |
218 | goto out; | ||
339 | } | 219 | } |
340 | 220 | ||
221 | BUG_ON(last == NULL); | ||
222 | /* mark should be the last entry. last is the current last entry */ | ||
223 | hlist_add_after_rcu(last, &mark->i.i_list); | ||
224 | out: | ||
225 | fsnotify_recalc_inode_mask_locked(inode); | ||
341 | spin_unlock(&inode->i_lock); | 226 | spin_unlock(&inode->i_lock); |
342 | spin_unlock(&group->mark_lock); | ||
343 | spin_unlock(&entry->lock); | ||
344 | |||
345 | if (lentry) { | ||
346 | ret = -EEXIST; | ||
347 | iput(inode); | ||
348 | fsnotify_put_mark(lentry); | ||
349 | } else { | ||
350 | __fsnotify_update_child_dentry_flags(inode); | ||
351 | } | ||
352 | 227 | ||
353 | return ret; | 228 | return ret; |
354 | } | 229 | } |
@@ -369,11 +244,11 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
369 | struct inode *need_iput_tmp; | 244 | struct inode *need_iput_tmp; |
370 | 245 | ||
371 | /* | 246 | /* |
372 | * We cannot __iget() an inode in state I_CLEAR, I_FREEING, | 247 | * We cannot __iget() an inode in state I_FREEING, |
373 | * I_WILL_FREE, or I_NEW which is fine because by that point | 248 | * I_WILL_FREE, or I_NEW which is fine because by that point |
374 | * the inode cannot have any associated watches. | 249 | * the inode cannot have any associated watches. |
375 | */ | 250 | */ |
376 | if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) | 251 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) |
377 | continue; | 252 | continue; |
378 | 253 | ||
379 | /* | 254 | /* |
@@ -397,7 +272,7 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
397 | /* In case the dropping of a reference would nuke next_i. */ | 272 | /* In case the dropping of a reference would nuke next_i. */ |
398 | if ((&next_i->i_sb_list != list) && | 273 | if ((&next_i->i_sb_list != list) && |
399 | atomic_read(&next_i->i_count) && | 274 | atomic_read(&next_i->i_count) && |
400 | !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) { | 275 | !(next_i->i_state & (I_FREEING | I_WILL_FREE))) { |
401 | __iget(next_i); | 276 | __iget(next_i); |
402 | need_iput = next_i; | 277 | need_iput = next_i; |
403 | } | 278 | } |
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig index b3a159b21cfd..b981fc0c8379 100644 --- a/fs/notify/inotify/Kconfig +++ b/fs/notify/inotify/Kconfig | |||
@@ -1,18 +1,3 @@ | |||
1 | config INOTIFY | ||
2 | bool "Inotify file change notification support" | ||
3 | default n | ||
4 | ---help--- | ||
5 | Say Y here to enable legacy in kernel inotify support. Inotify is a | ||
6 | file change notification system. It is a replacement for dnotify. | ||
7 | This option only provides the legacy inotify in kernel API. There | ||
8 | are no in tree kernel users of this interface since it is deprecated. | ||
9 | You only need this if you are loading an out of tree kernel module | ||
10 | that uses inotify. | ||
11 | |||
12 | For more information, see <file:Documentation/filesystems/inotify.txt> | ||
13 | |||
14 | If unsure, say N. | ||
15 | |||
16 | config INOTIFY_USER | 1 | config INOTIFY_USER |
17 | bool "Inotify support for userspace" | 2 | bool "Inotify support for userspace" |
18 | select ANON_INODES | 3 | select ANON_INODES |
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile index 943828171362..a380dabe09de 100644 --- a/fs/notify/inotify/Makefile +++ b/fs/notify/inotify/Makefile | |||
@@ -1,2 +1 @@ | |||
1 | obj-$(CONFIG_INOTIFY) += inotify.o | ||
2 | obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o | obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o | |
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c deleted file mode 100644 index 27b75ebc7460..000000000000 --- a/fs/notify/inotify/inotify.c +++ /dev/null | |||
@@ -1,873 +0,0 @@ | |||
1 | /* | ||
2 | * fs/inotify.c - inode-based file event notifications | ||
3 | * | ||
4 | * Authors: | ||
5 | * John McCutchan <ttb@tentacle.dhs.org> | ||
6 | * Robert Love <rml@novell.com> | ||
7 | * | ||
8 | * Kernel API added by: Amy Griffis <amy.griffis@hp.com> | ||
9 | * | ||
10 | * Copyright (C) 2005 John McCutchan | ||
11 | * Copyright 2006 Hewlett-Packard Development Company, L.P. | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or modify it | ||
14 | * under the terms of the GNU General Public License as published by the | ||
15 | * Free Software Foundation; either version 2, or (at your option) any | ||
16 | * later version. | ||
17 | * | ||
18 | * This program is distributed in the hope that it will be useful, but | ||
19 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
21 | * General Public License for more details. | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/kernel.h> | ||
26 | #include <linux/spinlock.h> | ||
27 | #include <linux/idr.h> | ||
28 | #include <linux/slab.h> | ||
29 | #include <linux/fs.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/init.h> | ||
32 | #include <linux/list.h> | ||
33 | #include <linux/writeback.h> | ||
34 | #include <linux/inotify.h> | ||
35 | #include <linux/fsnotify_backend.h> | ||
36 | |||
37 | static atomic_t inotify_cookie; | ||
38 | |||
39 | /* | ||
40 | * Lock ordering: | ||
41 | * | ||
42 | * dentry->d_lock (used to keep d_move() away from dentry->d_parent) | ||
43 | * iprune_mutex (synchronize shrink_icache_memory()) | ||
44 | * inode_lock (protects the super_block->s_inodes list) | ||
45 | * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list) | ||
46 | * inotify_handle->mutex (protects inotify_handle and watches->h_list) | ||
47 | * | ||
48 | * The inode->inotify_mutex and inotify_handle->mutex and held during execution | ||
49 | * of a caller's event handler. Thus, the caller must not hold any locks | ||
50 | * taken in their event handler while calling any of the published inotify | ||
51 | * interfaces. | ||
52 | */ | ||
53 | |||
54 | /* | ||
55 | * Lifetimes of the three main data structures--inotify_handle, inode, and | ||
56 | * inotify_watch--are managed by reference count. | ||
57 | * | ||
58 | * inotify_handle: Lifetime is from inotify_init() to inotify_destroy(). | ||
59 | * Additional references can bump the count via get_inotify_handle() and drop | ||
60 | * the count via put_inotify_handle(). | ||
61 | * | ||
62 | * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch() | ||
63 | * to remove_watch_no_event(). Additional references can bump the count via | ||
64 | * get_inotify_watch() and drop the count via put_inotify_watch(). The caller | ||
65 | * is reponsible for the final put after receiving IN_IGNORED, or when using | ||
66 | * IN_ONESHOT after receiving the first event. Inotify does the final put if | ||
67 | * inotify_destroy() is called. | ||
68 | * | ||
69 | * inode: Pinned so long as the inode is associated with a watch, from | ||
70 | * inotify_add_watch() to the final put_inotify_watch(). | ||
71 | */ | ||
72 | |||
73 | /* | ||
74 | * struct inotify_handle - represents an inotify instance | ||
75 | * | ||
76 | * This structure is protected by the mutex 'mutex'. | ||
77 | */ | ||
78 | struct inotify_handle { | ||
79 | struct idr idr; /* idr mapping wd -> watch */ | ||
80 | struct mutex mutex; /* protects this bad boy */ | ||
81 | struct list_head watches; /* list of watches */ | ||
82 | atomic_t count; /* reference count */ | ||
83 | u32 last_wd; /* the last wd allocated */ | ||
84 | const struct inotify_operations *in_ops; /* inotify caller operations */ | ||
85 | }; | ||
86 | |||
87 | static inline void get_inotify_handle(struct inotify_handle *ih) | ||
88 | { | ||
89 | atomic_inc(&ih->count); | ||
90 | } | ||
91 | |||
92 | static inline void put_inotify_handle(struct inotify_handle *ih) | ||
93 | { | ||
94 | if (atomic_dec_and_test(&ih->count)) { | ||
95 | idr_destroy(&ih->idr); | ||
96 | kfree(ih); | ||
97 | } | ||
98 | } | ||
99 | |||
100 | /** | ||
101 | * get_inotify_watch - grab a reference to an inotify_watch | ||
102 | * @watch: watch to grab | ||
103 | */ | ||
104 | void get_inotify_watch(struct inotify_watch *watch) | ||
105 | { | ||
106 | atomic_inc(&watch->count); | ||
107 | } | ||
108 | EXPORT_SYMBOL_GPL(get_inotify_watch); | ||
109 | |||
110 | int pin_inotify_watch(struct inotify_watch *watch) | ||
111 | { | ||
112 | struct super_block *sb = watch->inode->i_sb; | ||
113 | if (atomic_inc_not_zero(&sb->s_active)) { | ||
114 | atomic_inc(&watch->count); | ||
115 | return 1; | ||
116 | } | ||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * put_inotify_watch - decrements the ref count on a given watch. cleans up | ||
122 | * watch references if the count reaches zero. inotify_watch is freed by | ||
123 | * inotify callers via the destroy_watch() op. | ||
124 | * @watch: watch to release | ||
125 | */ | ||
126 | void put_inotify_watch(struct inotify_watch *watch) | ||
127 | { | ||
128 | if (atomic_dec_and_test(&watch->count)) { | ||
129 | struct inotify_handle *ih = watch->ih; | ||
130 | |||
131 | iput(watch->inode); | ||
132 | ih->in_ops->destroy_watch(watch); | ||
133 | put_inotify_handle(ih); | ||
134 | } | ||
135 | } | ||
136 | EXPORT_SYMBOL_GPL(put_inotify_watch); | ||
137 | |||
138 | void unpin_inotify_watch(struct inotify_watch *watch) | ||
139 | { | ||
140 | struct super_block *sb = watch->inode->i_sb; | ||
141 | put_inotify_watch(watch); | ||
142 | deactivate_super(sb); | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * inotify_handle_get_wd - returns the next WD for use by the given handle | ||
147 | * | ||
148 | * Callers must hold ih->mutex. This function can sleep. | ||
149 | */ | ||
150 | static int inotify_handle_get_wd(struct inotify_handle *ih, | ||
151 | struct inotify_watch *watch) | ||
152 | { | ||
153 | int ret; | ||
154 | |||
155 | do { | ||
156 | if (unlikely(!idr_pre_get(&ih->idr, GFP_NOFS))) | ||
157 | return -ENOSPC; | ||
158 | ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd); | ||
159 | } while (ret == -EAGAIN); | ||
160 | |||
161 | if (likely(!ret)) | ||
162 | ih->last_wd = watch->wd; | ||
163 | |||
164 | return ret; | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * inotify_inode_watched - returns nonzero if there are watches on this inode | ||
169 | * and zero otherwise. We call this lockless, we do not care if we race. | ||
170 | */ | ||
171 | static inline int inotify_inode_watched(struct inode *inode) | ||
172 | { | ||
173 | return !list_empty(&inode->inotify_watches); | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * Get child dentry flag into synch with parent inode. | ||
178 | * Flag should always be clear for negative dentrys. | ||
179 | */ | ||
180 | static void set_dentry_child_flags(struct inode *inode, int watched) | ||
181 | { | ||
182 | struct dentry *alias; | ||
183 | |||
184 | spin_lock(&dcache_lock); | ||
185 | list_for_each_entry(alias, &inode->i_dentry, d_alias) { | ||
186 | struct dentry *child; | ||
187 | |||
188 | list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { | ||
189 | if (!child->d_inode) | ||
190 | continue; | ||
191 | |||
192 | spin_lock(&child->d_lock); | ||
193 | if (watched) | ||
194 | child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; | ||
195 | else | ||
196 | child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED; | ||
197 | spin_unlock(&child->d_lock); | ||
198 | } | ||
199 | } | ||
200 | spin_unlock(&dcache_lock); | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * inotify_find_handle - find the watch associated with the given inode and | ||
205 | * handle | ||
206 | * | ||
207 | * Callers must hold inode->inotify_mutex. | ||
208 | */ | ||
209 | static struct inotify_watch *inode_find_handle(struct inode *inode, | ||
210 | struct inotify_handle *ih) | ||
211 | { | ||
212 | struct inotify_watch *watch; | ||
213 | |||
214 | list_for_each_entry(watch, &inode->inotify_watches, i_list) { | ||
215 | if (watch->ih == ih) | ||
216 | return watch; | ||
217 | } | ||
218 | |||
219 | return NULL; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * remove_watch_no_event - remove watch without the IN_IGNORED event. | ||
224 | * | ||
225 | * Callers must hold both inode->inotify_mutex and ih->mutex. | ||
226 | */ | ||
227 | static void remove_watch_no_event(struct inotify_watch *watch, | ||
228 | struct inotify_handle *ih) | ||
229 | { | ||
230 | list_del(&watch->i_list); | ||
231 | list_del(&watch->h_list); | ||
232 | |||
233 | if (!inotify_inode_watched(watch->inode)) | ||
234 | set_dentry_child_flags(watch->inode, 0); | ||
235 | |||
236 | idr_remove(&ih->idr, watch->wd); | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * inotify_remove_watch_locked - Remove a watch from both the handle and the | ||
241 | * inode. Sends the IN_IGNORED event signifying that the inode is no longer | ||
242 | * watched. May be invoked from a caller's event handler. | ||
243 | * @ih: inotify handle associated with watch | ||
244 | * @watch: watch to remove | ||
245 | * | ||
246 | * Callers must hold both inode->inotify_mutex and ih->mutex. | ||
247 | */ | ||
248 | void inotify_remove_watch_locked(struct inotify_handle *ih, | ||
249 | struct inotify_watch *watch) | ||
250 | { | ||
251 | remove_watch_no_event(watch, ih); | ||
252 | ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL); | ||
253 | } | ||
254 | EXPORT_SYMBOL_GPL(inotify_remove_watch_locked); | ||
255 | |||
256 | /* Kernel API for producing events */ | ||
257 | |||
258 | /* | ||
259 | * inotify_d_instantiate - instantiate dcache entry for inode | ||
260 | */ | ||
261 | void inotify_d_instantiate(struct dentry *entry, struct inode *inode) | ||
262 | { | ||
263 | struct dentry *parent; | ||
264 | |||
265 | if (!inode) | ||
266 | return; | ||
267 | |||
268 | spin_lock(&entry->d_lock); | ||
269 | parent = entry->d_parent; | ||
270 | if (parent->d_inode && inotify_inode_watched(parent->d_inode)) | ||
271 | entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; | ||
272 | spin_unlock(&entry->d_lock); | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * inotify_d_move - dcache entry has been moved | ||
277 | */ | ||
278 | void inotify_d_move(struct dentry *entry) | ||
279 | { | ||
280 | struct dentry *parent; | ||
281 | |||
282 | parent = entry->d_parent; | ||
283 | if (inotify_inode_watched(parent->d_inode)) | ||
284 | entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; | ||
285 | else | ||
286 | entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED; | ||
287 | } | ||
288 | |||
289 | /** | ||
290 | * inotify_inode_queue_event - queue an event to all watches on this inode | ||
291 | * @inode: inode event is originating from | ||
292 | * @mask: event mask describing this event | ||
293 | * @cookie: cookie for synchronization, or zero | ||
294 | * @name: filename, if any | ||
295 | * @n_inode: inode associated with name | ||
296 | */ | ||
297 | void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie, | ||
298 | const char *name, struct inode *n_inode) | ||
299 | { | ||
300 | struct inotify_watch *watch, *next; | ||
301 | |||
302 | if (!inotify_inode_watched(inode)) | ||
303 | return; | ||
304 | |||
305 | mutex_lock(&inode->inotify_mutex); | ||
306 | list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { | ||
307 | u32 watch_mask = watch->mask; | ||
308 | if (watch_mask & mask) { | ||
309 | struct inotify_handle *ih= watch->ih; | ||
310 | mutex_lock(&ih->mutex); | ||
311 | if (watch_mask & IN_ONESHOT) | ||
312 | remove_watch_no_event(watch, ih); | ||
313 | ih->in_ops->handle_event(watch, watch->wd, mask, cookie, | ||
314 | name, n_inode); | ||
315 | mutex_unlock(&ih->mutex); | ||
316 | } | ||
317 | } | ||
318 | mutex_unlock(&inode->inotify_mutex); | ||
319 | } | ||
320 | EXPORT_SYMBOL_GPL(inotify_inode_queue_event); | ||
321 | |||
322 | /** | ||
323 | * inotify_dentry_parent_queue_event - queue an event to a dentry's parent | ||
324 | * @dentry: the dentry in question, we queue against this dentry's parent | ||
325 | * @mask: event mask describing this event | ||
326 | * @cookie: cookie for synchronization, or zero | ||
327 | * @name: filename, if any | ||
328 | */ | ||
329 | void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask, | ||
330 | u32 cookie, const char *name) | ||
331 | { | ||
332 | struct dentry *parent; | ||
333 | struct inode *inode; | ||
334 | |||
335 | if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED)) | ||
336 | return; | ||
337 | |||
338 | spin_lock(&dentry->d_lock); | ||
339 | parent = dentry->d_parent; | ||
340 | inode = parent->d_inode; | ||
341 | |||
342 | if (inotify_inode_watched(inode)) { | ||
343 | dget(parent); | ||
344 | spin_unlock(&dentry->d_lock); | ||
345 | inotify_inode_queue_event(inode, mask, cookie, name, | ||
346 | dentry->d_inode); | ||
347 | dput(parent); | ||
348 | } else | ||
349 | spin_unlock(&dentry->d_lock); | ||
350 | } | ||
351 | EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event); | ||
352 | |||
353 | /** | ||
354 | * inotify_get_cookie - return a unique cookie for use in synchronizing events. | ||
355 | */ | ||
356 | u32 inotify_get_cookie(void) | ||
357 | { | ||
358 | return atomic_inc_return(&inotify_cookie); | ||
359 | } | ||
360 | EXPORT_SYMBOL_GPL(inotify_get_cookie); | ||
361 | |||
362 | /** | ||
363 | * inotify_unmount_inodes - an sb is unmounting. handle any watched inodes. | ||
364 | * @list: list of inodes being unmounted (sb->s_inodes) | ||
365 | * | ||
366 | * Called with inode_lock held, protecting the unmounting super block's list | ||
367 | * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. | ||
368 | * We temporarily drop inode_lock, however, and CAN block. | ||
369 | */ | ||
370 | void inotify_unmount_inodes(struct list_head *list) | ||
371 | { | ||
372 | struct inode *inode, *next_i, *need_iput = NULL; | ||
373 | |||
374 | list_for_each_entry_safe(inode, next_i, list, i_sb_list) { | ||
375 | struct inotify_watch *watch, *next_w; | ||
376 | struct inode *need_iput_tmp; | ||
377 | struct list_head *watches; | ||
378 | |||
379 | /* | ||
380 | * We cannot __iget() an inode in state I_CLEAR, I_FREEING, | ||
381 | * I_WILL_FREE, or I_NEW which is fine because by that point | ||
382 | * the inode cannot have any associated watches. | ||
383 | */ | ||
384 | if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) | ||
385 | continue; | ||
386 | |||
387 | /* | ||
388 | * If i_count is zero, the inode cannot have any watches and | ||
389 | * doing an __iget/iput with MS_ACTIVE clear would actually | ||
390 | * evict all inodes with zero i_count from icache which is | ||
391 | * unnecessarily violent and may in fact be illegal to do. | ||
392 | */ | ||
393 | if (!atomic_read(&inode->i_count)) | ||
394 | continue; | ||
395 | |||
396 | need_iput_tmp = need_iput; | ||
397 | need_iput = NULL; | ||
398 | /* In case inotify_remove_watch_locked() drops a reference. */ | ||
399 | if (inode != need_iput_tmp) | ||
400 | __iget(inode); | ||
401 | else | ||
402 | need_iput_tmp = NULL; | ||
403 | /* In case the dropping of a reference would nuke next_i. */ | ||
404 | if ((&next_i->i_sb_list != list) && | ||
405 | atomic_read(&next_i->i_count) && | ||
406 | !(next_i->i_state & (I_CLEAR | I_FREEING | | ||
407 | I_WILL_FREE))) { | ||
408 | __iget(next_i); | ||
409 | need_iput = next_i; | ||
410 | } | ||
411 | |||
412 | /* | ||
413 | * We can safely drop inode_lock here because we hold | ||
414 | * references on both inode and next_i. Also no new inodes | ||
415 | * will be added since the umount has begun. Finally, | ||
416 | * iprune_mutex keeps shrink_icache_memory() away. | ||
417 | */ | ||
418 | spin_unlock(&inode_lock); | ||
419 | |||
420 | if (need_iput_tmp) | ||
421 | iput(need_iput_tmp); | ||
422 | |||
423 | /* for each watch, send IN_UNMOUNT and then remove it */ | ||
424 | mutex_lock(&inode->inotify_mutex); | ||
425 | watches = &inode->inotify_watches; | ||
426 | list_for_each_entry_safe(watch, next_w, watches, i_list) { | ||
427 | struct inotify_handle *ih= watch->ih; | ||
428 | get_inotify_watch(watch); | ||
429 | mutex_lock(&ih->mutex); | ||
430 | ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, | ||
431 | NULL, NULL); | ||
432 | inotify_remove_watch_locked(ih, watch); | ||
433 | mutex_unlock(&ih->mutex); | ||
434 | put_inotify_watch(watch); | ||
435 | } | ||
436 | mutex_unlock(&inode->inotify_mutex); | ||
437 | iput(inode); | ||
438 | |||
439 | spin_lock(&inode_lock); | ||
440 | } | ||
441 | } | ||
442 | EXPORT_SYMBOL_GPL(inotify_unmount_inodes); | ||
443 | |||
444 | /** | ||
445 | * inotify_inode_is_dead - an inode has been deleted, cleanup any watches | ||
446 | * @inode: inode that is about to be removed | ||
447 | */ | ||
448 | void inotify_inode_is_dead(struct inode *inode) | ||
449 | { | ||
450 | struct inotify_watch *watch, *next; | ||
451 | |||
452 | mutex_lock(&inode->inotify_mutex); | ||
453 | list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) { | ||
454 | struct inotify_handle *ih = watch->ih; | ||
455 | mutex_lock(&ih->mutex); | ||
456 | inotify_remove_watch_locked(ih, watch); | ||
457 | mutex_unlock(&ih->mutex); | ||
458 | } | ||
459 | mutex_unlock(&inode->inotify_mutex); | ||
460 | } | ||
461 | EXPORT_SYMBOL_GPL(inotify_inode_is_dead); | ||
462 | |||
463 | /* Kernel Consumer API */ | ||
464 | |||
465 | /** | ||
466 | * inotify_init - allocate and initialize an inotify instance | ||
467 | * @ops: caller's inotify operations | ||
468 | */ | ||
469 | struct inotify_handle *inotify_init(const struct inotify_operations *ops) | ||
470 | { | ||
471 | struct inotify_handle *ih; | ||
472 | |||
473 | ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL); | ||
474 | if (unlikely(!ih)) | ||
475 | return ERR_PTR(-ENOMEM); | ||
476 | |||
477 | idr_init(&ih->idr); | ||
478 | INIT_LIST_HEAD(&ih->watches); | ||
479 | mutex_init(&ih->mutex); | ||
480 | ih->last_wd = 0; | ||
481 | ih->in_ops = ops; | ||
482 | atomic_set(&ih->count, 0); | ||
483 | get_inotify_handle(ih); | ||
484 | |||
485 | return ih; | ||
486 | } | ||
487 | EXPORT_SYMBOL_GPL(inotify_init); | ||
488 | |||
489 | /** | ||
490 | * inotify_init_watch - initialize an inotify watch | ||
491 | * @watch: watch to initialize | ||
492 | */ | ||
493 | void inotify_init_watch(struct inotify_watch *watch) | ||
494 | { | ||
495 | INIT_LIST_HEAD(&watch->h_list); | ||
496 | INIT_LIST_HEAD(&watch->i_list); | ||
497 | atomic_set(&watch->count, 0); | ||
498 | get_inotify_watch(watch); /* initial get */ | ||
499 | } | ||
500 | EXPORT_SYMBOL_GPL(inotify_init_watch); | ||
501 | |||
502 | /* | ||
503 | * Watch removals suck violently. To kick the watch out we need (in this | ||
504 | * order) inode->inotify_mutex and ih->mutex. That's fine if we have | ||
505 | * a hold on inode; however, for all other cases we need to make damn sure | ||
506 | * we don't race with umount. We can *NOT* just grab a reference to a | ||
507 | * watch - inotify_unmount_inodes() will happily sail past it and we'll end | ||
508 | * with reference to inode potentially outliving its superblock. Ideally | ||
509 | * we just want to grab an active reference to superblock if we can; that | ||
510 | * will make sure we won't go into inotify_umount_inodes() until we are | ||
511 | * done. Cleanup is just deactivate_super(). However, that leaves a messy | ||
512 | * case - what if we *are* racing with umount() and active references to | ||
513 | * superblock can't be acquired anymore? We can bump ->s_count, grab | ||
514 | * ->s_umount, which will wait until the superblock is shut down and the | ||
515 | * watch in question is pining for fjords. | ||
516 | * | ||
517 | * And yes, this is far beyond mere "not very pretty"; so's the entire | ||
518 | * concept of inotify to start with. | ||
519 | */ | ||
520 | |||
521 | /** | ||
522 | * pin_to_kill - pin the watch down for removal | ||
523 | * @ih: inotify handle | ||
524 | * @watch: watch to kill | ||
525 | * | ||
526 | * Called with ih->mutex held, drops it. Possible return values: | ||
527 | * 0 - nothing to do, it has died | ||
528 | * 1 - remove it, drop the reference and deactivate_super() | ||
529 | */ | ||
530 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) | ||
531 | { | ||
532 | struct super_block *sb = watch->inode->i_sb; | ||
533 | |||
534 | if (atomic_inc_not_zero(&sb->s_active)) { | ||
535 | get_inotify_watch(watch); | ||
536 | mutex_unlock(&ih->mutex); | ||
537 | return 1; /* the best outcome */ | ||
538 | } | ||
539 | spin_lock(&sb_lock); | ||
540 | sb->s_count++; | ||
541 | spin_unlock(&sb_lock); | ||
542 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ | ||
543 | down_read(&sb->s_umount); | ||
544 | /* fs is already shut down; the watch is dead */ | ||
545 | drop_super(sb); | ||
546 | return 0; | ||
547 | } | ||
548 | |||
549 | static void unpin_and_kill(struct inotify_watch *watch) | ||
550 | { | ||
551 | struct super_block *sb = watch->inode->i_sb; | ||
552 | put_inotify_watch(watch); | ||
553 | deactivate_super(sb); | ||
554 | } | ||
555 | |||
556 | /** | ||
557 | * inotify_destroy - clean up and destroy an inotify instance | ||
558 | * @ih: inotify handle | ||
559 | */ | ||
560 | void inotify_destroy(struct inotify_handle *ih) | ||
561 | { | ||
562 | /* | ||
563 | * Destroy all of the watches for this handle. Unfortunately, not very | ||
564 | * pretty. We cannot do a simple iteration over the list, because we | ||
565 | * do not know the inode until we iterate to the watch. But we need to | ||
566 | * hold inode->inotify_mutex before ih->mutex. The following works. | ||
567 | * | ||
568 | * AV: it had to become even uglier to start working ;-/ | ||
569 | */ | ||
570 | while (1) { | ||
571 | struct inotify_watch *watch; | ||
572 | struct list_head *watches; | ||
573 | struct super_block *sb; | ||
574 | struct inode *inode; | ||
575 | |||
576 | mutex_lock(&ih->mutex); | ||
577 | watches = &ih->watches; | ||
578 | if (list_empty(watches)) { | ||
579 | mutex_unlock(&ih->mutex); | ||
580 | break; | ||
581 | } | ||
582 | watch = list_first_entry(watches, struct inotify_watch, h_list); | ||
583 | sb = watch->inode->i_sb; | ||
584 | if (!pin_to_kill(ih, watch)) | ||
585 | continue; | ||
586 | |||
587 | inode = watch->inode; | ||
588 | mutex_lock(&inode->inotify_mutex); | ||
589 | mutex_lock(&ih->mutex); | ||
590 | |||
591 | /* make sure we didn't race with another list removal */ | ||
592 | if (likely(idr_find(&ih->idr, watch->wd))) { | ||
593 | remove_watch_no_event(watch, ih); | ||
594 | put_inotify_watch(watch); | ||
595 | } | ||
596 | |||
597 | mutex_unlock(&ih->mutex); | ||
598 | mutex_unlock(&inode->inotify_mutex); | ||
599 | unpin_and_kill(watch); | ||
600 | } | ||
601 | |||
602 | /* free this handle: the put matching the get in inotify_init() */ | ||
603 | put_inotify_handle(ih); | ||
604 | } | ||
605 | EXPORT_SYMBOL_GPL(inotify_destroy); | ||
606 | |||
607 | /** | ||
608 | * inotify_find_watch - find an existing watch for an (ih,inode) pair | ||
609 | * @ih: inotify handle | ||
610 | * @inode: inode to watch | ||
611 | * @watchp: pointer to existing inotify_watch | ||
612 | * | ||
613 | * Caller must pin given inode (via nameidata). | ||
614 | */ | ||
615 | s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode, | ||
616 | struct inotify_watch **watchp) | ||
617 | { | ||
618 | struct inotify_watch *old; | ||
619 | int ret = -ENOENT; | ||
620 | |||
621 | mutex_lock(&inode->inotify_mutex); | ||
622 | mutex_lock(&ih->mutex); | ||
623 | |||
624 | old = inode_find_handle(inode, ih); | ||
625 | if (unlikely(old)) { | ||
626 | get_inotify_watch(old); /* caller must put watch */ | ||
627 | *watchp = old; | ||
628 | ret = old->wd; | ||
629 | } | ||
630 | |||
631 | mutex_unlock(&ih->mutex); | ||
632 | mutex_unlock(&inode->inotify_mutex); | ||
633 | |||
634 | return ret; | ||
635 | } | ||
636 | EXPORT_SYMBOL_GPL(inotify_find_watch); | ||
637 | |||
638 | /** | ||
639 | * inotify_find_update_watch - find and update the mask of an existing watch | ||
640 | * @ih: inotify handle | ||
641 | * @inode: inode's watch to update | ||
642 | * @mask: mask of events to watch | ||
643 | * | ||
644 | * Caller must pin given inode (via nameidata). | ||
645 | */ | ||
646 | s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode, | ||
647 | u32 mask) | ||
648 | { | ||
649 | struct inotify_watch *old; | ||
650 | int mask_add = 0; | ||
651 | int ret; | ||
652 | |||
653 | if (mask & IN_MASK_ADD) | ||
654 | mask_add = 1; | ||
655 | |||
656 | /* don't allow invalid bits: we don't want flags set */ | ||
657 | mask &= IN_ALL_EVENTS | IN_ONESHOT; | ||
658 | if (unlikely(!mask)) | ||
659 | return -EINVAL; | ||
660 | |||
661 | mutex_lock(&inode->inotify_mutex); | ||
662 | mutex_lock(&ih->mutex); | ||
663 | |||
664 | /* | ||
665 | * Handle the case of re-adding a watch on an (inode,ih) pair that we | ||
666 | * are already watching. We just update the mask and return its wd. | ||
667 | */ | ||
668 | old = inode_find_handle(inode, ih); | ||
669 | if (unlikely(!old)) { | ||
670 | ret = -ENOENT; | ||
671 | goto out; | ||
672 | } | ||
673 | |||
674 | if (mask_add) | ||
675 | old->mask |= mask; | ||
676 | else | ||
677 | old->mask = mask; | ||
678 | ret = old->wd; | ||
679 | out: | ||
680 | mutex_unlock(&ih->mutex); | ||
681 | mutex_unlock(&inode->inotify_mutex); | ||
682 | return ret; | ||
683 | } | ||
684 | EXPORT_SYMBOL_GPL(inotify_find_update_watch); | ||
685 | |||
686 | /** | ||
687 | * inotify_add_watch - add a watch to an inotify instance | ||
688 | * @ih: inotify handle | ||
689 | * @watch: caller allocated watch structure | ||
690 | * @inode: inode to watch | ||
691 | * @mask: mask of events to watch | ||
692 | * | ||
693 | * Caller must pin given inode (via nameidata). | ||
694 | * Caller must ensure it only calls inotify_add_watch() once per watch. | ||
695 | * Calls inotify_handle_get_wd() so may sleep. | ||
696 | */ | ||
697 | s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, | ||
698 | struct inode *inode, u32 mask) | ||
699 | { | ||
700 | int ret = 0; | ||
701 | int newly_watched; | ||
702 | |||
703 | /* don't allow invalid bits: we don't want flags set */ | ||
704 | mask &= IN_ALL_EVENTS | IN_ONESHOT; | ||
705 | if (unlikely(!mask)) | ||
706 | return -EINVAL; | ||
707 | watch->mask = mask; | ||
708 | |||
709 | mutex_lock(&inode->inotify_mutex); | ||
710 | mutex_lock(&ih->mutex); | ||
711 | |||
712 | /* Initialize a new watch */ | ||
713 | ret = inotify_handle_get_wd(ih, watch); | ||
714 | if (unlikely(ret)) | ||
715 | goto out; | ||
716 | ret = watch->wd; | ||
717 | |||
718 | /* save a reference to handle and bump the count to make it official */ | ||
719 | get_inotify_handle(ih); | ||
720 | watch->ih = ih; | ||
721 | |||
722 | /* | ||
723 | * Save a reference to the inode and bump the ref count to make it | ||
724 | * official. We hold a reference to nameidata, which makes this safe. | ||
725 | */ | ||
726 | watch->inode = igrab(inode); | ||
727 | |||
728 | /* Add the watch to the handle's and the inode's list */ | ||
729 | newly_watched = !inotify_inode_watched(inode); | ||
730 | list_add(&watch->h_list, &ih->watches); | ||
731 | list_add(&watch->i_list, &inode->inotify_watches); | ||
732 | /* | ||
733 | * Set child flags _after_ adding the watch, so there is no race | ||
734 | * windows where newly instantiated children could miss their parent's | ||
735 | * watched flag. | ||
736 | */ | ||
737 | if (newly_watched) | ||
738 | set_dentry_child_flags(inode, 1); | ||
739 | |||
740 | out: | ||
741 | mutex_unlock(&ih->mutex); | ||
742 | mutex_unlock(&inode->inotify_mutex); | ||
743 | return ret; | ||
744 | } | ||
745 | EXPORT_SYMBOL_GPL(inotify_add_watch); | ||
746 | |||
747 | /** | ||
748 | * inotify_clone_watch - put the watch next to existing one | ||
749 | * @old: already installed watch | ||
750 | * @new: new watch | ||
751 | * | ||
752 | * Caller must hold the inotify_mutex of inode we are dealing with; | ||
753 | * it is expected to remove the old watch before unlocking the inode. | ||
754 | */ | ||
755 | s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new) | ||
756 | { | ||
757 | struct inotify_handle *ih = old->ih; | ||
758 | int ret = 0; | ||
759 | |||
760 | new->mask = old->mask; | ||
761 | new->ih = ih; | ||
762 | |||
763 | mutex_lock(&ih->mutex); | ||
764 | |||
765 | /* Initialize a new watch */ | ||
766 | ret = inotify_handle_get_wd(ih, new); | ||
767 | if (unlikely(ret)) | ||
768 | goto out; | ||
769 | ret = new->wd; | ||
770 | |||
771 | get_inotify_handle(ih); | ||
772 | |||
773 | new->inode = igrab(old->inode); | ||
774 | |||
775 | list_add(&new->h_list, &ih->watches); | ||
776 | list_add(&new->i_list, &old->inode->inotify_watches); | ||
777 | out: | ||
778 | mutex_unlock(&ih->mutex); | ||
779 | return ret; | ||
780 | } | ||
781 | |||
782 | void inotify_evict_watch(struct inotify_watch *watch) | ||
783 | { | ||
784 | get_inotify_watch(watch); | ||
785 | mutex_lock(&watch->ih->mutex); | ||
786 | inotify_remove_watch_locked(watch->ih, watch); | ||
787 | mutex_unlock(&watch->ih->mutex); | ||
788 | } | ||
789 | |||
790 | /** | ||
791 | * inotify_rm_wd - remove a watch from an inotify instance | ||
792 | * @ih: inotify handle | ||
793 | * @wd: watch descriptor to remove | ||
794 | * | ||
795 | * Can sleep. | ||
796 | */ | ||
797 | int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | ||
798 | { | ||
799 | struct inotify_watch *watch; | ||
800 | struct super_block *sb; | ||
801 | struct inode *inode; | ||
802 | |||
803 | mutex_lock(&ih->mutex); | ||
804 | watch = idr_find(&ih->idr, wd); | ||
805 | if (unlikely(!watch)) { | ||
806 | mutex_unlock(&ih->mutex); | ||
807 | return -EINVAL; | ||
808 | } | ||
809 | sb = watch->inode->i_sb; | ||
810 | if (!pin_to_kill(ih, watch)) | ||
811 | return 0; | ||
812 | |||
813 | inode = watch->inode; | ||
814 | |||
815 | mutex_lock(&inode->inotify_mutex); | ||
816 | mutex_lock(&ih->mutex); | ||
817 | |||
818 | /* make sure that we did not race */ | ||
819 | if (likely(idr_find(&ih->idr, wd) == watch)) | ||
820 | inotify_remove_watch_locked(ih, watch); | ||
821 | |||
822 | mutex_unlock(&ih->mutex); | ||
823 | mutex_unlock(&inode->inotify_mutex); | ||
824 | unpin_and_kill(watch); | ||
825 | |||
826 | return 0; | ||
827 | } | ||
828 | EXPORT_SYMBOL_GPL(inotify_rm_wd); | ||
829 | |||
830 | /** | ||
831 | * inotify_rm_watch - remove a watch from an inotify instance | ||
832 | * @ih: inotify handle | ||
833 | * @watch: watch to remove | ||
834 | * | ||
835 | * Can sleep. | ||
836 | */ | ||
837 | int inotify_rm_watch(struct inotify_handle *ih, | ||
838 | struct inotify_watch *watch) | ||
839 | { | ||
840 | return inotify_rm_wd(ih, watch->wd); | ||
841 | } | ||
842 | EXPORT_SYMBOL_GPL(inotify_rm_watch); | ||
843 | |||
844 | /* | ||
845 | * inotify_setup - core initialization function | ||
846 | */ | ||
847 | static int __init inotify_setup(void) | ||
848 | { | ||
849 | BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); | ||
850 | BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); | ||
851 | BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); | ||
852 | BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE); | ||
853 | BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); | ||
854 | BUILD_BUG_ON(IN_OPEN != FS_OPEN); | ||
855 | BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM); | ||
856 | BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO); | ||
857 | BUILD_BUG_ON(IN_CREATE != FS_CREATE); | ||
858 | BUILD_BUG_ON(IN_DELETE != FS_DELETE); | ||
859 | BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF); | ||
860 | BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF); | ||
861 | BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); | ||
862 | |||
863 | BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); | ||
864 | BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR); | ||
865 | BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); | ||
866 | BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); | ||
867 | |||
868 | atomic_set(&inotify_cookie, 0); | ||
869 | |||
870 | return 0; | ||
871 | } | ||
872 | |||
873 | module_init(inotify_setup); | ||
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index f234f3a4c8ca..b6642e4de4bf 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h | |||
@@ -9,13 +9,12 @@ struct inotify_event_private_data { | |||
9 | int wd; | 9 | int wd; |
10 | }; | 10 | }; |
11 | 11 | ||
12 | struct inotify_inode_mark_entry { | 12 | struct inotify_inode_mark { |
13 | /* fsnotify_mark_entry MUST be the first thing */ | 13 | struct fsnotify_mark fsn_mark; |
14 | struct fsnotify_mark_entry fsn_entry; | ||
15 | int wd; | 14 | int wd; |
16 | }; | 15 | }; |
17 | 16 | ||
18 | extern void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, | 17 | extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, |
19 | struct fsnotify_group *group); | 18 | struct fsnotify_group *group); |
20 | extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv); | 19 | extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv); |
21 | 20 | ||
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index e27960cd76ab..5e73eeb2c697 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c | |||
@@ -22,6 +22,7 @@ | |||
22 | * General Public License for more details. | 22 | * General Public License for more details. |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/dcache.h> /* d_unlinked */ | ||
25 | #include <linux/fs.h> /* struct inode */ | 26 | #include <linux/fs.h> /* struct inode */ |
26 | #include <linux/fsnotify_backend.h> | 27 | #include <linux/fsnotify_backend.h> |
27 | #include <linux/inotify.h> | 28 | #include <linux/inotify.h> |
@@ -32,26 +33,84 @@ | |||
32 | 33 | ||
33 | #include "inotify.h" | 34 | #include "inotify.h" |
34 | 35 | ||
35 | static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event) | 36 | /* |
37 | * Check if 2 events contain the same information. We do not compare private data | ||
38 | * but at this moment that isn't a problem for any know fsnotify listeners. | ||
39 | */ | ||
40 | static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new) | ||
41 | { | ||
42 | if ((old->mask == new->mask) && | ||
43 | (old->to_tell == new->to_tell) && | ||
44 | (old->data_type == new->data_type) && | ||
45 | (old->name_len == new->name_len)) { | ||
46 | switch (old->data_type) { | ||
47 | case (FSNOTIFY_EVENT_INODE): | ||
48 | /* remember, after old was put on the wait_q we aren't | ||
49 | * allowed to look at the inode any more, only thing | ||
50 | * left to check was if the file_name is the same */ | ||
51 | if (!old->name_len || | ||
52 | !strcmp(old->file_name, new->file_name)) | ||
53 | return true; | ||
54 | break; | ||
55 | case (FSNOTIFY_EVENT_FILE): | ||
56 | if ((old->file->f_path.mnt == new->file->f_path.mnt) && | ||
57 | (old->file->f_path.dentry == new->file->f_path.dentry)) | ||
58 | return true; | ||
59 | break; | ||
60 | case (FSNOTIFY_EVENT_NONE): | ||
61 | if (old->mask & FS_Q_OVERFLOW) | ||
62 | return true; | ||
63 | else if (old->mask & FS_IN_IGNORED) | ||
64 | return false; | ||
65 | return true; | ||
66 | }; | ||
67 | } | ||
68 | return false; | ||
69 | } | ||
70 | |||
71 | static struct fsnotify_event *inotify_merge(struct list_head *list, | ||
72 | struct fsnotify_event *event) | ||
36 | { | 73 | { |
37 | struct fsnotify_mark_entry *entry; | 74 | struct fsnotify_event_holder *last_holder; |
38 | struct inotify_inode_mark_entry *ientry; | 75 | struct fsnotify_event *last_event; |
76 | |||
77 | /* and the list better be locked by something too */ | ||
78 | spin_lock(&event->lock); | ||
79 | |||
80 | last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list); | ||
81 | last_event = last_holder->event; | ||
82 | if (event_compare(last_event, event)) | ||
83 | fsnotify_get_event(last_event); | ||
84 | else | ||
85 | last_event = NULL; | ||
86 | |||
87 | spin_unlock(&event->lock); | ||
88 | |||
89 | return last_event; | ||
90 | } | ||
91 | |||
92 | static int inotify_handle_event(struct fsnotify_group *group, | ||
93 | struct fsnotify_mark *inode_mark, | ||
94 | struct fsnotify_mark *vfsmount_mark, | ||
95 | struct fsnotify_event *event) | ||
96 | { | ||
97 | struct inotify_inode_mark *i_mark; | ||
39 | struct inode *to_tell; | 98 | struct inode *to_tell; |
40 | struct inotify_event_private_data *event_priv; | 99 | struct inotify_event_private_data *event_priv; |
41 | struct fsnotify_event_private_data *fsn_event_priv; | 100 | struct fsnotify_event_private_data *fsn_event_priv; |
42 | int wd, ret; | 101 | struct fsnotify_event *added_event; |
102 | int wd, ret = 0; | ||
103 | |||
104 | BUG_ON(vfsmount_mark); | ||
105 | |||
106 | pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group, | ||
107 | event, event->to_tell, event->mask); | ||
43 | 108 | ||
44 | to_tell = event->to_tell; | 109 | to_tell = event->to_tell; |
45 | 110 | ||
46 | spin_lock(&to_tell->i_lock); | 111 | i_mark = container_of(inode_mark, struct inotify_inode_mark, |
47 | entry = fsnotify_find_mark_entry(group, to_tell); | 112 | fsn_mark); |
48 | spin_unlock(&to_tell->i_lock); | 113 | wd = i_mark->wd; |
49 | /* race with watch removal? We already passes should_send */ | ||
50 | if (unlikely(!entry)) | ||
51 | return 0; | ||
52 | ientry = container_of(entry, struct inotify_inode_mark_entry, | ||
53 | fsn_entry); | ||
54 | wd = ientry->wd; | ||
55 | 114 | ||
56 | event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); | 115 | event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); |
57 | if (unlikely(!event_priv)) | 116 | if (unlikely(!event_priv)) |
@@ -62,48 +121,40 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev | |||
62 | fsn_event_priv->group = group; | 121 | fsn_event_priv->group = group; |
63 | event_priv->wd = wd; | 122 | event_priv->wd = wd; |
64 | 123 | ||
65 | ret = fsnotify_add_notify_event(group, event, fsn_event_priv); | 124 | added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge); |
66 | if (ret) { | 125 | if (added_event) { |
67 | inotify_free_event_priv(fsn_event_priv); | 126 | inotify_free_event_priv(fsn_event_priv); |
68 | /* EEXIST says we tail matched, EOVERFLOW isn't something | 127 | if (!IS_ERR(added_event)) |
69 | * to report up the stack. */ | 128 | fsnotify_put_event(added_event); |
70 | if ((ret == -EEXIST) || | 129 | else |
71 | (ret == -EOVERFLOW)) | 130 | ret = PTR_ERR(added_event); |
72 | ret = 0; | ||
73 | } | 131 | } |
74 | 132 | ||
75 | /* | 133 | if (inode_mark->mask & IN_ONESHOT) |
76 | * If we hold the entry until after the event is on the queue | 134 | fsnotify_destroy_mark(inode_mark); |
77 | * IN_IGNORED won't be able to pass this event in the queue | ||
78 | */ | ||
79 | fsnotify_put_mark(entry); | ||
80 | 135 | ||
81 | return ret; | 136 | return ret; |
82 | } | 137 | } |
83 | 138 | ||
84 | static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) | 139 | static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) |
85 | { | 140 | { |
86 | inotify_ignored_and_remove_idr(entry, group); | 141 | inotify_ignored_and_remove_idr(fsn_mark, group); |
87 | } | 142 | } |
88 | 143 | ||
89 | static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask) | 144 | static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, |
145 | struct fsnotify_mark *inode_mark, | ||
146 | struct fsnotify_mark *vfsmount_mark, | ||
147 | __u32 mask, void *data, int data_type) | ||
90 | { | 148 | { |
91 | struct fsnotify_mark_entry *entry; | 149 | if ((inode_mark->mask & FS_EXCL_UNLINK) && |
92 | bool send; | 150 | (data_type == FSNOTIFY_EVENT_FILE)) { |
93 | 151 | struct file *file = data; | |
94 | spin_lock(&inode->i_lock); | ||
95 | entry = fsnotify_find_mark_entry(group, inode); | ||
96 | spin_unlock(&inode->i_lock); | ||
97 | if (!entry) | ||
98 | return false; | ||
99 | 152 | ||
100 | mask = (mask & ~FS_EVENT_ON_CHILD); | 153 | if (d_unlinked(file->f_path.dentry)) |
101 | send = (entry->mask & mask); | 154 | return false; |
102 | 155 | } | |
103 | /* find took a reference */ | ||
104 | fsnotify_put_mark(entry); | ||
105 | 156 | ||
106 | return send; | 157 | return true; |
107 | } | 158 | } |
108 | 159 | ||
109 | /* | 160 | /* |
@@ -115,18 +166,18 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode | |||
115 | */ | 166 | */ |
116 | static int idr_callback(int id, void *p, void *data) | 167 | static int idr_callback(int id, void *p, void *data) |
117 | { | 168 | { |
118 | struct fsnotify_mark_entry *entry; | 169 | struct fsnotify_mark *fsn_mark; |
119 | struct inotify_inode_mark_entry *ientry; | 170 | struct inotify_inode_mark *i_mark; |
120 | static bool warned = false; | 171 | static bool warned = false; |
121 | 172 | ||
122 | if (warned) | 173 | if (warned) |
123 | return 0; | 174 | return 0; |
124 | 175 | ||
125 | warned = true; | 176 | warned = true; |
126 | entry = p; | 177 | fsn_mark = p; |
127 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | 178 | i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); |
128 | 179 | ||
129 | WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in " | 180 | WARN(1, "inotify closing but id=%d for fsn_mark=%p in group=%p still in " |
130 | "idr. Probably leaking memory\n", id, p, data); | 181 | "idr. Probably leaking memory\n", id, p, data); |
131 | 182 | ||
132 | /* | 183 | /* |
@@ -135,9 +186,9 @@ static int idr_callback(int id, void *p, void *data) | |||
135 | * out why we got here and the panic is no worse than the original | 186 | * out why we got here and the panic is no worse than the original |
136 | * BUG() that was here. | 187 | * BUG() that was here. |
137 | */ | 188 | */ |
138 | if (entry) | 189 | if (fsn_mark) |
139 | printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n", | 190 | printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n", |
140 | entry->group, entry->inode, ientry->wd); | 191 | fsn_mark->group, fsn_mark->i.inode, i_mark->wd); |
141 | return 0; | 192 | return 0; |
142 | } | 193 | } |
143 | 194 | ||
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index e46ca685b9be..bf7f6d776c31 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -46,17 +46,11 @@ | |||
46 | /* these are configurable via /proc/sys/fs/inotify/ */ | 46 | /* these are configurable via /proc/sys/fs/inotify/ */ |
47 | static int inotify_max_user_instances __read_mostly; | 47 | static int inotify_max_user_instances __read_mostly; |
48 | static int inotify_max_queued_events __read_mostly; | 48 | static int inotify_max_queued_events __read_mostly; |
49 | int inotify_max_user_watches __read_mostly; | 49 | static int inotify_max_user_watches __read_mostly; |
50 | 50 | ||
51 | static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; | 51 | static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; |
52 | struct kmem_cache *event_priv_cachep __read_mostly; | 52 | struct kmem_cache *event_priv_cachep __read_mostly; |
53 | 53 | ||
54 | /* | ||
55 | * When inotify registers a new group it increments this and uses that | ||
56 | * value as an offset to set the fsnotify group "name" and priority. | ||
57 | */ | ||
58 | static atomic_t inotify_grp_num; | ||
59 | |||
60 | #ifdef CONFIG_SYSCTL | 54 | #ifdef CONFIG_SYSCTL |
61 | 55 | ||
62 | #include <linux/sysctl.h> | 56 | #include <linux/sysctl.h> |
@@ -96,11 +90,14 @@ static inline __u32 inotify_arg_to_mask(u32 arg) | |||
96 | { | 90 | { |
97 | __u32 mask; | 91 | __u32 mask; |
98 | 92 | ||
99 | /* everything should accept their own ignored and cares about children */ | 93 | /* |
100 | mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD); | 94 | * everything should accept their own ignored, cares about children, |
95 | * and should receive events when the inode is unmounted | ||
96 | */ | ||
97 | mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT); | ||
101 | 98 | ||
102 | /* mask off the flags used to open the fd */ | 99 | /* mask off the flags used to open the fd */ |
103 | mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT)); | 100 | mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)); |
104 | 101 | ||
105 | return mask; | 102 | return mask; |
106 | } | 103 | } |
@@ -144,6 +141,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, | |||
144 | 141 | ||
145 | event = fsnotify_peek_notify_event(group); | 142 | event = fsnotify_peek_notify_event(group); |
146 | 143 | ||
144 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | ||
145 | |||
147 | if (event->name_len) | 146 | if (event->name_len) |
148 | event_size += roundup(event->name_len + 1, event_size); | 147 | event_size += roundup(event->name_len + 1, event_size); |
149 | 148 | ||
@@ -173,6 +172,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
173 | size_t event_size = sizeof(struct inotify_event); | 172 | size_t event_size = sizeof(struct inotify_event); |
174 | size_t name_len = 0; | 173 | size_t name_len = 0; |
175 | 174 | ||
175 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | ||
176 | |||
176 | /* we get the inotify watch descriptor from the event private data */ | 177 | /* we get the inotify watch descriptor from the event private data */ |
177 | spin_lock(&event->lock); | 178 | spin_lock(&event->lock); |
178 | fsn_priv = fsnotify_remove_priv_from_event(group, event); | 179 | fsn_priv = fsnotify_remove_priv_from_event(group, event); |
@@ -245,6 +246,8 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
245 | kevent = get_one_event(group, count); | 246 | kevent = get_one_event(group, count); |
246 | mutex_unlock(&group->notification_mutex); | 247 | mutex_unlock(&group->notification_mutex); |
247 | 248 | ||
249 | pr_debug("%s: group=%p kevent=%p\n", __func__, group, kevent); | ||
250 | |||
248 | if (kevent) { | 251 | if (kevent) { |
249 | ret = PTR_ERR(kevent); | 252 | ret = PTR_ERR(kevent); |
250 | if (IS_ERR(kevent)) | 253 | if (IS_ERR(kevent)) |
@@ -289,6 +292,8 @@ static int inotify_release(struct inode *ignored, struct file *file) | |||
289 | struct fsnotify_group *group = file->private_data; | 292 | struct fsnotify_group *group = file->private_data; |
290 | struct user_struct *user = group->inotify_data.user; | 293 | struct user_struct *user = group->inotify_data.user; |
291 | 294 | ||
295 | pr_debug("%s: group=%p\n", __func__, group); | ||
296 | |||
292 | fsnotify_clear_marks_by_group(group); | 297 | fsnotify_clear_marks_by_group(group); |
293 | 298 | ||
294 | /* free this group, matching get was inotify_init->fsnotify_obtain_group */ | 299 | /* free this group, matching get was inotify_init->fsnotify_obtain_group */ |
@@ -312,6 +317,8 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, | |||
312 | group = file->private_data; | 317 | group = file->private_data; |
313 | p = (void __user *) arg; | 318 | p = (void __user *) arg; |
314 | 319 | ||
320 | pr_debug("%s: group=%p cmd=%u\n", __func__, group, cmd); | ||
321 | |||
315 | switch (cmd) { | 322 | switch (cmd) { |
316 | case FIONREAD: | 323 | case FIONREAD: |
317 | mutex_lock(&group->notification_mutex); | 324 | mutex_lock(&group->notification_mutex); |
@@ -357,59 +364,159 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns | |||
357 | return error; | 364 | return error; |
358 | } | 365 | } |
359 | 366 | ||
367 | static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock, | ||
368 | int *last_wd, | ||
369 | struct inotify_inode_mark *i_mark) | ||
370 | { | ||
371 | int ret; | ||
372 | |||
373 | do { | ||
374 | if (unlikely(!idr_pre_get(idr, GFP_KERNEL))) | ||
375 | return -ENOMEM; | ||
376 | |||
377 | spin_lock(idr_lock); | ||
378 | ret = idr_get_new_above(idr, i_mark, *last_wd + 1, | ||
379 | &i_mark->wd); | ||
380 | /* we added the mark to the idr, take a reference */ | ||
381 | if (!ret) { | ||
382 | *last_wd = i_mark->wd; | ||
383 | fsnotify_get_mark(&i_mark->fsn_mark); | ||
384 | } | ||
385 | spin_unlock(idr_lock); | ||
386 | } while (ret == -EAGAIN); | ||
387 | |||
388 | return ret; | ||
389 | } | ||
390 | |||
391 | static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group, | ||
392 | int wd) | ||
393 | { | ||
394 | struct idr *idr = &group->inotify_data.idr; | ||
395 | spinlock_t *idr_lock = &group->inotify_data.idr_lock; | ||
396 | struct inotify_inode_mark *i_mark; | ||
397 | |||
398 | assert_spin_locked(idr_lock); | ||
399 | |||
400 | i_mark = idr_find(idr, wd); | ||
401 | if (i_mark) { | ||
402 | struct fsnotify_mark *fsn_mark = &i_mark->fsn_mark; | ||
403 | |||
404 | fsnotify_get_mark(fsn_mark); | ||
405 | /* One ref for being in the idr, one ref we just took */ | ||
406 | BUG_ON(atomic_read(&fsn_mark->refcnt) < 2); | ||
407 | } | ||
408 | |||
409 | return i_mark; | ||
410 | } | ||
411 | |||
412 | static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group, | ||
413 | int wd) | ||
414 | { | ||
415 | struct inotify_inode_mark *i_mark; | ||
416 | spinlock_t *idr_lock = &group->inotify_data.idr_lock; | ||
417 | |||
418 | spin_lock(idr_lock); | ||
419 | i_mark = inotify_idr_find_locked(group, wd); | ||
420 | spin_unlock(idr_lock); | ||
421 | |||
422 | return i_mark; | ||
423 | } | ||
424 | |||
425 | static void do_inotify_remove_from_idr(struct fsnotify_group *group, | ||
426 | struct inotify_inode_mark *i_mark) | ||
427 | { | ||
428 | struct idr *idr = &group->inotify_data.idr; | ||
429 | spinlock_t *idr_lock = &group->inotify_data.idr_lock; | ||
430 | int wd = i_mark->wd; | ||
431 | |||
432 | assert_spin_locked(idr_lock); | ||
433 | |||
434 | idr_remove(idr, wd); | ||
435 | |||
436 | /* removed from the idr, drop that ref */ | ||
437 | fsnotify_put_mark(&i_mark->fsn_mark); | ||
438 | } | ||
439 | |||
360 | /* | 440 | /* |
361 | * Remove the mark from the idr (if present) and drop the reference | 441 | * Remove the mark from the idr (if present) and drop the reference |
362 | * on the mark because it was in the idr. | 442 | * on the mark because it was in the idr. |
363 | */ | 443 | */ |
364 | static void inotify_remove_from_idr(struct fsnotify_group *group, | 444 | static void inotify_remove_from_idr(struct fsnotify_group *group, |
365 | struct inotify_inode_mark_entry *ientry) | 445 | struct inotify_inode_mark *i_mark) |
366 | { | 446 | { |
367 | struct idr *idr; | 447 | spinlock_t *idr_lock = &group->inotify_data.idr_lock; |
368 | struct fsnotify_mark_entry *entry; | 448 | struct inotify_inode_mark *found_i_mark = NULL; |
369 | struct inotify_inode_mark_entry *found_ientry; | ||
370 | int wd; | 449 | int wd; |
371 | 450 | ||
372 | spin_lock(&group->inotify_data.idr_lock); | 451 | spin_lock(idr_lock); |
373 | idr = &group->inotify_data.idr; | 452 | wd = i_mark->wd; |
374 | wd = ientry->wd; | ||
375 | 453 | ||
376 | if (wd == -1) | 454 | /* |
455 | * does this i_mark think it is in the idr? we shouldn't get called | ||
456 | * if it wasn't.... | ||
457 | */ | ||
458 | if (wd == -1) { | ||
459 | WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" | ||
460 | " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, | ||
461 | i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode); | ||
377 | goto out; | 462 | goto out; |
463 | } | ||
378 | 464 | ||
379 | entry = idr_find(&group->inotify_data.idr, wd); | 465 | /* Lets look in the idr to see if we find it */ |
380 | if (unlikely(!entry)) | 466 | found_i_mark = inotify_idr_find_locked(group, wd); |
467 | if (unlikely(!found_i_mark)) { | ||
468 | WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" | ||
469 | " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, | ||
470 | i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode); | ||
381 | goto out; | 471 | goto out; |
472 | } | ||
382 | 473 | ||
383 | found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | 474 | /* |
384 | if (unlikely(found_ientry != ientry)) { | 475 | * We found an mark in the idr at the right wd, but it's |
385 | /* We found an entry in the idr with the right wd, but it's | 476 | * not the mark we were told to remove. eparis seriously |
386 | * not the entry we were told to remove. eparis seriously | 477 | * fucked up somewhere. |
387 | * fucked up somewhere. */ | 478 | */ |
388 | WARN_ON(1); | 479 | if (unlikely(found_i_mark != i_mark)) { |
389 | ientry->wd = -1; | 480 | WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p " |
481 | "mark->inode=%p found_i_mark=%p found_i_mark->wd=%d " | ||
482 | "found_i_mark->group=%p found_i_mark->inode=%p\n", | ||
483 | __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group, | ||
484 | i_mark->fsn_mark.i.inode, found_i_mark, found_i_mark->wd, | ||
485 | found_i_mark->fsn_mark.group, | ||
486 | found_i_mark->fsn_mark.i.inode); | ||
390 | goto out; | 487 | goto out; |
391 | } | 488 | } |
392 | 489 | ||
393 | /* One ref for being in the idr, one ref held by the caller */ | 490 | /* |
394 | BUG_ON(atomic_read(&entry->refcnt) < 2); | 491 | * One ref for being in the idr |
395 | 492 | * one ref held by the caller trying to kill us | |
396 | idr_remove(idr, wd); | 493 | * one ref grabbed by inotify_idr_find |
397 | ientry->wd = -1; | 494 | */ |
495 | if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) { | ||
496 | printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" | ||
497 | " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, | ||
498 | i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode); | ||
499 | /* we can't really recover with bad ref cnting.. */ | ||
500 | BUG(); | ||
501 | } | ||
398 | 502 | ||
399 | /* removed from the idr, drop that ref */ | 503 | do_inotify_remove_from_idr(group, i_mark); |
400 | fsnotify_put_mark(entry); | ||
401 | out: | 504 | out: |
402 | spin_unlock(&group->inotify_data.idr_lock); | 505 | /* match the ref taken by inotify_idr_find_locked() */ |
506 | if (found_i_mark) | ||
507 | fsnotify_put_mark(&found_i_mark->fsn_mark); | ||
508 | i_mark->wd = -1; | ||
509 | spin_unlock(idr_lock); | ||
403 | } | 510 | } |
404 | 511 | ||
405 | /* | 512 | /* |
406 | * Send IN_IGNORED for this wd, remove this wd from the idr. | 513 | * Send IN_IGNORED for this wd, remove this wd from the idr. |
407 | */ | 514 | */ |
408 | void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, | 515 | void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, |
409 | struct fsnotify_group *group) | 516 | struct fsnotify_group *group) |
410 | { | 517 | { |
411 | struct inotify_inode_mark_entry *ientry; | 518 | struct inotify_inode_mark *i_mark; |
412 | struct fsnotify_event *ignored_event; | 519 | struct fsnotify_event *ignored_event, *notify_event; |
413 | struct inotify_event_private_data *event_priv; | 520 | struct inotify_event_private_data *event_priv; |
414 | struct fsnotify_event_private_data *fsn_event_priv; | 521 | struct fsnotify_event_private_data *fsn_event_priv; |
415 | int ret; | 522 | int ret; |
@@ -420,7 +527,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, | |||
420 | if (!ignored_event) | 527 | if (!ignored_event) |
421 | return; | 528 | return; |
422 | 529 | ||
423 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | 530 | i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); |
424 | 531 | ||
425 | event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS); | 532 | event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS); |
426 | if (unlikely(!event_priv)) | 533 | if (unlikely(!event_priv)) |
@@ -429,37 +536,44 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, | |||
429 | fsn_event_priv = &event_priv->fsnotify_event_priv_data; | 536 | fsn_event_priv = &event_priv->fsnotify_event_priv_data; |
430 | 537 | ||
431 | fsn_event_priv->group = group; | 538 | fsn_event_priv->group = group; |
432 | event_priv->wd = ientry->wd; | 539 | event_priv->wd = i_mark->wd; |
433 | 540 | ||
434 | ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); | 541 | notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL); |
435 | if (ret) | 542 | if (notify_event) { |
543 | if (IS_ERR(notify_event)) | ||
544 | ret = PTR_ERR(notify_event); | ||
545 | else | ||
546 | fsnotify_put_event(notify_event); | ||
436 | inotify_free_event_priv(fsn_event_priv); | 547 | inotify_free_event_priv(fsn_event_priv); |
548 | } | ||
437 | 549 | ||
438 | skip_send_ignore: | 550 | skip_send_ignore: |
439 | 551 | ||
440 | /* matches the reference taken when the event was created */ | 552 | /* matches the reference taken when the event was created */ |
441 | fsnotify_put_event(ignored_event); | 553 | fsnotify_put_event(ignored_event); |
442 | 554 | ||
443 | /* remove this entry from the idr */ | 555 | /* remove this mark from the idr */ |
444 | inotify_remove_from_idr(group, ientry); | 556 | inotify_remove_from_idr(group, i_mark); |
445 | 557 | ||
446 | atomic_dec(&group->inotify_data.user->inotify_watches); | 558 | atomic_dec(&group->inotify_data.user->inotify_watches); |
447 | } | 559 | } |
448 | 560 | ||
449 | /* ding dong the mark is dead */ | 561 | /* ding dong the mark is dead */ |
450 | static void inotify_free_mark(struct fsnotify_mark_entry *entry) | 562 | static void inotify_free_mark(struct fsnotify_mark *fsn_mark) |
451 | { | 563 | { |
452 | struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry; | 564 | struct inotify_inode_mark *i_mark; |
565 | |||
566 | i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); | ||
453 | 567 | ||
454 | kmem_cache_free(inotify_inode_mark_cachep, ientry); | 568 | kmem_cache_free(inotify_inode_mark_cachep, i_mark); |
455 | } | 569 | } |
456 | 570 | ||
457 | static int inotify_update_existing_watch(struct fsnotify_group *group, | 571 | static int inotify_update_existing_watch(struct fsnotify_group *group, |
458 | struct inode *inode, | 572 | struct inode *inode, |
459 | u32 arg) | 573 | u32 arg) |
460 | { | 574 | { |
461 | struct fsnotify_mark_entry *entry; | 575 | struct fsnotify_mark *fsn_mark; |
462 | struct inotify_inode_mark_entry *ientry; | 576 | struct inotify_inode_mark *i_mark; |
463 | __u32 old_mask, new_mask; | 577 | __u32 old_mask, new_mask; |
464 | __u32 mask; | 578 | __u32 mask; |
465 | int add = (arg & IN_MASK_ADD); | 579 | int add = (arg & IN_MASK_ADD); |
@@ -467,52 +581,43 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, | |||
467 | 581 | ||
468 | /* don't allow invalid bits: we don't want flags set */ | 582 | /* don't allow invalid bits: we don't want flags set */ |
469 | mask = inotify_arg_to_mask(arg); | 583 | mask = inotify_arg_to_mask(arg); |
470 | if (unlikely(!mask)) | 584 | if (unlikely(!(mask & IN_ALL_EVENTS))) |
471 | return -EINVAL; | 585 | return -EINVAL; |
472 | 586 | ||
473 | spin_lock(&inode->i_lock); | 587 | fsn_mark = fsnotify_find_inode_mark(group, inode); |
474 | entry = fsnotify_find_mark_entry(group, inode); | 588 | if (!fsn_mark) |
475 | spin_unlock(&inode->i_lock); | ||
476 | if (!entry) | ||
477 | return -ENOENT; | 589 | return -ENOENT; |
478 | 590 | ||
479 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | 591 | i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); |
480 | 592 | ||
481 | spin_lock(&entry->lock); | 593 | spin_lock(&fsn_mark->lock); |
482 | 594 | ||
483 | old_mask = entry->mask; | 595 | old_mask = fsn_mark->mask; |
484 | if (add) { | 596 | if (add) |
485 | entry->mask |= mask; | 597 | fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask)); |
486 | new_mask = entry->mask; | 598 | else |
487 | } else { | 599 | fsnotify_set_mark_mask_locked(fsn_mark, mask); |
488 | entry->mask = mask; | 600 | new_mask = fsn_mark->mask; |
489 | new_mask = entry->mask; | ||
490 | } | ||
491 | 601 | ||
492 | spin_unlock(&entry->lock); | 602 | spin_unlock(&fsn_mark->lock); |
493 | 603 | ||
494 | if (old_mask != new_mask) { | 604 | if (old_mask != new_mask) { |
495 | /* more bits in old than in new? */ | 605 | /* more bits in old than in new? */ |
496 | int dropped = (old_mask & ~new_mask); | 606 | int dropped = (old_mask & ~new_mask); |
497 | /* more bits in this entry than the inode's mask? */ | 607 | /* more bits in this fsn_mark than the inode's mask? */ |
498 | int do_inode = (new_mask & ~inode->i_fsnotify_mask); | 608 | int do_inode = (new_mask & ~inode->i_fsnotify_mask); |
499 | /* more bits in this entry than the group? */ | ||
500 | int do_group = (new_mask & ~group->mask); | ||
501 | 609 | ||
502 | /* update the inode with this new entry */ | 610 | /* update the inode with this new fsn_mark */ |
503 | if (dropped || do_inode) | 611 | if (dropped || do_inode) |
504 | fsnotify_recalc_inode_mask(inode); | 612 | fsnotify_recalc_inode_mask(inode); |
505 | 613 | ||
506 | /* update the group mask with the new mask */ | ||
507 | if (dropped || do_group) | ||
508 | fsnotify_recalc_group_mask(group); | ||
509 | } | 614 | } |
510 | 615 | ||
511 | /* return the wd */ | 616 | /* return the wd */ |
512 | ret = ientry->wd; | 617 | ret = i_mark->wd; |
513 | 618 | ||
514 | /* match the get from fsnotify_find_mark_entry() */ | 619 | /* match the get from fsnotify_find_mark() */ |
515 | fsnotify_put_mark(entry); | 620 | fsnotify_put_mark(fsn_mark); |
516 | 621 | ||
517 | return ret; | 622 | return ret; |
518 | } | 623 | } |
@@ -521,73 +626,51 @@ static int inotify_new_watch(struct fsnotify_group *group, | |||
521 | struct inode *inode, | 626 | struct inode *inode, |
522 | u32 arg) | 627 | u32 arg) |
523 | { | 628 | { |
524 | struct inotify_inode_mark_entry *tmp_ientry; | 629 | struct inotify_inode_mark *tmp_i_mark; |
525 | __u32 mask; | 630 | __u32 mask; |
526 | int ret; | 631 | int ret; |
632 | struct idr *idr = &group->inotify_data.idr; | ||
633 | spinlock_t *idr_lock = &group->inotify_data.idr_lock; | ||
527 | 634 | ||
528 | /* don't allow invalid bits: we don't want flags set */ | 635 | /* don't allow invalid bits: we don't want flags set */ |
529 | mask = inotify_arg_to_mask(arg); | 636 | mask = inotify_arg_to_mask(arg); |
530 | if (unlikely(!mask)) | 637 | if (unlikely(!(mask & IN_ALL_EVENTS))) |
531 | return -EINVAL; | 638 | return -EINVAL; |
532 | 639 | ||
533 | tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); | 640 | tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); |
534 | if (unlikely(!tmp_ientry)) | 641 | if (unlikely(!tmp_i_mark)) |
535 | return -ENOMEM; | 642 | return -ENOMEM; |
536 | 643 | ||
537 | fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark); | 644 | fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark); |
538 | tmp_ientry->fsn_entry.mask = mask; | 645 | tmp_i_mark->fsn_mark.mask = mask; |
539 | tmp_ientry->wd = -1; | 646 | tmp_i_mark->wd = -1; |
540 | 647 | ||
541 | ret = -ENOSPC; | 648 | ret = -ENOSPC; |
542 | if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) | 649 | if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) |
543 | goto out_err; | 650 | goto out_err; |
544 | retry: | ||
545 | ret = -ENOMEM; | ||
546 | if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) | ||
547 | goto out_err; | ||
548 | 651 | ||
549 | /* we are putting the mark on the idr, take a reference */ | 652 | ret = inotify_add_to_idr(idr, idr_lock, &group->inotify_data.last_wd, |
550 | fsnotify_get_mark(&tmp_ientry->fsn_entry); | 653 | tmp_i_mark); |
551 | 654 | if (ret) | |
552 | spin_lock(&group->inotify_data.idr_lock); | ||
553 | ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, | ||
554 | group->inotify_data.last_wd+1, | ||
555 | &tmp_ientry->wd); | ||
556 | spin_unlock(&group->inotify_data.idr_lock); | ||
557 | if (ret) { | ||
558 | /* we didn't get on the idr, drop the idr reference */ | ||
559 | fsnotify_put_mark(&tmp_ientry->fsn_entry); | ||
560 | |||
561 | /* idr was out of memory allocate and try again */ | ||
562 | if (ret == -EAGAIN) | ||
563 | goto retry; | ||
564 | goto out_err; | 655 | goto out_err; |
565 | } | ||
566 | 656 | ||
567 | /* we are on the idr, now get on the inode */ | 657 | /* we are on the idr, now get on the inode */ |
568 | ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); | 658 | ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, NULL, 0); |
569 | if (ret) { | 659 | if (ret) { |
570 | /* we failed to get on the inode, get off the idr */ | 660 | /* we failed to get on the inode, get off the idr */ |
571 | inotify_remove_from_idr(group, tmp_ientry); | 661 | inotify_remove_from_idr(group, tmp_i_mark); |
572 | goto out_err; | 662 | goto out_err; |
573 | } | 663 | } |
574 | 664 | ||
575 | /* update the idr hint, who cares about races, it's just a hint */ | ||
576 | group->inotify_data.last_wd = tmp_ientry->wd; | ||
577 | |||
578 | /* increment the number of watches the user has */ | 665 | /* increment the number of watches the user has */ |
579 | atomic_inc(&group->inotify_data.user->inotify_watches); | 666 | atomic_inc(&group->inotify_data.user->inotify_watches); |
580 | 667 | ||
581 | /* return the watch descriptor for this new entry */ | 668 | /* return the watch descriptor for this new mark */ |
582 | ret = tmp_ientry->wd; | 669 | ret = tmp_i_mark->wd; |
583 | |||
584 | /* if this mark added a new event update the group mask */ | ||
585 | if (mask & ~group->mask) | ||
586 | fsnotify_recalc_group_mask(group); | ||
587 | 670 | ||
588 | out_err: | 671 | out_err: |
589 | /* match the ref from fsnotify_init_markentry() */ | 672 | /* match the ref from fsnotify_init_mark() */ |
590 | fsnotify_put_mark(&tmp_ientry->fsn_entry); | 673 | fsnotify_put_mark(&tmp_i_mark->fsn_mark); |
591 | 674 | ||
592 | return ret; | 675 | return ret; |
593 | } | 676 | } |
@@ -616,11 +699,8 @@ retry: | |||
616 | static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events) | 699 | static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events) |
617 | { | 700 | { |
618 | struct fsnotify_group *group; | 701 | struct fsnotify_group *group; |
619 | unsigned int grp_num; | ||
620 | 702 | ||
621 | /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ | 703 | group = fsnotify_alloc_group(&inotify_fsnotify_ops); |
622 | grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num)); | ||
623 | group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops); | ||
624 | if (IS_ERR(group)) | 704 | if (IS_ERR(group)) |
625 | return group; | 705 | return group; |
626 | 706 | ||
@@ -726,7 +806,7 @@ fput_and_out: | |||
726 | SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) | 806 | SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) |
727 | { | 807 | { |
728 | struct fsnotify_group *group; | 808 | struct fsnotify_group *group; |
729 | struct fsnotify_mark_entry *entry; | 809 | struct inotify_inode_mark *i_mark; |
730 | struct file *filp; | 810 | struct file *filp; |
731 | int ret = 0, fput_needed; | 811 | int ret = 0, fput_needed; |
732 | 812 | ||
@@ -735,25 +815,23 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) | |||
735 | return -EBADF; | 815 | return -EBADF; |
736 | 816 | ||
737 | /* verify that this is indeed an inotify instance */ | 817 | /* verify that this is indeed an inotify instance */ |
738 | if (unlikely(filp->f_op != &inotify_fops)) { | 818 | ret = -EINVAL; |
739 | ret = -EINVAL; | 819 | if (unlikely(filp->f_op != &inotify_fops)) |
740 | goto out; | 820 | goto out; |
741 | } | ||
742 | 821 | ||
743 | group = filp->private_data; | 822 | group = filp->private_data; |
744 | 823 | ||
745 | spin_lock(&group->inotify_data.idr_lock); | 824 | ret = -EINVAL; |
746 | entry = idr_find(&group->inotify_data.idr, wd); | 825 | i_mark = inotify_idr_find(group, wd); |
747 | if (unlikely(!entry)) { | 826 | if (unlikely(!i_mark)) |
748 | spin_unlock(&group->inotify_data.idr_lock); | ||
749 | ret = -EINVAL; | ||
750 | goto out; | 827 | goto out; |
751 | } | ||
752 | fsnotify_get_mark(entry); | ||
753 | spin_unlock(&group->inotify_data.idr_lock); | ||
754 | 828 | ||
755 | fsnotify_destroy_mark_by_entry(entry); | 829 | ret = 0; |
756 | fsnotify_put_mark(entry); | 830 | |
831 | fsnotify_destroy_mark(&i_mark->fsn_mark); | ||
832 | |||
833 | /* match ref taken by inotify_idr_find */ | ||
834 | fsnotify_put_mark(&i_mark->fsn_mark); | ||
757 | 835 | ||
758 | out: | 836 | out: |
759 | fput_light(filp, fput_needed); | 837 | fput_light(filp, fput_needed); |
@@ -767,7 +845,28 @@ out: | |||
767 | */ | 845 | */ |
768 | static int __init inotify_user_setup(void) | 846 | static int __init inotify_user_setup(void) |
769 | { | 847 | { |
770 | inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC); | 848 | BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); |
849 | BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); | ||
850 | BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); | ||
851 | BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE); | ||
852 | BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); | ||
853 | BUILD_BUG_ON(IN_OPEN != FS_OPEN); | ||
854 | BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM); | ||
855 | BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO); | ||
856 | BUILD_BUG_ON(IN_CREATE != FS_CREATE); | ||
857 | BUILD_BUG_ON(IN_DELETE != FS_DELETE); | ||
858 | BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF); | ||
859 | BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF); | ||
860 | BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); | ||
861 | BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); | ||
862 | BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); | ||
863 | BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK); | ||
864 | BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR); | ||
865 | BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); | ||
866 | |||
867 | BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); | ||
868 | |||
869 | inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); | ||
771 | event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); | 870 | event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); |
772 | 871 | ||
773 | inotify_max_queued_events = 16384; | 872 | inotify_max_queued_events = 16384; |
diff --git a/fs/notify/mark.c b/fs/notify/mark.c new file mode 100644 index 000000000000..325185e514bb --- /dev/null +++ b/fs/notify/mark.c | |||
@@ -0,0 +1,371 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; see the file COPYING. If not, write to | ||
16 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
17 | */ | ||
18 | |||
19 | /* | ||
20 | * fsnotify inode mark locking/lifetime/and refcnting | ||
21 | * | ||
22 | * REFCNT: | ||
23 | * The mark->refcnt tells how many "things" in the kernel currently are | ||
24 | * referencing this object. The object typically will live inside the kernel | ||
25 | * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task | ||
26 | * which can find this object holding the appropriete locks, can take a reference | ||
27 | * and the object itself is guarenteed to survive until the reference is dropped. | ||
28 | * | ||
29 | * LOCKING: | ||
30 | * There are 3 spinlocks involved with fsnotify inode marks and they MUST | ||
31 | * be taken in order as follows: | ||
32 | * | ||
33 | * mark->lock | ||
34 | * group->mark_lock | ||
35 | * inode->i_lock | ||
36 | * | ||
37 | * mark->lock protects 2 things, mark->group and mark->inode. You must hold | ||
38 | * that lock to dereference either of these things (they could be NULL even with | ||
39 | * the lock) | ||
40 | * | ||
41 | * group->mark_lock protects the marks_list anchored inside a given group | ||
42 | * and each mark is hooked via the g_list. It also sorta protects the | ||
43 | * free_g_list, which when used is anchored by a private list on the stack of the | ||
44 | * task which held the group->mark_lock. | ||
45 | * | ||
46 | * inode->i_lock protects the i_fsnotify_marks list anchored inside a | ||
47 | * given inode and each mark is hooked via the i_list. (and sorta the | ||
48 | * free_i_list) | ||
49 | * | ||
50 | * | ||
51 | * LIFETIME: | ||
52 | * Inode marks survive between when they are added to an inode and when their | ||
53 | * refcnt==0. | ||
54 | * | ||
55 | * The inode mark can be cleared for a number of different reasons including: | ||
56 | * - The inode is unlinked for the last time. (fsnotify_inode_remove) | ||
57 | * - The inode is being evicted from cache. (fsnotify_inode_delete) | ||
58 | * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes) | ||
59 | * - Something explicitly requests that it be removed. (fsnotify_destroy_mark) | ||
60 | * - The fsnotify_group associated with the mark is going away and all such marks | ||
61 | * need to be cleaned up. (fsnotify_clear_marks_by_group) | ||
62 | * | ||
63 | * Worst case we are given an inode and need to clean up all the marks on that | ||
64 | * inode. We take i_lock and walk the i_fsnotify_marks safely. For each | ||
65 | * mark on the list we take a reference (so the mark can't disappear under us). | ||
66 | * We remove that mark form the inode's list of marks and we add this mark to a | ||
67 | * private list anchored on the stack using i_free_list; At this point we no | ||
68 | * longer fear anything finding the mark using the inode's list of marks. | ||
69 | * | ||
70 | * We can safely and locklessly run the private list on the stack of everything | ||
71 | * we just unattached from the original inode. For each mark on the private list | ||
72 | * we grab the mark-> and can thus dereference mark->group and mark->inode. If | ||
73 | * we see the group and inode are not NULL we take those locks. Now holding all | ||
74 | * 3 locks we can completely remove the mark from other tasks finding it in the | ||
75 | * future. Remember, 10 things might already be referencing this mark, but they | ||
76 | * better be holding a ref. We drop our reference we took before we unhooked it | ||
77 | * from the inode. When the ref hits 0 we can free the mark. | ||
78 | * | ||
79 | * Very similarly for freeing by group, except we use free_g_list. | ||
80 | * | ||
81 | * This has the very interesting property of being able to run concurrently with | ||
82 | * any (or all) other directions. | ||
83 | */ | ||
84 | |||
85 | #include <linux/fs.h> | ||
86 | #include <linux/init.h> | ||
87 | #include <linux/kernel.h> | ||
88 | #include <linux/kthread.h> | ||
89 | #include <linux/module.h> | ||
90 | #include <linux/mutex.h> | ||
91 | #include <linux/slab.h> | ||
92 | #include <linux/spinlock.h> | ||
93 | #include <linux/srcu.h> | ||
94 | #include <linux/writeback.h> /* for inode_lock */ | ||
95 | |||
96 | #include <asm/atomic.h> | ||
97 | |||
98 | #include <linux/fsnotify_backend.h> | ||
99 | #include "fsnotify.h" | ||
100 | |||
101 | struct srcu_struct fsnotify_mark_srcu; | ||
102 | static DEFINE_SPINLOCK(destroy_lock); | ||
103 | static LIST_HEAD(destroy_list); | ||
104 | static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq); | ||
105 | |||
106 | void fsnotify_get_mark(struct fsnotify_mark *mark) | ||
107 | { | ||
108 | atomic_inc(&mark->refcnt); | ||
109 | } | ||
110 | |||
111 | void fsnotify_put_mark(struct fsnotify_mark *mark) | ||
112 | { | ||
113 | if (atomic_dec_and_test(&mark->refcnt)) | ||
114 | mark->free_mark(mark); | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * Any time a mark is getting freed we end up here. | ||
119 | * The caller had better be holding a reference to this mark so we don't actually | ||
120 | * do the final put under the mark->lock | ||
121 | */ | ||
122 | void fsnotify_destroy_mark(struct fsnotify_mark *mark) | ||
123 | { | ||
124 | struct fsnotify_group *group; | ||
125 | struct inode *inode = NULL; | ||
126 | |||
127 | spin_lock(&mark->lock); | ||
128 | |||
129 | group = mark->group; | ||
130 | |||
131 | /* something else already called this function on this mark */ | ||
132 | if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { | ||
133 | spin_unlock(&mark->lock); | ||
134 | return; | ||
135 | } | ||
136 | |||
137 | mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; | ||
138 | |||
139 | /* 1 from caller and 1 for being on i_list/g_list */ | ||
140 | BUG_ON(atomic_read(&mark->refcnt) < 2); | ||
141 | |||
142 | spin_lock(&group->mark_lock); | ||
143 | |||
144 | if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { | ||
145 | inode = mark->i.inode; | ||
146 | fsnotify_destroy_inode_mark(mark); | ||
147 | } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) | ||
148 | fsnotify_destroy_vfsmount_mark(mark); | ||
149 | else | ||
150 | BUG(); | ||
151 | |||
152 | list_del_init(&mark->g_list); | ||
153 | |||
154 | spin_unlock(&group->mark_lock); | ||
155 | spin_unlock(&mark->lock); | ||
156 | |||
157 | spin_lock(&destroy_lock); | ||
158 | list_add(&mark->destroy_list, &destroy_list); | ||
159 | spin_unlock(&destroy_lock); | ||
160 | wake_up(&destroy_waitq); | ||
161 | |||
162 | /* | ||
163 | * Some groups like to know that marks are being freed. This is a | ||
164 | * callback to the group function to let it know that this mark | ||
165 | * is being freed. | ||
166 | */ | ||
167 | if (group->ops->freeing_mark) | ||
168 | group->ops->freeing_mark(mark, group); | ||
169 | |||
170 | /* | ||
171 | * __fsnotify_update_child_dentry_flags(inode); | ||
172 | * | ||
173 | * I really want to call that, but we can't, we have no idea if the inode | ||
174 | * still exists the second we drop the mark->lock. | ||
175 | * | ||
176 | * The next time an event arrive to this inode from one of it's children | ||
177 | * __fsnotify_parent will see that the inode doesn't care about it's | ||
178 | * children and will update all of these flags then. So really this | ||
179 | * is just a lazy update (and could be a perf win...) | ||
180 | */ | ||
181 | |||
182 | if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) | ||
183 | iput(inode); | ||
184 | |||
185 | /* | ||
186 | * it's possible that this group tried to destroy itself, but this | ||
187 | * this mark was simultaneously being freed by inode. If that's the | ||
188 | * case, we finish freeing the group here. | ||
189 | */ | ||
190 | if (unlikely(atomic_dec_and_test(&group->num_marks))) | ||
191 | fsnotify_final_destroy_group(group); | ||
192 | } | ||
193 | |||
194 | void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask) | ||
195 | { | ||
196 | assert_spin_locked(&mark->lock); | ||
197 | |||
198 | mark->mask = mask; | ||
199 | |||
200 | if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) | ||
201 | fsnotify_set_inode_mark_mask_locked(mark, mask); | ||
202 | } | ||
203 | |||
204 | void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask) | ||
205 | { | ||
206 | assert_spin_locked(&mark->lock); | ||
207 | |||
208 | mark->ignored_mask = mask; | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * Attach an initialized mark to a given group and fs object. | ||
213 | * These marks may be used for the fsnotify backend to determine which | ||
214 | * event types should be delivered to which group. | ||
215 | */ | ||
216 | int fsnotify_add_mark(struct fsnotify_mark *mark, | ||
217 | struct fsnotify_group *group, struct inode *inode, | ||
218 | struct vfsmount *mnt, int allow_dups) | ||
219 | { | ||
220 | int ret = 0; | ||
221 | |||
222 | BUG_ON(inode && mnt); | ||
223 | BUG_ON(!inode && !mnt); | ||
224 | |||
225 | /* | ||
226 | * LOCKING ORDER!!!! | ||
227 | * mark->lock | ||
228 | * group->mark_lock | ||
229 | * inode->i_lock | ||
230 | */ | ||
231 | spin_lock(&mark->lock); | ||
232 | spin_lock(&group->mark_lock); | ||
233 | |||
234 | mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE; | ||
235 | |||
236 | mark->group = group; | ||
237 | list_add(&mark->g_list, &group->marks_list); | ||
238 | atomic_inc(&group->num_marks); | ||
239 | fsnotify_get_mark(mark); /* for i_list and g_list */ | ||
240 | |||
241 | if (inode) { | ||
242 | ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups); | ||
243 | if (ret) | ||
244 | goto err; | ||
245 | } else if (mnt) { | ||
246 | ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups); | ||
247 | if (ret) | ||
248 | goto err; | ||
249 | } else { | ||
250 | BUG(); | ||
251 | } | ||
252 | |||
253 | spin_unlock(&group->mark_lock); | ||
254 | |||
255 | /* this will pin the object if appropriate */ | ||
256 | fsnotify_set_mark_mask_locked(mark, mark->mask); | ||
257 | |||
258 | spin_unlock(&mark->lock); | ||
259 | |||
260 | if (inode) | ||
261 | __fsnotify_update_child_dentry_flags(inode); | ||
262 | |||
263 | return ret; | ||
264 | err: | ||
265 | mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; | ||
266 | list_del_init(&mark->g_list); | ||
267 | mark->group = NULL; | ||
268 | atomic_dec(&group->num_marks); | ||
269 | |||
270 | spin_unlock(&group->mark_lock); | ||
271 | spin_unlock(&mark->lock); | ||
272 | |||
273 | spin_lock(&destroy_lock); | ||
274 | list_add(&mark->destroy_list, &destroy_list); | ||
275 | spin_unlock(&destroy_lock); | ||
276 | wake_up(&destroy_waitq); | ||
277 | |||
278 | return ret; | ||
279 | } | ||
280 | |||
281 | /* | ||
282 | * clear any marks in a group in which mark->flags & flags is true | ||
283 | */ | ||
284 | void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, | ||
285 | unsigned int flags) | ||
286 | { | ||
287 | struct fsnotify_mark *lmark, *mark; | ||
288 | LIST_HEAD(free_list); | ||
289 | |||
290 | spin_lock(&group->mark_lock); | ||
291 | list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { | ||
292 | if (mark->flags & flags) { | ||
293 | list_add(&mark->free_g_list, &free_list); | ||
294 | list_del_init(&mark->g_list); | ||
295 | fsnotify_get_mark(mark); | ||
296 | } | ||
297 | } | ||
298 | spin_unlock(&group->mark_lock); | ||
299 | |||
300 | list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) { | ||
301 | fsnotify_destroy_mark(mark); | ||
302 | fsnotify_put_mark(mark); | ||
303 | } | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * Given a group, destroy all of the marks associated with that group. | ||
308 | */ | ||
309 | void fsnotify_clear_marks_by_group(struct fsnotify_group *group) | ||
310 | { | ||
311 | fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1); | ||
312 | } | ||
313 | |||
314 | void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old) | ||
315 | { | ||
316 | assert_spin_locked(&old->lock); | ||
317 | new->i.inode = old->i.inode; | ||
318 | new->m.mnt = old->m.mnt; | ||
319 | new->group = old->group; | ||
320 | new->mask = old->mask; | ||
321 | new->free_mark = old->free_mark; | ||
322 | } | ||
323 | |||
324 | /* | ||
325 | * Nothing fancy, just initialize lists and locks and counters. | ||
326 | */ | ||
327 | void fsnotify_init_mark(struct fsnotify_mark *mark, | ||
328 | void (*free_mark)(struct fsnotify_mark *mark)) | ||
329 | { | ||
330 | memset(mark, 0, sizeof(*mark)); | ||
331 | spin_lock_init(&mark->lock); | ||
332 | atomic_set(&mark->refcnt, 1); | ||
333 | mark->free_mark = free_mark; | ||
334 | } | ||
335 | |||
336 | static int fsnotify_mark_destroy(void *ignored) | ||
337 | { | ||
338 | struct fsnotify_mark *mark, *next; | ||
339 | LIST_HEAD(private_destroy_list); | ||
340 | |||
341 | for (;;) { | ||
342 | spin_lock(&destroy_lock); | ||
343 | /* exchange the list head */ | ||
344 | list_replace_init(&destroy_list, &private_destroy_list); | ||
345 | spin_unlock(&destroy_lock); | ||
346 | |||
347 | synchronize_srcu(&fsnotify_mark_srcu); | ||
348 | |||
349 | list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) { | ||
350 | list_del_init(&mark->destroy_list); | ||
351 | fsnotify_put_mark(mark); | ||
352 | } | ||
353 | |||
354 | wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list)); | ||
355 | } | ||
356 | |||
357 | return 0; | ||
358 | } | ||
359 | |||
360 | static int __init fsnotify_mark_init(void) | ||
361 | { | ||
362 | struct task_struct *thread; | ||
363 | |||
364 | thread = kthread_run(fsnotify_mark_destroy, NULL, | ||
365 | "fsnotify_mark"); | ||
366 | if (IS_ERR(thread)) | ||
367 | panic("unable to start fsnotify mark destruction thread."); | ||
368 | |||
369 | return 0; | ||
370 | } | ||
371 | device_initcall(fsnotify_mark_init); | ||
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index b8bf53b4c108..d6c435adc7a2 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
@@ -31,6 +31,7 @@ | |||
31 | * allocated and used. | 31 | * allocated and used. |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #include <linux/file.h> | ||
34 | #include <linux/fs.h> | 35 | #include <linux/fs.h> |
35 | #include <linux/init.h> | 36 | #include <linux/init.h> |
36 | #include <linux/kernel.h> | 37 | #include <linux/kernel.h> |
@@ -56,7 +57,7 @@ static struct kmem_cache *fsnotify_event_holder_cachep; | |||
56 | * it is needed. It's refcnt is set 1 at kernel init time and will never | 57 | * it is needed. It's refcnt is set 1 at kernel init time and will never |
57 | * get set to 0 so it will never get 'freed' | 58 | * get set to 0 so it will never get 'freed' |
58 | */ | 59 | */ |
59 | static struct fsnotify_event q_overflow_event; | 60 | static struct fsnotify_event *q_overflow_event; |
60 | static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0); | 61 | static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0); |
61 | 62 | ||
62 | /** | 63 | /** |
@@ -87,12 +88,15 @@ void fsnotify_put_event(struct fsnotify_event *event) | |||
87 | return; | 88 | return; |
88 | 89 | ||
89 | if (atomic_dec_and_test(&event->refcnt)) { | 90 | if (atomic_dec_and_test(&event->refcnt)) { |
90 | if (event->data_type == FSNOTIFY_EVENT_PATH) | 91 | pr_debug("%s: event=%p\n", __func__, event); |
91 | path_put(&event->path); | 92 | |
93 | if (event->data_type == FSNOTIFY_EVENT_FILE) | ||
94 | fput(event->file); | ||
92 | 95 | ||
93 | BUG_ON(!list_empty(&event->private_data_list)); | 96 | BUG_ON(!list_empty(&event->private_data_list)); |
94 | 97 | ||
95 | kfree(event->file_name); | 98 | kfree(event->file_name); |
99 | put_pid(event->tgid); | ||
96 | kmem_cache_free(fsnotify_event_cachep, event); | 100 | kmem_cache_free(fsnotify_event_cachep, event); |
97 | } | 101 | } |
98 | } | 102 | } |
@@ -104,7 +108,8 @@ struct fsnotify_event_holder *fsnotify_alloc_event_holder(void) | |||
104 | 108 | ||
105 | void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder) | 109 | void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder) |
106 | { | 110 | { |
107 | kmem_cache_free(fsnotify_event_holder_cachep, holder); | 111 | if (holder) |
112 | kmem_cache_free(fsnotify_event_holder_cachep, holder); | ||
108 | } | 113 | } |
109 | 114 | ||
110 | /* | 115 | /* |
@@ -129,53 +134,20 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot | |||
129 | } | 134 | } |
130 | 135 | ||
131 | /* | 136 | /* |
132 | * Check if 2 events contain the same information. We do not compare private data | ||
133 | * but at this moment that isn't a problem for any know fsnotify listeners. | ||
134 | */ | ||
135 | static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new) | ||
136 | { | ||
137 | if ((old->mask == new->mask) && | ||
138 | (old->to_tell == new->to_tell) && | ||
139 | (old->data_type == new->data_type) && | ||
140 | (old->name_len == new->name_len)) { | ||
141 | switch (old->data_type) { | ||
142 | case (FSNOTIFY_EVENT_INODE): | ||
143 | /* remember, after old was put on the wait_q we aren't | ||
144 | * allowed to look at the inode any more, only thing | ||
145 | * left to check was if the file_name is the same */ | ||
146 | if (!old->name_len || | ||
147 | !strcmp(old->file_name, new->file_name)) | ||
148 | return true; | ||
149 | break; | ||
150 | case (FSNOTIFY_EVENT_PATH): | ||
151 | if ((old->path.mnt == new->path.mnt) && | ||
152 | (old->path.dentry == new->path.dentry)) | ||
153 | return true; | ||
154 | break; | ||
155 | case (FSNOTIFY_EVENT_NONE): | ||
156 | if (old->mask & FS_Q_OVERFLOW) | ||
157 | return true; | ||
158 | else if (old->mask & FS_IN_IGNORED) | ||
159 | return false; | ||
160 | return false; | ||
161 | }; | ||
162 | } | ||
163 | return false; | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * Add an event to the group notification queue. The group can later pull this | 137 | * Add an event to the group notification queue. The group can later pull this |
168 | * event off the queue to deal with. If the event is successfully added to the | 138 | * event off the queue to deal with. If the event is successfully added to the |
169 | * group's notification queue, a reference is taken on event. | 139 | * group's notification queue, a reference is taken on event. |
170 | */ | 140 | */ |
171 | int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, | 141 | struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, |
172 | struct fsnotify_event_private_data *priv) | 142 | struct fsnotify_event_private_data *priv, |
143 | struct fsnotify_event *(*merge)(struct list_head *, | ||
144 | struct fsnotify_event *)) | ||
173 | { | 145 | { |
146 | struct fsnotify_event *return_event = NULL; | ||
174 | struct fsnotify_event_holder *holder = NULL; | 147 | struct fsnotify_event_holder *holder = NULL; |
175 | struct list_head *list = &group->notification_list; | 148 | struct list_head *list = &group->notification_list; |
176 | struct fsnotify_event_holder *last_holder; | 149 | |
177 | struct fsnotify_event *last_event; | 150 | pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv); |
178 | int ret = 0; | ||
179 | 151 | ||
180 | /* | 152 | /* |
181 | * There is one fsnotify_event_holder embedded inside each fsnotify_event. | 153 | * There is one fsnotify_event_holder embedded inside each fsnotify_event. |
@@ -189,18 +161,40 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even | |||
189 | alloc_holder: | 161 | alloc_holder: |
190 | holder = fsnotify_alloc_event_holder(); | 162 | holder = fsnotify_alloc_event_holder(); |
191 | if (!holder) | 163 | if (!holder) |
192 | return -ENOMEM; | 164 | return ERR_PTR(-ENOMEM); |
193 | } | 165 | } |
194 | 166 | ||
195 | mutex_lock(&group->notification_mutex); | 167 | mutex_lock(&group->notification_mutex); |
196 | 168 | ||
197 | if (group->q_len >= group->max_events) { | 169 | if (group->q_len >= group->max_events) { |
198 | event = &q_overflow_event; | 170 | event = q_overflow_event; |
199 | ret = -EOVERFLOW; | 171 | |
172 | /* | ||
173 | * we need to return the overflow event | ||
174 | * which means we need a ref | ||
175 | */ | ||
176 | fsnotify_get_event(event); | ||
177 | return_event = event; | ||
178 | |||
200 | /* sorry, no private data on the overflow event */ | 179 | /* sorry, no private data on the overflow event */ |
201 | priv = NULL; | 180 | priv = NULL; |
202 | } | 181 | } |
203 | 182 | ||
183 | if (!list_empty(list) && merge) { | ||
184 | struct fsnotify_event *tmp; | ||
185 | |||
186 | tmp = merge(list, event); | ||
187 | if (tmp) { | ||
188 | mutex_unlock(&group->notification_mutex); | ||
189 | |||
190 | if (return_event) | ||
191 | fsnotify_put_event(return_event); | ||
192 | if (holder != &event->holder) | ||
193 | fsnotify_destroy_event_holder(holder); | ||
194 | return tmp; | ||
195 | } | ||
196 | } | ||
197 | |||
204 | spin_lock(&event->lock); | 198 | spin_lock(&event->lock); |
205 | 199 | ||
206 | if (list_empty(&event->holder.event_list)) { | 200 | if (list_empty(&event->holder.event_list)) { |
@@ -212,19 +206,13 @@ alloc_holder: | |||
212 | * event holder was used, go back and get a new one */ | 206 | * event holder was used, go back and get a new one */ |
213 | spin_unlock(&event->lock); | 207 | spin_unlock(&event->lock); |
214 | mutex_unlock(&group->notification_mutex); | 208 | mutex_unlock(&group->notification_mutex); |
215 | goto alloc_holder; | ||
216 | } | ||
217 | 209 | ||
218 | if (!list_empty(list)) { | 210 | if (return_event) { |
219 | last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list); | 211 | fsnotify_put_event(return_event); |
220 | last_event = last_holder->event; | 212 | return_event = NULL; |
221 | if (event_compare(last_event, event)) { | ||
222 | spin_unlock(&event->lock); | ||
223 | mutex_unlock(&group->notification_mutex); | ||
224 | if (holder != &event->holder) | ||
225 | fsnotify_destroy_event_holder(holder); | ||
226 | return -EEXIST; | ||
227 | } | 213 | } |
214 | |||
215 | goto alloc_holder; | ||
228 | } | 216 | } |
229 | 217 | ||
230 | group->q_len++; | 218 | group->q_len++; |
@@ -238,7 +226,7 @@ alloc_holder: | |||
238 | mutex_unlock(&group->notification_mutex); | 226 | mutex_unlock(&group->notification_mutex); |
239 | 227 | ||
240 | wake_up(&group->notification_waitq); | 228 | wake_up(&group->notification_waitq); |
241 | return ret; | 229 | return return_event; |
242 | } | 230 | } |
243 | 231 | ||
244 | /* | 232 | /* |
@@ -253,6 +241,8 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group | |||
253 | 241 | ||
254 | BUG_ON(!mutex_is_locked(&group->notification_mutex)); | 242 | BUG_ON(!mutex_is_locked(&group->notification_mutex)); |
255 | 243 | ||
244 | pr_debug("%s: group=%p\n", __func__, group); | ||
245 | |||
256 | holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); | 246 | holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); |
257 | 247 | ||
258 | event = holder->event; | 248 | event = holder->event; |
@@ -314,25 +304,82 @@ void fsnotify_flush_notify(struct fsnotify_group *group) | |||
314 | 304 | ||
315 | static void initialize_event(struct fsnotify_event *event) | 305 | static void initialize_event(struct fsnotify_event *event) |
316 | { | 306 | { |
317 | event->holder.event = NULL; | ||
318 | INIT_LIST_HEAD(&event->holder.event_list); | 307 | INIT_LIST_HEAD(&event->holder.event_list); |
319 | atomic_set(&event->refcnt, 1); | 308 | atomic_set(&event->refcnt, 1); |
320 | 309 | ||
321 | spin_lock_init(&event->lock); | 310 | spin_lock_init(&event->lock); |
322 | 311 | ||
323 | event->path.dentry = NULL; | ||
324 | event->path.mnt = NULL; | ||
325 | event->inode = NULL; | ||
326 | event->data_type = FSNOTIFY_EVENT_NONE; | ||
327 | |||
328 | INIT_LIST_HEAD(&event->private_data_list); | 312 | INIT_LIST_HEAD(&event->private_data_list); |
313 | } | ||
329 | 314 | ||
330 | event->to_tell = NULL; | 315 | /* |
316 | * Caller damn well better be holding whatever mutex is protecting the | ||
317 | * old_holder->event_list and the new_event must be a clean event which | ||
318 | * cannot be found anywhere else in the kernel. | ||
319 | */ | ||
320 | int fsnotify_replace_event(struct fsnotify_event_holder *old_holder, | ||
321 | struct fsnotify_event *new_event) | ||
322 | { | ||
323 | struct fsnotify_event *old_event = old_holder->event; | ||
324 | struct fsnotify_event_holder *new_holder = &new_event->holder; | ||
331 | 325 | ||
332 | event->file_name = NULL; | 326 | enum event_spinlock_class { |
333 | event->name_len = 0; | 327 | SPINLOCK_OLD, |
328 | SPINLOCK_NEW, | ||
329 | }; | ||
334 | 330 | ||
335 | event->sync_cookie = 0; | 331 | pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event); |
332 | |||
333 | /* | ||
334 | * if the new_event's embedded holder is in use someone | ||
335 | * screwed up and didn't give us a clean new event. | ||
336 | */ | ||
337 | BUG_ON(!list_empty(&new_holder->event_list)); | ||
338 | |||
339 | spin_lock_nested(&old_event->lock, SPINLOCK_OLD); | ||
340 | spin_lock_nested(&new_event->lock, SPINLOCK_NEW); | ||
341 | |||
342 | new_holder->event = new_event; | ||
343 | list_replace_init(&old_holder->event_list, &new_holder->event_list); | ||
344 | |||
345 | spin_unlock(&new_event->lock); | ||
346 | spin_unlock(&old_event->lock); | ||
347 | |||
348 | /* event == holder means we are referenced through the in event holder */ | ||
349 | if (old_holder != &old_event->holder) | ||
350 | fsnotify_destroy_event_holder(old_holder); | ||
351 | |||
352 | fsnotify_get_event(new_event); /* on the list take reference */ | ||
353 | fsnotify_put_event(old_event); /* off the list, drop reference */ | ||
354 | |||
355 | return 0; | ||
356 | } | ||
357 | |||
358 | struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event) | ||
359 | { | ||
360 | struct fsnotify_event *event; | ||
361 | |||
362 | event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL); | ||
363 | if (!event) | ||
364 | return NULL; | ||
365 | |||
366 | pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event); | ||
367 | |||
368 | memcpy(event, old_event, sizeof(*event)); | ||
369 | initialize_event(event); | ||
370 | |||
371 | if (event->name_len) { | ||
372 | event->file_name = kstrdup(old_event->file_name, GFP_KERNEL); | ||
373 | if (!event->file_name) { | ||
374 | kmem_cache_free(fsnotify_event_cachep, event); | ||
375 | return NULL; | ||
376 | } | ||
377 | } | ||
378 | event->tgid = get_pid(old_event->tgid); | ||
379 | if (event->data_type == FSNOTIFY_EVENT_FILE) | ||
380 | get_file(event->file); | ||
381 | |||
382 | return event; | ||
336 | } | 383 | } |
337 | 384 | ||
338 | /* | 385 | /* |
@@ -348,15 +395,18 @@ static void initialize_event(struct fsnotify_event *event) | |||
348 | * @name the filename, if available | 395 | * @name the filename, if available |
349 | */ | 396 | */ |
350 | struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, | 397 | struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, |
351 | int data_type, const char *name, u32 cookie, | 398 | int data_type, const unsigned char *name, |
352 | gfp_t gfp) | 399 | u32 cookie, gfp_t gfp) |
353 | { | 400 | { |
354 | struct fsnotify_event *event; | 401 | struct fsnotify_event *event; |
355 | 402 | ||
356 | event = kmem_cache_alloc(fsnotify_event_cachep, gfp); | 403 | event = kmem_cache_zalloc(fsnotify_event_cachep, gfp); |
357 | if (!event) | 404 | if (!event) |
358 | return NULL; | 405 | return NULL; |
359 | 406 | ||
407 | pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n", | ||
408 | __func__, event, to_tell, mask, data, data_type); | ||
409 | |||
360 | initialize_event(event); | 410 | initialize_event(event); |
361 | 411 | ||
362 | if (name) { | 412 | if (name) { |
@@ -368,35 +418,36 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, | |||
368 | event->name_len = strlen(event->file_name); | 418 | event->name_len = strlen(event->file_name); |
369 | } | 419 | } |
370 | 420 | ||
421 | event->tgid = get_pid(task_tgid(current)); | ||
371 | event->sync_cookie = cookie; | 422 | event->sync_cookie = cookie; |
372 | event->to_tell = to_tell; | 423 | event->to_tell = to_tell; |
424 | event->data_type = data_type; | ||
373 | 425 | ||
374 | switch (data_type) { | 426 | switch (data_type) { |
375 | case FSNOTIFY_EVENT_FILE: { | 427 | case FSNOTIFY_EVENT_FILE: { |
376 | struct file *file = data; | 428 | event->file = data; |
377 | struct path *path = &file->f_path; | 429 | /* |
378 | event->path.dentry = path->dentry; | 430 | * if this file is about to disappear hold an extra reference |
379 | event->path.mnt = path->mnt; | 431 | * until we return to __fput so we don't have to worry about |
380 | path_get(&event->path); | 432 | * future get/put destroying the file under us or generating |
381 | event->data_type = FSNOTIFY_EVENT_PATH; | 433 | * additional events. Notice that we change f_mode without |
382 | break; | 434 | * holding f_lock. This is safe since this is the only possible |
383 | } | 435 | * reference to this object in the kernel (it was about to be |
384 | case FSNOTIFY_EVENT_PATH: { | 436 | * freed, remember?) |
385 | struct path *path = data; | 437 | */ |
386 | event->path.dentry = path->dentry; | 438 | if (!atomic_long_read(&event->file->f_count)) { |
387 | event->path.mnt = path->mnt; | 439 | event->file->f_mode |= FMODE_NONOTIFY; |
388 | path_get(&event->path); | 440 | get_file(event->file); |
389 | event->data_type = FSNOTIFY_EVENT_PATH; | 441 | } |
442 | get_file(event->file); | ||
390 | break; | 443 | break; |
391 | } | 444 | } |
392 | case FSNOTIFY_EVENT_INODE: | 445 | case FSNOTIFY_EVENT_INODE: |
393 | event->inode = data; | 446 | event->inode = data; |
394 | event->data_type = FSNOTIFY_EVENT_INODE; | ||
395 | break; | 447 | break; |
396 | case FSNOTIFY_EVENT_NONE: | 448 | case FSNOTIFY_EVENT_NONE: |
397 | event->inode = NULL; | 449 | event->inode = NULL; |
398 | event->path.dentry = NULL; | 450 | event->file = NULL; |
399 | event->path.mnt = NULL; | ||
400 | break; | 451 | break; |
401 | default: | 452 | default: |
402 | BUG(); | 453 | BUG(); |
@@ -412,8 +463,11 @@ __init int fsnotify_notification_init(void) | |||
412 | fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); | 463 | fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); |
413 | fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC); | 464 | fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC); |
414 | 465 | ||
415 | initialize_event(&q_overflow_event); | 466 | q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL, |
416 | q_overflow_event.mask = FS_Q_OVERFLOW; | 467 | FSNOTIFY_EVENT_NONE, NULL, 0, |
468 | GFP_KERNEL); | ||
469 | if (!q_overflow_event) | ||
470 | panic("unable to allocate fsnotify q_overflow_event\n"); | ||
417 | 471 | ||
418 | return 0; | 472 | return 0; |
419 | } | 473 | } |
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c new file mode 100644 index 000000000000..56772b578fbd --- /dev/null +++ b/fs/notify/vfsmount_mark.c | |||
@@ -0,0 +1,187 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2, or (at your option) | ||
7 | * any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; see the file COPYING. If not, write to | ||
16 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/fs.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/mount.h> | ||
24 | #include <linux/mutex.h> | ||
25 | #include <linux/spinlock.h> | ||
26 | #include <linux/writeback.h> /* for inode_lock */ | ||
27 | |||
28 | #include <asm/atomic.h> | ||
29 | |||
30 | #include <linux/fsnotify_backend.h> | ||
31 | #include "fsnotify.h" | ||
32 | |||
33 | void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) | ||
34 | { | ||
35 | struct fsnotify_mark *mark, *lmark; | ||
36 | struct hlist_node *pos, *n; | ||
37 | LIST_HEAD(free_list); | ||
38 | |||
39 | spin_lock(&mnt->mnt_root->d_lock); | ||
40 | hlist_for_each_entry_safe(mark, pos, n, &mnt->mnt_fsnotify_marks, m.m_list) { | ||
41 | list_add(&mark->m.free_m_list, &free_list); | ||
42 | hlist_del_init_rcu(&mark->m.m_list); | ||
43 | fsnotify_get_mark(mark); | ||
44 | } | ||
45 | spin_unlock(&mnt->mnt_root->d_lock); | ||
46 | |||
47 | list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) { | ||
48 | fsnotify_destroy_mark(mark); | ||
49 | fsnotify_put_mark(mark); | ||
50 | } | ||
51 | } | ||
52 | |||
53 | void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) | ||
54 | { | ||
55 | fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT); | ||
56 | } | ||
57 | |||
58 | /* | ||
59 | * Recalculate the mask of events relevant to a given vfsmount locked. | ||
60 | */ | ||
61 | static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt) | ||
62 | { | ||
63 | struct fsnotify_mark *mark; | ||
64 | struct hlist_node *pos; | ||
65 | __u32 new_mask = 0; | ||
66 | |||
67 | assert_spin_locked(&mnt->mnt_root->d_lock); | ||
68 | |||
69 | hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list) | ||
70 | new_mask |= mark->mask; | ||
71 | mnt->mnt_fsnotify_mask = new_mask; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types | ||
76 | * any notifier is interested in hearing for this mount point | ||
77 | */ | ||
78 | void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt) | ||
79 | { | ||
80 | spin_lock(&mnt->mnt_root->d_lock); | ||
81 | fsnotify_recalc_vfsmount_mask_locked(mnt); | ||
82 | spin_unlock(&mnt->mnt_root->d_lock); | ||
83 | } | ||
84 | |||
85 | void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark) | ||
86 | { | ||
87 | struct vfsmount *mnt = mark->m.mnt; | ||
88 | |||
89 | assert_spin_locked(&mark->lock); | ||
90 | assert_spin_locked(&mark->group->mark_lock); | ||
91 | |||
92 | spin_lock(&mnt->mnt_root->d_lock); | ||
93 | |||
94 | hlist_del_init_rcu(&mark->m.m_list); | ||
95 | mark->m.mnt = NULL; | ||
96 | |||
97 | fsnotify_recalc_vfsmount_mask_locked(mnt); | ||
98 | |||
99 | spin_unlock(&mnt->mnt_root->d_lock); | ||
100 | } | ||
101 | |||
102 | static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group, | ||
103 | struct vfsmount *mnt) | ||
104 | { | ||
105 | struct fsnotify_mark *mark; | ||
106 | struct hlist_node *pos; | ||
107 | |||
108 | assert_spin_locked(&mnt->mnt_root->d_lock); | ||
109 | |||
110 | hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list) { | ||
111 | if (mark->group == group) { | ||
112 | fsnotify_get_mark(mark); | ||
113 | return mark; | ||
114 | } | ||
115 | } | ||
116 | return NULL; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * given a group and vfsmount, find the mark associated with that combination. | ||
121 | * if found take a reference to that mark and return it, else return NULL | ||
122 | */ | ||
123 | struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, | ||
124 | struct vfsmount *mnt) | ||
125 | { | ||
126 | struct fsnotify_mark *mark; | ||
127 | |||
128 | spin_lock(&mnt->mnt_root->d_lock); | ||
129 | mark = fsnotify_find_vfsmount_mark_locked(group, mnt); | ||
130 | spin_unlock(&mnt->mnt_root->d_lock); | ||
131 | |||
132 | return mark; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Attach an initialized mark to a given group and vfsmount. | ||
137 | * These marks may be used for the fsnotify backend to determine which | ||
138 | * event types should be delivered to which groups. | ||
139 | */ | ||
140 | int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, | ||
141 | struct fsnotify_group *group, struct vfsmount *mnt, | ||
142 | int allow_dups) | ||
143 | { | ||
144 | struct fsnotify_mark *lmark; | ||
145 | struct hlist_node *node, *last = NULL; | ||
146 | int ret = 0; | ||
147 | |||
148 | mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; | ||
149 | |||
150 | assert_spin_locked(&mark->lock); | ||
151 | assert_spin_locked(&group->mark_lock); | ||
152 | |||
153 | spin_lock(&mnt->mnt_root->d_lock); | ||
154 | |||
155 | mark->m.mnt = mnt; | ||
156 | |||
157 | /* is mark the first mark? */ | ||
158 | if (hlist_empty(&mnt->mnt_fsnotify_marks)) { | ||
159 | hlist_add_head_rcu(&mark->m.m_list, &mnt->mnt_fsnotify_marks); | ||
160 | goto out; | ||
161 | } | ||
162 | |||
163 | /* should mark be in the middle of the current list? */ | ||
164 | hlist_for_each_entry(lmark, node, &mnt->mnt_fsnotify_marks, m.m_list) { | ||
165 | last = node; | ||
166 | |||
167 | if ((lmark->group == group) && !allow_dups) { | ||
168 | ret = -EEXIST; | ||
169 | goto out; | ||
170 | } | ||
171 | |||
172 | if (mark->group < lmark->group) | ||
173 | continue; | ||
174 | |||
175 | hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); | ||
176 | goto out; | ||
177 | } | ||
178 | |||
179 | BUG_ON(last == NULL); | ||
180 | /* mark should be the last entry. last is the current last entry */ | ||
181 | hlist_add_after_rcu(last, &mark->m.m_list); | ||
182 | out: | ||
183 | fsnotify_recalc_vfsmount_mask_locked(mnt); | ||
184 | spin_unlock(&mnt->mnt_root->d_lock); | ||
185 | |||
186 | return ret; | ||
187 | } | ||
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 4b57fb1eac2a..93622b175fc7 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c | |||
@@ -2238,7 +2238,7 @@ void ntfs_clear_extent_inode(ntfs_inode *ni) | |||
2238 | } | 2238 | } |
2239 | 2239 | ||
2240 | /** | 2240 | /** |
2241 | * ntfs_clear_big_inode - clean up the ntfs specific part of an inode | 2241 | * ntfs_evict_big_inode - clean up the ntfs specific part of an inode |
2242 | * @vi: vfs inode pending annihilation | 2242 | * @vi: vfs inode pending annihilation |
2243 | * | 2243 | * |
2244 | * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode() | 2244 | * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode() |
@@ -2247,10 +2247,13 @@ void ntfs_clear_extent_inode(ntfs_inode *ni) | |||
2247 | * | 2247 | * |
2248 | * If the MFT record is dirty, we commit it before doing anything else. | 2248 | * If the MFT record is dirty, we commit it before doing anything else. |
2249 | */ | 2249 | */ |
2250 | void ntfs_clear_big_inode(struct inode *vi) | 2250 | void ntfs_evict_big_inode(struct inode *vi) |
2251 | { | 2251 | { |
2252 | ntfs_inode *ni = NTFS_I(vi); | 2252 | ntfs_inode *ni = NTFS_I(vi); |
2253 | 2253 | ||
2254 | truncate_inode_pages(&vi->i_data, 0); | ||
2255 | end_writeback(vi); | ||
2256 | |||
2254 | #ifdef NTFS_RW | 2257 | #ifdef NTFS_RW |
2255 | if (NInoDirty(ni)) { | 2258 | if (NInoDirty(ni)) { |
2256 | bool was_bad = (is_bad_inode(vi)); | 2259 | bool was_bad = (is_bad_inode(vi)); |
@@ -2879,9 +2882,6 @@ void ntfs_truncate_vfs(struct inode *vi) { | |||
2879 | * | 2882 | * |
2880 | * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also | 2883 | * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also |
2881 | * called with ->i_alloc_sem held for writing. | 2884 | * called with ->i_alloc_sem held for writing. |
2882 | * | ||
2883 | * Basically this is a copy of generic notify_change() and inode_setattr() | ||
2884 | * functionality, except we intercept and abort changes in i_size. | ||
2885 | */ | 2885 | */ |
2886 | int ntfs_setattr(struct dentry *dentry, struct iattr *attr) | 2886 | int ntfs_setattr(struct dentry *dentry, struct iattr *attr) |
2887 | { | 2887 | { |
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 9a113544605d..2dabf813456c 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h | |||
@@ -279,7 +279,7 @@ extern struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name, | |||
279 | 279 | ||
280 | extern struct inode *ntfs_alloc_big_inode(struct super_block *sb); | 280 | extern struct inode *ntfs_alloc_big_inode(struct super_block *sb); |
281 | extern void ntfs_destroy_big_inode(struct inode *inode); | 281 | extern void ntfs_destroy_big_inode(struct inode *inode); |
282 | extern void ntfs_clear_big_inode(struct inode *vi); | 282 | extern void ntfs_evict_big_inode(struct inode *vi); |
283 | 283 | ||
284 | extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni); | 284 | extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni); |
285 | 285 | ||
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 0de1db6cddbf..512806171bfa 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
@@ -2700,7 +2700,7 @@ static const struct super_operations ntfs_sops = { | |||
2700 | .put_super = ntfs_put_super, /* Syscall: umount. */ | 2700 | .put_super = ntfs_put_super, /* Syscall: umount. */ |
2701 | .statfs = ntfs_statfs, /* Syscall: statfs */ | 2701 | .statfs = ntfs_statfs, /* Syscall: statfs */ |
2702 | .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */ | 2702 | .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */ |
2703 | .clear_inode = ntfs_clear_big_inode, /* VFS: Called when an inode is | 2703 | .evict_inode = ntfs_evict_big_inode, /* VFS: Called when an inode is |
2704 | removed from memory. */ | 2704 | removed from memory. */ |
2705 | //.umount_begin = NULL, /* Forced umount. */ | 2705 | //.umount_begin = NULL, /* Forced umount. */ |
2706 | .show_options = ntfs_show_options, /* Show mount options in | 2706 | .show_options = ntfs_show_options, /* Show mount options in |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 96337a4fbbdf..0de69c9a08be 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -643,11 +643,10 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
643 | if (i_size_read(inode) <= offset) | 643 | if (i_size_read(inode) <= offset) |
644 | return 0; | 644 | return 0; |
645 | 645 | ||
646 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, | 646 | ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, |
647 | inode->i_sb->s_bdev, iov, offset, | 647 | iov, offset, nr_segs, |
648 | nr_segs, | 648 | ocfs2_direct_IO_get_blocks, |
649 | ocfs2_direct_IO_get_blocks, | 649 | ocfs2_dio_end_io, NULL, 0); |
650 | ocfs2_dio_end_io); | ||
651 | 650 | ||
652 | mlog_exit(ret); | 651 | mlog_exit(ret); |
653 | return ret; | 652 | return ret; |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 0cd24cf54396..5efdd37dfe48 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -419,7 +419,7 @@ static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence) | |||
419 | 419 | ||
420 | static int debug_buffer_release(struct inode *inode, struct file *file) | 420 | static int debug_buffer_release(struct inode *inode, struct file *file) |
421 | { | 421 | { |
422 | struct debug_buffer *db = (struct debug_buffer *)file->private_data; | 422 | struct debug_buffer *db = file->private_data; |
423 | 423 | ||
424 | if (db) | 424 | if (db) |
425 | kfree(db->buf); | 425 | kfree(db->buf); |
@@ -715,7 +715,7 @@ static int debug_lockres_open(struct inode *inode, struct file *file) | |||
715 | goto bail; | 715 | goto bail; |
716 | } | 716 | } |
717 | 717 | ||
718 | seq = (struct seq_file *) file->private_data; | 718 | seq = file->private_data; |
719 | seq->private = dl; | 719 | seq->private = dl; |
720 | 720 | ||
721 | dlm_grab(dlm); | 721 | dlm_grab(dlm); |
@@ -731,7 +731,7 @@ bail: | |||
731 | 731 | ||
732 | static int debug_lockres_release(struct inode *inode, struct file *file) | 732 | static int debug_lockres_release(struct inode *inode, struct file *file) |
733 | { | 733 | { |
734 | struct seq_file *seq = (struct seq_file *)file->private_data; | 734 | struct seq_file *seq = file->private_data; |
735 | struct debug_lockres *dl = (struct debug_lockres *)seq->private; | 735 | struct debug_lockres *dl = (struct debug_lockres *)seq->private; |
736 | 736 | ||
737 | if (dl->dl_res) | 737 | if (dl->dl_res) |
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index b83d6107a1f5..c2903b84bb7a 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -182,8 +182,7 @@ static int dlmfs_file_release(struct inode *inode, | |||
182 | { | 182 | { |
183 | int level, status; | 183 | int level, status; |
184 | struct dlmfs_inode_private *ip = DLMFS_I(inode); | 184 | struct dlmfs_inode_private *ip = DLMFS_I(inode); |
185 | struct dlmfs_filp_private *fp = | 185 | struct dlmfs_filp_private *fp = file->private_data; |
186 | (struct dlmfs_filp_private *) file->private_data; | ||
187 | 186 | ||
188 | if (S_ISDIR(inode->i_mode)) | 187 | if (S_ISDIR(inode->i_mode)) |
189 | BUG(); | 188 | BUG(); |
@@ -214,10 +213,12 @@ static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr) | |||
214 | 213 | ||
215 | attr->ia_valid &= ~ATTR_SIZE; | 214 | attr->ia_valid &= ~ATTR_SIZE; |
216 | error = inode_change_ok(inode, attr); | 215 | error = inode_change_ok(inode, attr); |
217 | if (!error) | 216 | if (error) |
218 | error = inode_setattr(inode, attr); | 217 | return error; |
219 | 218 | ||
220 | return error; | 219 | setattr_copy(inode, attr); |
220 | mark_inode_dirty(inode); | ||
221 | return 0; | ||
221 | } | 222 | } |
222 | 223 | ||
223 | static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) | 224 | static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) |
@@ -355,13 +356,12 @@ static void dlmfs_destroy_inode(struct inode *inode) | |||
355 | kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); | 356 | kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); |
356 | } | 357 | } |
357 | 358 | ||
358 | static void dlmfs_clear_inode(struct inode *inode) | 359 | static void dlmfs_evict_inode(struct inode *inode) |
359 | { | 360 | { |
360 | int status; | 361 | int status; |
361 | struct dlmfs_inode_private *ip; | 362 | struct dlmfs_inode_private *ip; |
362 | 363 | ||
363 | if (!inode) | 364 | end_writeback(inode); |
364 | return; | ||
365 | 365 | ||
366 | mlog(0, "inode %lu\n", inode->i_ino); | 366 | mlog(0, "inode %lu\n", inode->i_ino); |
367 | 367 | ||
@@ -631,7 +631,7 @@ static const struct super_operations dlmfs_ops = { | |||
631 | .statfs = simple_statfs, | 631 | .statfs = simple_statfs, |
632 | .alloc_inode = dlmfs_alloc_inode, | 632 | .alloc_inode = dlmfs_alloc_inode, |
633 | .destroy_inode = dlmfs_destroy_inode, | 633 | .destroy_inode = dlmfs_destroy_inode, |
634 | .clear_inode = dlmfs_clear_inode, | 634 | .evict_inode = dlmfs_evict_inode, |
635 | .drop_inode = generic_delete_inode, | 635 | .drop_inode = generic_delete_inode, |
636 | }; | 636 | }; |
637 | 637 | ||
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 39eb16ac5f98..5e02a893f46e 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -2966,7 +2966,7 @@ static const struct seq_operations ocfs2_dlm_seq_ops = { | |||
2966 | 2966 | ||
2967 | static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) | 2967 | static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file) |
2968 | { | 2968 | { |
2969 | struct seq_file *seq = (struct seq_file *) file->private_data; | 2969 | struct seq_file *seq = file->private_data; |
2970 | struct ocfs2_dlm_seq_priv *priv = seq->private; | 2970 | struct ocfs2_dlm_seq_priv *priv = seq->private; |
2971 | struct ocfs2_lock_res *res = &priv->p_iter_res; | 2971 | struct ocfs2_lock_res *res = &priv->p_iter_res; |
2972 | 2972 | ||
@@ -3000,7 +3000,7 @@ static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file) | |||
3000 | goto out; | 3000 | goto out; |
3001 | } | 3001 | } |
3002 | 3002 | ||
3003 | seq = (struct seq_file *) file->private_data; | 3003 | seq = file->private_data; |
3004 | seq->private = priv; | 3004 | seq->private = priv; |
3005 | 3005 | ||
3006 | ocfs2_add_lockres_tracking(&priv->p_iter_res, | 3006 | ocfs2_add_lockres_tracking(&priv->p_iter_res, |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 2b10b36d1577..81296b4e3646 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1233,18 +1233,26 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1233 | } | 1233 | } |
1234 | 1234 | ||
1235 | /* | 1235 | /* |
1236 | * This will intentionally not wind up calling simple_setsize(), | 1236 | * This will intentionally not wind up calling truncate_setsize(), |
1237 | * since all the work for a size change has been done above. | 1237 | * since all the work for a size change has been done above. |
1238 | * Otherwise, we could get into problems with truncate as | 1238 | * Otherwise, we could get into problems with truncate as |
1239 | * ip_alloc_sem is used there to protect against i_size | 1239 | * ip_alloc_sem is used there to protect against i_size |
1240 | * changes. | 1240 | * changes. |
1241 | * | ||
1242 | * XXX: this means the conditional below can probably be removed. | ||
1241 | */ | 1243 | */ |
1242 | status = inode_setattr(inode, attr); | 1244 | if ((attr->ia_valid & ATTR_SIZE) && |
1243 | if (status < 0) { | 1245 | attr->ia_size != i_size_read(inode)) { |
1244 | mlog_errno(status); | 1246 | status = vmtruncate(inode, attr->ia_size); |
1245 | goto bail_commit; | 1247 | if (status) { |
1248 | mlog_errno(status); | ||
1249 | goto bail_commit; | ||
1250 | } | ||
1246 | } | 1251 | } |
1247 | 1252 | ||
1253 | setattr_copy(inode, attr); | ||
1254 | mark_inode_dirty(inode); | ||
1255 | |||
1248 | status = ocfs2_mark_inode_dirty(handle, inode, bh); | 1256 | status = ocfs2_mark_inode_dirty(handle, inode, bh); |
1249 | if (status < 0) | 1257 | if (status < 0) |
1250 | mlog_errno(status); | 1258 | mlog_errno(status); |
@@ -2300,12 +2308,12 @@ relock: | |||
2300 | * blocks outside i_size. Trim these off again. | 2308 | * blocks outside i_size. Trim these off again. |
2301 | * Don't need i_size_read because we hold i_mutex. | 2309 | * Don't need i_size_read because we hold i_mutex. |
2302 | * | 2310 | * |
2303 | * XXX(hch): this looks buggy because ocfs2 did not | 2311 | * XXX(truncate): this looks buggy because ocfs2 did not |
2304 | * actually implement ->truncate. Take a look at | 2312 | * actually implement ->truncate. Take a look at |
2305 | * the new truncate sequence and update this accordingly | 2313 | * the new truncate sequence and update this accordingly |
2306 | */ | 2314 | */ |
2307 | if (*ppos + count > inode->i_size) | 2315 | if (*ppos + count > inode->i_size) |
2308 | simple_setsize(inode, inode->i_size); | 2316 | truncate_setsize(inode, inode->i_size); |
2309 | ret = written; | 2317 | ret = written; |
2310 | goto out_dio; | 2318 | goto out_dio; |
2311 | } | 2319 | } |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index abb0a95cc717..0492464916b1 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -969,7 +969,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode, | |||
969 | truncate_inode_pages(&inode->i_data, 0); | 969 | truncate_inode_pages(&inode->i_data, 0); |
970 | } | 970 | } |
971 | 971 | ||
972 | void ocfs2_delete_inode(struct inode *inode) | 972 | static void ocfs2_delete_inode(struct inode *inode) |
973 | { | 973 | { |
974 | int wipe, status; | 974 | int wipe, status; |
975 | sigset_t oldset; | 975 | sigset_t oldset; |
@@ -1075,20 +1075,17 @@ bail_unlock_nfs_sync: | |||
1075 | bail_unblock: | 1075 | bail_unblock: |
1076 | ocfs2_unblock_signals(&oldset); | 1076 | ocfs2_unblock_signals(&oldset); |
1077 | bail: | 1077 | bail: |
1078 | clear_inode(inode); | ||
1079 | mlog_exit_void(); | 1078 | mlog_exit_void(); |
1080 | } | 1079 | } |
1081 | 1080 | ||
1082 | void ocfs2_clear_inode(struct inode *inode) | 1081 | static void ocfs2_clear_inode(struct inode *inode) |
1083 | { | 1082 | { |
1084 | int status; | 1083 | int status; |
1085 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1084 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1086 | 1085 | ||
1087 | mlog_entry_void(); | 1086 | mlog_entry_void(); |
1088 | 1087 | ||
1089 | if (!inode) | 1088 | end_writeback(inode); |
1090 | goto bail; | ||
1091 | |||
1092 | mlog(0, "Clearing inode: %llu, nlink = %u\n", | 1089 | mlog(0, "Clearing inode: %llu, nlink = %u\n", |
1093 | (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink); | 1090 | (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink); |
1094 | 1091 | ||
@@ -1180,16 +1177,27 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1180 | jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, | 1177 | jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, |
1181 | &oi->ip_jinode); | 1178 | &oi->ip_jinode); |
1182 | 1179 | ||
1183 | bail: | ||
1184 | mlog_exit_void(); | 1180 | mlog_exit_void(); |
1185 | } | 1181 | } |
1186 | 1182 | ||
1183 | void ocfs2_evict_inode(struct inode *inode) | ||
1184 | { | ||
1185 | if (!inode->i_nlink || | ||
1186 | (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) { | ||
1187 | ocfs2_delete_inode(inode); | ||
1188 | } else { | ||
1189 | truncate_inode_pages(&inode->i_data, 0); | ||
1190 | } | ||
1191 | ocfs2_clear_inode(inode); | ||
1192 | } | ||
1193 | |||
1187 | /* Called under inode_lock, with no more references on the | 1194 | /* Called under inode_lock, with no more references on the |
1188 | * struct inode, so it's safe here to check the flags field | 1195 | * struct inode, so it's safe here to check the flags field |
1189 | * and to manipulate i_nlink without any other locks. */ | 1196 | * and to manipulate i_nlink without any other locks. */ |
1190 | void ocfs2_drop_inode(struct inode *inode) | 1197 | int ocfs2_drop_inode(struct inode *inode) |
1191 | { | 1198 | { |
1192 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1199 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1200 | int res; | ||
1193 | 1201 | ||
1194 | mlog_entry_void(); | 1202 | mlog_entry_void(); |
1195 | 1203 | ||
@@ -1197,11 +1205,12 @@ void ocfs2_drop_inode(struct inode *inode) | |||
1197 | (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags); | 1205 | (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags); |
1198 | 1206 | ||
1199 | if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) | 1207 | if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) |
1200 | generic_delete_inode(inode); | 1208 | res = 1; |
1201 | else | 1209 | else |
1202 | generic_drop_inode(inode); | 1210 | res = generic_drop_inode(inode); |
1203 | 1211 | ||
1204 | mlog_exit_void(); | 1212 | mlog_exit_void(); |
1213 | return res; | ||
1205 | } | 1214 | } |
1206 | 1215 | ||
1207 | /* | 1216 | /* |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 9f5f5fcadc45..6de5a869db30 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -123,9 +123,8 @@ static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode) | |||
123 | return &OCFS2_I(inode)->ip_metadata_cache; | 123 | return &OCFS2_I(inode)->ip_metadata_cache; |
124 | } | 124 | } |
125 | 125 | ||
126 | void ocfs2_clear_inode(struct inode *inode); | 126 | void ocfs2_evict_inode(struct inode *inode); |
127 | void ocfs2_delete_inode(struct inode *inode); | 127 | int ocfs2_drop_inode(struct inode *inode); |
128 | void ocfs2_drop_inode(struct inode *inode); | ||
129 | 128 | ||
130 | /* Flags for ocfs2_iget() */ | 129 | /* Flags for ocfs2_iget() */ |
131 | #define OCFS2_FI_FLAG_SYSFILE 0x1 | 130 | #define OCFS2_FI_FLAG_SYSFILE 0x1 |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 625de9d7088c..9b57c0350ff9 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -760,13 +760,13 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb) | |||
760 | if (osb->osb_commit_interval) | 760 | if (osb->osb_commit_interval) |
761 | commit_interval = osb->osb_commit_interval; | 761 | commit_interval = osb->osb_commit_interval; |
762 | 762 | ||
763 | spin_lock(&journal->j_state_lock); | 763 | write_lock(&journal->j_state_lock); |
764 | journal->j_commit_interval = commit_interval; | 764 | journal->j_commit_interval = commit_interval; |
765 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | 765 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) |
766 | journal->j_flags |= JBD2_BARRIER; | 766 | journal->j_flags |= JBD2_BARRIER; |
767 | else | 767 | else |
768 | journal->j_flags &= ~JBD2_BARRIER; | 768 | journal->j_flags &= ~JBD2_BARRIER; |
769 | spin_unlock(&journal->j_state_lock); | 769 | write_unlock(&journal->j_state_lock); |
770 | } | 770 | } |
771 | 771 | ||
772 | int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | 772 | int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 0eaa929a4dbf..fa1be1b304d1 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -145,8 +145,7 @@ static const struct super_operations ocfs2_sops = { | |||
145 | .alloc_inode = ocfs2_alloc_inode, | 145 | .alloc_inode = ocfs2_alloc_inode, |
146 | .destroy_inode = ocfs2_destroy_inode, | 146 | .destroy_inode = ocfs2_destroy_inode, |
147 | .drop_inode = ocfs2_drop_inode, | 147 | .drop_inode = ocfs2_drop_inode, |
148 | .clear_inode = ocfs2_clear_inode, | 148 | .evict_inode = ocfs2_evict_inode, |
149 | .delete_inode = ocfs2_delete_inode, | ||
150 | .sync_fs = ocfs2_sync_fs, | 149 | .sync_fs = ocfs2_sync_fs, |
151 | .put_super = ocfs2_put_super, | 150 | .put_super = ocfs2_put_super, |
152 | .remount_fs = ocfs2_remount, | 151 | .remount_fs = ocfs2_remount, |
@@ -2472,7 +2471,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) | |||
2472 | kfree(osb->slot_recovery_generations); | 2471 | kfree(osb->slot_recovery_generations); |
2473 | /* FIXME | 2472 | /* FIXME |
2474 | * This belongs in journal shutdown, but because we have to | 2473 | * This belongs in journal shutdown, but because we have to |
2475 | * allocate osb->journal at the start of ocfs2_initalize_osb(), | 2474 | * allocate osb->journal at the start of ocfs2_initialize_osb(), |
2476 | * we free it here. | 2475 | * we free it here. |
2477 | */ | 2476 | */ |
2478 | kfree(osb->journal); | 2477 | kfree(osb->journal); |
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index b42d62419034..393f3f659da7 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c | |||
@@ -25,11 +25,10 @@ static struct buffer_head *omfs_get_bucket(struct inode *dir, | |||
25 | const char *name, int namelen, int *ofs) | 25 | const char *name, int namelen, int *ofs) |
26 | { | 26 | { |
27 | int nbuckets = (dir->i_size - OMFS_DIR_START)/8; | 27 | int nbuckets = (dir->i_size - OMFS_DIR_START)/8; |
28 | int block = clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino); | ||
29 | int bucket = omfs_hash(name, namelen, nbuckets); | 28 | int bucket = omfs_hash(name, namelen, nbuckets); |
30 | 29 | ||
31 | *ofs = OMFS_DIR_START + bucket * 8; | 30 | *ofs = OMFS_DIR_START + bucket * 8; |
32 | return sb_bread(dir->i_sb, block); | 31 | return omfs_bread(dir->i_sb, dir->i_ino); |
33 | } | 32 | } |
34 | 33 | ||
35 | static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block, | 34 | static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block, |
@@ -42,8 +41,7 @@ static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block, | |||
42 | *prev_block = ~0; | 41 | *prev_block = ~0; |
43 | 42 | ||
44 | while (block != ~0) { | 43 | while (block != ~0) { |
45 | bh = sb_bread(dir->i_sb, | 44 | bh = omfs_bread(dir->i_sb, block); |
46 | clus_to_blk(OMFS_SB(dir->i_sb), block)); | ||
47 | if (!bh) { | 45 | if (!bh) { |
48 | err = -EIO; | 46 | err = -EIO; |
49 | goto err; | 47 | goto err; |
@@ -86,11 +84,10 @@ static struct buffer_head *omfs_find_entry(struct inode *dir, | |||
86 | int omfs_make_empty(struct inode *inode, struct super_block *sb) | 84 | int omfs_make_empty(struct inode *inode, struct super_block *sb) |
87 | { | 85 | { |
88 | struct omfs_sb_info *sbi = OMFS_SB(sb); | 86 | struct omfs_sb_info *sbi = OMFS_SB(sb); |
89 | int block = clus_to_blk(sbi, inode->i_ino); | ||
90 | struct buffer_head *bh; | 87 | struct buffer_head *bh; |
91 | struct omfs_inode *oi; | 88 | struct omfs_inode *oi; |
92 | 89 | ||
93 | bh = sb_bread(sb, block); | 90 | bh = omfs_bread(sb, inode->i_ino); |
94 | if (!bh) | 91 | if (!bh) |
95 | return -ENOMEM; | 92 | return -ENOMEM; |
96 | 93 | ||
@@ -134,7 +131,7 @@ static int omfs_add_link(struct dentry *dentry, struct inode *inode) | |||
134 | brelse(bh); | 131 | brelse(bh); |
135 | 132 | ||
136 | /* now set the sibling and parent pointers on the new inode */ | 133 | /* now set the sibling and parent pointers on the new inode */ |
137 | bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), inode->i_ino)); | 134 | bh = omfs_bread(dir->i_sb, inode->i_ino); |
138 | if (!bh) | 135 | if (!bh) |
139 | goto out; | 136 | goto out; |
140 | 137 | ||
@@ -190,8 +187,7 @@ static int omfs_delete_entry(struct dentry *dentry) | |||
190 | if (prev != ~0) { | 187 | if (prev != ~0) { |
191 | /* found in middle of list, get list ptr */ | 188 | /* found in middle of list, get list ptr */ |
192 | brelse(bh); | 189 | brelse(bh); |
193 | bh = sb_bread(dir->i_sb, | 190 | bh = omfs_bread(dir->i_sb, prev); |
194 | clus_to_blk(OMFS_SB(dir->i_sb), prev)); | ||
195 | if (!bh) | 191 | if (!bh) |
196 | goto out; | 192 | goto out; |
197 | 193 | ||
@@ -224,8 +220,7 @@ static int omfs_dir_is_empty(struct inode *inode) | |||
224 | u64 *ptr; | 220 | u64 *ptr; |
225 | int i; | 221 | int i; |
226 | 222 | ||
227 | bh = sb_bread(inode->i_sb, clus_to_blk(OMFS_SB(inode->i_sb), | 223 | bh = omfs_bread(inode->i_sb, inode->i_ino); |
228 | inode->i_ino)); | ||
229 | 224 | ||
230 | if (!bh) | 225 | if (!bh) |
231 | return 0; | 226 | return 0; |
@@ -353,8 +348,7 @@ static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir, | |||
353 | 348 | ||
354 | /* follow chain in this bucket */ | 349 | /* follow chain in this bucket */ |
355 | while (fsblock != ~0) { | 350 | while (fsblock != ~0) { |
356 | bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), | 351 | bh = omfs_bread(dir->i_sb, fsblock); |
357 | fsblock)); | ||
358 | if (!bh) | 352 | if (!bh) |
359 | goto out; | 353 | goto out; |
360 | 354 | ||
@@ -466,7 +460,7 @@ static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
466 | hchain = (filp->f_pos >> 20) - 1; | 460 | hchain = (filp->f_pos >> 20) - 1; |
467 | hindex = filp->f_pos & 0xfffff; | 461 | hindex = filp->f_pos & 0xfffff; |
468 | 462 | ||
469 | bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino)); | 463 | bh = omfs_bread(dir->i_sb, dir->i_ino); |
470 | if (!bh) | 464 | if (!bh) |
471 | goto out; | 465 | goto out; |
472 | 466 | ||
diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 6e7a3291bbe8..8a6d34fa668a 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c | |||
@@ -50,7 +50,7 @@ int omfs_shrink_inode(struct inode *inode) | |||
50 | if (inode->i_size != 0) | 50 | if (inode->i_size != 0) |
51 | goto out; | 51 | goto out; |
52 | 52 | ||
53 | bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next)); | 53 | bh = omfs_bread(inode->i_sb, next); |
54 | if (!bh) | 54 | if (!bh) |
55 | goto out; | 55 | goto out; |
56 | 56 | ||
@@ -90,7 +90,7 @@ int omfs_shrink_inode(struct inode *inode) | |||
90 | if (next == ~0) | 90 | if (next == ~0) |
91 | break; | 91 | break; |
92 | 92 | ||
93 | bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next)); | 93 | bh = omfs_bread(inode->i_sb, next); |
94 | if (!bh) | 94 | if (!bh) |
95 | goto out; | 95 | goto out; |
96 | oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); | 96 | oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); |
@@ -222,7 +222,7 @@ static int omfs_get_block(struct inode *inode, sector_t block, | |||
222 | struct buffer_head *bh; | 222 | struct buffer_head *bh; |
223 | sector_t next, offset; | 223 | sector_t next, offset; |
224 | int ret; | 224 | int ret; |
225 | u64 new_block; | 225 | u64 uninitialized_var(new_block); |
226 | u32 max_extents; | 226 | u32 max_extents; |
227 | int extent_count; | 227 | int extent_count; |
228 | struct omfs_extent *oe; | 228 | struct omfs_extent *oe; |
@@ -232,7 +232,7 @@ static int omfs_get_block(struct inode *inode, sector_t block, | |||
232 | int remain; | 232 | int remain; |
233 | 233 | ||
234 | ret = -EIO; | 234 | ret = -EIO; |
235 | bh = sb_bread(inode->i_sb, clus_to_blk(sbi, inode->i_ino)); | 235 | bh = omfs_bread(inode->i_sb, inode->i_ino); |
236 | if (!bh) | 236 | if (!bh) |
237 | goto out; | 237 | goto out; |
238 | 238 | ||
@@ -265,7 +265,7 @@ static int omfs_get_block(struct inode *inode, sector_t block, | |||
265 | break; | 265 | break; |
266 | 266 | ||
267 | brelse(bh); | 267 | brelse(bh); |
268 | bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next)); | 268 | bh = omfs_bread(inode->i_sb, next); |
269 | if (!bh) | 269 | if (!bh) |
270 | goto out; | 270 | goto out; |
271 | oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); | 271 | oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); |
@@ -312,9 +312,17 @@ static int omfs_write_begin(struct file *file, struct address_space *mapping, | |||
312 | loff_t pos, unsigned len, unsigned flags, | 312 | loff_t pos, unsigned len, unsigned flags, |
313 | struct page **pagep, void **fsdata) | 313 | struct page **pagep, void **fsdata) |
314 | { | 314 | { |
315 | *pagep = NULL; | 315 | int ret; |
316 | return block_write_begin(file, mapping, pos, len, flags, | 316 | |
317 | pagep, fsdata, omfs_get_block); | 317 | ret = block_write_begin(mapping, pos, len, flags, pagep, |
318 | omfs_get_block); | ||
319 | if (unlikely(ret)) { | ||
320 | loff_t isize = mapping->host->i_size; | ||
321 | if (pos + len > isize) | ||
322 | vmtruncate(mapping->host, isize); | ||
323 | } | ||
324 | |||
325 | return ret; | ||
318 | } | 326 | } |
319 | 327 | ||
320 | static sector_t omfs_bmap(struct address_space *mapping, sector_t block) | 328 | static sector_t omfs_bmap(struct address_space *mapping, sector_t block) |
@@ -333,7 +341,29 @@ const struct file_operations omfs_file_operations = { | |||
333 | .splice_read = generic_file_splice_read, | 341 | .splice_read = generic_file_splice_read, |
334 | }; | 342 | }; |
335 | 343 | ||
344 | static int omfs_setattr(struct dentry *dentry, struct iattr *attr) | ||
345 | { | ||
346 | struct inode *inode = dentry->d_inode; | ||
347 | int error; | ||
348 | |||
349 | error = inode_change_ok(inode, attr); | ||
350 | if (error) | ||
351 | return error; | ||
352 | |||
353 | if ((attr->ia_valid & ATTR_SIZE) && | ||
354 | attr->ia_size != i_size_read(inode)) { | ||
355 | error = vmtruncate(inode, attr->ia_size); | ||
356 | if (error) | ||
357 | return error; | ||
358 | } | ||
359 | |||
360 | setattr_copy(inode, attr); | ||
361 | mark_inode_dirty(inode); | ||
362 | return 0; | ||
363 | } | ||
364 | |||
336 | const struct inode_operations omfs_file_inops = { | 365 | const struct inode_operations omfs_file_inops = { |
366 | .setattr = omfs_setattr, | ||
337 | .truncate = omfs_truncate | 367 | .truncate = omfs_truncate |
338 | }; | 368 | }; |
339 | 369 | ||
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 089839a6cc64..14a22863291a 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c | |||
@@ -19,6 +19,15 @@ MODULE_AUTHOR("Bob Copeland <me@bobcopeland.com>"); | |||
19 | MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux"); | 19 | MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux"); |
20 | MODULE_LICENSE("GPL"); | 20 | MODULE_LICENSE("GPL"); |
21 | 21 | ||
22 | struct buffer_head *omfs_bread(struct super_block *sb, sector_t block) | ||
23 | { | ||
24 | struct omfs_sb_info *sbi = OMFS_SB(sb); | ||
25 | if (block >= sbi->s_num_blocks) | ||
26 | return NULL; | ||
27 | |||
28 | return sb_bread(sb, clus_to_blk(sbi, block)); | ||
29 | } | ||
30 | |||
22 | struct inode *omfs_new_inode(struct inode *dir, int mode) | 31 | struct inode *omfs_new_inode(struct inode *dir, int mode) |
23 | { | 32 | { |
24 | struct inode *inode; | 33 | struct inode *inode; |
@@ -93,15 +102,13 @@ static int __omfs_write_inode(struct inode *inode, int wait) | |||
93 | struct omfs_inode *oi; | 102 | struct omfs_inode *oi; |
94 | struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb); | 103 | struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb); |
95 | struct buffer_head *bh, *bh2; | 104 | struct buffer_head *bh, *bh2; |
96 | unsigned int block; | ||
97 | u64 ctime; | 105 | u64 ctime; |
98 | int i; | 106 | int i; |
99 | int ret = -EIO; | 107 | int ret = -EIO; |
100 | int sync_failed = 0; | 108 | int sync_failed = 0; |
101 | 109 | ||
102 | /* get current inode since we may have written sibling ptrs etc. */ | 110 | /* get current inode since we may have written sibling ptrs etc. */ |
103 | block = clus_to_blk(sbi, inode->i_ino); | 111 | bh = omfs_bread(inode->i_sb, inode->i_ino); |
104 | bh = sb_bread(inode->i_sb, block); | ||
105 | if (!bh) | 112 | if (!bh) |
106 | goto out; | 113 | goto out; |
107 | 114 | ||
@@ -140,8 +147,7 @@ static int __omfs_write_inode(struct inode *inode, int wait) | |||
140 | 147 | ||
141 | /* if mirroring writes, copy to next fsblock */ | 148 | /* if mirroring writes, copy to next fsblock */ |
142 | for (i = 1; i < sbi->s_mirrors; i++) { | 149 | for (i = 1; i < sbi->s_mirrors; i++) { |
143 | bh2 = sb_bread(inode->i_sb, block + i * | 150 | bh2 = omfs_bread(inode->i_sb, inode->i_ino + i); |
144 | (sbi->s_blocksize / sbi->s_sys_blocksize)); | ||
145 | if (!bh2) | 151 | if (!bh2) |
146 | goto out_brelse; | 152 | goto out_brelse; |
147 | 153 | ||
@@ -175,9 +181,13 @@ int omfs_sync_inode(struct inode *inode) | |||
175 | * called when an entry is deleted, need to clear the bits in the | 181 | * called when an entry is deleted, need to clear the bits in the |
176 | * bitmaps. | 182 | * bitmaps. |
177 | */ | 183 | */ |
178 | static void omfs_delete_inode(struct inode *inode) | 184 | static void omfs_evict_inode(struct inode *inode) |
179 | { | 185 | { |
180 | truncate_inode_pages(&inode->i_data, 0); | 186 | truncate_inode_pages(&inode->i_data, 0); |
187 | end_writeback(inode); | ||
188 | |||
189 | if (inode->i_nlink) | ||
190 | return; | ||
181 | 191 | ||
182 | if (S_ISREG(inode->i_mode)) { | 192 | if (S_ISREG(inode->i_mode)) { |
183 | inode->i_size = 0; | 193 | inode->i_size = 0; |
@@ -185,7 +195,6 @@ static void omfs_delete_inode(struct inode *inode) | |||
185 | } | 195 | } |
186 | 196 | ||
187 | omfs_clear_range(inode->i_sb, inode->i_ino, 2); | 197 | omfs_clear_range(inode->i_sb, inode->i_ino, 2); |
188 | clear_inode(inode); | ||
189 | } | 198 | } |
190 | 199 | ||
191 | struct inode *omfs_iget(struct super_block *sb, ino_t ino) | 200 | struct inode *omfs_iget(struct super_block *sb, ino_t ino) |
@@ -193,7 +202,6 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino) | |||
193 | struct omfs_sb_info *sbi = OMFS_SB(sb); | 202 | struct omfs_sb_info *sbi = OMFS_SB(sb); |
194 | struct omfs_inode *oi; | 203 | struct omfs_inode *oi; |
195 | struct buffer_head *bh; | 204 | struct buffer_head *bh; |
196 | unsigned int block; | ||
197 | u64 ctime; | 205 | u64 ctime; |
198 | unsigned long nsecs; | 206 | unsigned long nsecs; |
199 | struct inode *inode; | 207 | struct inode *inode; |
@@ -204,8 +212,7 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino) | |||
204 | if (!(inode->i_state & I_NEW)) | 212 | if (!(inode->i_state & I_NEW)) |
205 | return inode; | 213 | return inode; |
206 | 214 | ||
207 | block = clus_to_blk(sbi, ino); | 215 | bh = omfs_bread(inode->i_sb, ino); |
208 | bh = sb_bread(inode->i_sb, block); | ||
209 | if (!bh) | 216 | if (!bh) |
210 | goto iget_failed; | 217 | goto iget_failed; |
211 | 218 | ||
@@ -284,7 +291,7 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
284 | 291 | ||
285 | static const struct super_operations omfs_sops = { | 292 | static const struct super_operations omfs_sops = { |
286 | .write_inode = omfs_write_inode, | 293 | .write_inode = omfs_write_inode, |
287 | .delete_inode = omfs_delete_inode, | 294 | .evict_inode = omfs_evict_inode, |
288 | .put_super = omfs_put_super, | 295 | .put_super = omfs_put_super, |
289 | .statfs = omfs_statfs, | 296 | .statfs = omfs_statfs, |
290 | .show_options = generic_show_options, | 297 | .show_options = generic_show_options, |
@@ -319,6 +326,9 @@ static int omfs_get_imap(struct super_block *sb) | |||
319 | goto nomem; | 326 | goto nomem; |
320 | 327 | ||
321 | block = clus_to_blk(sbi, sbi->s_bitmap_ino); | 328 | block = clus_to_blk(sbi, sbi->s_bitmap_ino); |
329 | if (block >= sbi->s_num_blocks) | ||
330 | goto nomem; | ||
331 | |||
322 | ptr = sbi->s_imap; | 332 | ptr = sbi->s_imap; |
323 | for (count = bitmap_size; count > 0; count -= sb->s_blocksize) { | 333 | for (count = bitmap_size; count > 0; count -= sb->s_blocksize) { |
324 | bh = sb_bread(sb, block++); | 334 | bh = sb_bread(sb, block++); |
@@ -417,7 +427,6 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent) | |||
417 | struct omfs_root_block *omfs_rb; | 427 | struct omfs_root_block *omfs_rb; |
418 | struct omfs_sb_info *sbi; | 428 | struct omfs_sb_info *sbi; |
419 | struct inode *root; | 429 | struct inode *root; |
420 | sector_t start; | ||
421 | int ret = -EINVAL; | 430 | int ret = -EINVAL; |
422 | 431 | ||
423 | save_mount_options(sb, (char *) data); | 432 | save_mount_options(sb, (char *) data); |
@@ -486,8 +495,7 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent) | |||
486 | sbi->s_block_shift = get_bitmask_order(sbi->s_blocksize) - | 495 | sbi->s_block_shift = get_bitmask_order(sbi->s_blocksize) - |
487 | get_bitmask_order(sbi->s_sys_blocksize); | 496 | get_bitmask_order(sbi->s_sys_blocksize); |
488 | 497 | ||
489 | start = clus_to_blk(sbi, be64_to_cpu(omfs_sb->s_root_block)); | 498 | bh2 = omfs_bread(sb, be64_to_cpu(omfs_sb->s_root_block)); |
490 | bh2 = sb_bread(sb, start); | ||
491 | if (!bh2) | 499 | if (!bh2) |
492 | goto out_brelse_bh; | 500 | goto out_brelse_bh; |
493 | 501 | ||
@@ -504,6 +512,21 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent) | |||
504 | goto out_brelse_bh2; | 512 | goto out_brelse_bh2; |
505 | } | 513 | } |
506 | 514 | ||
515 | if (sbi->s_bitmap_ino != ~0ULL && | ||
516 | sbi->s_bitmap_ino > sbi->s_num_blocks) { | ||
517 | printk(KERN_ERR "omfs: free space bitmap location is corrupt " | ||
518 | "(%llx, total blocks %llx)\n", | ||
519 | (unsigned long long) sbi->s_bitmap_ino, | ||
520 | (unsigned long long) sbi->s_num_blocks); | ||
521 | goto out_brelse_bh2; | ||
522 | } | ||
523 | if (sbi->s_clustersize < 1 || | ||
524 | sbi->s_clustersize > OMFS_MAX_CLUSTER_SIZE) { | ||
525 | printk(KERN_ERR "omfs: cluster size out of range (%d)", | ||
526 | sbi->s_clustersize); | ||
527 | goto out_brelse_bh2; | ||
528 | } | ||
529 | |||
507 | ret = omfs_get_imap(sb); | 530 | ret = omfs_get_imap(sb); |
508 | if (ret) | 531 | if (ret) |
509 | goto out_brelse_bh2; | 532 | goto out_brelse_bh2; |
@@ -529,6 +552,8 @@ out_brelse_bh2: | |||
529 | out_brelse_bh: | 552 | out_brelse_bh: |
530 | brelse(bh); | 553 | brelse(bh); |
531 | end: | 554 | end: |
555 | if (ret) | ||
556 | kfree(sbi); | ||
532 | return ret; | 557 | return ret; |
533 | } | 558 | } |
534 | 559 | ||
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h index ebe2fdbe535e..7d414fef501a 100644 --- a/fs/omfs/omfs.h +++ b/fs/omfs/omfs.h | |||
@@ -58,6 +58,7 @@ extern void omfs_make_empty_table(struct buffer_head *bh, int offset); | |||
58 | extern int omfs_shrink_inode(struct inode *inode); | 58 | extern int omfs_shrink_inode(struct inode *inode); |
59 | 59 | ||
60 | /* inode.c */ | 60 | /* inode.c */ |
61 | extern struct buffer_head *omfs_bread(struct super_block *sb, sector_t block); | ||
61 | extern struct inode *omfs_iget(struct super_block *sb, ino_t inode); | 62 | extern struct inode *omfs_iget(struct super_block *sb, ino_t inode); |
62 | extern struct inode *omfs_new_inode(struct inode *dir, int mode); | 63 | extern struct inode *omfs_new_inode(struct inode *dir, int mode); |
63 | extern int omfs_reserve_block(struct super_block *sb, sector_t block); | 64 | extern int omfs_reserve_block(struct super_block *sb, sector_t block); |
diff --git a/fs/omfs/omfs_fs.h b/fs/omfs/omfs_fs.h index 12cca245d6e8..ee5e4327de92 100644 --- a/fs/omfs/omfs_fs.h +++ b/fs/omfs/omfs_fs.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #define OMFS_EXTENT_CONT 0x40 | 17 | #define OMFS_EXTENT_CONT 0x40 |
18 | #define OMFS_XOR_COUNT 19 | 18 | #define OMFS_XOR_COUNT 19 |
19 | #define OMFS_MAX_BLOCK_SIZE 8192 | 19 | #define OMFS_MAX_BLOCK_SIZE 8192 |
20 | #define OMFS_MAX_CLUSTER_SIZE 8 | ||
20 | 21 | ||
21 | struct omfs_super_block { | 22 | struct omfs_super_block { |
22 | char s_fill1[256]; | 23 | char s_fill1[256]; |
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/falloc.h> | 29 | #include <linux/falloc.h> |
30 | #include <linux/fs_struct.h> | 30 | #include <linux/fs_struct.h> |
31 | #include <linux/ima.h> | 31 | #include <linux/ima.h> |
32 | #include <linux/dnotify.h> | ||
32 | 33 | ||
33 | #include "internal.h" | 34 | #include "internal.h" |
34 | 35 | ||
@@ -110,7 +111,7 @@ static long do_sys_truncate(const char __user *pathname, loff_t length) | |||
110 | 111 | ||
111 | error = locks_verify_truncate(inode, NULL, length); | 112 | error = locks_verify_truncate(inode, NULL, length); |
112 | if (!error) | 113 | if (!error) |
113 | error = security_path_truncate(&path, length, 0); | 114 | error = security_path_truncate(&path); |
114 | if (!error) | 115 | if (!error) |
115 | error = do_truncate(path.dentry, length, 0, NULL); | 116 | error = do_truncate(path.dentry, length, 0, NULL); |
116 | 117 | ||
@@ -165,8 +166,7 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) | |||
165 | 166 | ||
166 | error = locks_verify_truncate(inode, file, length); | 167 | error = locks_verify_truncate(inode, file, length); |
167 | if (!error) | 168 | if (!error) |
168 | error = security_path_truncate(&file->f_path, length, | 169 | error = security_path_truncate(&file->f_path); |
169 | ATTR_MTIME|ATTR_CTIME); | ||
170 | if (!error) | 170 | if (!error) |
171 | error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); | 171 | error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); |
172 | out_putf: | 172 | out_putf: |
@@ -367,7 +367,7 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename) | |||
367 | if (error) | 367 | if (error) |
368 | goto out; | 368 | goto out; |
369 | 369 | ||
370 | error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); | 370 | error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); |
371 | if (error) | 371 | if (error) |
372 | goto dput_and_out; | 372 | goto dput_and_out; |
373 | 373 | ||
@@ -396,7 +396,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) | |||
396 | if (!S_ISDIR(inode->i_mode)) | 396 | if (!S_ISDIR(inode->i_mode)) |
397 | goto out_putf; | 397 | goto out_putf; |
398 | 398 | ||
399 | error = inode_permission(inode, MAY_EXEC | MAY_ACCESS); | 399 | error = inode_permission(inode, MAY_EXEC | MAY_CHDIR); |
400 | if (!error) | 400 | if (!error) |
401 | set_fs_pwd(current->fs, &file->f_path); | 401 | set_fs_pwd(current->fs, &file->f_path); |
402 | out_putf: | 402 | out_putf: |
@@ -414,7 +414,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) | |||
414 | if (error) | 414 | if (error) |
415 | goto out; | 415 | goto out; |
416 | 416 | ||
417 | error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); | 417 | error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); |
418 | if (error) | 418 | if (error) |
419 | goto dput_and_out; | 419 | goto dput_and_out; |
420 | 420 | ||
@@ -888,7 +888,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode) | |||
888 | put_unused_fd(fd); | 888 | put_unused_fd(fd); |
889 | fd = PTR_ERR(f); | 889 | fd = PTR_ERR(f); |
890 | } else { | 890 | } else { |
891 | fsnotify_open(f->f_path.dentry); | 891 | fsnotify_open(f); |
892 | fd_install(fd, f); | 892 | fd_install(fd, f); |
893 | } | 893 | } |
894 | } | 894 | } |
@@ -1031,7 +1031,9 @@ EXPORT_SYMBOL(generic_file_open); | |||
1031 | 1031 | ||
1032 | /* | 1032 | /* |
1033 | * This is used by subsystems that don't want seekable | 1033 | * This is used by subsystems that don't want seekable |
1034 | * file descriptors | 1034 | * file descriptors. The function is not supposed to ever fail, the only |
1035 | * reason it returns an 'int' and not 'void' is so that it can be plugged | ||
1036 | * directly into file_operations structure. | ||
1035 | */ | 1037 | */ |
1036 | int nonseekable_open(struct inode *inode, struct file *filp) | 1038 | int nonseekable_open(struct inode *inode, struct file *filp) |
1037 | { | 1039 | { |
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index 6921e7890be6..fbeb697374d5 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c | |||
@@ -45,8 +45,11 @@ adfs_partition(struct parsed_partitions *state, char *name, char *data, | |||
45 | nr_sects = (le32_to_cpu(dr->disc_size_high) << 23) | | 45 | nr_sects = (le32_to_cpu(dr->disc_size_high) << 23) | |
46 | (le32_to_cpu(dr->disc_size) >> 9); | 46 | (le32_to_cpu(dr->disc_size) >> 9); |
47 | 47 | ||
48 | if (name) | 48 | if (name) { |
49 | printk(" [%s]", name); | 49 | strlcat(state->pp_buf, " [", PAGE_SIZE); |
50 | strlcat(state->pp_buf, name, PAGE_SIZE); | ||
51 | strlcat(state->pp_buf, "]", PAGE_SIZE); | ||
52 | } | ||
50 | put_partition(state, slot, first_sector, nr_sects); | 53 | put_partition(state, slot, first_sector, nr_sects); |
51 | return dr; | 54 | return dr; |
52 | } | 55 | } |
@@ -81,14 +84,14 @@ static int riscix_partition(struct parsed_partitions *state, | |||
81 | if (!rr) | 84 | if (!rr) |
82 | return -1; | 85 | return -1; |
83 | 86 | ||
84 | printk(" [RISCiX]"); | 87 | strlcat(state->pp_buf, " [RISCiX]", PAGE_SIZE); |
85 | 88 | ||
86 | 89 | ||
87 | if (rr->magic == RISCIX_MAGIC) { | 90 | if (rr->magic == RISCIX_MAGIC) { |
88 | unsigned long size = nr_sects > 2 ? 2 : nr_sects; | 91 | unsigned long size = nr_sects > 2 ? 2 : nr_sects; |
89 | int part; | 92 | int part; |
90 | 93 | ||
91 | printk(" <"); | 94 | strlcat(state->pp_buf, " <", PAGE_SIZE); |
92 | 95 | ||
93 | put_partition(state, slot++, first_sect, size); | 96 | put_partition(state, slot++, first_sect, size); |
94 | for (part = 0; part < 8; part++) { | 97 | for (part = 0; part < 8; part++) { |
@@ -97,11 +100,13 @@ static int riscix_partition(struct parsed_partitions *state, | |||
97 | put_partition(state, slot++, | 100 | put_partition(state, slot++, |
98 | le32_to_cpu(rr->part[part].start), | 101 | le32_to_cpu(rr->part[part].start), |
99 | le32_to_cpu(rr->part[part].length)); | 102 | le32_to_cpu(rr->part[part].length)); |
100 | printk("(%s)", rr->part[part].name); | 103 | strlcat(state->pp_buf, "(", PAGE_SIZE); |
104 | strlcat(state->pp_buf, rr->part[part].name, PAGE_SIZE); | ||
105 | strlcat(state->pp_buf, ")", PAGE_SIZE); | ||
101 | } | 106 | } |
102 | } | 107 | } |
103 | 108 | ||
104 | printk(" >\n"); | 109 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); |
105 | } else { | 110 | } else { |
106 | put_partition(state, slot++, first_sect, nr_sects); | 111 | put_partition(state, slot++, first_sect, nr_sects); |
107 | } | 112 | } |
@@ -131,7 +136,7 @@ static int linux_partition(struct parsed_partitions *state, | |||
131 | struct linux_part *linuxp; | 136 | struct linux_part *linuxp; |
132 | unsigned long size = nr_sects > 2 ? 2 : nr_sects; | 137 | unsigned long size = nr_sects > 2 ? 2 : nr_sects; |
133 | 138 | ||
134 | printk(" [Linux]"); | 139 | strlcat(state->pp_buf, " [Linux]", PAGE_SIZE); |
135 | 140 | ||
136 | put_partition(state, slot++, first_sect, size); | 141 | put_partition(state, slot++, first_sect, size); |
137 | 142 | ||
@@ -139,7 +144,7 @@ static int linux_partition(struct parsed_partitions *state, | |||
139 | if (!linuxp) | 144 | if (!linuxp) |
140 | return -1; | 145 | return -1; |
141 | 146 | ||
142 | printk(" <"); | 147 | strlcat(state->pp_buf, " <", PAGE_SIZE); |
143 | while (linuxp->magic == cpu_to_le32(LINUX_NATIVE_MAGIC) || | 148 | while (linuxp->magic == cpu_to_le32(LINUX_NATIVE_MAGIC) || |
144 | linuxp->magic == cpu_to_le32(LINUX_SWAP_MAGIC)) { | 149 | linuxp->magic == cpu_to_le32(LINUX_SWAP_MAGIC)) { |
145 | if (slot == state->limit) | 150 | if (slot == state->limit) |
@@ -149,7 +154,7 @@ static int linux_partition(struct parsed_partitions *state, | |||
149 | le32_to_cpu(linuxp->nr_sects)); | 154 | le32_to_cpu(linuxp->nr_sects)); |
150 | linuxp ++; | 155 | linuxp ++; |
151 | } | 156 | } |
152 | printk(" >"); | 157 | strlcat(state->pp_buf, " >", PAGE_SIZE); |
153 | 158 | ||
154 | put_dev_sector(sect); | 159 | put_dev_sector(sect); |
155 | return slot; | 160 | return slot; |
@@ -294,7 +299,7 @@ int adfspart_check_ADFS(struct parsed_partitions *state) | |||
294 | break; | 299 | break; |
295 | } | 300 | } |
296 | } | 301 | } |
297 | printk("\n"); | 302 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
298 | return 1; | 303 | return 1; |
299 | } | 304 | } |
300 | #endif | 305 | #endif |
@@ -367,7 +372,7 @@ int adfspart_check_ICS(struct parsed_partitions *state) | |||
367 | return 0; | 372 | return 0; |
368 | } | 373 | } |
369 | 374 | ||
370 | printk(" [ICS]"); | 375 | strlcat(state->pp_buf, " [ICS]", PAGE_SIZE); |
371 | 376 | ||
372 | for (slot = 1, p = (const struct ics_part *)data; p->size; p++) { | 377 | for (slot = 1, p = (const struct ics_part *)data; p->size; p++) { |
373 | u32 start = le32_to_cpu(p->start); | 378 | u32 start = le32_to_cpu(p->start); |
@@ -401,7 +406,7 @@ int adfspart_check_ICS(struct parsed_partitions *state) | |||
401 | } | 406 | } |
402 | 407 | ||
403 | put_dev_sector(sect); | 408 | put_dev_sector(sect); |
404 | printk("\n"); | 409 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
405 | return 1; | 410 | return 1; |
406 | } | 411 | } |
407 | #endif | 412 | #endif |
@@ -461,7 +466,7 @@ int adfspart_check_POWERTEC(struct parsed_partitions *state) | |||
461 | return 0; | 466 | return 0; |
462 | } | 467 | } |
463 | 468 | ||
464 | printk(" [POWERTEC]"); | 469 | strlcat(state->pp_buf, " [POWERTEC]", PAGE_SIZE); |
465 | 470 | ||
466 | for (i = 0, p = (const struct ptec_part *)data; i < 12; i++, p++) { | 471 | for (i = 0, p = (const struct ptec_part *)data; i < 12; i++, p++) { |
467 | u32 start = le32_to_cpu(p->start); | 472 | u32 start = le32_to_cpu(p->start); |
@@ -472,7 +477,7 @@ int adfspart_check_POWERTEC(struct parsed_partitions *state) | |||
472 | } | 477 | } |
473 | 478 | ||
474 | put_dev_sector(sect); | 479 | put_dev_sector(sect); |
475 | printk("\n"); | 480 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
476 | return 1; | 481 | return 1; |
477 | } | 482 | } |
478 | #endif | 483 | #endif |
@@ -543,7 +548,7 @@ int adfspart_check_EESOX(struct parsed_partitions *state) | |||
543 | 548 | ||
544 | size = get_capacity(state->bdev->bd_disk); | 549 | size = get_capacity(state->bdev->bd_disk); |
545 | put_partition(state, slot++, start, size - start); | 550 | put_partition(state, slot++, start, size - start); |
546 | printk("\n"); | 551 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
547 | } | 552 | } |
548 | 553 | ||
549 | return i ? 1 : 0; | 554 | return i ? 1 : 0; |
diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c index ba443d4229f8..70cbf44a1560 100644 --- a/fs/partitions/amiga.c +++ b/fs/partitions/amiga.c | |||
@@ -69,7 +69,13 @@ int amiga_partition(struct parsed_partitions *state) | |||
69 | /* blksize is blocks per 512 byte standard block */ | 69 | /* blksize is blocks per 512 byte standard block */ |
70 | blksize = be32_to_cpu( rdb->rdb_BlockBytes ) / 512; | 70 | blksize = be32_to_cpu( rdb->rdb_BlockBytes ) / 512; |
71 | 71 | ||
72 | printk(" RDSK (%d)", blksize * 512); /* Be more informative */ | 72 | { |
73 | char tmp[7 + 10 + 1 + 1]; | ||
74 | |||
75 | /* Be more informative */ | ||
76 | snprintf(tmp, sizeof(tmp), " RDSK (%d)", blksize * 512); | ||
77 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
78 | } | ||
73 | blk = be32_to_cpu(rdb->rdb_PartitionList); | 79 | blk = be32_to_cpu(rdb->rdb_PartitionList); |
74 | put_dev_sector(sect); | 80 | put_dev_sector(sect); |
75 | for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { | 81 | for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { |
@@ -106,23 +112,27 @@ int amiga_partition(struct parsed_partitions *state) | |||
106 | { | 112 | { |
107 | /* Be even more informative to aid mounting */ | 113 | /* Be even more informative to aid mounting */ |
108 | char dostype[4]; | 114 | char dostype[4]; |
115 | char tmp[42]; | ||
116 | |||
109 | __be32 *dt = (__be32 *)dostype; | 117 | __be32 *dt = (__be32 *)dostype; |
110 | *dt = pb->pb_Environment[16]; | 118 | *dt = pb->pb_Environment[16]; |
111 | if (dostype[3] < ' ') | 119 | if (dostype[3] < ' ') |
112 | printk(" (%c%c%c^%c)", | 120 | snprintf(tmp, sizeof(tmp), " (%c%c%c^%c)", |
113 | dostype[0], dostype[1], | 121 | dostype[0], dostype[1], |
114 | dostype[2], dostype[3] + '@' ); | 122 | dostype[2], dostype[3] + '@' ); |
115 | else | 123 | else |
116 | printk(" (%c%c%c%c)", | 124 | snprintf(tmp, sizeof(tmp), " (%c%c%c%c)", |
117 | dostype[0], dostype[1], | 125 | dostype[0], dostype[1], |
118 | dostype[2], dostype[3]); | 126 | dostype[2], dostype[3]); |
119 | printk("(res %d spb %d)", | 127 | strlcat(state->pp_buf, tmp, PAGE_SIZE); |
128 | snprintf(tmp, sizeof(tmp), "(res %d spb %d)", | ||
120 | be32_to_cpu(pb->pb_Environment[6]), | 129 | be32_to_cpu(pb->pb_Environment[6]), |
121 | be32_to_cpu(pb->pb_Environment[4])); | 130 | be32_to_cpu(pb->pb_Environment[4])); |
131 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
122 | } | 132 | } |
123 | res = 1; | 133 | res = 1; |
124 | } | 134 | } |
125 | printk("\n"); | 135 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
126 | 136 | ||
127 | rdb_done: | 137 | rdb_done: |
128 | return res; | 138 | return res; |
diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c index 4439ff1b6cec..9875b05e80a2 100644 --- a/fs/partitions/atari.c +++ b/fs/partitions/atari.c | |||
@@ -62,7 +62,7 @@ int atari_partition(struct parsed_partitions *state) | |||
62 | } | 62 | } |
63 | 63 | ||
64 | pi = &rs->part[0]; | 64 | pi = &rs->part[0]; |
65 | printk (" AHDI"); | 65 | strlcat(state->pp_buf, " AHDI", PAGE_SIZE); |
66 | for (slot = 1; pi < &rs->part[4] && slot < state->limit; slot++, pi++) { | 66 | for (slot = 1; pi < &rs->part[4] && slot < state->limit; slot++, pi++) { |
67 | struct rootsector *xrs; | 67 | struct rootsector *xrs; |
68 | Sector sect2; | 68 | Sector sect2; |
@@ -81,7 +81,7 @@ int atari_partition(struct parsed_partitions *state) | |||
81 | #ifdef ICD_PARTS | 81 | #ifdef ICD_PARTS |
82 | part_fmt = 1; | 82 | part_fmt = 1; |
83 | #endif | 83 | #endif |
84 | printk(" XGM<"); | 84 | strlcat(state->pp_buf, " XGM<", PAGE_SIZE); |
85 | partsect = extensect = be32_to_cpu(pi->st); | 85 | partsect = extensect = be32_to_cpu(pi->st); |
86 | while (1) { | 86 | while (1) { |
87 | xrs = read_part_sector(state, partsect, §2); | 87 | xrs = read_part_sector(state, partsect, §2); |
@@ -120,14 +120,14 @@ int atari_partition(struct parsed_partitions *state) | |||
120 | break; | 120 | break; |
121 | } | 121 | } |
122 | } | 122 | } |
123 | printk(" >"); | 123 | strlcat(state->pp_buf, " >", PAGE_SIZE); |
124 | } | 124 | } |
125 | #ifdef ICD_PARTS | 125 | #ifdef ICD_PARTS |
126 | if ( part_fmt!=1 ) { /* no extended partitions -> test ICD-format */ | 126 | if ( part_fmt!=1 ) { /* no extended partitions -> test ICD-format */ |
127 | pi = &rs->icdpart[0]; | 127 | pi = &rs->icdpart[0]; |
128 | /* sanity check: no ICD format if first partition invalid */ | 128 | /* sanity check: no ICD format if first partition invalid */ |
129 | if (OK_id(pi->id)) { | 129 | if (OK_id(pi->id)) { |
130 | printk(" ICD<"); | 130 | strlcat(state->pp_buf, " ICD<", PAGE_SIZE); |
131 | for (; pi < &rs->icdpart[8] && slot < state->limit; slot++, pi++) { | 131 | for (; pi < &rs->icdpart[8] && slot < state->limit; slot++, pi++) { |
132 | /* accept only GEM,BGM,RAW,LNX,SWP partitions */ | 132 | /* accept only GEM,BGM,RAW,LNX,SWP partitions */ |
133 | if (!((pi->flg & 1) && OK_id(pi->id))) | 133 | if (!((pi->flg & 1) && OK_id(pi->id))) |
@@ -137,13 +137,13 @@ int atari_partition(struct parsed_partitions *state) | |||
137 | be32_to_cpu(pi->st), | 137 | be32_to_cpu(pi->st), |
138 | be32_to_cpu(pi->siz)); | 138 | be32_to_cpu(pi->siz)); |
139 | } | 139 | } |
140 | printk(" >"); | 140 | strlcat(state->pp_buf, " >", PAGE_SIZE); |
141 | } | 141 | } |
142 | } | 142 | } |
143 | #endif | 143 | #endif |
144 | put_dev_sector(sect); | 144 | put_dev_sector(sect); |
145 | 145 | ||
146 | printk ("\n"); | 146 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
147 | 147 | ||
148 | return 1; | 148 | return 1; |
149 | } | 149 | } |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 5dcd4b0c5533..79fbf3f390f0 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -164,10 +164,16 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
164 | state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); | 164 | state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); |
165 | if (!state) | 165 | if (!state) |
166 | return NULL; | 166 | return NULL; |
167 | state->pp_buf = (char *)__get_free_page(GFP_KERNEL); | ||
168 | if (!state->pp_buf) { | ||
169 | kfree(state); | ||
170 | return NULL; | ||
171 | } | ||
172 | state->pp_buf[0] = '\0'; | ||
167 | 173 | ||
168 | state->bdev = bdev; | 174 | state->bdev = bdev; |
169 | disk_name(hd, 0, state->name); | 175 | disk_name(hd, 0, state->name); |
170 | printk(KERN_INFO " %s:", state->name); | 176 | snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name); |
171 | if (isdigit(state->name[strlen(state->name)-1])) | 177 | if (isdigit(state->name[strlen(state->name)-1])) |
172 | sprintf(state->name, "p"); | 178 | sprintf(state->name, "p"); |
173 | 179 | ||
@@ -185,17 +191,25 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
185 | } | 191 | } |
186 | 192 | ||
187 | } | 193 | } |
188 | if (res > 0) | 194 | if (res > 0) { |
195 | printk(KERN_INFO "%s", state->pp_buf); | ||
196 | |||
197 | free_page((unsigned long)state->pp_buf); | ||
189 | return state; | 198 | return state; |
199 | } | ||
190 | if (state->access_beyond_eod) | 200 | if (state->access_beyond_eod) |
191 | err = -ENOSPC; | 201 | err = -ENOSPC; |
192 | if (err) | 202 | if (err) |
193 | /* The partition is unrecognized. So report I/O errors if there were any */ | 203 | /* The partition is unrecognized. So report I/O errors if there were any */ |
194 | res = err; | 204 | res = err; |
195 | if (!res) | 205 | if (!res) |
196 | printk(" unknown partition table\n"); | 206 | strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE); |
197 | else if (warn_no_part) | 207 | else if (warn_no_part) |
198 | printk(" unable to read partition table\n"); | 208 | strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE); |
209 | |||
210 | printk(KERN_INFO "%s", state->pp_buf); | ||
211 | |||
212 | free_page((unsigned long)state->pp_buf); | ||
199 | kfree(state); | 213 | kfree(state); |
200 | return ERR_PTR(res); | 214 | return ERR_PTR(res); |
201 | } | 215 | } |
@@ -459,7 +473,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, | |||
459 | } | 473 | } |
460 | 474 | ||
461 | /* everything is up and running, commence */ | 475 | /* everything is up and running, commence */ |
462 | INIT_RCU_HEAD(&p->rcu_head); | ||
463 | rcu_assign_pointer(ptbl->part[partno], p); | 476 | rcu_assign_pointer(ptbl->part[partno], p); |
464 | 477 | ||
465 | /* suppress uevent if the disk supresses it */ | 478 | /* suppress uevent if the disk supresses it */ |
diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 52f8bd399396..8e4e103ba216 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h | |||
@@ -16,6 +16,7 @@ struct parsed_partitions { | |||
16 | int next; | 16 | int next; |
17 | int limit; | 17 | int limit; |
18 | bool access_beyond_eod; | 18 | bool access_beyond_eod; |
19 | char *pp_buf; | ||
19 | }; | 20 | }; |
20 | 21 | ||
21 | static inline void *read_part_sector(struct parsed_partitions *state, | 22 | static inline void *read_part_sector(struct parsed_partitions *state, |
@@ -32,9 +33,12 @@ static inline void | |||
32 | put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) | 33 | put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) |
33 | { | 34 | { |
34 | if (n < p->limit) { | 35 | if (n < p->limit) { |
36 | char tmp[1 + BDEVNAME_SIZE + 10 + 1]; | ||
37 | |||
35 | p->parts[n].from = from; | 38 | p->parts[n].from = from; |
36 | p->parts[n].size = size; | 39 | p->parts[n].size = size; |
37 | printk(" %s%d", p->name, n); | 40 | snprintf(tmp, sizeof(tmp), " %s%d", p->name, n); |
41 | strlcat(p->pp_buf, tmp, PAGE_SIZE); | ||
38 | } | 42 | } |
39 | } | 43 | } |
40 | 44 | ||
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index 9efb2cfe2410..dbb44d4bb8a7 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c | |||
@@ -630,6 +630,6 @@ int efi_partition(struct parsed_partitions *state) | |||
630 | } | 630 | } |
631 | kfree(ptes); | 631 | kfree(ptes); |
632 | kfree(gpt); | 632 | kfree(gpt); |
633 | printk("\n"); | 633 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
634 | return 1; | 634 | return 1; |
635 | } | 635 | } |
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index fc8497643fd0..d1b8a5c4bc0a 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c | |||
@@ -75,6 +75,7 @@ int ibm_partition(struct parsed_partitions *state) | |||
75 | unsigned char *data; | 75 | unsigned char *data; |
76 | Sector sect; | 76 | Sector sect; |
77 | sector_t labelsect; | 77 | sector_t labelsect; |
78 | char tmp[64]; | ||
78 | 79 | ||
79 | res = 0; | 80 | res = 0; |
80 | blocksize = bdev_logical_block_size(bdev); | 81 | blocksize = bdev_logical_block_size(bdev); |
@@ -144,13 +145,15 @@ int ibm_partition(struct parsed_partitions *state) | |||
144 | */ | 145 | */ |
145 | blocksize = label->cms.block_size; | 146 | blocksize = label->cms.block_size; |
146 | if (label->cms.disk_offset != 0) { | 147 | if (label->cms.disk_offset != 0) { |
147 | printk("CMS1/%8s(MDSK):", name); | 148 | snprintf(tmp, sizeof(tmp), "CMS1/%8s(MDSK):", name); |
149 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
148 | /* disk is reserved minidisk */ | 150 | /* disk is reserved minidisk */ |
149 | offset = label->cms.disk_offset; | 151 | offset = label->cms.disk_offset; |
150 | size = (label->cms.block_count - 1) | 152 | size = (label->cms.block_count - 1) |
151 | * (blocksize >> 9); | 153 | * (blocksize >> 9); |
152 | } else { | 154 | } else { |
153 | printk("CMS1/%8s:", name); | 155 | snprintf(tmp, sizeof(tmp), "CMS1/%8s:", name); |
156 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
154 | offset = (info->label_block + 1); | 157 | offset = (info->label_block + 1); |
155 | size = label->cms.block_count | 158 | size = label->cms.block_count |
156 | * (blocksize >> 9); | 159 | * (blocksize >> 9); |
@@ -159,7 +162,8 @@ int ibm_partition(struct parsed_partitions *state) | |||
159 | size-offset*(blocksize >> 9)); | 162 | size-offset*(blocksize >> 9)); |
160 | } else { | 163 | } else { |
161 | if (strncmp(type, "LNX1", 4) == 0) { | 164 | if (strncmp(type, "LNX1", 4) == 0) { |
162 | printk("LNX1/%8s:", name); | 165 | snprintf(tmp, sizeof(tmp), "LNX1/%8s:", name); |
166 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
163 | if (label->lnx.ldl_version == 0xf2) { | 167 | if (label->lnx.ldl_version == 0xf2) { |
164 | fmt_size = label->lnx.formatted_blocks | 168 | fmt_size = label->lnx.formatted_blocks |
165 | * (blocksize >> 9); | 169 | * (blocksize >> 9); |
@@ -178,7 +182,7 @@ int ibm_partition(struct parsed_partitions *state) | |||
178 | offset = (info->label_block + 1); | 182 | offset = (info->label_block + 1); |
179 | } else { | 183 | } else { |
180 | /* unlabeled disk */ | 184 | /* unlabeled disk */ |
181 | printk("(nonl)"); | 185 | strlcat(tmp, sizeof(tmp), "(nonl)", PAGE_SIZE); |
182 | size = i_size >> 9; | 186 | size = i_size >> 9; |
183 | offset = (info->label_block + 1); | 187 | offset = (info->label_block + 1); |
184 | } | 188 | } |
@@ -197,7 +201,8 @@ int ibm_partition(struct parsed_partitions *state) | |||
197 | * if not, something is wrong, skipping partition detection | 201 | * if not, something is wrong, skipping partition detection |
198 | */ | 202 | */ |
199 | if (strncmp(type, "VOL1", 4) == 0) { | 203 | if (strncmp(type, "VOL1", 4) == 0) { |
200 | printk("VOL1/%8s:", name); | 204 | snprintf(tmp, sizeof(tmp), "VOL1/%8s:", name); |
205 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
201 | /* | 206 | /* |
202 | * get block number and read then go through format1 | 207 | * get block number and read then go through format1 |
203 | * labels | 208 | * labels |
@@ -253,7 +258,7 @@ int ibm_partition(struct parsed_partitions *state) | |||
253 | 258 | ||
254 | } | 259 | } |
255 | 260 | ||
256 | printk("\n"); | 261 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
257 | goto out_freeall; | 262 | goto out_freeall; |
258 | 263 | ||
259 | 264 | ||
diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c index 1cc928bb762f..0ea19312706b 100644 --- a/fs/partitions/karma.c +++ b/fs/partitions/karma.c | |||
@@ -50,7 +50,7 @@ int karma_partition(struct parsed_partitions *state) | |||
50 | } | 50 | } |
51 | slot++; | 51 | slot++; |
52 | } | 52 | } |
53 | printk("\n"); | 53 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
54 | put_dev_sector(sect); | 54 | put_dev_sector(sect); |
55 | return 1; | 55 | return 1; |
56 | } | 56 | } |
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 648c9d8f3357..5bf8a04b5d9b 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c | |||
@@ -643,7 +643,7 @@ static bool ldm_create_data_partitions (struct parsed_partitions *pp, | |||
643 | return false; | 643 | return false; |
644 | } | 644 | } |
645 | 645 | ||
646 | printk (" [LDM]"); | 646 | strlcat(pp->pp_buf, " [LDM]", PAGE_SIZE); |
647 | 647 | ||
648 | /* Create the data partitions */ | 648 | /* Create the data partitions */ |
649 | list_for_each (item, &ldb->v_part) { | 649 | list_for_each (item, &ldb->v_part) { |
@@ -658,7 +658,7 @@ static bool ldm_create_data_partitions (struct parsed_partitions *pp, | |||
658 | part_num++; | 658 | part_num++; |
659 | } | 659 | } |
660 | 660 | ||
661 | printk ("\n"); | 661 | strlcat(pp->pp_buf, "\n", PAGE_SIZE); |
662 | return true; | 662 | return true; |
663 | } | 663 | } |
664 | 664 | ||
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c index 74465ff7c263..68d6a216ee79 100644 --- a/fs/partitions/mac.c +++ b/fs/partitions/mac.c | |||
@@ -59,7 +59,7 @@ int mac_partition(struct parsed_partitions *state) | |||
59 | put_dev_sector(sect); | 59 | put_dev_sector(sect); |
60 | return 0; /* not a MacOS disk */ | 60 | return 0; /* not a MacOS disk */ |
61 | } | 61 | } |
62 | printk(" [mac]"); | 62 | strlcat(state->pp_buf, " [mac]", PAGE_SIZE); |
63 | blocks_in_map = be32_to_cpu(part->map_count); | 63 | blocks_in_map = be32_to_cpu(part->map_count); |
64 | for (blk = 1; blk <= blocks_in_map; ++blk) { | 64 | for (blk = 1; blk <= blocks_in_map; ++blk) { |
65 | int pos = blk * secsize; | 65 | int pos = blk * secsize; |
@@ -128,6 +128,6 @@ int mac_partition(struct parsed_partitions *state) | |||
128 | #endif | 128 | #endif |
129 | 129 | ||
130 | put_dev_sector(sect); | 130 | put_dev_sector(sect); |
131 | printk("\n"); | 131 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
132 | return 1; | 132 | return 1; |
133 | } | 133 | } |
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 15bfb7b1e044..5f79a6677c69 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c | |||
@@ -213,10 +213,18 @@ static void parse_solaris_x86(struct parsed_partitions *state, | |||
213 | put_dev_sector(sect); | 213 | put_dev_sector(sect); |
214 | return; | 214 | return; |
215 | } | 215 | } |
216 | printk(" %s%d: <solaris:", state->name, origin); | 216 | { |
217 | char tmp[1 + BDEVNAME_SIZE + 10 + 11 + 1]; | ||
218 | |||
219 | snprintf(tmp, sizeof(tmp), " %s%d: <solaris:", state->name, origin); | ||
220 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
221 | } | ||
217 | if (le32_to_cpu(v->v_version) != 1) { | 222 | if (le32_to_cpu(v->v_version) != 1) { |
218 | printk(" cannot handle version %d vtoc>\n", | 223 | char tmp[64]; |
219 | le32_to_cpu(v->v_version)); | 224 | |
225 | snprintf(tmp, sizeof(tmp), " cannot handle version %d vtoc>\n", | ||
226 | le32_to_cpu(v->v_version)); | ||
227 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
220 | put_dev_sector(sect); | 228 | put_dev_sector(sect); |
221 | return; | 229 | return; |
222 | } | 230 | } |
@@ -224,9 +232,12 @@ static void parse_solaris_x86(struct parsed_partitions *state, | |||
224 | max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; | 232 | max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; |
225 | for (i=0; i<max_nparts && state->next<state->limit; i++) { | 233 | for (i=0; i<max_nparts && state->next<state->limit; i++) { |
226 | struct solaris_x86_slice *s = &v->v_slice[i]; | 234 | struct solaris_x86_slice *s = &v->v_slice[i]; |
235 | char tmp[3 + 10 + 1 + 1]; | ||
236 | |||
227 | if (s->s_size == 0) | 237 | if (s->s_size == 0) |
228 | continue; | 238 | continue; |
229 | printk(" [s%d]", i); | 239 | snprintf(tmp, sizeof(tmp), " [s%d]", i); |
240 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
230 | /* solaris partitions are relative to current MS-DOS | 241 | /* solaris partitions are relative to current MS-DOS |
231 | * one; must add the offset of the current partition */ | 242 | * one; must add the offset of the current partition */ |
232 | put_partition(state, state->next++, | 243 | put_partition(state, state->next++, |
@@ -234,7 +245,7 @@ static void parse_solaris_x86(struct parsed_partitions *state, | |||
234 | le32_to_cpu(s->s_size)); | 245 | le32_to_cpu(s->s_size)); |
235 | } | 246 | } |
236 | put_dev_sector(sect); | 247 | put_dev_sector(sect); |
237 | printk(" >\n"); | 248 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); |
238 | #endif | 249 | #endif |
239 | } | 250 | } |
240 | 251 | ||
@@ -250,6 +261,7 @@ static void parse_bsd(struct parsed_partitions *state, | |||
250 | Sector sect; | 261 | Sector sect; |
251 | struct bsd_disklabel *l; | 262 | struct bsd_disklabel *l; |
252 | struct bsd_partition *p; | 263 | struct bsd_partition *p; |
264 | char tmp[64]; | ||
253 | 265 | ||
254 | l = read_part_sector(state, offset + 1, §); | 266 | l = read_part_sector(state, offset + 1, §); |
255 | if (!l) | 267 | if (!l) |
@@ -258,7 +270,9 @@ static void parse_bsd(struct parsed_partitions *state, | |||
258 | put_dev_sector(sect); | 270 | put_dev_sector(sect); |
259 | return; | 271 | return; |
260 | } | 272 | } |
261 | printk(" %s%d: <%s:", state->name, origin, flavour); | 273 | |
274 | snprintf(tmp, sizeof(tmp), " %s%d: <%s:", state->name, origin, flavour); | ||
275 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
262 | 276 | ||
263 | if (le16_to_cpu(l->d_npartitions) < max_partitions) | 277 | if (le16_to_cpu(l->d_npartitions) < max_partitions) |
264 | max_partitions = le16_to_cpu(l->d_npartitions); | 278 | max_partitions = le16_to_cpu(l->d_npartitions); |
@@ -275,16 +289,18 @@ static void parse_bsd(struct parsed_partitions *state, | |||
275 | /* full parent partition, we have it already */ | 289 | /* full parent partition, we have it already */ |
276 | continue; | 290 | continue; |
277 | if (offset > bsd_start || offset+size < bsd_start+bsd_size) { | 291 | if (offset > bsd_start || offset+size < bsd_start+bsd_size) { |
278 | printk("bad subpartition - ignored\n"); | 292 | strlcat(state->pp_buf, "bad subpartition - ignored\n", PAGE_SIZE); |
279 | continue; | 293 | continue; |
280 | } | 294 | } |
281 | put_partition(state, state->next++, bsd_start, bsd_size); | 295 | put_partition(state, state->next++, bsd_start, bsd_size); |
282 | } | 296 | } |
283 | put_dev_sector(sect); | 297 | put_dev_sector(sect); |
284 | if (le16_to_cpu(l->d_npartitions) > max_partitions) | 298 | if (le16_to_cpu(l->d_npartitions) > max_partitions) { |
285 | printk(" (ignored %d more)", | 299 | snprintf(tmp, sizeof(tmp), " (ignored %d more)", |
286 | le16_to_cpu(l->d_npartitions) - max_partitions); | 300 | le16_to_cpu(l->d_npartitions) - max_partitions); |
287 | printk(" >\n"); | 301 | strlcat(state->pp_buf, tmp, PAGE_SIZE); |
302 | } | ||
303 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); | ||
288 | } | 304 | } |
289 | #endif | 305 | #endif |
290 | 306 | ||
@@ -333,7 +349,12 @@ static void parse_unixware(struct parsed_partitions *state, | |||
333 | put_dev_sector(sect); | 349 | put_dev_sector(sect); |
334 | return; | 350 | return; |
335 | } | 351 | } |
336 | printk(" %s%d: <unixware:", state->name, origin); | 352 | { |
353 | char tmp[1 + BDEVNAME_SIZE + 10 + 12 + 1]; | ||
354 | |||
355 | snprintf(tmp, sizeof(tmp), " %s%d: <unixware:", state->name, origin); | ||
356 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
357 | } | ||
337 | p = &l->vtoc.v_slice[1]; | 358 | p = &l->vtoc.v_slice[1]; |
338 | /* I omit the 0th slice as it is the same as whole disk. */ | 359 | /* I omit the 0th slice as it is the same as whole disk. */ |
339 | while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) { | 360 | while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) { |
@@ -347,7 +368,7 @@ static void parse_unixware(struct parsed_partitions *state, | |||
347 | p++; | 368 | p++; |
348 | } | 369 | } |
349 | put_dev_sector(sect); | 370 | put_dev_sector(sect); |
350 | printk(" >\n"); | 371 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); |
351 | #endif | 372 | #endif |
352 | } | 373 | } |
353 | 374 | ||
@@ -376,8 +397,10 @@ static void parse_minix(struct parsed_partitions *state, | |||
376 | * the normal boot sector. */ | 397 | * the normal boot sector. */ |
377 | if (msdos_magic_present (data + 510) && | 398 | if (msdos_magic_present (data + 510) && |
378 | SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ | 399 | SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ |
400 | char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1]; | ||
379 | 401 | ||
380 | printk(" %s%d: <minix:", state->name, origin); | 402 | snprintf(tmp, sizeof(tmp), " %s%d: <minix:", state->name, origin); |
403 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
381 | for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) { | 404 | for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) { |
382 | if (state->next == state->limit) | 405 | if (state->next == state->limit) |
383 | break; | 406 | break; |
@@ -386,7 +409,7 @@ static void parse_minix(struct parsed_partitions *state, | |||
386 | put_partition(state, state->next++, | 409 | put_partition(state, state->next++, |
387 | start_sect(p), nr_sects(p)); | 410 | start_sect(p), nr_sects(p)); |
388 | } | 411 | } |
389 | printk(" >\n"); | 412 | strlcat(state->pp_buf, " >\n", PAGE_SIZE); |
390 | } | 413 | } |
391 | put_dev_sector(sect); | 414 | put_dev_sector(sect); |
392 | #endif /* CONFIG_MINIX_SUBPARTITION */ | 415 | #endif /* CONFIG_MINIX_SUBPARTITION */ |
@@ -425,7 +448,7 @@ int msdos_partition(struct parsed_partitions *state) | |||
425 | 448 | ||
426 | if (aix_magic_present(state, data)) { | 449 | if (aix_magic_present(state, data)) { |
427 | put_dev_sector(sect); | 450 | put_dev_sector(sect); |
428 | printk( " [AIX]"); | 451 | strlcat(state->pp_buf, " [AIX]", PAGE_SIZE); |
429 | return 0; | 452 | return 0; |
430 | } | 453 | } |
431 | 454 | ||
@@ -446,7 +469,7 @@ int msdos_partition(struct parsed_partitions *state) | |||
446 | fb = (struct fat_boot_sector *) data; | 469 | fb = (struct fat_boot_sector *) data; |
447 | if (slot == 1 && fb->reserved && fb->fats | 470 | if (slot == 1 && fb->reserved && fb->fats |
448 | && fat_valid_media(fb->media)) { | 471 | && fat_valid_media(fb->media)) { |
449 | printk("\n"); | 472 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
450 | put_dev_sector(sect); | 473 | put_dev_sector(sect); |
451 | return 1; | 474 | return 1; |
452 | } else { | 475 | } else { |
@@ -491,21 +514,21 @@ int msdos_partition(struct parsed_partitions *state) | |||
491 | n = min(size, max(sector_size, n)); | 514 | n = min(size, max(sector_size, n)); |
492 | put_partition(state, slot, start, n); | 515 | put_partition(state, slot, start, n); |
493 | 516 | ||
494 | printk(" <"); | 517 | strlcat(state->pp_buf, " <", PAGE_SIZE); |
495 | parse_extended(state, start, size); | 518 | parse_extended(state, start, size); |
496 | printk(" >"); | 519 | strlcat(state->pp_buf, " >", PAGE_SIZE); |
497 | continue; | 520 | continue; |
498 | } | 521 | } |
499 | put_partition(state, slot, start, size); | 522 | put_partition(state, slot, start, size); |
500 | if (SYS_IND(p) == LINUX_RAID_PARTITION) | 523 | if (SYS_IND(p) == LINUX_RAID_PARTITION) |
501 | state->parts[slot].flags = ADDPART_FLAG_RAID; | 524 | state->parts[slot].flags = ADDPART_FLAG_RAID; |
502 | if (SYS_IND(p) == DM6_PARTITION) | 525 | if (SYS_IND(p) == DM6_PARTITION) |
503 | printk("[DM]"); | 526 | strlcat(state->pp_buf, "[DM]", PAGE_SIZE); |
504 | if (SYS_IND(p) == EZD_PARTITION) | 527 | if (SYS_IND(p) == EZD_PARTITION) |
505 | printk("[EZD]"); | 528 | strlcat(state->pp_buf, "[EZD]", PAGE_SIZE); |
506 | } | 529 | } |
507 | 530 | ||
508 | printk("\n"); | 531 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
509 | 532 | ||
510 | /* second pass - output for each on a separate line */ | 533 | /* second pass - output for each on a separate line */ |
511 | p = (struct partition *) (0x1be + data); | 534 | p = (struct partition *) (0x1be + data); |
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c index fc22b85d436a..48cec7cbca17 100644 --- a/fs/partitions/osf.c +++ b/fs/partitions/osf.c | |||
@@ -72,7 +72,7 @@ int osf_partition(struct parsed_partitions *state) | |||
72 | le32_to_cpu(partition->p_size)); | 72 | le32_to_cpu(partition->p_size)); |
73 | slot++; | 73 | slot++; |
74 | } | 74 | } |
75 | printk("\n"); | 75 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
76 | put_dev_sector(sect); | 76 | put_dev_sector(sect); |
77 | return 1; | 77 | return 1; |
78 | } | 78 | } |
diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c index 43b1df9aa16c..ea8a86dceaf4 100644 --- a/fs/partitions/sgi.c +++ b/fs/partitions/sgi.c | |||
@@ -76,7 +76,7 @@ int sgi_partition(struct parsed_partitions *state) | |||
76 | } | 76 | } |
77 | slot++; | 77 | slot++; |
78 | } | 78 | } |
79 | printk("\n"); | 79 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
80 | put_dev_sector(sect); | 80 | put_dev_sector(sect); |
81 | return 1; | 81 | return 1; |
82 | } | 82 | } |
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c index a32660e25f7f..b5b6fcfb3d36 100644 --- a/fs/partitions/sun.c +++ b/fs/partitions/sun.c | |||
@@ -116,7 +116,7 @@ int sun_partition(struct parsed_partitions *state) | |||
116 | } | 116 | } |
117 | slot++; | 117 | slot++; |
118 | } | 118 | } |
119 | printk("\n"); | 119 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
120 | put_dev_sector(sect); | 120 | put_dev_sector(sect); |
121 | return 1; | 121 | return 1; |
122 | } | 122 | } |
diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c index 9030c864428e..9627ccffc1c4 100644 --- a/fs/partitions/sysv68.c +++ b/fs/partitions/sysv68.c | |||
@@ -54,6 +54,7 @@ int sysv68_partition(struct parsed_partitions *state) | |||
54 | unsigned char *data; | 54 | unsigned char *data; |
55 | struct dkblk0 *b; | 55 | struct dkblk0 *b; |
56 | struct slice *slice; | 56 | struct slice *slice; |
57 | char tmp[64]; | ||
57 | 58 | ||
58 | data = read_part_sector(state, 0, §); | 59 | data = read_part_sector(state, 0, §); |
59 | if (!data) | 60 | if (!data) |
@@ -73,7 +74,8 @@ int sysv68_partition(struct parsed_partitions *state) | |||
73 | return -1; | 74 | return -1; |
74 | 75 | ||
75 | slices -= 1; /* last slice is the whole disk */ | 76 | slices -= 1; /* last slice is the whole disk */ |
76 | printk("sysV68: %s(s%u)", state->name, slices); | 77 | snprintf(tmp, sizeof(tmp), "sysV68: %s(s%u)", state->name, slices); |
78 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
77 | slice = (struct slice *)data; | 79 | slice = (struct slice *)data; |
78 | for (i = 0; i < slices; i++, slice++) { | 80 | for (i = 0; i < slices; i++, slice++) { |
79 | if (slot == state->limit) | 81 | if (slot == state->limit) |
@@ -82,11 +84,12 @@ int sysv68_partition(struct parsed_partitions *state) | |||
82 | put_partition(state, slot, | 84 | put_partition(state, slot, |
83 | be32_to_cpu(slice->blkoff), | 85 | be32_to_cpu(slice->blkoff), |
84 | be32_to_cpu(slice->nblocks)); | 86 | be32_to_cpu(slice->nblocks)); |
85 | printk("(s%u)", i); | 87 | snprintf(tmp, sizeof(tmp), "(s%u)", i); |
88 | strlcat(state->pp_buf, tmp, PAGE_SIZE); | ||
86 | } | 89 | } |
87 | slot++; | 90 | slot++; |
88 | } | 91 | } |
89 | printk("\n"); | 92 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
90 | put_dev_sector(sect); | 93 | put_dev_sector(sect); |
91 | return 1; | 94 | return 1; |
92 | } | 95 | } |
diff --git a/fs/partitions/ultrix.c b/fs/partitions/ultrix.c index db9eef260364..8dbaf9f77a99 100644 --- a/fs/partitions/ultrix.c +++ b/fs/partitions/ultrix.c | |||
@@ -39,7 +39,7 @@ int ultrix_partition(struct parsed_partitions *state) | |||
39 | label->pt_part[i].pi_blkoff, | 39 | label->pt_part[i].pi_blkoff, |
40 | label->pt_part[i].pi_nblocks); | 40 | label->pt_part[i].pi_nblocks); |
41 | put_dev_sector(sect); | 41 | put_dev_sector(sect); |
42 | printk ("\n"); | 42 | strlcat(state->pp_buf, "\n", PAGE_SIZE); |
43 | return 1; | 43 | return 1; |
44 | } else { | 44 | } else { |
45 | put_dev_sector(sect); | 45 | put_dev_sector(sect); |
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 11a7b5c68153..2758e2afc518 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for the Linux proc filesystem routines. | 2 | # Makefile for the Linux proc filesystem routines. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_PROC_FS) += proc.o | 5 | obj-y += proc.o |
6 | 6 | ||
7 | proc-y := nommu.o task_nommu.o | 7 | proc-y := nommu.o task_nommu.o |
8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o | 8 | proc-$(CONFIG_MMU) := mmu.o task_mmu.o |
diff --git a/fs/proc/base.c b/fs/proc/base.c index acb7ef80ea4f..c806dfb24e08 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -63,6 +63,7 @@ | |||
63 | #include <linux/namei.h> | 63 | #include <linux/namei.h> |
64 | #include <linux/mnt_namespace.h> | 64 | #include <linux/mnt_namespace.h> |
65 | #include <linux/mm.h> | 65 | #include <linux/mm.h> |
66 | #include <linux/swap.h> | ||
66 | #include <linux/rcupdate.h> | 67 | #include <linux/rcupdate.h> |
67 | #include <linux/kallsyms.h> | 68 | #include <linux/kallsyms.h> |
68 | #include <linux/stacktrace.h> | 69 | #include <linux/stacktrace.h> |
@@ -427,17 +428,14 @@ static const struct file_operations proc_lstats_operations = { | |||
427 | 428 | ||
428 | #endif | 429 | #endif |
429 | 430 | ||
430 | /* The badness from the OOM killer */ | ||
431 | unsigned long badness(struct task_struct *p, unsigned long uptime); | ||
432 | static int proc_oom_score(struct task_struct *task, char *buffer) | 431 | static int proc_oom_score(struct task_struct *task, char *buffer) |
433 | { | 432 | { |
434 | unsigned long points = 0; | 433 | unsigned long points = 0; |
435 | struct timespec uptime; | ||
436 | 434 | ||
437 | do_posix_clock_monotonic_gettime(&uptime); | ||
438 | read_lock(&tasklist_lock); | 435 | read_lock(&tasklist_lock); |
439 | if (pid_alive(task)) | 436 | if (pid_alive(task)) |
440 | points = badness(task, uptime.tv_sec); | 437 | points = oom_badness(task, NULL, NULL, |
438 | totalram_pages + total_swap_pages); | ||
441 | read_unlock(&tasklist_lock); | 439 | read_unlock(&tasklist_lock); |
442 | return sprintf(buffer, "%lu\n", points); | 440 | return sprintf(buffer, "%lu\n", points); |
443 | } | 441 | } |
@@ -561,9 +559,19 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr) | |||
561 | return -EPERM; | 559 | return -EPERM; |
562 | 560 | ||
563 | error = inode_change_ok(inode, attr); | 561 | error = inode_change_ok(inode, attr); |
564 | if (!error) | 562 | if (error) |
565 | error = inode_setattr(inode, attr); | 563 | return error; |
566 | return error; | 564 | |
565 | if ((attr->ia_valid & ATTR_SIZE) && | ||
566 | attr->ia_size != i_size_read(inode)) { | ||
567 | error = vmtruncate(inode, attr->ia_size); | ||
568 | if (error) | ||
569 | return error; | ||
570 | } | ||
571 | |||
572 | setattr_copy(inode, attr); | ||
573 | mark_inode_dirty(inode); | ||
574 | return 0; | ||
567 | } | 575 | } |
568 | 576 | ||
569 | static const struct inode_operations proc_def_inode_operations = { | 577 | static const struct inode_operations proc_def_inode_operations = { |
@@ -1039,8 +1047,24 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
1039 | return -EACCES; | 1047 | return -EACCES; |
1040 | } | 1048 | } |
1041 | 1049 | ||
1050 | /* | ||
1051 | * Warn that /proc/pid/oom_adj is deprecated, see | ||
1052 | * Documentation/feature-removal-schedule.txt. | ||
1053 | */ | ||
1054 | printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, " | ||
1055 | "please use /proc/%d/oom_score_adj instead.\n", | ||
1056 | current->comm, task_pid_nr(current), | ||
1057 | task_pid_nr(task), task_pid_nr(task)); | ||
1042 | task->signal->oom_adj = oom_adjust; | 1058 | task->signal->oom_adj = oom_adjust; |
1043 | 1059 | /* | |
1060 | * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum | ||
1061 | * value is always attainable. | ||
1062 | */ | ||
1063 | if (task->signal->oom_adj == OOM_ADJUST_MAX) | ||
1064 | task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX; | ||
1065 | else | ||
1066 | task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / | ||
1067 | -OOM_DISABLE; | ||
1044 | unlock_task_sighand(task, &flags); | 1068 | unlock_task_sighand(task, &flags); |
1045 | put_task_struct(task); | 1069 | put_task_struct(task); |
1046 | 1070 | ||
@@ -1053,6 +1077,82 @@ static const struct file_operations proc_oom_adjust_operations = { | |||
1053 | .llseek = generic_file_llseek, | 1077 | .llseek = generic_file_llseek, |
1054 | }; | 1078 | }; |
1055 | 1079 | ||
1080 | static ssize_t oom_score_adj_read(struct file *file, char __user *buf, | ||
1081 | size_t count, loff_t *ppos) | ||
1082 | { | ||
1083 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | ||
1084 | char buffer[PROC_NUMBUF]; | ||
1085 | int oom_score_adj = OOM_SCORE_ADJ_MIN; | ||
1086 | unsigned long flags; | ||
1087 | size_t len; | ||
1088 | |||
1089 | if (!task) | ||
1090 | return -ESRCH; | ||
1091 | if (lock_task_sighand(task, &flags)) { | ||
1092 | oom_score_adj = task->signal->oom_score_adj; | ||
1093 | unlock_task_sighand(task, &flags); | ||
1094 | } | ||
1095 | put_task_struct(task); | ||
1096 | len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj); | ||
1097 | return simple_read_from_buffer(buf, count, ppos, buffer, len); | ||
1098 | } | ||
1099 | |||
1100 | static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | ||
1101 | size_t count, loff_t *ppos) | ||
1102 | { | ||
1103 | struct task_struct *task; | ||
1104 | char buffer[PROC_NUMBUF]; | ||
1105 | unsigned long flags; | ||
1106 | long oom_score_adj; | ||
1107 | int err; | ||
1108 | |||
1109 | memset(buffer, 0, sizeof(buffer)); | ||
1110 | if (count > sizeof(buffer) - 1) | ||
1111 | count = sizeof(buffer) - 1; | ||
1112 | if (copy_from_user(buffer, buf, count)) | ||
1113 | return -EFAULT; | ||
1114 | |||
1115 | err = strict_strtol(strstrip(buffer), 0, &oom_score_adj); | ||
1116 | if (err) | ||
1117 | return -EINVAL; | ||
1118 | if (oom_score_adj < OOM_SCORE_ADJ_MIN || | ||
1119 | oom_score_adj > OOM_SCORE_ADJ_MAX) | ||
1120 | return -EINVAL; | ||
1121 | |||
1122 | task = get_proc_task(file->f_path.dentry->d_inode); | ||
1123 | if (!task) | ||
1124 | return -ESRCH; | ||
1125 | if (!lock_task_sighand(task, &flags)) { | ||
1126 | put_task_struct(task); | ||
1127 | return -ESRCH; | ||
1128 | } | ||
1129 | if (oom_score_adj < task->signal->oom_score_adj && | ||
1130 | !capable(CAP_SYS_RESOURCE)) { | ||
1131 | unlock_task_sighand(task, &flags); | ||
1132 | put_task_struct(task); | ||
1133 | return -EACCES; | ||
1134 | } | ||
1135 | |||
1136 | task->signal->oom_score_adj = oom_score_adj; | ||
1137 | /* | ||
1138 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is | ||
1139 | * always attainable. | ||
1140 | */ | ||
1141 | if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
1142 | task->signal->oom_adj = OOM_DISABLE; | ||
1143 | else | ||
1144 | task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / | ||
1145 | OOM_SCORE_ADJ_MAX; | ||
1146 | unlock_task_sighand(task, &flags); | ||
1147 | put_task_struct(task); | ||
1148 | return count; | ||
1149 | } | ||
1150 | |||
1151 | static const struct file_operations proc_oom_score_adj_operations = { | ||
1152 | .read = oom_score_adj_read, | ||
1153 | .write = oom_score_adj_write, | ||
1154 | }; | ||
1155 | |||
1056 | #ifdef CONFIG_AUDITSYSCALL | 1156 | #ifdef CONFIG_AUDITSYSCALL |
1057 | #define TMPBUFLEN 21 | 1157 | #define TMPBUFLEN 21 |
1058 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, | 1158 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, |
@@ -2625,6 +2725,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2625 | #endif | 2725 | #endif |
2626 | INF("oom_score", S_IRUGO, proc_oom_score), | 2726 | INF("oom_score", S_IRUGO, proc_oom_score), |
2627 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), | 2727 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), |
2728 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), | ||
2628 | #ifdef CONFIG_AUDITSYSCALL | 2729 | #ifdef CONFIG_AUDITSYSCALL |
2629 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), | 2730 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), |
2630 | REG("sessionid", S_IRUGO, proc_sessionid_operations), | 2731 | REG("sessionid", S_IRUGO, proc_sessionid_operations), |
@@ -2959,6 +3060,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2959 | #endif | 3060 | #endif |
2960 | INF("oom_score", S_IRUGO, proc_oom_score), | 3061 | INF("oom_score", S_IRUGO, proc_oom_score), |
2961 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), | 3062 | REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), |
3063 | REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), | ||
2962 | #ifdef CONFIG_AUDITSYSCALL | 3064 | #ifdef CONFIG_AUDITSYSCALL |
2963 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), | 3065 | REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), |
2964 | REG("sessionid", S_IRUSR, proc_sessionid_operations), | 3066 | REG("sessionid", S_IRUSR, proc_sessionid_operations), |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 2791907744ed..dd29f0337661 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/time.h> | 12 | #include <linux/time.h> |
13 | #include <linux/proc_fs.h> | 13 | #include <linux/proc_fs.h> |
14 | #include <linux/stat.h> | 14 | #include <linux/stat.h> |
15 | #include <linux/mm.h> | ||
15 | #include <linux/module.h> | 16 | #include <linux/module.h> |
16 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
17 | #include <linux/mount.h> | 18 | #include <linux/mount.h> |
@@ -258,17 +259,22 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) | |||
258 | 259 | ||
259 | error = inode_change_ok(inode, iattr); | 260 | error = inode_change_ok(inode, iattr); |
260 | if (error) | 261 | if (error) |
261 | goto out; | 262 | return error; |
262 | 263 | ||
263 | error = inode_setattr(inode, iattr); | 264 | if ((iattr->ia_valid & ATTR_SIZE) && |
264 | if (error) | 265 | iattr->ia_size != i_size_read(inode)) { |
265 | goto out; | 266 | error = vmtruncate(inode, iattr->ia_size); |
267 | if (error) | ||
268 | return error; | ||
269 | } | ||
270 | |||
271 | setattr_copy(inode, iattr); | ||
272 | mark_inode_dirty(inode); | ||
266 | 273 | ||
267 | de->uid = inode->i_uid; | 274 | de->uid = inode->i_uid; |
268 | de->gid = inode->i_gid; | 275 | de->gid = inode->i_gid; |
269 | de->mode = inode->i_mode; | 276 | de->mode = inode->i_mode; |
270 | out: | 277 | return 0; |
271 | return error; | ||
272 | } | 278 | } |
273 | 279 | ||
274 | static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, | 280 | static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index aea8502e58a3..23561cda7245 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -25,11 +25,12 @@ | |||
25 | 25 | ||
26 | #include "internal.h" | 26 | #include "internal.h" |
27 | 27 | ||
28 | static void proc_delete_inode(struct inode *inode) | 28 | static void proc_evict_inode(struct inode *inode) |
29 | { | 29 | { |
30 | struct proc_dir_entry *de; | 30 | struct proc_dir_entry *de; |
31 | 31 | ||
32 | truncate_inode_pages(&inode->i_data, 0); | 32 | truncate_inode_pages(&inode->i_data, 0); |
33 | end_writeback(inode); | ||
33 | 34 | ||
34 | /* Stop tracking associated processes */ | 35 | /* Stop tracking associated processes */ |
35 | put_pid(PROC_I(inode)->pid); | 36 | put_pid(PROC_I(inode)->pid); |
@@ -40,7 +41,6 @@ static void proc_delete_inode(struct inode *inode) | |||
40 | pde_put(de); | 41 | pde_put(de); |
41 | if (PROC_I(inode)->sysctl) | 42 | if (PROC_I(inode)->sysctl) |
42 | sysctl_head_put(PROC_I(inode)->sysctl); | 43 | sysctl_head_put(PROC_I(inode)->sysctl); |
43 | clear_inode(inode); | ||
44 | } | 44 | } |
45 | 45 | ||
46 | struct vfsmount *proc_mnt; | 46 | struct vfsmount *proc_mnt; |
@@ -91,7 +91,7 @@ static const struct super_operations proc_sops = { | |||
91 | .alloc_inode = proc_alloc_inode, | 91 | .alloc_inode = proc_alloc_inode, |
92 | .destroy_inode = proc_destroy_inode, | 92 | .destroy_inode = proc_destroy_inode, |
93 | .drop_inode = generic_delete_inode, | 93 | .drop_inode = generic_delete_inode, |
94 | .delete_inode = proc_delete_inode, | 94 | .evict_inode = proc_evict_inode, |
95 | .statfs = simple_statfs, | 95 | .statfs = simple_statfs, |
96 | }; | 96 | }; |
97 | 97 | ||
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 6ff9981f0a18..5be436ea088e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -329,10 +329,19 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) | |||
329 | return -EPERM; | 329 | return -EPERM; |
330 | 330 | ||
331 | error = inode_change_ok(inode, attr); | 331 | error = inode_change_ok(inode, attr); |
332 | if (!error) | 332 | if (error) |
333 | error = inode_setattr(inode, attr); | 333 | return error; |
334 | |||
335 | if ((attr->ia_valid & ATTR_SIZE) && | ||
336 | attr->ia_size != i_size_read(inode)) { | ||
337 | error = vmtruncate(inode, attr->ia_size); | ||
338 | if (error) | ||
339 | return error; | ||
340 | } | ||
334 | 341 | ||
335 | return error; | 342 | setattr_copy(inode, attr); |
343 | mark_inode_dirty(inode); | ||
344 | return 0; | ||
336 | } | 345 | } |
337 | 346 | ||
338 | static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 347 | static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 277575ddc05c..16829722be93 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
@@ -320,10 +320,19 @@ static int qnx4_write_begin(struct file *file, struct address_space *mapping, | |||
320 | struct page **pagep, void **fsdata) | 320 | struct page **pagep, void **fsdata) |
321 | { | 321 | { |
322 | struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host); | 322 | struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host); |
323 | int ret; | ||
324 | |||
323 | *pagep = NULL; | 325 | *pagep = NULL; |
324 | return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 326 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
325 | qnx4_get_block, | 327 | qnx4_get_block, |
326 | &qnx4_inode->mmu_private); | 328 | &qnx4_inode->mmu_private); |
329 | if (unlikely(ret)) { | ||
330 | loff_t isize = mapping->host->i_size; | ||
331 | if (pos + len > isize) | ||
332 | vmtruncate(mapping->host, isize); | ||
333 | } | ||
334 | |||
335 | return ret; | ||
327 | } | 336 | } |
328 | static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) | 337 | static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) |
329 | { | 338 | { |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 437d2ca2de97..aad1316a977f 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -132,6 +132,22 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock); | |||
132 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); | 132 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); |
133 | EXPORT_SYMBOL(dq_data_lock); | 133 | EXPORT_SYMBOL(dq_data_lock); |
134 | 134 | ||
135 | void __quota_error(struct super_block *sb, const char *func, | ||
136 | const char *fmt, ...) | ||
137 | { | ||
138 | va_list args; | ||
139 | |||
140 | if (printk_ratelimit()) { | ||
141 | va_start(args, fmt); | ||
142 | printk(KERN_ERR "Quota error (device %s): %s: ", | ||
143 | sb->s_id, func); | ||
144 | vprintk(fmt, args); | ||
145 | printk("\n"); | ||
146 | va_end(args); | ||
147 | } | ||
148 | } | ||
149 | EXPORT_SYMBOL(__quota_error); | ||
150 | |||
135 | #if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING) | 151 | #if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING) |
136 | static char *quotatypes[] = INITQFNAMES; | 152 | static char *quotatypes[] = INITQFNAMES; |
137 | #endif | 153 | #endif |
@@ -705,11 +721,8 @@ void dqput(struct dquot *dquot) | |||
705 | return; | 721 | return; |
706 | #ifdef CONFIG_QUOTA_DEBUG | 722 | #ifdef CONFIG_QUOTA_DEBUG |
707 | if (!atomic_read(&dquot->dq_count)) { | 723 | if (!atomic_read(&dquot->dq_count)) { |
708 | printk("VFS: dqput: trying to free free dquot\n"); | 724 | quota_error(dquot->dq_sb, "trying to free free dquot of %s %d", |
709 | printk("VFS: device %s, dquot of %s %d\n", | 725 | quotatypes[dquot->dq_type], dquot->dq_id); |
710 | dquot->dq_sb->s_id, | ||
711 | quotatypes[dquot->dq_type], | ||
712 | dquot->dq_id); | ||
713 | BUG(); | 726 | BUG(); |
714 | } | 727 | } |
715 | #endif | 728 | #endif |
@@ -732,9 +745,9 @@ we_slept: | |||
732 | /* Commit dquot before releasing */ | 745 | /* Commit dquot before releasing */ |
733 | ret = dquot->dq_sb->dq_op->write_dquot(dquot); | 746 | ret = dquot->dq_sb->dq_op->write_dquot(dquot); |
734 | if (ret < 0) { | 747 | if (ret < 0) { |
735 | printk(KERN_ERR "VFS: cannot write quota structure on " | 748 | quota_error(dquot->dq_sb, "Can't write quota structure" |
736 | "device %s (error %d). Quota may get out of " | 749 | " (error %d). Quota may get out of sync!", |
737 | "sync!\n", dquot->dq_sb->s_id, ret); | 750 | ret); |
738 | /* | 751 | /* |
739 | * We clear dirty bit anyway, so that we avoid | 752 | * We clear dirty bit anyway, so that we avoid |
740 | * infinite loop here | 753 | * infinite loop here |
@@ -885,7 +898,7 @@ static void add_dquot_ref(struct super_block *sb, int type) | |||
885 | 898 | ||
886 | spin_lock(&inode_lock); | 899 | spin_lock(&inode_lock); |
887 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 900 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
888 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 901 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) |
889 | continue; | 902 | continue; |
890 | #ifdef CONFIG_QUOTA_DEBUG | 903 | #ifdef CONFIG_QUOTA_DEBUG |
891 | if (unlikely(inode_get_rsv_space(inode) > 0)) | 904 | if (unlikely(inode_get_rsv_space(inode) > 0)) |
@@ -914,9 +927,9 @@ static void add_dquot_ref(struct super_block *sb, int type) | |||
914 | 927 | ||
915 | #ifdef CONFIG_QUOTA_DEBUG | 928 | #ifdef CONFIG_QUOTA_DEBUG |
916 | if (reserved) { | 929 | if (reserved) { |
917 | printk(KERN_WARNING "VFS (%s): Writes happened before quota" | 930 | quota_error(sb, "Writes happened before quota was turned on " |
918 | " was turned on thus quota information is probably " | 931 | "thus quota information is probably inconsistent. " |
919 | "inconsistent. Please run quotacheck(8).\n", sb->s_id); | 932 | "Please run quotacheck(8)"); |
920 | } | 933 | } |
921 | #endif | 934 | #endif |
922 | } | 935 | } |
@@ -947,7 +960,9 @@ static int remove_inode_dquot_ref(struct inode *inode, int type, | |||
947 | if (dqput_blocks(dquot)) { | 960 | if (dqput_blocks(dquot)) { |
948 | #ifdef CONFIG_QUOTA_DEBUG | 961 | #ifdef CONFIG_QUOTA_DEBUG |
949 | if (atomic_read(&dquot->dq_count) != 1) | 962 | if (atomic_read(&dquot->dq_count) != 1) |
950 | printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count)); | 963 | quota_error(inode->i_sb, "Adding dquot with " |
964 | "dq_count %d to dispose list", | ||
965 | atomic_read(&dquot->dq_count)); | ||
951 | #endif | 966 | #endif |
952 | spin_lock(&dq_list_lock); | 967 | spin_lock(&dq_list_lock); |
953 | /* As dquot must have currently users it can't be on | 968 | /* As dquot must have currently users it can't be on |
@@ -986,6 +1001,7 @@ static void remove_dquot_ref(struct super_block *sb, int type, | |||
986 | struct list_head *tofree_head) | 1001 | struct list_head *tofree_head) |
987 | { | 1002 | { |
988 | struct inode *inode; | 1003 | struct inode *inode; |
1004 | int reserved = 0; | ||
989 | 1005 | ||
990 | spin_lock(&inode_lock); | 1006 | spin_lock(&inode_lock); |
991 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 1007 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
@@ -995,10 +1011,20 @@ static void remove_dquot_ref(struct super_block *sb, int type, | |||
995 | * only quota pointers and these have separate locking | 1011 | * only quota pointers and these have separate locking |
996 | * (dqptr_sem). | 1012 | * (dqptr_sem). |
997 | */ | 1013 | */ |
998 | if (!IS_NOQUOTA(inode)) | 1014 | if (!IS_NOQUOTA(inode)) { |
1015 | if (unlikely(inode_get_rsv_space(inode) > 0)) | ||
1016 | reserved = 1; | ||
999 | remove_inode_dquot_ref(inode, type, tofree_head); | 1017 | remove_inode_dquot_ref(inode, type, tofree_head); |
1018 | } | ||
1000 | } | 1019 | } |
1001 | spin_unlock(&inode_lock); | 1020 | spin_unlock(&inode_lock); |
1021 | #ifdef CONFIG_QUOTA_DEBUG | ||
1022 | if (reserved) { | ||
1023 | printk(KERN_WARNING "VFS (%s): Writes happened after quota" | ||
1024 | " was disabled thus quota information is probably " | ||
1025 | "inconsistent. Please run quotacheck(8).\n", sb->s_id); | ||
1026 | } | ||
1027 | #endif | ||
1002 | } | 1028 | } |
1003 | 1029 | ||
1004 | /* Gather all references from inodes and drop them */ | 1030 | /* Gather all references from inodes and drop them */ |
@@ -1304,6 +1330,15 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space) | |||
1304 | return QUOTA_NL_NOWARN; | 1330 | return QUOTA_NL_NOWARN; |
1305 | } | 1331 | } |
1306 | 1332 | ||
1333 | static int dquot_active(const struct inode *inode) | ||
1334 | { | ||
1335 | struct super_block *sb = inode->i_sb; | ||
1336 | |||
1337 | if (IS_NOQUOTA(inode)) | ||
1338 | return 0; | ||
1339 | return sb_any_quota_loaded(sb) & ~sb_any_quota_suspended(sb); | ||
1340 | } | ||
1341 | |||
1307 | /* | 1342 | /* |
1308 | * Initialize quota pointers in inode | 1343 | * Initialize quota pointers in inode |
1309 | * | 1344 | * |
@@ -1323,7 +1358,7 @@ static void __dquot_initialize(struct inode *inode, int type) | |||
1323 | 1358 | ||
1324 | /* First test before acquiring mutex - solves deadlocks when we | 1359 | /* First test before acquiring mutex - solves deadlocks when we |
1325 | * re-enter the quota code and are already holding the mutex */ | 1360 | * re-enter the quota code and are already holding the mutex */ |
1326 | if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) | 1361 | if (!dquot_active(inode)) |
1327 | return; | 1362 | return; |
1328 | 1363 | ||
1329 | /* First get references to structures we might need. */ | 1364 | /* First get references to structures we might need. */ |
@@ -1507,7 +1542,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) | |||
1507 | * First test before acquiring mutex - solves deadlocks when we | 1542 | * First test before acquiring mutex - solves deadlocks when we |
1508 | * re-enter the quota code and are already holding the mutex | 1543 | * re-enter the quota code and are already holding the mutex |
1509 | */ | 1544 | */ |
1510 | if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) { | 1545 | if (!dquot_active(inode)) { |
1511 | inode_incr_space(inode, number, reserve); | 1546 | inode_incr_space(inode, number, reserve); |
1512 | goto out; | 1547 | goto out; |
1513 | } | 1548 | } |
@@ -1559,7 +1594,7 @@ int dquot_alloc_inode(const struct inode *inode) | |||
1559 | 1594 | ||
1560 | /* First test before acquiring mutex - solves deadlocks when we | 1595 | /* First test before acquiring mutex - solves deadlocks when we |
1561 | * re-enter the quota code and are already holding the mutex */ | 1596 | * re-enter the quota code and are already holding the mutex */ |
1562 | if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) | 1597 | if (!dquot_active(inode)) |
1563 | return 0; | 1598 | return 0; |
1564 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) | 1599 | for (cnt = 0; cnt < MAXQUOTAS; cnt++) |
1565 | warntype[cnt] = QUOTA_NL_NOWARN; | 1600 | warntype[cnt] = QUOTA_NL_NOWARN; |
@@ -1596,7 +1631,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) | |||
1596 | { | 1631 | { |
1597 | int cnt; | 1632 | int cnt; |
1598 | 1633 | ||
1599 | if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) { | 1634 | if (!dquot_active(inode)) { |
1600 | inode_claim_rsv_space(inode, number); | 1635 | inode_claim_rsv_space(inode, number); |
1601 | return 0; | 1636 | return 0; |
1602 | } | 1637 | } |
@@ -1629,7 +1664,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) | |||
1629 | 1664 | ||
1630 | /* First test before acquiring mutex - solves deadlocks when we | 1665 | /* First test before acquiring mutex - solves deadlocks when we |
1631 | * re-enter the quota code and are already holding the mutex */ | 1666 | * re-enter the quota code and are already holding the mutex */ |
1632 | if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) { | 1667 | if (!dquot_active(inode)) { |
1633 | inode_decr_space(inode, number, reserve); | 1668 | inode_decr_space(inode, number, reserve); |
1634 | return; | 1669 | return; |
1635 | } | 1670 | } |
@@ -1667,7 +1702,7 @@ void dquot_free_inode(const struct inode *inode) | |||
1667 | 1702 | ||
1668 | /* First test before acquiring mutex - solves deadlocks when we | 1703 | /* First test before acquiring mutex - solves deadlocks when we |
1669 | * re-enter the quota code and are already holding the mutex */ | 1704 | * re-enter the quota code and are already holding the mutex */ |
1670 | if (!sb_any_quota_active(inode->i_sb) || IS_NOQUOTA(inode)) | 1705 | if (!dquot_active(inode)) |
1671 | return; | 1706 | return; |
1672 | 1707 | ||
1673 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); | 1708 | down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); |
@@ -1790,7 +1825,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr) | |||
1790 | struct super_block *sb = inode->i_sb; | 1825 | struct super_block *sb = inode->i_sb; |
1791 | int ret; | 1826 | int ret; |
1792 | 1827 | ||
1793 | if (!sb_any_quota_active(sb) || IS_NOQUOTA(inode)) | 1828 | if (!dquot_active(inode)) |
1794 | return 0; | 1829 | return 0; |
1795 | 1830 | ||
1796 | if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) | 1831 | if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) |
@@ -1957,7 +1992,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) | |||
1957 | truncate_inode_pages(&toputinode[cnt]->i_data, | 1992 | truncate_inode_pages(&toputinode[cnt]->i_data, |
1958 | 0); | 1993 | 0); |
1959 | mutex_unlock(&toputinode[cnt]->i_mutex); | 1994 | mutex_unlock(&toputinode[cnt]->i_mutex); |
1960 | mark_inode_dirty(toputinode[cnt]); | 1995 | mark_inode_dirty_sync(toputinode[cnt]); |
1961 | } | 1996 | } |
1962 | mutex_unlock(&dqopt->dqonoff_mutex); | 1997 | mutex_unlock(&dqopt->dqonoff_mutex); |
1963 | } | 1998 | } |
@@ -2270,7 +2305,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) | |||
2270 | memset(di, 0, sizeof(*di)); | 2305 | memset(di, 0, sizeof(*di)); |
2271 | di->d_version = FS_DQUOT_VERSION; | 2306 | di->d_version = FS_DQUOT_VERSION; |
2272 | di->d_flags = dquot->dq_type == USRQUOTA ? | 2307 | di->d_flags = dquot->dq_type == USRQUOTA ? |
2273 | XFS_USER_QUOTA : XFS_GROUP_QUOTA; | 2308 | FS_USER_QUOTA : FS_GROUP_QUOTA; |
2274 | di->d_id = dquot->dq_id; | 2309 | di->d_id = dquot->dq_id; |
2275 | 2310 | ||
2276 | spin_lock(&dq_data_lock); | 2311 | spin_lock(&dq_data_lock); |
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index 24f03407eeb5..9e48874eabcc 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c | |||
@@ -65,8 +65,7 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) | |||
65 | ret = sb->s_op->quota_write(sb, info->dqi_type, buf, | 65 | ret = sb->s_op->quota_write(sb, info->dqi_type, buf, |
66 | info->dqi_usable_bs, blk << info->dqi_blocksize_bits); | 66 | info->dqi_usable_bs, blk << info->dqi_blocksize_bits); |
67 | if (ret != info->dqi_usable_bs) { | 67 | if (ret != info->dqi_usable_bs) { |
68 | q_warn(KERN_WARNING "VFS: dquota write failed on " | 68 | quota_error(sb, "dquota write failed"); |
69 | "dev %s\n", sb->s_id); | ||
70 | if (ret >= 0) | 69 | if (ret >= 0) |
71 | ret = -EIO; | 70 | ret = -EIO; |
72 | } | 71 | } |
@@ -160,9 +159,8 @@ static int remove_free_dqentry(struct qtree_mem_dqinfo *info, char *buf, | |||
160 | dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); | 159 | dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); |
161 | /* No matter whether write succeeds block is out of list */ | 160 | /* No matter whether write succeeds block is out of list */ |
162 | if (write_blk(info, blk, buf) < 0) | 161 | if (write_blk(info, blk, buf) < 0) |
163 | q_warn(KERN_ERR | 162 | quota_error(info->dqi_sb, "Can't write block (%u) " |
164 | "VFS: Can't write block (%u) with free entries.\n", | 163 | "with free entries", blk); |
165 | blk); | ||
166 | return 0; | 164 | return 0; |
167 | out_buf: | 165 | out_buf: |
168 | kfree(tmpbuf); | 166 | kfree(tmpbuf); |
@@ -252,9 +250,8 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, | |||
252 | if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { | 250 | if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { |
253 | *err = remove_free_dqentry(info, buf, blk); | 251 | *err = remove_free_dqentry(info, buf, blk); |
254 | if (*err < 0) { | 252 | if (*err < 0) { |
255 | q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't " | 253 | quota_error(dquot->dq_sb, "Can't remove block (%u) " |
256 | "remove block (%u) from entry free list.\n", | 254 | "from entry free list", blk); |
257 | blk); | ||
258 | goto out_buf; | 255 | goto out_buf; |
259 | } | 256 | } |
260 | } | 257 | } |
@@ -268,16 +265,15 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info, | |||
268 | } | 265 | } |
269 | #ifdef __QUOTA_QT_PARANOIA | 266 | #ifdef __QUOTA_QT_PARANOIA |
270 | if (i == qtree_dqstr_in_blk(info)) { | 267 | if (i == qtree_dqstr_in_blk(info)) { |
271 | printk(KERN_ERR "VFS: find_free_dqentry(): Data block full " | 268 | quota_error(dquot->dq_sb, "Data block full but it shouldn't"); |
272 | "but it shouldn't.\n"); | ||
273 | *err = -EIO; | 269 | *err = -EIO; |
274 | goto out_buf; | 270 | goto out_buf; |
275 | } | 271 | } |
276 | #endif | 272 | #endif |
277 | *err = write_blk(info, blk, buf); | 273 | *err = write_blk(info, blk, buf); |
278 | if (*err < 0) { | 274 | if (*err < 0) { |
279 | q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't write quota " | 275 | quota_error(dquot->dq_sb, "Can't write quota data block %u", |
280 | "data block %u.\n", blk); | 276 | blk); |
281 | goto out_buf; | 277 | goto out_buf; |
282 | } | 278 | } |
283 | dquot->dq_off = (blk << info->dqi_blocksize_bits) + | 279 | dquot->dq_off = (blk << info->dqi_blocksize_bits) + |
@@ -311,8 +307,8 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
311 | } else { | 307 | } else { |
312 | ret = read_blk(info, *treeblk, buf); | 308 | ret = read_blk(info, *treeblk, buf); |
313 | if (ret < 0) { | 309 | if (ret < 0) { |
314 | q_warn(KERN_ERR "VFS: Can't read tree quota block " | 310 | quota_error(dquot->dq_sb, "Can't read tree quota " |
315 | "%u.\n", *treeblk); | 311 | "block %u", *treeblk); |
316 | goto out_buf; | 312 | goto out_buf; |
317 | } | 313 | } |
318 | } | 314 | } |
@@ -323,9 +319,9 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
323 | if (depth == info->dqi_qtree_depth - 1) { | 319 | if (depth == info->dqi_qtree_depth - 1) { |
324 | #ifdef __QUOTA_QT_PARANOIA | 320 | #ifdef __QUOTA_QT_PARANOIA |
325 | if (newblk) { | 321 | if (newblk) { |
326 | printk(KERN_ERR "VFS: Inserting already present quota " | 322 | quota_error(dquot->dq_sb, "Inserting already present " |
327 | "entry (block %u).\n", | 323 | "quota entry (block %u)", |
328 | le32_to_cpu(ref[get_index(info, | 324 | le32_to_cpu(ref[get_index(info, |
329 | dquot->dq_id, depth)])); | 325 | dquot->dq_id, depth)])); |
330 | ret = -EIO; | 326 | ret = -EIO; |
331 | goto out_buf; | 327 | goto out_buf; |
@@ -373,8 +369,8 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | |||
373 | if (!dquot->dq_off) { | 369 | if (!dquot->dq_off) { |
374 | ret = dq_insert_tree(info, dquot); | 370 | ret = dq_insert_tree(info, dquot); |
375 | if (ret < 0) { | 371 | if (ret < 0) { |
376 | q_warn(KERN_ERR "VFS: Error %zd occurred while " | 372 | quota_error(sb, "Error %zd occurred while creating " |
377 | "creating quota.\n", ret); | 373 | "quota", ret); |
378 | kfree(ddquot); | 374 | kfree(ddquot); |
379 | return ret; | 375 | return ret; |
380 | } | 376 | } |
@@ -385,8 +381,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | |||
385 | ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, | 381 | ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, |
386 | dquot->dq_off); | 382 | dquot->dq_off); |
387 | if (ret != info->dqi_entry_size) { | 383 | if (ret != info->dqi_entry_size) { |
388 | q_warn(KERN_WARNING "VFS: dquota write failed on dev %s\n", | 384 | quota_error(sb, "dquota write failed"); |
389 | sb->s_id); | ||
390 | if (ret >= 0) | 385 | if (ret >= 0) |
391 | ret = -ENOSPC; | 386 | ret = -ENOSPC; |
392 | } else { | 387 | } else { |
@@ -410,14 +405,15 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
410 | if (!buf) | 405 | if (!buf) |
411 | return -ENOMEM; | 406 | return -ENOMEM; |
412 | if (dquot->dq_off >> info->dqi_blocksize_bits != blk) { | 407 | if (dquot->dq_off >> info->dqi_blocksize_bits != blk) { |
413 | q_warn(KERN_ERR "VFS: Quota structure has offset to other " | 408 | quota_error(dquot->dq_sb, "Quota structure has offset to " |
414 | "block (%u) than it should (%u).\n", blk, | 409 | "other block (%u) than it should (%u)", blk, |
415 | (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); | 410 | (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); |
416 | goto out_buf; | 411 | goto out_buf; |
417 | } | 412 | } |
418 | ret = read_blk(info, blk, buf); | 413 | ret = read_blk(info, blk, buf); |
419 | if (ret < 0) { | 414 | if (ret < 0) { |
420 | q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", blk); | 415 | quota_error(dquot->dq_sb, "Can't read quota data block %u", |
416 | blk); | ||
421 | goto out_buf; | 417 | goto out_buf; |
422 | } | 418 | } |
423 | dh = (struct qt_disk_dqdbheader *)buf; | 419 | dh = (struct qt_disk_dqdbheader *)buf; |
@@ -427,8 +423,8 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
427 | if (ret >= 0) | 423 | if (ret >= 0) |
428 | ret = put_free_dqblk(info, buf, blk); | 424 | ret = put_free_dqblk(info, buf, blk); |
429 | if (ret < 0) { | 425 | if (ret < 0) { |
430 | q_warn(KERN_ERR "VFS: Can't move quota data block (%u) " | 426 | quota_error(dquot->dq_sb, "Can't move quota data block " |
431 | "to free list.\n", blk); | 427 | "(%u) to free list", blk); |
432 | goto out_buf; | 428 | goto out_buf; |
433 | } | 429 | } |
434 | } else { | 430 | } else { |
@@ -440,15 +436,15 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
440 | /* Insert will write block itself */ | 436 | /* Insert will write block itself */ |
441 | ret = insert_free_dqentry(info, buf, blk); | 437 | ret = insert_free_dqentry(info, buf, blk); |
442 | if (ret < 0) { | 438 | if (ret < 0) { |
443 | q_warn(KERN_ERR "VFS: Can't insert quota data " | 439 | quota_error(dquot->dq_sb, "Can't insert quota " |
444 | "block (%u) to free entry list.\n", blk); | 440 | "data block (%u) to free entry list", blk); |
445 | goto out_buf; | 441 | goto out_buf; |
446 | } | 442 | } |
447 | } else { | 443 | } else { |
448 | ret = write_blk(info, blk, buf); | 444 | ret = write_blk(info, blk, buf); |
449 | if (ret < 0) { | 445 | if (ret < 0) { |
450 | q_warn(KERN_ERR "VFS: Can't write quota data " | 446 | quota_error(dquot->dq_sb, "Can't write quota " |
451 | "block %u\n", blk); | 447 | "data block %u", blk); |
452 | goto out_buf; | 448 | goto out_buf; |
453 | } | 449 | } |
454 | } | 450 | } |
@@ -472,7 +468,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
472 | return -ENOMEM; | 468 | return -ENOMEM; |
473 | ret = read_blk(info, *blk, buf); | 469 | ret = read_blk(info, *blk, buf); |
474 | if (ret < 0) { | 470 | if (ret < 0) { |
475 | q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); | 471 | quota_error(dquot->dq_sb, "Can't read quota data " |
472 | "block %u", blk); | ||
476 | goto out_buf; | 473 | goto out_buf; |
477 | } | 474 | } |
478 | newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); | 475 | newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); |
@@ -496,8 +493,8 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, | |||
496 | } else { | 493 | } else { |
497 | ret = write_blk(info, *blk, buf); | 494 | ret = write_blk(info, *blk, buf); |
498 | if (ret < 0) | 495 | if (ret < 0) |
499 | q_warn(KERN_ERR "VFS: Can't write quota tree " | 496 | quota_error(dquot->dq_sb, "Can't write quota " |
500 | "block %u.\n", *blk); | 497 | "tree block %u", blk); |
501 | } | 498 | } |
502 | } | 499 | } |
503 | out_buf: | 500 | out_buf: |
@@ -529,7 +526,8 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, | |||
529 | return -ENOMEM; | 526 | return -ENOMEM; |
530 | ret = read_blk(info, blk, buf); | 527 | ret = read_blk(info, blk, buf); |
531 | if (ret < 0) { | 528 | if (ret < 0) { |
532 | q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); | 529 | quota_error(dquot->dq_sb, "Can't read quota tree " |
530 | "block %u", blk); | ||
533 | goto out_buf; | 531 | goto out_buf; |
534 | } | 532 | } |
535 | ddquot = buf + sizeof(struct qt_disk_dqdbheader); | 533 | ddquot = buf + sizeof(struct qt_disk_dqdbheader); |
@@ -539,8 +537,8 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, | |||
539 | ddquot += info->dqi_entry_size; | 537 | ddquot += info->dqi_entry_size; |
540 | } | 538 | } |
541 | if (i == qtree_dqstr_in_blk(info)) { | 539 | if (i == qtree_dqstr_in_blk(info)) { |
542 | q_warn(KERN_ERR "VFS: Quota for id %u referenced " | 540 | quota_error(dquot->dq_sb, "Quota for id %u referenced " |
543 | "but not present.\n", dquot->dq_id); | 541 | "but not present", dquot->dq_id); |
544 | ret = -EIO; | 542 | ret = -EIO; |
545 | goto out_buf; | 543 | goto out_buf; |
546 | } else { | 544 | } else { |
@@ -564,7 +562,8 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, | |||
564 | return -ENOMEM; | 562 | return -ENOMEM; |
565 | ret = read_blk(info, blk, buf); | 563 | ret = read_blk(info, blk, buf); |
566 | if (ret < 0) { | 564 | if (ret < 0) { |
567 | q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); | 565 | quota_error(dquot->dq_sb, "Can't read quota tree block %u", |
566 | blk); | ||
568 | goto out_buf; | 567 | goto out_buf; |
569 | } | 568 | } |
570 | ret = 0; | 569 | ret = 0; |
@@ -598,7 +597,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | |||
598 | #ifdef __QUOTA_QT_PARANOIA | 597 | #ifdef __QUOTA_QT_PARANOIA |
599 | /* Invalidated quota? */ | 598 | /* Invalidated quota? */ |
600 | if (!sb_dqopt(dquot->dq_sb)->files[type]) { | 599 | if (!sb_dqopt(dquot->dq_sb)->files[type]) { |
601 | printk(KERN_ERR "VFS: Quota invalidated while reading!\n"); | 600 | quota_error(sb, "Quota invalidated while reading!"); |
602 | return -EIO; | 601 | return -EIO; |
603 | } | 602 | } |
604 | #endif | 603 | #endif |
@@ -607,8 +606,8 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | |||
607 | offset = find_dqentry(info, dquot); | 606 | offset = find_dqentry(info, dquot); |
608 | if (offset <= 0) { /* Entry not present? */ | 607 | if (offset <= 0) { /* Entry not present? */ |
609 | if (offset < 0) | 608 | if (offset < 0) |
610 | q_warn(KERN_ERR "VFS: Can't read quota " | 609 | quota_error(sb, "Can't read quota structure " |
611 | "structure for id %u.\n", dquot->dq_id); | 610 | "for id %u", dquot->dq_id); |
612 | dquot->dq_off = 0; | 611 | dquot->dq_off = 0; |
613 | set_bit(DQ_FAKE_B, &dquot->dq_flags); | 612 | set_bit(DQ_FAKE_B, &dquot->dq_flags); |
614 | memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); | 613 | memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); |
@@ -625,8 +624,8 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) | |||
625 | if (ret != info->dqi_entry_size) { | 624 | if (ret != info->dqi_entry_size) { |
626 | if (ret >= 0) | 625 | if (ret >= 0) |
627 | ret = -EIO; | 626 | ret = -EIO; |
628 | q_warn(KERN_ERR "VFS: Error while reading quota " | 627 | quota_error(sb, "Error while reading quota structure for id %u", |
629 | "structure for id %u.\n", dquot->dq_id); | 628 | dquot->dq_id); |
630 | set_bit(DQ_FAKE_B, &dquot->dq_flags); | 629 | set_bit(DQ_FAKE_B, &dquot->dq_flags); |
631 | memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); | 630 | memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); |
632 | kfree(ddquot); | 631 | kfree(ddquot); |
diff --git a/fs/quota/quota_tree.h b/fs/quota/quota_tree.h index ccc3e71fb1d8..a1ab8db81a51 100644 --- a/fs/quota/quota_tree.h +++ b/fs/quota/quota_tree.h | |||
@@ -22,10 +22,4 @@ struct qt_disk_dqdbheader { | |||
22 | 22 | ||
23 | #define QT_TREEOFF 1 /* Offset of tree in file in blocks */ | 23 | #define QT_TREEOFF 1 /* Offset of tree in file in blocks */ |
24 | 24 | ||
25 | #define q_warn(fmt, args...) \ | ||
26 | do { \ | ||
27 | if (printk_ratelimit()) \ | ||
28 | printk(fmt, ## args); \ | ||
29 | } while(0) | ||
30 | |||
31 | #endif /* _LINUX_QUOTAIO_TREE_H */ | 25 | #endif /* _LINUX_QUOTAIO_TREE_H */ |
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c index 4af344c5852a..34b37a67bb16 100644 --- a/fs/quota/quota_v1.c +++ b/fs/quota/quota_v1.c | |||
@@ -95,8 +95,7 @@ static int v1_commit_dqblk(struct dquot *dquot) | |||
95 | (char *)&dqblk, sizeof(struct v1_disk_dqblk), | 95 | (char *)&dqblk, sizeof(struct v1_disk_dqblk), |
96 | v1_dqoff(dquot->dq_id)); | 96 | v1_dqoff(dquot->dq_id)); |
97 | if (ret != sizeof(struct v1_disk_dqblk)) { | 97 | if (ret != sizeof(struct v1_disk_dqblk)) { |
98 | printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", | 98 | quota_error(dquot->dq_sb, "dquota write failed"); |
99 | dquot->dq_sb->s_id); | ||
100 | if (ret >= 0) | 99 | if (ret >= 0) |
101 | ret = -EIO; | 100 | ret = -EIO; |
102 | goto out; | 101 | goto out; |
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index 135206af1458..65444d29406b 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c | |||
@@ -63,9 +63,8 @@ static int v2_read_header(struct super_block *sb, int type, | |||
63 | size = sb->s_op->quota_read(sb, type, (char *)dqhead, | 63 | size = sb->s_op->quota_read(sb, type, (char *)dqhead, |
64 | sizeof(struct v2_disk_dqheader), 0); | 64 | sizeof(struct v2_disk_dqheader), 0); |
65 | if (size != sizeof(struct v2_disk_dqheader)) { | 65 | if (size != sizeof(struct v2_disk_dqheader)) { |
66 | q_warn(KERN_WARNING "quota_v2: Failed header read:" | 66 | quota_error(sb, "Failed header read: expected=%zd got=%zd", |
67 | " expected=%zd got=%zd\n", | 67 | sizeof(struct v2_disk_dqheader), size); |
68 | sizeof(struct v2_disk_dqheader), size); | ||
69 | return 0; | 68 | return 0; |
70 | } | 69 | } |
71 | return 1; | 70 | return 1; |
@@ -106,8 +105,7 @@ static int v2_read_file_info(struct super_block *sb, int type) | |||
106 | size = sb->s_op->quota_read(sb, type, (char *)&dinfo, | 105 | size = sb->s_op->quota_read(sb, type, (char *)&dinfo, |
107 | sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); | 106 | sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); |
108 | if (size != sizeof(struct v2_disk_dqinfo)) { | 107 | if (size != sizeof(struct v2_disk_dqinfo)) { |
109 | q_warn(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n", | 108 | quota_error(sb, "Can't read info structure"); |
110 | sb->s_id); | ||
111 | return -1; | 109 | return -1; |
112 | } | 110 | } |
113 | info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS); | 111 | info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS); |
@@ -167,8 +165,7 @@ static int v2_write_file_info(struct super_block *sb, int type) | |||
167 | size = sb->s_op->quota_write(sb, type, (char *)&dinfo, | 165 | size = sb->s_op->quota_write(sb, type, (char *)&dinfo, |
168 | sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); | 166 | sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); |
169 | if (size != sizeof(struct v2_disk_dqinfo)) { | 167 | if (size != sizeof(struct v2_disk_dqinfo)) { |
170 | q_warn(KERN_WARNING "Can't write info structure on device %s.\n", | 168 | quota_error(sb, "Can't write info structure"); |
171 | sb->s_id); | ||
172 | return -1; | 169 | return -1; |
173 | } | 170 | } |
174 | return 0; | 171 | return 0; |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index d532c20fc179..9eead2c796b7 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
@@ -146,9 +146,8 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) | |||
146 | return ret; | 146 | return ret; |
147 | } | 147 | } |
148 | 148 | ||
149 | ret = simple_setsize(inode, newsize); | 149 | truncate_setsize(inode, newsize); |
150 | 150 | return 0; | |
151 | return ret; | ||
152 | } | 151 | } |
153 | 152 | ||
154 | /*****************************************************************************/ | 153 | /*****************************************************************************/ |
@@ -183,7 +182,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) | |||
183 | } | 182 | } |
184 | } | 183 | } |
185 | 184 | ||
186 | generic_setattr(inode, ia); | 185 | setattr_copy(inode, ia); |
187 | out: | 186 | out: |
188 | ia->ia_valid = old_ia_valid; | 187 | ia->ia_valid = old_ia_valid; |
189 | return ret; | 188 | return ret; |
diff --git a/fs/read_write.c b/fs/read_write.c index 9c0485236e68..74e36586e4d3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -311,7 +311,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) | |||
311 | else | 311 | else |
312 | ret = do_sync_read(file, buf, count, pos); | 312 | ret = do_sync_read(file, buf, count, pos); |
313 | if (ret > 0) { | 313 | if (ret > 0) { |
314 | fsnotify_access(file->f_path.dentry); | 314 | fsnotify_access(file); |
315 | add_rchar(current, ret); | 315 | add_rchar(current, ret); |
316 | } | 316 | } |
317 | inc_syscr(current); | 317 | inc_syscr(current); |
@@ -367,7 +367,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ | |||
367 | else | 367 | else |
368 | ret = do_sync_write(file, buf, count, pos); | 368 | ret = do_sync_write(file, buf, count, pos); |
369 | if (ret > 0) { | 369 | if (ret > 0) { |
370 | fsnotify_modify(file->f_path.dentry); | 370 | fsnotify_modify(file); |
371 | add_wchar(current, ret); | 371 | add_wchar(current, ret); |
372 | } | 372 | } |
373 | inc_syscw(current); | 373 | inc_syscw(current); |
@@ -675,9 +675,9 @@ out: | |||
675 | kfree(iov); | 675 | kfree(iov); |
676 | if ((ret + (type == READ)) > 0) { | 676 | if ((ret + (type == READ)) > 0) { |
677 | if (type == READ) | 677 | if (type == READ) |
678 | fsnotify_access(file->f_path.dentry); | 678 | fsnotify_access(file); |
679 | else | 679 | else |
680 | fsnotify_modify(file->f_path.dentry); | 680 | fsnotify_modify(file); |
681 | } | 681 | } |
682 | return ret; | 682 | return ret; |
683 | } | 683 | } |
diff --git a/fs/readdir.c b/fs/readdir.c index 7723401f8d8b..356f71528ad6 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -4,6 +4,7 @@ | |||
4 | * Copyright (C) 1995 Linus Torvalds | 4 | * Copyright (C) 1995 Linus Torvalds |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/stddef.h> | ||
7 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
8 | #include <linux/module.h> | 9 | #include <linux/module.h> |
9 | #include <linux/time.h> | 10 | #include <linux/time.h> |
@@ -54,7 +55,6 @@ EXPORT_SYMBOL(vfs_readdir); | |||
54 | * anyway. Thus the special "fillonedir()" function for that | 55 | * anyway. Thus the special "fillonedir()" function for that |
55 | * case (the low-level handlers don't need to care about this). | 56 | * case (the low-level handlers don't need to care about this). |
56 | */ | 57 | */ |
57 | #define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) | ||
58 | 58 | ||
59 | #ifdef __ARCH_WANT_OLD_READDIR | 59 | #ifdef __ARCH_WANT_OLD_READDIR |
60 | 60 | ||
@@ -152,7 +152,8 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset, | |||
152 | struct linux_dirent __user * dirent; | 152 | struct linux_dirent __user * dirent; |
153 | struct getdents_callback * buf = (struct getdents_callback *) __buf; | 153 | struct getdents_callback * buf = (struct getdents_callback *) __buf; |
154 | unsigned long d_ino; | 154 | unsigned long d_ino; |
155 | int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 2, sizeof(long)); | 155 | int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, |
156 | sizeof(long)); | ||
156 | 157 | ||
157 | buf->error = -EINVAL; /* only used if we fail.. */ | 158 | buf->error = -EINVAL; /* only used if we fail.. */ |
158 | if (reclen > buf->count) | 159 | if (reclen > buf->count) |
@@ -237,7 +238,8 @@ static int filldir64(void * __buf, const char * name, int namlen, loff_t offset, | |||
237 | { | 238 | { |
238 | struct linux_dirent64 __user *dirent; | 239 | struct linux_dirent64 __user *dirent; |
239 | struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; | 240 | struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; |
240 | int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 1, sizeof(u64)); | 241 | int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, |
242 | sizeof(u64)); | ||
241 | 243 | ||
242 | buf->error = -EINVAL; /* only used if we fail.. */ | 244 | buf->error = -EINVAL; /* only used if we fail.. */ |
243 | if (reclen > buf->count) | 245 | if (reclen > buf->count) |
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index b82cdd8a45dd..6846371498b6 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -38,20 +38,24 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
38 | 38 | ||
39 | BUG_ON(!S_ISREG(inode->i_mode)); | 39 | BUG_ON(!S_ISREG(inode->i_mode)); |
40 | 40 | ||
41 | if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1)) | ||
42 | return 0; | ||
43 | |||
44 | mutex_lock(&(REISERFS_I(inode)->tailpack)); | ||
45 | |||
46 | if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) { | ||
47 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); | ||
48 | return 0; | ||
49 | } | ||
50 | |||
41 | /* fast out for when nothing needs to be done */ | 51 | /* fast out for when nothing needs to be done */ |
42 | if ((atomic_read(&inode->i_count) > 1 || | 52 | if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || |
43 | !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || | ||
44 | !tail_has_to_be_packed(inode)) && | 53 | !tail_has_to_be_packed(inode)) && |
45 | REISERFS_I(inode)->i_prealloc_count <= 0) { | 54 | REISERFS_I(inode)->i_prealloc_count <= 0) { |
55 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); | ||
46 | return 0; | 56 | return 0; |
47 | } | 57 | } |
48 | 58 | ||
49 | mutex_lock(&inode->i_mutex); | ||
50 | |||
51 | mutex_lock(&(REISERFS_I(inode)->i_mmap)); | ||
52 | if (REISERFS_I(inode)->i_flags & i_ever_mapped) | ||
53 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | ||
54 | |||
55 | reiserfs_write_lock(inode->i_sb); | 59 | reiserfs_write_lock(inode->i_sb); |
56 | /* freeing preallocation only involves relogging blocks that | 60 | /* freeing preallocation only involves relogging blocks that |
57 | * are already in the current transaction. preallocation gets | 61 | * are already in the current transaction. preallocation gets |
@@ -94,9 +98,10 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
94 | if (!err) | 98 | if (!err) |
95 | err = jbegin_failure; | 99 | err = jbegin_failure; |
96 | 100 | ||
97 | if (!err && atomic_read(&inode->i_count) <= 1 && | 101 | if (!err && |
98 | (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && | 102 | (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && |
99 | tail_has_to_be_packed(inode)) { | 103 | tail_has_to_be_packed(inode)) { |
104 | |||
100 | /* if regular file is released by last holder and it has been | 105 | /* if regular file is released by last holder and it has been |
101 | appended (we append by unformatted node only) or its direct | 106 | appended (we append by unformatted node only) or its direct |
102 | item(s) had to be converted, then it may have to be | 107 | item(s) had to be converted, then it may have to be |
@@ -104,27 +109,28 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
104 | err = reiserfs_truncate_file(inode, 0); | 109 | err = reiserfs_truncate_file(inode, 0); |
105 | } | 110 | } |
106 | out: | 111 | out: |
107 | mutex_unlock(&(REISERFS_I(inode)->i_mmap)); | ||
108 | mutex_unlock(&inode->i_mutex); | ||
109 | reiserfs_write_unlock(inode->i_sb); | 112 | reiserfs_write_unlock(inode->i_sb); |
113 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); | ||
110 | return err; | 114 | return err; |
111 | } | 115 | } |
112 | 116 | ||
113 | static int reiserfs_file_mmap(struct file *file, struct vm_area_struct *vma) | 117 | static int reiserfs_file_open(struct inode *inode, struct file *file) |
114 | { | 118 | { |
115 | struct inode *inode; | 119 | int err = dquot_file_open(inode, file); |
116 | 120 | if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) { | |
117 | inode = file->f_path.dentry->d_inode; | 121 | /* somebody might be tailpacking on final close; wait for it */ |
118 | mutex_lock(&(REISERFS_I(inode)->i_mmap)); | 122 | mutex_lock(&(REISERFS_I(inode)->tailpack)); |
119 | REISERFS_I(inode)->i_flags |= i_ever_mapped; | 123 | atomic_inc(&REISERFS_I(inode)->openers); |
120 | mutex_unlock(&(REISERFS_I(inode)->i_mmap)); | 124 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); |
121 | 125 | } | |
122 | return generic_file_mmap(file, vma); | 126 | return err; |
123 | } | 127 | } |
124 | 128 | ||
125 | static void reiserfs_vfs_truncate_file(struct inode *inode) | 129 | static void reiserfs_vfs_truncate_file(struct inode *inode) |
126 | { | 130 | { |
131 | mutex_lock(&(REISERFS_I(inode)->tailpack)); | ||
127 | reiserfs_truncate_file(inode, 1); | 132 | reiserfs_truncate_file(inode, 1); |
133 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); | ||
128 | } | 134 | } |
129 | 135 | ||
130 | /* Sync a reiserfs file. */ | 136 | /* Sync a reiserfs file. */ |
@@ -288,8 +294,8 @@ const struct file_operations reiserfs_file_operations = { | |||
288 | #ifdef CONFIG_COMPAT | 294 | #ifdef CONFIG_COMPAT |
289 | .compat_ioctl = reiserfs_compat_ioctl, | 295 | .compat_ioctl = reiserfs_compat_ioctl, |
290 | #endif | 296 | #endif |
291 | .mmap = reiserfs_file_mmap, | 297 | .mmap = generic_file_mmap, |
292 | .open = dquot_file_open, | 298 | .open = reiserfs_file_open, |
293 | .release = reiserfs_file_release, | 299 | .release = reiserfs_file_release, |
294 | .fsync = reiserfs_sync_file, | 300 | .fsync = reiserfs_sync_file, |
295 | .aio_read = generic_file_aio_read, | 301 | .aio_read = generic_file_aio_read, |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 0f22fdaf54ac..ae35413dcbe1 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -25,7 +25,7 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
25 | int reiserfs_prepare_write(struct file *f, struct page *page, | 25 | int reiserfs_prepare_write(struct file *f, struct page *page, |
26 | unsigned from, unsigned to); | 26 | unsigned from, unsigned to); |
27 | 27 | ||
28 | void reiserfs_delete_inode(struct inode *inode) | 28 | void reiserfs_evict_inode(struct inode *inode) |
29 | { | 29 | { |
30 | /* We need blocks for transaction + (user+group) quota update (possibly delete) */ | 30 | /* We need blocks for transaction + (user+group) quota update (possibly delete) */ |
31 | int jbegin_count = | 31 | int jbegin_count = |
@@ -35,10 +35,12 @@ void reiserfs_delete_inode(struct inode *inode) | |||
35 | int depth; | 35 | int depth; |
36 | int err; | 36 | int err; |
37 | 37 | ||
38 | if (!is_bad_inode(inode)) | 38 | if (!inode->i_nlink && !is_bad_inode(inode)) |
39 | dquot_initialize(inode); | 39 | dquot_initialize(inode); |
40 | 40 | ||
41 | truncate_inode_pages(&inode->i_data, 0); | 41 | truncate_inode_pages(&inode->i_data, 0); |
42 | if (inode->i_nlink) | ||
43 | goto no_delete; | ||
42 | 44 | ||
43 | depth = reiserfs_write_lock_once(inode->i_sb); | 45 | depth = reiserfs_write_lock_once(inode->i_sb); |
44 | 46 | ||
@@ -77,9 +79,14 @@ void reiserfs_delete_inode(struct inode *inode) | |||
77 | ; | 79 | ; |
78 | } | 80 | } |
79 | out: | 81 | out: |
80 | clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ | 82 | end_writeback(inode); /* note this must go after the journal_end to prevent deadlock */ |
83 | dquot_drop(inode); | ||
81 | inode->i_blocks = 0; | 84 | inode->i_blocks = 0; |
82 | reiserfs_write_unlock_once(inode->i_sb, depth); | 85 | reiserfs_write_unlock_once(inode->i_sb, depth); |
86 | |||
87 | no_delete: | ||
88 | end_writeback(inode); | ||
89 | dquot_drop(inode); | ||
83 | } | 90 | } |
84 | 91 | ||
85 | static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, | 92 | static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, |
@@ -1138,7 +1145,6 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1138 | REISERFS_I(inode)->i_prealloc_count = 0; | 1145 | REISERFS_I(inode)->i_prealloc_count = 0; |
1139 | REISERFS_I(inode)->i_trans_id = 0; | 1146 | REISERFS_I(inode)->i_trans_id = 0; |
1140 | REISERFS_I(inode)->i_jl = NULL; | 1147 | REISERFS_I(inode)->i_jl = NULL; |
1141 | mutex_init(&(REISERFS_I(inode)->i_mmap)); | ||
1142 | reiserfs_init_xattr_rwsem(inode); | 1148 | reiserfs_init_xattr_rwsem(inode); |
1143 | 1149 | ||
1144 | if (stat_data_v1(ih)) { | 1150 | if (stat_data_v1(ih)) { |
@@ -1221,7 +1227,7 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1221 | inode_set_bytes(inode, | 1227 | inode_set_bytes(inode, |
1222 | to_real_used_space(inode, inode->i_blocks, | 1228 | to_real_used_space(inode, inode->i_blocks, |
1223 | SD_V2_SIZE)); | 1229 | SD_V2_SIZE)); |
1224 | /* read persistent inode attributes from sd and initalise | 1230 | /* read persistent inode attributes from sd and initialise |
1225 | generic inode flags from them */ | 1231 | generic inode flags from them */ |
1226 | REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); | 1232 | REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); |
1227 | sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); | 1233 | sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); |
@@ -1841,7 +1847,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1841 | REISERFS_I(inode)->i_attrs = | 1847 | REISERFS_I(inode)->i_attrs = |
1842 | REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; | 1848 | REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; |
1843 | sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); | 1849 | sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); |
1844 | mutex_init(&(REISERFS_I(inode)->i_mmap)); | ||
1845 | reiserfs_init_xattr_rwsem(inode); | 1850 | reiserfs_init_xattr_rwsem(inode); |
1846 | 1851 | ||
1847 | /* key to search for correct place for new stat data */ | 1852 | /* key to search for correct place for new stat data */ |
@@ -2587,8 +2592,7 @@ static int reiserfs_write_begin(struct file *file, | |||
2587 | old_ref = th->t_refcount; | 2592 | old_ref = th->t_refcount; |
2588 | th->t_refcount++; | 2593 | th->t_refcount++; |
2589 | } | 2594 | } |
2590 | ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 2595 | ret = __block_write_begin(page, pos, len, reiserfs_get_block); |
2591 | reiserfs_get_block); | ||
2592 | if (ret && reiserfs_transaction_running(inode->i_sb)) { | 2596 | if (ret && reiserfs_transaction_running(inode->i_sb)) { |
2593 | struct reiserfs_transaction_handle *th = current->journal_info; | 2597 | struct reiserfs_transaction_handle *th = current->journal_info; |
2594 | /* this gets a little ugly. If reiserfs_get_block returned an | 2598 | /* this gets a little ugly. If reiserfs_get_block returned an |
@@ -3059,10 +3063,25 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, | |||
3059 | { | 3063 | { |
3060 | struct file *file = iocb->ki_filp; | 3064 | struct file *file = iocb->ki_filp; |
3061 | struct inode *inode = file->f_mapping->host; | 3065 | struct inode *inode = file->f_mapping->host; |
3066 | ssize_t ret; | ||
3062 | 3067 | ||
3063 | return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, | 3068 | ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, |
3064 | offset, nr_segs, | 3069 | offset, nr_segs, |
3065 | reiserfs_get_blocks_direct_io, NULL); | 3070 | reiserfs_get_blocks_direct_io, NULL); |
3071 | |||
3072 | /* | ||
3073 | * In case of error extending write may have instantiated a few | ||
3074 | * blocks outside i_size. Trim these off again. | ||
3075 | */ | ||
3076 | if (unlikely((rw & WRITE) && ret < 0)) { | ||
3077 | loff_t isize = i_size_read(inode); | ||
3078 | loff_t end = offset + iov_length(iov, nr_segs); | ||
3079 | |||
3080 | if (end > isize) | ||
3081 | vmtruncate(inode, isize); | ||
3082 | } | ||
3083 | |||
3084 | return ret; | ||
3066 | } | 3085 | } |
3067 | 3086 | ||
3068 | int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | 3087 | int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) |
@@ -3072,6 +3091,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3072 | int depth; | 3091 | int depth; |
3073 | int error; | 3092 | int error; |
3074 | 3093 | ||
3094 | error = inode_change_ok(inode, attr); | ||
3095 | if (error) | ||
3096 | return error; | ||
3097 | |||
3075 | /* must be turned off for recursive notify_change calls */ | 3098 | /* must be turned off for recursive notify_change calls */ |
3076 | ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); | 3099 | ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); |
3077 | 3100 | ||
@@ -3121,55 +3144,58 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3121 | goto out; | 3144 | goto out; |
3122 | } | 3145 | } |
3123 | 3146 | ||
3124 | error = inode_change_ok(inode, attr); | 3147 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || |
3125 | if (!error) { | 3148 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { |
3126 | if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || | 3149 | struct reiserfs_transaction_handle th; |
3127 | (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { | 3150 | int jbegin_count = |
3128 | error = reiserfs_chown_xattrs(inode, attr); | 3151 | 2 * |
3152 | (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + | ||
3153 | REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + | ||
3154 | 2; | ||
3129 | 3155 | ||
3130 | if (!error) { | 3156 | error = reiserfs_chown_xattrs(inode, attr); |
3131 | struct reiserfs_transaction_handle th; | 3157 | |
3132 | int jbegin_count = | 3158 | if (error) |
3133 | 2 * | 3159 | return error; |
3134 | (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + | 3160 | |
3135 | REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + | 3161 | /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ |
3136 | 2; | 3162 | error = journal_begin(&th, inode->i_sb, jbegin_count); |
3137 | 3163 | if (error) | |
3138 | /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ | 3164 | goto out; |
3139 | error = | 3165 | error = dquot_transfer(inode, attr); |
3140 | journal_begin(&th, inode->i_sb, | 3166 | if (error) { |
3141 | jbegin_count); | 3167 | journal_end(&th, inode->i_sb, jbegin_count); |
3142 | if (error) | 3168 | goto out; |
3143 | goto out; | ||
3144 | error = dquot_transfer(inode, attr); | ||
3145 | if (error) { | ||
3146 | journal_end(&th, inode->i_sb, | ||
3147 | jbegin_count); | ||
3148 | goto out; | ||
3149 | } | ||
3150 | /* Update corresponding info in inode so that everything is in | ||
3151 | * one transaction */ | ||
3152 | if (attr->ia_valid & ATTR_UID) | ||
3153 | inode->i_uid = attr->ia_uid; | ||
3154 | if (attr->ia_valid & ATTR_GID) | ||
3155 | inode->i_gid = attr->ia_gid; | ||
3156 | mark_inode_dirty(inode); | ||
3157 | error = | ||
3158 | journal_end(&th, inode->i_sb, jbegin_count); | ||
3159 | } | ||
3160 | } | ||
3161 | if (!error) { | ||
3162 | /* | ||
3163 | * Relax the lock here, as it might truncate the | ||
3164 | * inode pages and wait for inode pages locks. | ||
3165 | * To release such page lock, the owner needs the | ||
3166 | * reiserfs lock | ||
3167 | */ | ||
3168 | reiserfs_write_unlock_once(inode->i_sb, depth); | ||
3169 | error = inode_setattr(inode, attr); | ||
3170 | depth = reiserfs_write_lock_once(inode->i_sb); | ||
3171 | } | 3169 | } |
3170 | |||
3171 | /* Update corresponding info in inode so that everything is in | ||
3172 | * one transaction */ | ||
3173 | if (attr->ia_valid & ATTR_UID) | ||
3174 | inode->i_uid = attr->ia_uid; | ||
3175 | if (attr->ia_valid & ATTR_GID) | ||
3176 | inode->i_gid = attr->ia_gid; | ||
3177 | mark_inode_dirty(inode); | ||
3178 | error = journal_end(&th, inode->i_sb, jbegin_count); | ||
3179 | if (error) | ||
3180 | goto out; | ||
3181 | } | ||
3182 | |||
3183 | /* | ||
3184 | * Relax the lock here, as it might truncate the | ||
3185 | * inode pages and wait for inode pages locks. | ||
3186 | * To release such page lock, the owner needs the | ||
3187 | * reiserfs lock | ||
3188 | */ | ||
3189 | reiserfs_write_unlock_once(inode->i_sb, depth); | ||
3190 | if ((attr->ia_valid & ATTR_SIZE) && | ||
3191 | attr->ia_size != i_size_read(inode)) | ||
3192 | error = vmtruncate(inode, attr->ia_size); | ||
3193 | |||
3194 | if (!error) { | ||
3195 | setattr_copy(inode, attr); | ||
3196 | mark_inode_dirty(inode); | ||
3172 | } | 3197 | } |
3198 | depth = reiserfs_write_lock_once(inode->i_sb); | ||
3173 | 3199 | ||
3174 | if (!error && reiserfs_posixacl(inode->i_sb)) { | 3200 | if (!error && reiserfs_posixacl(inode->i_sb)) { |
3175 | if (attr->ia_valid & ATTR_MODE) | 3201 | if (attr->ia_valid & ATTR_MODE) |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 19fbc810e8e7..1ec952b1f036 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -983,7 +983,6 @@ static int flush_older_commits(struct super_block *s, | |||
983 | 983 | ||
984 | static int reiserfs_async_progress_wait(struct super_block *s) | 984 | static int reiserfs_async_progress_wait(struct super_block *s) |
985 | { | 985 | { |
986 | DEFINE_WAIT(wait); | ||
987 | struct reiserfs_journal *j = SB_JOURNAL(s); | 986 | struct reiserfs_journal *j = SB_JOURNAL(s); |
988 | 987 | ||
989 | if (atomic_read(&j->j_async_throttle)) { | 988 | if (atomic_read(&j->j_async_throttle)) { |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 9822fa15118b..e15ff612002d 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -525,6 +525,8 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb) | |||
525 | kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL); | 525 | kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL); |
526 | if (!ei) | 526 | if (!ei) |
527 | return NULL; | 527 | return NULL; |
528 | atomic_set(&ei->openers, 0); | ||
529 | mutex_init(&ei->tailpack); | ||
528 | return &ei->vfs_inode; | 530 | return &ei->vfs_inode; |
529 | } | 531 | } |
530 | 532 | ||
@@ -589,11 +591,6 @@ out: | |||
589 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); | 591 | reiserfs_write_unlock_once(inode->i_sb, lock_depth); |
590 | } | 592 | } |
591 | 593 | ||
592 | static void reiserfs_clear_inode(struct inode *inode) | ||
593 | { | ||
594 | dquot_drop(inode); | ||
595 | } | ||
596 | |||
597 | #ifdef CONFIG_QUOTA | 594 | #ifdef CONFIG_QUOTA |
598 | static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, | 595 | static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, |
599 | size_t, loff_t); | 596 | size_t, loff_t); |
@@ -606,8 +603,7 @@ static const struct super_operations reiserfs_sops = { | |||
606 | .destroy_inode = reiserfs_destroy_inode, | 603 | .destroy_inode = reiserfs_destroy_inode, |
607 | .write_inode = reiserfs_write_inode, | 604 | .write_inode = reiserfs_write_inode, |
608 | .dirty_inode = reiserfs_dirty_inode, | 605 | .dirty_inode = reiserfs_dirty_inode, |
609 | .clear_inode = reiserfs_clear_inode, | 606 | .evict_inode = reiserfs_evict_inode, |
610 | .delete_inode = reiserfs_delete_inode, | ||
611 | .put_super = reiserfs_put_super, | 607 | .put_super = reiserfs_put_super, |
612 | .write_super = reiserfs_write_super, | 608 | .write_super = reiserfs_write_super, |
613 | .sync_fs = reiserfs_sync_fs, | 609 | .sync_fs = reiserfs_sync_fs, |
diff --git a/fs/signalfd.c b/fs/signalfd.c index f329849ce3c0..1c5a6add779d 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c | |||
@@ -88,6 +88,7 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, | |||
88 | err |= __put_user(kinfo->si_tid, &uinfo->ssi_tid); | 88 | err |= __put_user(kinfo->si_tid, &uinfo->ssi_tid); |
89 | err |= __put_user(kinfo->si_overrun, &uinfo->ssi_overrun); | 89 | err |= __put_user(kinfo->si_overrun, &uinfo->ssi_overrun); |
90 | err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); | 90 | err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); |
91 | err |= __put_user(kinfo->si_int, &uinfo->ssi_int); | ||
91 | break; | 92 | break; |
92 | case __SI_POLL: | 93 | case __SI_POLL: |
93 | err |= __put_user(kinfo->si_band, &uinfo->ssi_band); | 94 | err |= __put_user(kinfo->si_band, &uinfo->ssi_band); |
@@ -111,6 +112,7 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, | |||
111 | err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); | 112 | err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); |
112 | err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid); | 113 | err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid); |
113 | err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); | 114 | err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); |
115 | err |= __put_user(kinfo->si_int, &uinfo->ssi_int); | ||
114 | break; | 116 | break; |
115 | default: | 117 | default: |
116 | /* | 118 | /* |
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 9551cb6f7fe4..450c91941988 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c | |||
@@ -46,7 +46,7 @@ | |||
46 | 46 | ||
47 | #define SMB_TTL_DEFAULT 1000 | 47 | #define SMB_TTL_DEFAULT 1000 |
48 | 48 | ||
49 | static void smb_delete_inode(struct inode *); | 49 | static void smb_evict_inode(struct inode *); |
50 | static void smb_put_super(struct super_block *); | 50 | static void smb_put_super(struct super_block *); |
51 | static int smb_statfs(struct dentry *, struct kstatfs *); | 51 | static int smb_statfs(struct dentry *, struct kstatfs *); |
52 | static int smb_show_options(struct seq_file *, struct vfsmount *); | 52 | static int smb_show_options(struct seq_file *, struct vfsmount *); |
@@ -102,7 +102,7 @@ static const struct super_operations smb_sops = | |||
102 | .alloc_inode = smb_alloc_inode, | 102 | .alloc_inode = smb_alloc_inode, |
103 | .destroy_inode = smb_destroy_inode, | 103 | .destroy_inode = smb_destroy_inode, |
104 | .drop_inode = generic_delete_inode, | 104 | .drop_inode = generic_delete_inode, |
105 | .delete_inode = smb_delete_inode, | 105 | .evict_inode = smb_evict_inode, |
106 | .put_super = smb_put_super, | 106 | .put_super = smb_put_super, |
107 | .statfs = smb_statfs, | 107 | .statfs = smb_statfs, |
108 | .show_options = smb_show_options, | 108 | .show_options = smb_show_options, |
@@ -324,15 +324,15 @@ out: | |||
324 | * All blocking cleanup operations need to go here to avoid races. | 324 | * All blocking cleanup operations need to go here to avoid races. |
325 | */ | 325 | */ |
326 | static void | 326 | static void |
327 | smb_delete_inode(struct inode *ino) | 327 | smb_evict_inode(struct inode *ino) |
328 | { | 328 | { |
329 | DEBUG1("ino=%ld\n", ino->i_ino); | 329 | DEBUG1("ino=%ld\n", ino->i_ino); |
330 | truncate_inode_pages(&ino->i_data, 0); | 330 | truncate_inode_pages(&ino->i_data, 0); |
331 | end_writeback(ino); | ||
331 | lock_kernel(); | 332 | lock_kernel(); |
332 | if (smb_close(ino)) | 333 | if (smb_close(ino)) |
333 | PARANOIA("could not close inode %ld\n", ino->i_ino); | 334 | PARANOIA("could not close inode %ld\n", ino->i_ino); |
334 | unlock_kernel(); | 335 | unlock_kernel(); |
335 | clear_inode(ino); | ||
336 | } | 336 | } |
337 | 337 | ||
338 | static struct option opts[] = { | 338 | static struct option opts[] = { |
@@ -714,9 +714,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr) | |||
714 | error = server->ops->truncate(inode, attr->ia_size); | 714 | error = server->ops->truncate(inode, attr->ia_size); |
715 | if (error) | 715 | if (error) |
716 | goto out; | 716 | goto out; |
717 | error = simple_setsize(inode, attr->ia_size); | 717 | truncate_setsize(inode, attr->ia_size); |
718 | if (error) | ||
719 | goto out; | ||
720 | refresh = 1; | 718 | refresh = 1; |
721 | } | 719 | } |
722 | 720 | ||
diff --git a/fs/splice.c b/fs/splice.c index efdbfece9932..8f1dfaecc8f0 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -399,17 +399,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, | |||
399 | * If the page isn't uptodate, we may need to start io on it | 399 | * If the page isn't uptodate, we may need to start io on it |
400 | */ | 400 | */ |
401 | if (!PageUptodate(page)) { | 401 | if (!PageUptodate(page)) { |
402 | /* | 402 | lock_page(page); |
403 | * If in nonblock mode then dont block on waiting | ||
404 | * for an in-flight io page | ||
405 | */ | ||
406 | if (flags & SPLICE_F_NONBLOCK) { | ||
407 | if (!trylock_page(page)) { | ||
408 | error = -EAGAIN; | ||
409 | break; | ||
410 | } | ||
411 | } else | ||
412 | lock_page(page); | ||
413 | 403 | ||
414 | /* | 404 | /* |
415 | * Page was truncated, or invalidated by the | 405 | * Page was truncated, or invalidated by the |
@@ -597,7 +587,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
597 | struct page *pages[PIPE_DEF_BUFFERS]; | 587 | struct page *pages[PIPE_DEF_BUFFERS]; |
598 | struct partial_page partial[PIPE_DEF_BUFFERS]; | 588 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
599 | struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; | 589 | struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; |
600 | pgoff_t index; | ||
601 | ssize_t res; | 590 | ssize_t res; |
602 | size_t this_len; | 591 | size_t this_len; |
603 | int error; | 592 | int error; |
@@ -621,7 +610,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos, | |||
621 | goto shrink_ret; | 610 | goto shrink_ret; |
622 | } | 611 | } |
623 | 612 | ||
624 | index = *ppos >> PAGE_CACHE_SHIFT; | ||
625 | offset = *ppos & ~PAGE_CACHE_MASK; | 613 | offset = *ppos & ~PAGE_CACHE_MASK; |
626 | nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 614 | nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
627 | 615 | ||
diff --git a/fs/statfs.c b/fs/statfs.c index 4ef021f3b612..30ea8c8a996b 100644 --- a/fs/statfs.c +++ b/fs/statfs.c | |||
@@ -2,38 +2,83 @@ | |||
2 | #include <linux/module.h> | 2 | #include <linux/module.h> |
3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
4 | #include <linux/file.h> | 4 | #include <linux/file.h> |
5 | #include <linux/mount.h> | ||
5 | #include <linux/namei.h> | 6 | #include <linux/namei.h> |
6 | #include <linux/statfs.h> | 7 | #include <linux/statfs.h> |
7 | #include <linux/security.h> | 8 | #include <linux/security.h> |
8 | #include <linux/uaccess.h> | 9 | #include <linux/uaccess.h> |
9 | 10 | ||
10 | int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 11 | static int flags_by_mnt(int mnt_flags) |
11 | { | 12 | { |
12 | int retval = -ENODEV; | 13 | int flags = 0; |
13 | 14 | ||
14 | if (dentry) { | 15 | if (mnt_flags & MNT_READONLY) |
15 | retval = -ENOSYS; | 16 | flags |= ST_RDONLY; |
16 | if (dentry->d_sb->s_op->statfs) { | 17 | if (mnt_flags & MNT_NOSUID) |
17 | memset(buf, 0, sizeof(*buf)); | 18 | flags |= ST_NOSUID; |
18 | retval = security_sb_statfs(dentry); | 19 | if (mnt_flags & MNT_NODEV) |
19 | if (retval) | 20 | flags |= ST_NODEV; |
20 | return retval; | 21 | if (mnt_flags & MNT_NOEXEC) |
21 | retval = dentry->d_sb->s_op->statfs(dentry, buf); | 22 | flags |= ST_NOEXEC; |
22 | if (retval == 0 && buf->f_frsize == 0) | 23 | if (mnt_flags & MNT_NOATIME) |
23 | buf->f_frsize = buf->f_bsize; | 24 | flags |= ST_NOATIME; |
24 | } | 25 | if (mnt_flags & MNT_NODIRATIME) |
25 | } | 26 | flags |= ST_NODIRATIME; |
27 | if (mnt_flags & MNT_RELATIME) | ||
28 | flags |= ST_RELATIME; | ||
29 | return flags; | ||
30 | } | ||
31 | |||
32 | static int flags_by_sb(int s_flags) | ||
33 | { | ||
34 | int flags = 0; | ||
35 | if (s_flags & MS_SYNCHRONOUS) | ||
36 | flags |= ST_SYNCHRONOUS; | ||
37 | if (s_flags & MS_MANDLOCK) | ||
38 | flags |= ST_MANDLOCK; | ||
39 | return flags; | ||
40 | } | ||
41 | |||
42 | static int calculate_f_flags(struct vfsmount *mnt) | ||
43 | { | ||
44 | return ST_VALID | flags_by_mnt(mnt->mnt_flags) | | ||
45 | flags_by_sb(mnt->mnt_sb->s_flags); | ||
46 | } | ||
47 | |||
48 | int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf) | ||
49 | { | ||
50 | int retval; | ||
51 | |||
52 | if (!dentry->d_sb->s_op->statfs) | ||
53 | return -ENOSYS; | ||
54 | |||
55 | memset(buf, 0, sizeof(*buf)); | ||
56 | retval = security_sb_statfs(dentry); | ||
57 | if (retval) | ||
58 | return retval; | ||
59 | retval = dentry->d_sb->s_op->statfs(dentry, buf); | ||
60 | if (retval == 0 && buf->f_frsize == 0) | ||
61 | buf->f_frsize = buf->f_bsize; | ||
26 | return retval; | 62 | return retval; |
27 | } | 63 | } |
28 | 64 | ||
65 | int vfs_statfs(struct path *path, struct kstatfs *buf) | ||
66 | { | ||
67 | int error; | ||
68 | |||
69 | error = statfs_by_dentry(path->dentry, buf); | ||
70 | if (!error) | ||
71 | buf->f_flags = calculate_f_flags(path->mnt); | ||
72 | return error; | ||
73 | } | ||
29 | EXPORT_SYMBOL(vfs_statfs); | 74 | EXPORT_SYMBOL(vfs_statfs); |
30 | 75 | ||
31 | static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) | 76 | static int do_statfs_native(struct path *path, struct statfs *buf) |
32 | { | 77 | { |
33 | struct kstatfs st; | 78 | struct kstatfs st; |
34 | int retval; | 79 | int retval; |
35 | 80 | ||
36 | retval = vfs_statfs(dentry, &st); | 81 | retval = vfs_statfs(path, &st); |
37 | if (retval) | 82 | if (retval) |
38 | return retval; | 83 | return retval; |
39 | 84 | ||
@@ -67,17 +112,18 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) | |||
67 | buf->f_fsid = st.f_fsid; | 112 | buf->f_fsid = st.f_fsid; |
68 | buf->f_namelen = st.f_namelen; | 113 | buf->f_namelen = st.f_namelen; |
69 | buf->f_frsize = st.f_frsize; | 114 | buf->f_frsize = st.f_frsize; |
115 | buf->f_flags = st.f_flags; | ||
70 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); | 116 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); |
71 | } | 117 | } |
72 | return 0; | 118 | return 0; |
73 | } | 119 | } |
74 | 120 | ||
75 | static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) | 121 | static int do_statfs64(struct path *path, struct statfs64 *buf) |
76 | { | 122 | { |
77 | struct kstatfs st; | 123 | struct kstatfs st; |
78 | int retval; | 124 | int retval; |
79 | 125 | ||
80 | retval = vfs_statfs(dentry, &st); | 126 | retval = vfs_statfs(path, &st); |
81 | if (retval) | 127 | if (retval) |
82 | return retval; | 128 | return retval; |
83 | 129 | ||
@@ -94,6 +140,7 @@ static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) | |||
94 | buf->f_fsid = st.f_fsid; | 140 | buf->f_fsid = st.f_fsid; |
95 | buf->f_namelen = st.f_namelen; | 141 | buf->f_namelen = st.f_namelen; |
96 | buf->f_frsize = st.f_frsize; | 142 | buf->f_frsize = st.f_frsize; |
143 | buf->f_flags = st.f_flags; | ||
97 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); | 144 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); |
98 | } | 145 | } |
99 | return 0; | 146 | return 0; |
@@ -107,7 +154,7 @@ SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, b | |||
107 | error = user_path(pathname, &path); | 154 | error = user_path(pathname, &path); |
108 | if (!error) { | 155 | if (!error) { |
109 | struct statfs tmp; | 156 | struct statfs tmp; |
110 | error = vfs_statfs_native(path.dentry, &tmp); | 157 | error = do_statfs_native(&path, &tmp); |
111 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | 158 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) |
112 | error = -EFAULT; | 159 | error = -EFAULT; |
113 | path_put(&path); | 160 | path_put(&path); |
@@ -125,7 +172,7 @@ SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct stat | |||
125 | error = user_path(pathname, &path); | 172 | error = user_path(pathname, &path); |
126 | if (!error) { | 173 | if (!error) { |
127 | struct statfs64 tmp; | 174 | struct statfs64 tmp; |
128 | error = vfs_statfs64(path.dentry, &tmp); | 175 | error = do_statfs64(&path, &tmp); |
129 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | 176 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) |
130 | error = -EFAULT; | 177 | error = -EFAULT; |
131 | path_put(&path); | 178 | path_put(&path); |
@@ -143,7 +190,7 @@ SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) | |||
143 | file = fget(fd); | 190 | file = fget(fd); |
144 | if (!file) | 191 | if (!file) |
145 | goto out; | 192 | goto out; |
146 | error = vfs_statfs_native(file->f_path.dentry, &tmp); | 193 | error = do_statfs_native(&file->f_path, &tmp); |
147 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | 194 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) |
148 | error = -EFAULT; | 195 | error = -EFAULT; |
149 | fput(file); | 196 | fput(file); |
@@ -164,7 +211,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user | |||
164 | file = fget(fd); | 211 | file = fget(fd); |
165 | if (!file) | 212 | if (!file) |
166 | goto out; | 213 | goto out; |
167 | error = vfs_statfs64(file->f_path.dentry, &tmp); | 214 | error = do_statfs64(&file->f_path, &tmp); |
168 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | 215 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) |
169 | error = -EFAULT; | 216 | error = -EFAULT; |
170 | fput(file); | 217 | fput(file); |
@@ -183,7 +230,7 @@ SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) | |||
183 | if (!s) | 230 | if (!s) |
184 | return -EINVAL; | 231 | return -EINVAL; |
185 | 232 | ||
186 | err = vfs_statfs(s->s_root, &sbuf); | 233 | err = statfs_by_dentry(s->s_root, &sbuf); |
187 | drop_super(s); | 234 | drop_super(s); |
188 | if (err) | 235 | if (err) |
189 | return err; | 236 | return err; |
diff --git a/fs/super.c b/fs/super.c index 938119ab8dcb..9674ab2c8718 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -305,8 +305,13 @@ retry: | |||
305 | if (s) { | 305 | if (s) { |
306 | up_write(&s->s_umount); | 306 | up_write(&s->s_umount); |
307 | destroy_super(s); | 307 | destroy_super(s); |
308 | s = NULL; | ||
308 | } | 309 | } |
309 | down_write(&old->s_umount); | 310 | down_write(&old->s_umount); |
311 | if (unlikely(!(old->s_flags & MS_BORN))) { | ||
312 | deactivate_locked_super(old); | ||
313 | goto retry; | ||
314 | } | ||
310 | return old; | 315 | return old; |
311 | } | 316 | } |
312 | } | 317 | } |
@@ -358,10 +363,10 @@ EXPORT_SYMBOL(drop_super); | |||
358 | */ | 363 | */ |
359 | void sync_supers(void) | 364 | void sync_supers(void) |
360 | { | 365 | { |
361 | struct super_block *sb, *n; | 366 | struct super_block *sb, *p = NULL; |
362 | 367 | ||
363 | spin_lock(&sb_lock); | 368 | spin_lock(&sb_lock); |
364 | list_for_each_entry_safe(sb, n, &super_blocks, s_list) { | 369 | list_for_each_entry(sb, &super_blocks, s_list) { |
365 | if (list_empty(&sb->s_instances)) | 370 | if (list_empty(&sb->s_instances)) |
366 | continue; | 371 | continue; |
367 | if (sb->s_op->write_super && sb->s_dirt) { | 372 | if (sb->s_op->write_super && sb->s_dirt) { |
@@ -374,11 +379,13 @@ void sync_supers(void) | |||
374 | up_read(&sb->s_umount); | 379 | up_read(&sb->s_umount); |
375 | 380 | ||
376 | spin_lock(&sb_lock); | 381 | spin_lock(&sb_lock); |
377 | /* lock was dropped, must reset next */ | 382 | if (p) |
378 | list_safe_reset_next(sb, n, s_list); | 383 | __put_super(p); |
379 | __put_super(sb); | 384 | p = sb; |
380 | } | 385 | } |
381 | } | 386 | } |
387 | if (p) | ||
388 | __put_super(p); | ||
382 | spin_unlock(&sb_lock); | 389 | spin_unlock(&sb_lock); |
383 | } | 390 | } |
384 | 391 | ||
@@ -392,10 +399,10 @@ void sync_supers(void) | |||
392 | */ | 399 | */ |
393 | void iterate_supers(void (*f)(struct super_block *, void *), void *arg) | 400 | void iterate_supers(void (*f)(struct super_block *, void *), void *arg) |
394 | { | 401 | { |
395 | struct super_block *sb, *n; | 402 | struct super_block *sb, *p = NULL; |
396 | 403 | ||
397 | spin_lock(&sb_lock); | 404 | spin_lock(&sb_lock); |
398 | list_for_each_entry_safe(sb, n, &super_blocks, s_list) { | 405 | list_for_each_entry(sb, &super_blocks, s_list) { |
399 | if (list_empty(&sb->s_instances)) | 406 | if (list_empty(&sb->s_instances)) |
400 | continue; | 407 | continue; |
401 | sb->s_count++; | 408 | sb->s_count++; |
@@ -407,10 +414,12 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg) | |||
407 | up_read(&sb->s_umount); | 414 | up_read(&sb->s_umount); |
408 | 415 | ||
409 | spin_lock(&sb_lock); | 416 | spin_lock(&sb_lock); |
410 | /* lock was dropped, must reset next */ | 417 | if (p) |
411 | list_safe_reset_next(sb, n, s_list); | 418 | __put_super(p); |
412 | __put_super(sb); | 419 | p = sb; |
413 | } | 420 | } |
421 | if (p) | ||
422 | __put_super(p); | ||
414 | spin_unlock(&sb_lock); | 423 | spin_unlock(&sb_lock); |
415 | } | 424 | } |
416 | 425 | ||
@@ -572,10 +581,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
572 | 581 | ||
573 | static void do_emergency_remount(struct work_struct *work) | 582 | static void do_emergency_remount(struct work_struct *work) |
574 | { | 583 | { |
575 | struct super_block *sb, *n; | 584 | struct super_block *sb, *p = NULL; |
576 | 585 | ||
577 | spin_lock(&sb_lock); | 586 | spin_lock(&sb_lock); |
578 | list_for_each_entry_safe(sb, n, &super_blocks, s_list) { | 587 | list_for_each_entry(sb, &super_blocks, s_list) { |
579 | if (list_empty(&sb->s_instances)) | 588 | if (list_empty(&sb->s_instances)) |
580 | continue; | 589 | continue; |
581 | sb->s_count++; | 590 | sb->s_count++; |
@@ -589,10 +598,12 @@ static void do_emergency_remount(struct work_struct *work) | |||
589 | } | 598 | } |
590 | up_write(&sb->s_umount); | 599 | up_write(&sb->s_umount); |
591 | spin_lock(&sb_lock); | 600 | spin_lock(&sb_lock); |
592 | /* lock was dropped, must reset next */ | 601 | if (p) |
593 | list_safe_reset_next(sb, n, s_list); | 602 | __put_super(p); |
594 | __put_super(sb); | 603 | p = sb; |
595 | } | 604 | } |
605 | if (p) | ||
606 | __put_super(p); | ||
596 | spin_unlock(&sb_lock); | 607 | spin_unlock(&sb_lock); |
597 | kfree(work); | 608 | kfree(work); |
598 | printk("Emergency Remount complete\n"); | 609 | printk("Emergency Remount complete\n"); |
@@ -773,7 +784,16 @@ int get_sb_bdev(struct file_system_type *fs_type, | |||
773 | goto error_bdev; | 784 | goto error_bdev; |
774 | } | 785 | } |
775 | 786 | ||
787 | /* | ||
788 | * s_umount nests inside bd_mutex during | ||
789 | * __invalidate_device(). close_bdev_exclusive() | ||
790 | * acquires bd_mutex and can't be called under | ||
791 | * s_umount. Drop s_umount temporarily. This is safe | ||
792 | * as we're holding an active reference. | ||
793 | */ | ||
794 | up_write(&s->s_umount); | ||
776 | close_bdev_exclusive(bdev, mode); | 795 | close_bdev_exclusive(bdev, mode); |
796 | down_write(&s->s_umount); | ||
777 | } else { | 797 | } else { |
778 | char b[BDEVNAME_SIZE]; | 798 | char b[BDEVNAME_SIZE]; |
779 | 799 | ||
@@ -909,6 +929,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void | |||
909 | goto out_free_secdata; | 929 | goto out_free_secdata; |
910 | BUG_ON(!mnt->mnt_sb); | 930 | BUG_ON(!mnt->mnt_sb); |
911 | WARN_ON(!mnt->mnt_sb->s_bdi); | 931 | WARN_ON(!mnt->mnt_sb->s_bdi); |
932 | mnt->mnt_sb->s_flags |= MS_BORN; | ||
912 | 933 | ||
913 | error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); | 934 | error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); |
914 | if (error) | 935 | if (error) |
@@ -128,31 +128,6 @@ void emergency_sync(void) | |||
128 | } | 128 | } |
129 | } | 129 | } |
130 | 130 | ||
131 | /* | ||
132 | * Generic function to fsync a file. | ||
133 | */ | ||
134 | int file_fsync(struct file *filp, int datasync) | ||
135 | { | ||
136 | struct inode *inode = filp->f_mapping->host; | ||
137 | struct super_block * sb; | ||
138 | int ret, err; | ||
139 | |||
140 | /* sync the inode to buffers */ | ||
141 | ret = write_inode_now(inode, 0); | ||
142 | |||
143 | /* sync the superblock to buffers */ | ||
144 | sb = inode->i_sb; | ||
145 | if (sb->s_dirt && sb->s_op->write_super) | ||
146 | sb->s_op->write_super(sb); | ||
147 | |||
148 | /* .. finally sync the buffers to disk */ | ||
149 | err = sync_blockdev(sb->s_bdev); | ||
150 | if (!ret) | ||
151 | ret = err; | ||
152 | return ret; | ||
153 | } | ||
154 | EXPORT_SYMBOL(file_fsync); | ||
155 | |||
156 | /** | 131 | /** |
157 | * vfs_fsync_range - helper to sync a range of data & metadata to disk | 132 | * vfs_fsync_range - helper to sync a range of data & metadata to disk |
158 | * @file: file to sync | 133 | * @file: file to sync |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 1beaa739d0a6..1b27b5688f62 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -593,7 +593,8 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); | |||
593 | * @mode: file permissions. | 593 | * @mode: file permissions. |
594 | * | 594 | * |
595 | */ | 595 | */ |
596 | int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) | 596 | int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, |
597 | mode_t mode) | ||
597 | { | 598 | { |
598 | struct sysfs_dirent *sd; | 599 | struct sysfs_dirent *sd; |
599 | struct iattr newattrs; | 600 | struct iattr newattrs; |
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 0835a3b70e03..cffb1fd8ba33 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c | |||
@@ -122,7 +122,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
122 | goto out; | 122 | goto out; |
123 | 123 | ||
124 | /* this ignores size changes */ | 124 | /* this ignores size changes */ |
125 | generic_setattr(inode, iattr); | 125 | setattr_copy(inode, iattr); |
126 | 126 | ||
127 | out: | 127 | out: |
128 | mutex_unlock(&sysfs_mutex); | 128 | mutex_unlock(&sysfs_mutex); |
@@ -312,15 +312,15 @@ struct inode * sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd) | |||
312 | * The sysfs_dirent serves as both an inode and a directory entry for sysfs. | 312 | * The sysfs_dirent serves as both an inode and a directory entry for sysfs. |
313 | * To prevent the sysfs inode numbers from being freed prematurely we take a | 313 | * To prevent the sysfs inode numbers from being freed prematurely we take a |
314 | * reference to sysfs_dirent from the sysfs inode. A | 314 | * reference to sysfs_dirent from the sysfs inode. A |
315 | * super_operations.delete_inode() implementation is needed to drop that | 315 | * super_operations.evict_inode() implementation is needed to drop that |
316 | * reference upon inode destruction. | 316 | * reference upon inode destruction. |
317 | */ | 317 | */ |
318 | void sysfs_delete_inode(struct inode *inode) | 318 | void sysfs_evict_inode(struct inode *inode) |
319 | { | 319 | { |
320 | struct sysfs_dirent *sd = inode->i_private; | 320 | struct sysfs_dirent *sd = inode->i_private; |
321 | 321 | ||
322 | truncate_inode_pages(&inode->i_data, 0); | 322 | truncate_inode_pages(&inode->i_data, 0); |
323 | clear_inode(inode); | 323 | end_writeback(inode); |
324 | sysfs_put(sd); | 324 | sysfs_put(sd); |
325 | } | 325 | } |
326 | 326 | ||
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 281c0c9bc39f..f2af22574c50 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
@@ -29,7 +29,7 @@ struct kmem_cache *sysfs_dir_cachep; | |||
29 | static const struct super_operations sysfs_ops = { | 29 | static const struct super_operations sysfs_ops = { |
30 | .statfs = simple_statfs, | 30 | .statfs = simple_statfs, |
31 | .drop_inode = generic_delete_inode, | 31 | .drop_inode = generic_delete_inode, |
32 | .delete_inode = sysfs_delete_inode, | 32 | .evict_inode = sysfs_evict_inode, |
33 | }; | 33 | }; |
34 | 34 | ||
35 | struct sysfs_dirent sysfs_root = { | 35 | struct sysfs_dirent sysfs_root = { |
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 6a13105b5594..d9be60a2e956 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h | |||
@@ -198,7 +198,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd) | |||
198 | * inode.c | 198 | * inode.c |
199 | */ | 199 | */ |
200 | struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); | 200 | struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); |
201 | void sysfs_delete_inode(struct inode *inode); | 201 | void sysfs_evict_inode(struct inode *inode); |
202 | int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); | 202 | int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); |
203 | int sysfs_permission(struct inode *inode, int mask); | 203 | int sysfs_permission(struct inode *inode, int mask); |
204 | int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); | 204 | int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); |
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 79941e4964a4..a77c42157620 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c | |||
@@ -218,8 +218,7 @@ got_it: | |||
218 | pos = page_offset(page) + | 218 | pos = page_offset(page) + |
219 | (char*)de - (char*)page_address(page); | 219 | (char*)de - (char*)page_address(page); |
220 | lock_page(page); | 220 | lock_page(page); |
221 | err = __sysv_write_begin(NULL, page->mapping, pos, SYSV_DIRSIZE, | 221 | err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE); |
222 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
223 | if (err) | 222 | if (err) |
224 | goto out_unlock; | 223 | goto out_unlock; |
225 | memcpy (de->name, name, namelen); | 224 | memcpy (de->name, name, namelen); |
@@ -239,15 +238,13 @@ out_unlock: | |||
239 | 238 | ||
240 | int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page) | 239 | int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page) |
241 | { | 240 | { |
242 | struct address_space *mapping = page->mapping; | 241 | struct inode *inode = page->mapping->host; |
243 | struct inode *inode = (struct inode*)mapping->host; | ||
244 | char *kaddr = (char*)page_address(page); | 242 | char *kaddr = (char*)page_address(page); |
245 | loff_t pos = page_offset(page) + (char *)de - kaddr; | 243 | loff_t pos = page_offset(page) + (char *)de - kaddr; |
246 | int err; | 244 | int err; |
247 | 245 | ||
248 | lock_page(page); | 246 | lock_page(page); |
249 | err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE, | 247 | err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE); |
250 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
251 | BUG_ON(err); | 248 | BUG_ON(err); |
252 | de->inode = 0; | 249 | de->inode = 0; |
253 | err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); | 250 | err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); |
@@ -259,16 +256,14 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page) | |||
259 | 256 | ||
260 | int sysv_make_empty(struct inode *inode, struct inode *dir) | 257 | int sysv_make_empty(struct inode *inode, struct inode *dir) |
261 | { | 258 | { |
262 | struct address_space *mapping = inode->i_mapping; | 259 | struct page *page = grab_cache_page(inode->i_mapping, 0); |
263 | struct page *page = grab_cache_page(mapping, 0); | ||
264 | struct sysv_dir_entry * de; | 260 | struct sysv_dir_entry * de; |
265 | char *base; | 261 | char *base; |
266 | int err; | 262 | int err; |
267 | 263 | ||
268 | if (!page) | 264 | if (!page) |
269 | return -ENOMEM; | 265 | return -ENOMEM; |
270 | err = __sysv_write_begin(NULL, mapping, 0, 2 * SYSV_DIRSIZE, | 266 | err = sysv_prepare_chunk(page, 0, 2 * SYSV_DIRSIZE); |
271 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
272 | if (err) { | 267 | if (err) { |
273 | unlock_page(page); | 268 | unlock_page(page); |
274 | goto fail; | 269 | goto fail; |
@@ -341,15 +336,13 @@ not_empty: | |||
341 | void sysv_set_link(struct sysv_dir_entry *de, struct page *page, | 336 | void sysv_set_link(struct sysv_dir_entry *de, struct page *page, |
342 | struct inode *inode) | 337 | struct inode *inode) |
343 | { | 338 | { |
344 | struct address_space *mapping = page->mapping; | 339 | struct inode *dir = page->mapping->host; |
345 | struct inode *dir = mapping->host; | ||
346 | loff_t pos = page_offset(page) + | 340 | loff_t pos = page_offset(page) + |
347 | (char *)de-(char*)page_address(page); | 341 | (char *)de-(char*)page_address(page); |
348 | int err; | 342 | int err; |
349 | 343 | ||
350 | lock_page(page); | 344 | lock_page(page); |
351 | err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE, | 345 | err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE); |
352 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
353 | BUG_ON(err); | 346 | BUG_ON(err); |
354 | de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); | 347 | de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); |
355 | err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); | 348 | err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); |
diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 750cc22349bd..0a65939508e9 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c | |||
@@ -30,7 +30,29 @@ const struct file_operations sysv_file_operations = { | |||
30 | .splice_read = generic_file_splice_read, | 30 | .splice_read = generic_file_splice_read, |
31 | }; | 31 | }; |
32 | 32 | ||
33 | static int sysv_setattr(struct dentry *dentry, struct iattr *attr) | ||
34 | { | ||
35 | struct inode *inode = dentry->d_inode; | ||
36 | int error; | ||
37 | |||
38 | error = inode_change_ok(inode, attr); | ||
39 | if (error) | ||
40 | return error; | ||
41 | |||
42 | if ((attr->ia_valid & ATTR_SIZE) && | ||
43 | attr->ia_size != i_size_read(inode)) { | ||
44 | error = vmtruncate(inode, attr->ia_size); | ||
45 | if (error) | ||
46 | return error; | ||
47 | } | ||
48 | |||
49 | setattr_copy(inode, attr); | ||
50 | mark_inode_dirty(inode); | ||
51 | return 0; | ||
52 | } | ||
53 | |||
33 | const struct inode_operations sysv_file_inode_operations = { | 54 | const struct inode_operations sysv_file_inode_operations = { |
34 | .truncate = sysv_truncate, | 55 | .truncate = sysv_truncate, |
56 | .setattr = sysv_setattr, | ||
35 | .getattr = sysv_getattr, | 57 | .getattr = sysv_getattr, |
36 | }; | 58 | }; |
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c index fcc498ec9b33..0c96c98bd1db 100644 --- a/fs/sysv/ialloc.c +++ b/fs/sysv/ialloc.c | |||
@@ -113,7 +113,6 @@ void sysv_free_inode(struct inode * inode) | |||
113 | return; | 113 | return; |
114 | } | 114 | } |
115 | raw_inode = sysv_raw_inode(sb, ino, &bh); | 115 | raw_inode = sysv_raw_inode(sb, ino, &bh); |
116 | clear_inode(inode); | ||
117 | if (!raw_inode) { | 116 | if (!raw_inode) { |
118 | printk("sysv_free_inode: unable to read inode block on device " | 117 | printk("sysv_free_inode: unable to read inode block on device " |
119 | "%s\n", inode->i_sb->s_id); | 118 | "%s\n", inode->i_sb->s_id); |
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index d4a5380b5669..de44d067b9e6 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c | |||
@@ -71,8 +71,8 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data) | |||
71 | lock_super(sb); | 71 | lock_super(sb); |
72 | if (sbi->s_forced_ro) | 72 | if (sbi->s_forced_ro) |
73 | *flags |= MS_RDONLY; | 73 | *flags |= MS_RDONLY; |
74 | if (!(*flags & MS_RDONLY)) | 74 | if (*flags & MS_RDONLY) |
75 | sb->s_dirt = 1; | 75 | sysv_write_super(sb); |
76 | unlock_super(sb); | 76 | unlock_super(sb); |
77 | return 0; | 77 | return 0; |
78 | } | 78 | } |
@@ -308,12 +308,17 @@ int sysv_sync_inode(struct inode *inode) | |||
308 | return __sysv_write_inode(inode, 1); | 308 | return __sysv_write_inode(inode, 1); |
309 | } | 309 | } |
310 | 310 | ||
311 | static void sysv_delete_inode(struct inode *inode) | 311 | static void sysv_evict_inode(struct inode *inode) |
312 | { | 312 | { |
313 | truncate_inode_pages(&inode->i_data, 0); | 313 | truncate_inode_pages(&inode->i_data, 0); |
314 | inode->i_size = 0; | 314 | if (!inode->i_nlink) { |
315 | sysv_truncate(inode); | 315 | inode->i_size = 0; |
316 | sysv_free_inode(inode); | 316 | sysv_truncate(inode); |
317 | } | ||
318 | invalidate_inode_buffers(inode); | ||
319 | end_writeback(inode); | ||
320 | if (!inode->i_nlink) | ||
321 | sysv_free_inode(inode); | ||
317 | } | 322 | } |
318 | 323 | ||
319 | static struct kmem_cache *sysv_inode_cachep; | 324 | static struct kmem_cache *sysv_inode_cachep; |
@@ -344,7 +349,7 @@ const struct super_operations sysv_sops = { | |||
344 | .alloc_inode = sysv_alloc_inode, | 349 | .alloc_inode = sysv_alloc_inode, |
345 | .destroy_inode = sysv_destroy_inode, | 350 | .destroy_inode = sysv_destroy_inode, |
346 | .write_inode = sysv_write_inode, | 351 | .write_inode = sysv_write_inode, |
347 | .delete_inode = sysv_delete_inode, | 352 | .evict_inode = sysv_evict_inode, |
348 | .put_super = sysv_put_super, | 353 | .put_super = sysv_put_super, |
349 | .write_super = sysv_write_super, | 354 | .write_super = sysv_write_super, |
350 | .sync_fs = sysv_sync_fs, | 355 | .sync_fs = sysv_sync_fs, |
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index f042eec464c2..9ca66276315e 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c | |||
@@ -459,20 +459,25 @@ static int sysv_readpage(struct file *file, struct page *page) | |||
459 | return block_read_full_page(page,get_block); | 459 | return block_read_full_page(page,get_block); |
460 | } | 460 | } |
461 | 461 | ||
462 | int __sysv_write_begin(struct file *file, struct address_space *mapping, | 462 | int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len) |
463 | loff_t pos, unsigned len, unsigned flags, | ||
464 | struct page **pagep, void **fsdata) | ||
465 | { | 463 | { |
466 | return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 464 | return __block_write_begin(page, pos, len, get_block); |
467 | get_block); | ||
468 | } | 465 | } |
469 | 466 | ||
470 | static int sysv_write_begin(struct file *file, struct address_space *mapping, | 467 | static int sysv_write_begin(struct file *file, struct address_space *mapping, |
471 | loff_t pos, unsigned len, unsigned flags, | 468 | loff_t pos, unsigned len, unsigned flags, |
472 | struct page **pagep, void **fsdata) | 469 | struct page **pagep, void **fsdata) |
473 | { | 470 | { |
474 | *pagep = NULL; | 471 | int ret; |
475 | return __sysv_write_begin(file, mapping, pos, len, flags, pagep, fsdata); | 472 | |
473 | ret = block_write_begin(mapping, pos, len, flags, pagep, get_block); | ||
474 | if (unlikely(ret)) { | ||
475 | loff_t isize = mapping->host->i_size; | ||
476 | if (pos + len > isize) | ||
477 | vmtruncate(mapping->host, isize); | ||
478 | } | ||
479 | |||
480 | return ret; | ||
476 | } | 481 | } |
477 | 482 | ||
478 | static sector_t sysv_bmap(struct address_space *mapping, sector_t block) | 483 | static sector_t sysv_bmap(struct address_space *mapping, sector_t block) |
diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 5a903da54551..85359a8df605 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/init.h> | 24 | #include <linux/init.h> |
25 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
26 | #include <linux/buffer_head.h> | 26 | #include <linux/buffer_head.h> |
27 | #include <linux/parser.h> | ||
27 | #include "sysv.h" | 28 | #include "sysv.h" |
28 | 29 | ||
29 | /* | 30 | /* |
@@ -347,7 +348,6 @@ static int complete_read_super(struct super_block *sb, int silent, int size) | |||
347 | sb->s_flags |= MS_RDONLY; | 348 | sb->s_flags |= MS_RDONLY; |
348 | if (sbi->s_truncate) | 349 | if (sbi->s_truncate) |
349 | sb->s_root->d_op = &sysv_dentry_operations; | 350 | sb->s_root->d_op = &sysv_dentry_operations; |
350 | sb->s_dirt = 1; | ||
351 | return 1; | 351 | return 1; |
352 | } | 352 | } |
353 | 353 | ||
@@ -435,12 +435,46 @@ Ebadsize: | |||
435 | goto failed; | 435 | goto failed; |
436 | } | 436 | } |
437 | 437 | ||
438 | static int v7_fill_super(struct super_block *sb, void *data, int silent) | 438 | static int v7_sanity_check(struct super_block *sb, struct buffer_head *bh) |
439 | { | 439 | { |
440 | struct sysv_sb_info *sbi; | ||
441 | struct buffer_head *bh, *bh2 = NULL; | ||
442 | struct v7_super_block *v7sb; | 440 | struct v7_super_block *v7sb; |
443 | struct sysv_inode *v7i; | 441 | struct sysv_inode *v7i; |
442 | struct buffer_head *bh2; | ||
443 | struct sysv_sb_info *sbi; | ||
444 | |||
445 | sbi = sb->s_fs_info; | ||
446 | |||
447 | /* plausibility check on superblock */ | ||
448 | v7sb = (struct v7_super_block *) bh->b_data; | ||
449 | if (fs16_to_cpu(sbi, v7sb->s_nfree) > V7_NICFREE || | ||
450 | fs16_to_cpu(sbi, v7sb->s_ninode) > V7_NICINOD || | ||
451 | fs32_to_cpu(sbi, v7sb->s_fsize) > V7_MAXSIZE) | ||
452 | return 0; | ||
453 | |||
454 | /* plausibility check on root inode: it is a directory, | ||
455 | with a nonzero size that is a multiple of 16 */ | ||
456 | bh2 = sb_bread(sb, 2); | ||
457 | if (bh2 == NULL) | ||
458 | return 0; | ||
459 | |||
460 | v7i = (struct sysv_inode *)(bh2->b_data + 64); | ||
461 | if ((fs16_to_cpu(sbi, v7i->i_mode) & ~0777) != S_IFDIR || | ||
462 | (fs32_to_cpu(sbi, v7i->i_size) == 0) || | ||
463 | (fs32_to_cpu(sbi, v7i->i_size) & 017) || | ||
464 | (fs32_to_cpu(sbi, v7i->i_size) > V7_NFILES * | ||
465 | sizeof(struct sysv_dir_entry))) { | ||
466 | brelse(bh2); | ||
467 | return 0; | ||
468 | } | ||
469 | |||
470 | brelse(bh2); | ||
471 | return 1; | ||
472 | } | ||
473 | |||
474 | static int v7_fill_super(struct super_block *sb, void *data, int silent) | ||
475 | { | ||
476 | struct sysv_sb_info *sbi; | ||
477 | struct buffer_head *bh; | ||
444 | 478 | ||
445 | if (440 != sizeof (struct v7_super_block)) | 479 | if (440 != sizeof (struct v7_super_block)) |
446 | panic("V7 FS: bad super-block size"); | 480 | panic("V7 FS: bad super-block size"); |
@@ -454,7 +488,6 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent) | |||
454 | sbi->s_sb = sb; | 488 | sbi->s_sb = sb; |
455 | sbi->s_block_base = 0; | 489 | sbi->s_block_base = 0; |
456 | sbi->s_type = FSTYPE_V7; | 490 | sbi->s_type = FSTYPE_V7; |
457 | sbi->s_bytesex = BYTESEX_PDP; | ||
458 | sb->s_fs_info = sbi; | 491 | sb->s_fs_info = sbi; |
459 | 492 | ||
460 | sb_set_blocksize(sb, 512); | 493 | sb_set_blocksize(sb, 512); |
@@ -466,32 +499,27 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent) | |||
466 | goto failed; | 499 | goto failed; |
467 | } | 500 | } |
468 | 501 | ||
469 | /* plausibility check on superblock */ | 502 | /* Try PDP-11 UNIX */ |
470 | v7sb = (struct v7_super_block *) bh->b_data; | 503 | sbi->s_bytesex = BYTESEX_PDP; |
471 | if (fs16_to_cpu(sbi, v7sb->s_nfree) > V7_NICFREE || | 504 | if (v7_sanity_check(sb, bh)) |
472 | fs16_to_cpu(sbi, v7sb->s_ninode) > V7_NICINOD || | 505 | goto detected; |
473 | fs32_to_cpu(sbi, v7sb->s_time) == 0) | ||
474 | goto failed; | ||
475 | 506 | ||
476 | /* plausibility check on root inode: it is a directory, | 507 | /* Try PC/IX, v7/x86 */ |
477 | with a nonzero size that is a multiple of 16 */ | 508 | sbi->s_bytesex = BYTESEX_LE; |
478 | if ((bh2 = sb_bread(sb, 2)) == NULL) | 509 | if (v7_sanity_check(sb, bh)) |
479 | goto failed; | 510 | goto detected; |
480 | v7i = (struct sysv_inode *)(bh2->b_data + 64); | ||
481 | if ((fs16_to_cpu(sbi, v7i->i_mode) & ~0777) != S_IFDIR || | ||
482 | (fs32_to_cpu(sbi, v7i->i_size) == 0) || | ||
483 | (fs32_to_cpu(sbi, v7i->i_size) & 017) != 0) | ||
484 | goto failed; | ||
485 | brelse(bh2); | ||
486 | bh2 = NULL; | ||
487 | 511 | ||
512 | goto failed; | ||
513 | |||
514 | detected: | ||
488 | sbi->s_bh1 = bh; | 515 | sbi->s_bh1 = bh; |
489 | sbi->s_bh2 = bh; | 516 | sbi->s_bh2 = bh; |
490 | if (complete_read_super(sb, silent, 1)) | 517 | if (complete_read_super(sb, silent, 1)) |
491 | return 0; | 518 | return 0; |
492 | 519 | ||
493 | failed: | 520 | failed: |
494 | brelse(bh2); | 521 | printk(KERN_ERR "VFS: could not find a valid V7 on %s.\n", |
522 | sb->s_id); | ||
495 | brelse(bh); | 523 | brelse(bh); |
496 | kfree(sbi); | 524 | kfree(sbi); |
497 | return -EINVAL; | 525 | return -EINVAL; |
@@ -560,4 +588,5 @@ static void __exit exit_sysv_fs(void) | |||
560 | 588 | ||
561 | module_init(init_sysv_fs) | 589 | module_init(init_sysv_fs) |
562 | module_exit(exit_sysv_fs) | 590 | module_exit(exit_sysv_fs) |
591 | MODULE_ALIAS("v7"); | ||
563 | MODULE_LICENSE("GPL"); | 592 | MODULE_LICENSE("GPL"); |
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 94cb9b4d76c2..bb55cdb394bf 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h | |||
@@ -136,9 +136,7 @@ extern unsigned long sysv_count_free_blocks(struct super_block *); | |||
136 | 136 | ||
137 | /* itree.c */ | 137 | /* itree.c */ |
138 | extern void sysv_truncate(struct inode *); | 138 | extern void sysv_truncate(struct inode *); |
139 | extern int __sysv_write_begin(struct file *file, struct address_space *mapping, | 139 | extern int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len); |
140 | loff_t pos, unsigned len, unsigned flags, | ||
141 | struct page **pagep, void **fsdata); | ||
142 | 140 | ||
143 | /* inode.c */ | 141 | /* inode.c */ |
144 | extern struct inode *sysv_iget(struct super_block *, unsigned int); | 142 | extern struct inode *sysv_iget(struct super_block *, unsigned int); |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 12f445cee9f7..03ae894c45de 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -967,14 +967,15 @@ static int do_writepage(struct page *page, int len) | |||
967 | * the page locked, and it locks @ui_mutex. However, write-back does take inode | 967 | * the page locked, and it locks @ui_mutex. However, write-back does take inode |
968 | * @i_mutex, which means other VFS operations may be run on this inode at the | 968 | * @i_mutex, which means other VFS operations may be run on this inode at the |
969 | * same time. And the problematic one is truncation to smaller size, from where | 969 | * same time. And the problematic one is truncation to smaller size, from where |
970 | * we have to call 'simple_setsize()', which first changes @inode->i_size, then | 970 | * we have to call 'truncate_setsize()', which first changes @inode->i_size, then |
971 | * drops the truncated pages. And while dropping the pages, it takes the page | 971 | * drops the truncated pages. And while dropping the pages, it takes the page |
972 | * lock. This means that 'do_truncation()' cannot call 'simple_setsize()' with | 972 | * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with |
973 | * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This | 973 | * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This |
974 | * means that @inode->i_size is changed while @ui_mutex is unlocked. | 974 | * means that @inode->i_size is changed while @ui_mutex is unlocked. |
975 | * | 975 | * |
976 | * XXX: with the new truncate the above is not true anymore, the simple_setsize | 976 | * XXX(truncate): with the new truncate sequence this is not true anymore, |
977 | * calls can be replaced with the individual components. | 977 | * and the calls to truncate_setsize can be move around freely. They should |
978 | * be moved to the very end of the truncate sequence. | ||
978 | * | 979 | * |
979 | * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond | 980 | * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond |
980 | * inode size. How do we do this if @inode->i_size may became smaller while we | 981 | * inode size. How do we do this if @inode->i_size may became smaller while we |
@@ -1128,9 +1129,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
1128 | budgeted = 0; | 1129 | budgeted = 0; |
1129 | } | 1130 | } |
1130 | 1131 | ||
1131 | err = simple_setsize(inode, new_size); | 1132 | truncate_setsize(inode, new_size); |
1132 | if (err) | ||
1133 | goto out_budg; | ||
1134 | 1133 | ||
1135 | if (offset) { | 1134 | if (offset) { |
1136 | pgoff_t index = new_size >> PAGE_CACHE_SHIFT; | 1135 | pgoff_t index = new_size >> PAGE_CACHE_SHIFT; |
@@ -1217,16 +1216,14 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, | |||
1217 | 1216 | ||
1218 | if (attr->ia_valid & ATTR_SIZE) { | 1217 | if (attr->ia_valid & ATTR_SIZE) { |
1219 | dbg_gen("size %lld -> %lld", inode->i_size, new_size); | 1218 | dbg_gen("size %lld -> %lld", inode->i_size, new_size); |
1220 | err = simple_setsize(inode, new_size); | 1219 | truncate_setsize(inode, new_size); |
1221 | if (err) | ||
1222 | goto out; | ||
1223 | } | 1220 | } |
1224 | 1221 | ||
1225 | mutex_lock(&ui->ui_mutex); | 1222 | mutex_lock(&ui->ui_mutex); |
1226 | if (attr->ia_valid & ATTR_SIZE) { | 1223 | if (attr->ia_valid & ATTR_SIZE) { |
1227 | /* Truncation changes inode [mc]time */ | 1224 | /* Truncation changes inode [mc]time */ |
1228 | inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); | 1225 | inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); |
1229 | /* 'simple_setsize()' changed @i_size, update @ui_size */ | 1226 | /* 'truncate_setsize()' changed @i_size, update @ui_size */ |
1230 | ui->ui_size = inode->i_size; | 1227 | ui->ui_size = inode->i_size; |
1231 | } | 1228 | } |
1232 | 1229 | ||
@@ -1248,10 +1245,6 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, | |||
1248 | if (IS_SYNC(inode)) | 1245 | if (IS_SYNC(inode)) |
1249 | err = inode->i_sb->s_op->write_inode(inode, NULL); | 1246 | err = inode->i_sb->s_op->write_inode(inode, NULL); |
1250 | return err; | 1247 | return err; |
1251 | |||
1252 | out: | ||
1253 | ubifs_release_budget(c, &req); | ||
1254 | return err; | ||
1255 | } | 1248 | } |
1256 | 1249 | ||
1257 | int ubifs_setattr(struct dentry *dentry, struct iattr *attr) | 1250 | int ubifs_setattr(struct dentry *dentry, struct iattr *attr) |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 5fc5a0988970..cd5900b85d38 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -327,7 +327,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
327 | return err; | 327 | return err; |
328 | } | 328 | } |
329 | 329 | ||
330 | static void ubifs_delete_inode(struct inode *inode) | 330 | static void ubifs_evict_inode(struct inode *inode) |
331 | { | 331 | { |
332 | int err; | 332 | int err; |
333 | struct ubifs_info *c = inode->i_sb->s_fs_info; | 333 | struct ubifs_info *c = inode->i_sb->s_fs_info; |
@@ -343,9 +343,12 @@ static void ubifs_delete_inode(struct inode *inode) | |||
343 | 343 | ||
344 | dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); | 344 | dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); |
345 | ubifs_assert(!atomic_read(&inode->i_count)); | 345 | ubifs_assert(!atomic_read(&inode->i_count)); |
346 | ubifs_assert(inode->i_nlink == 0); | ||
347 | 346 | ||
348 | truncate_inode_pages(&inode->i_data, 0); | 347 | truncate_inode_pages(&inode->i_data, 0); |
348 | |||
349 | if (inode->i_nlink) | ||
350 | goto done; | ||
351 | |||
349 | if (is_bad_inode(inode)) | 352 | if (is_bad_inode(inode)) |
350 | goto out; | 353 | goto out; |
351 | 354 | ||
@@ -367,7 +370,8 @@ out: | |||
367 | c->nospace = c->nospace_rp = 0; | 370 | c->nospace = c->nospace_rp = 0; |
368 | smp_wmb(); | 371 | smp_wmb(); |
369 | } | 372 | } |
370 | clear_inode(inode); | 373 | done: |
374 | end_writeback(inode); | ||
371 | } | 375 | } |
372 | 376 | ||
373 | static void ubifs_dirty_inode(struct inode *inode) | 377 | static void ubifs_dirty_inode(struct inode *inode) |
@@ -1826,7 +1830,7 @@ const struct super_operations ubifs_super_operations = { | |||
1826 | .destroy_inode = ubifs_destroy_inode, | 1830 | .destroy_inode = ubifs_destroy_inode, |
1827 | .put_super = ubifs_put_super, | 1831 | .put_super = ubifs_put_super, |
1828 | .write_inode = ubifs_write_inode, | 1832 | .write_inode = ubifs_write_inode, |
1829 | .delete_inode = ubifs_delete_inode, | 1833 | .evict_inode = ubifs_evict_inode, |
1830 | .statfs = ubifs_statfs, | 1834 | .statfs = ubifs_statfs, |
1831 | .dirty_inode = ubifs_dirty_inode, | 1835 | .dirty_inode = ubifs_dirty_inode, |
1832 | .remount_fs = ubifs_remount_fs, | 1836 | .remount_fs = ubifs_remount_fs, |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 04310878f449..0c9876b396dd 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -379,7 +379,7 @@ struct ubifs_gced_idx_leb { | |||
379 | * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses | 379 | * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses |
380 | * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot | 380 | * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot |
381 | * make sure @inode->i_size is always changed under @ui_mutex, because it | 381 | * make sure @inode->i_size is always changed under @ui_mutex, because it |
382 | * cannot call 'simple_setsize()' with @ui_mutex locked, because it would deadlock | 382 | * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock |
383 | * with 'ubifs_writepage()' (see file.c). All the other inode fields are | 383 | * with 'ubifs_writepage()' (see file.c). All the other inode fields are |
384 | * changed under @ui_mutex, so they do not need "shadow" fields. Note, one | 384 | * changed under @ui_mutex, so they do not need "shadow" fields. Note, one |
385 | * could consider to rework locking and base it on "shadow" fields. | 385 | * could consider to rework locking and base it on "shadow" fields. |
diff --git a/fs/udf/file.c b/fs/udf/file.c index 94e06d6bddbd..66b9e7e7e4c5 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
@@ -36,7 +36,6 @@ | |||
36 | #include <linux/pagemap.h> | 36 | #include <linux/pagemap.h> |
37 | #include <linux/buffer_head.h> | 37 | #include <linux/buffer_head.h> |
38 | #include <linux/aio.h> | 38 | #include <linux/aio.h> |
39 | #include <linux/smp_lock.h> | ||
40 | 39 | ||
41 | #include "udf_i.h" | 40 | #include "udf_i.h" |
42 | #include "udf_sb.h" | 41 | #include "udf_sb.h" |
@@ -228,6 +227,28 @@ const struct file_operations udf_file_operations = { | |||
228 | .llseek = generic_file_llseek, | 227 | .llseek = generic_file_llseek, |
229 | }; | 228 | }; |
230 | 229 | ||
230 | static int udf_setattr(struct dentry *dentry, struct iattr *attr) | ||
231 | { | ||
232 | struct inode *inode = dentry->d_inode; | ||
233 | int error; | ||
234 | |||
235 | error = inode_change_ok(inode, attr); | ||
236 | if (error) | ||
237 | return error; | ||
238 | |||
239 | if ((attr->ia_valid & ATTR_SIZE) && | ||
240 | attr->ia_size != i_size_read(inode)) { | ||
241 | error = vmtruncate(inode, attr->ia_size); | ||
242 | if (error) | ||
243 | return error; | ||
244 | } | ||
245 | |||
246 | setattr_copy(inode, attr); | ||
247 | mark_inode_dirty(inode); | ||
248 | return 0; | ||
249 | } | ||
250 | |||
231 | const struct inode_operations udf_file_inode_operations = { | 251 | const struct inode_operations udf_file_inode_operations = { |
252 | .setattr = udf_setattr, | ||
232 | .truncate = udf_truncate, | 253 | .truncate = udf_truncate, |
233 | }; | 254 | }; |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 18cd7111185d..75d9304d0dc3 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
@@ -31,8 +31,6 @@ void udf_free_inode(struct inode *inode) | |||
31 | struct super_block *sb = inode->i_sb; | 31 | struct super_block *sb = inode->i_sb; |
32 | struct udf_sb_info *sbi = UDF_SB(sb); | 32 | struct udf_sb_info *sbi = UDF_SB(sb); |
33 | 33 | ||
34 | clear_inode(inode); | ||
35 | |||
36 | mutex_lock(&sbi->s_alloc_mutex); | 34 | mutex_lock(&sbi->s_alloc_mutex); |
37 | if (sbi->s_lvid_bh) { | 35 | if (sbi->s_lvid_bh) { |
38 | struct logicalVolIntegrityDescImpUse *lvidiu = | 36 | struct logicalVolIntegrityDescImpUse *lvidiu = |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 124852bcf6fe..fc48f37aa2dd 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -68,37 +68,23 @@ static void udf_update_extents(struct inode *, | |||
68 | static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); | 68 | static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); |
69 | 69 | ||
70 | 70 | ||
71 | void udf_delete_inode(struct inode *inode) | 71 | void udf_evict_inode(struct inode *inode) |
72 | { | ||
73 | truncate_inode_pages(&inode->i_data, 0); | ||
74 | |||
75 | if (is_bad_inode(inode)) | ||
76 | goto no_delete; | ||
77 | |||
78 | inode->i_size = 0; | ||
79 | udf_truncate(inode); | ||
80 | lock_kernel(); | ||
81 | |||
82 | udf_update_inode(inode, IS_SYNC(inode)); | ||
83 | udf_free_inode(inode); | ||
84 | |||
85 | unlock_kernel(); | ||
86 | return; | ||
87 | |||
88 | no_delete: | ||
89 | clear_inode(inode); | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * If we are going to release inode from memory, we truncate last inode extent | ||
94 | * to proper length. We could use drop_inode() but it's called under inode_lock | ||
95 | * and thus we cannot mark inode dirty there. We use clear_inode() but we have | ||
96 | * to make sure to write inode as it's not written automatically. | ||
97 | */ | ||
98 | void udf_clear_inode(struct inode *inode) | ||
99 | { | 72 | { |
100 | struct udf_inode_info *iinfo = UDF_I(inode); | 73 | struct udf_inode_info *iinfo = UDF_I(inode); |
74 | int want_delete = 0; | ||
75 | |||
76 | truncate_inode_pages(&inode->i_data, 0); | ||
101 | 77 | ||
78 | if (!inode->i_nlink && !is_bad_inode(inode)) { | ||
79 | want_delete = 1; | ||
80 | inode->i_size = 0; | ||
81 | udf_truncate(inode); | ||
82 | lock_kernel(); | ||
83 | udf_update_inode(inode, IS_SYNC(inode)); | ||
84 | unlock_kernel(); | ||
85 | } | ||
86 | invalidate_inode_buffers(inode); | ||
87 | end_writeback(inode); | ||
102 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && | 88 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && |
103 | inode->i_size != iinfo->i_lenExtents) { | 89 | inode->i_size != iinfo->i_lenExtents) { |
104 | printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has " | 90 | printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has " |
@@ -108,9 +94,13 @@ void udf_clear_inode(struct inode *inode) | |||
108 | (unsigned long long)inode->i_size, | 94 | (unsigned long long)inode->i_size, |
109 | (unsigned long long)iinfo->i_lenExtents); | 95 | (unsigned long long)iinfo->i_lenExtents); |
110 | } | 96 | } |
111 | |||
112 | kfree(iinfo->i_ext.i_data); | 97 | kfree(iinfo->i_ext.i_data); |
113 | iinfo->i_ext.i_data = NULL; | 98 | iinfo->i_ext.i_data = NULL; |
99 | if (want_delete) { | ||
100 | lock_kernel(); | ||
101 | udf_free_inode(inode); | ||
102 | unlock_kernel(); | ||
103 | } | ||
114 | } | 104 | } |
115 | 105 | ||
116 | static int udf_writepage(struct page *page, struct writeback_control *wbc) | 106 | static int udf_writepage(struct page *page, struct writeback_control *wbc) |
@@ -127,9 +117,16 @@ static int udf_write_begin(struct file *file, struct address_space *mapping, | |||
127 | loff_t pos, unsigned len, unsigned flags, | 117 | loff_t pos, unsigned len, unsigned flags, |
128 | struct page **pagep, void **fsdata) | 118 | struct page **pagep, void **fsdata) |
129 | { | 119 | { |
130 | *pagep = NULL; | 120 | int ret; |
131 | return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 121 | |
132 | udf_get_block); | 122 | ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block); |
123 | if (unlikely(ret)) { | ||
124 | loff_t isize = mapping->host->i_size; | ||
125 | if (pos + len > isize) | ||
126 | vmtruncate(mapping->host, isize); | ||
127 | } | ||
128 | |||
129 | return ret; | ||
133 | } | 130 | } |
134 | 131 | ||
135 | static sector_t udf_bmap(struct address_space *mapping, sector_t block) | 132 | static sector_t udf_bmap(struct address_space *mapping, sector_t block) |
diff --git a/fs/udf/super.c b/fs/udf/super.c index 612d1e2e285a..65412d84a45d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -175,8 +175,7 @@ static const struct super_operations udf_sb_ops = { | |||
175 | .alloc_inode = udf_alloc_inode, | 175 | .alloc_inode = udf_alloc_inode, |
176 | .destroy_inode = udf_destroy_inode, | 176 | .destroy_inode = udf_destroy_inode, |
177 | .write_inode = udf_write_inode, | 177 | .write_inode = udf_write_inode, |
178 | .delete_inode = udf_delete_inode, | 178 | .evict_inode = udf_evict_inode, |
179 | .clear_inode = udf_clear_inode, | ||
180 | .put_super = udf_put_super, | 179 | .put_super = udf_put_super, |
181 | .sync_fs = udf_sync_fs, | 180 | .sync_fs = udf_sync_fs, |
182 | .statfs = udf_statfs, | 181 | .statfs = udf_statfs, |
@@ -1579,9 +1578,7 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh, | |||
1579 | { | 1578 | { |
1580 | struct anchorVolDescPtr *anchor; | 1579 | struct anchorVolDescPtr *anchor; |
1581 | long main_s, main_e, reserve_s, reserve_e; | 1580 | long main_s, main_e, reserve_s, reserve_e; |
1582 | struct udf_sb_info *sbi; | ||
1583 | 1581 | ||
1584 | sbi = UDF_SB(sb); | ||
1585 | anchor = (struct anchorVolDescPtr *)bh->b_data; | 1582 | anchor = (struct anchorVolDescPtr *)bh->b_data; |
1586 | 1583 | ||
1587 | /* Locate the main sequence */ | 1584 | /* Locate the main sequence */ |
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 2bac0354891f..6995ab1f4305 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h | |||
@@ -139,8 +139,7 @@ extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); | |||
139 | extern struct buffer_head *udf_bread(struct inode *, int, int, int *); | 139 | extern struct buffer_head *udf_bread(struct inode *, int, int, int *); |
140 | extern void udf_truncate(struct inode *); | 140 | extern void udf_truncate(struct inode *); |
141 | extern void udf_read_inode(struct inode *); | 141 | extern void udf_read_inode(struct inode *); |
142 | extern void udf_delete_inode(struct inode *); | 142 | extern void udf_evict_inode(struct inode *); |
143 | extern void udf_clear_inode(struct inode *); | ||
144 | extern int udf_write_inode(struct inode *, struct writeback_control *wbc); | 143 | extern int udf_write_inode(struct inode *, struct writeback_control *wbc); |
145 | extern long udf_block_map(struct inode *, sector_t); | 144 | extern long udf_block_map(struct inode *, sector_t); |
146 | extern int udf_extend_file(struct inode *, struct extent_position *, | 145 | extern int udf_extend_file(struct inode *, struct extent_position *, |
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index ec784756dc65..dbc90994715a 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c | |||
@@ -95,8 +95,7 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, | |||
95 | int err; | 95 | int err; |
96 | 96 | ||
97 | lock_page(page); | 97 | lock_page(page); |
98 | err = __ufs_write_begin(NULL, page->mapping, pos, len, | 98 | err = ufs_prepare_chunk(page, pos, len); |
99 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
100 | BUG_ON(err); | 99 | BUG_ON(err); |
101 | 100 | ||
102 | de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino); | 101 | de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino); |
@@ -381,8 +380,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode) | |||
381 | got_it: | 380 | got_it: |
382 | pos = page_offset(page) + | 381 | pos = page_offset(page) + |
383 | (char*)de - (char*)page_address(page); | 382 | (char*)de - (char*)page_address(page); |
384 | err = __ufs_write_begin(NULL, page->mapping, pos, rec_len, | 383 | err = ufs_prepare_chunk(page, pos, rec_len); |
385 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
386 | if (err) | 384 | if (err) |
387 | goto out_unlock; | 385 | goto out_unlock; |
388 | if (de->d_ino) { | 386 | if (de->d_ino) { |
@@ -518,7 +516,6 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir, | |||
518 | struct page * page) | 516 | struct page * page) |
519 | { | 517 | { |
520 | struct super_block *sb = inode->i_sb; | 518 | struct super_block *sb = inode->i_sb; |
521 | struct address_space *mapping = page->mapping; | ||
522 | char *kaddr = page_address(page); | 519 | char *kaddr = page_address(page); |
523 | unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); | 520 | unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); |
524 | unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen); | 521 | unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen); |
@@ -549,8 +546,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir, | |||
549 | 546 | ||
550 | pos = page_offset(page) + from; | 547 | pos = page_offset(page) + from; |
551 | lock_page(page); | 548 | lock_page(page); |
552 | err = __ufs_write_begin(NULL, mapping, pos, to - from, | 549 | err = ufs_prepare_chunk(page, pos, to - from); |
553 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
554 | BUG_ON(err); | 550 | BUG_ON(err); |
555 | if (pde) | 551 | if (pde) |
556 | pde->d_reclen = cpu_to_fs16(sb, to - from); | 552 | pde->d_reclen = cpu_to_fs16(sb, to - from); |
@@ -577,8 +573,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir) | |||
577 | if (!page) | 573 | if (!page) |
578 | return -ENOMEM; | 574 | return -ENOMEM; |
579 | 575 | ||
580 | err = __ufs_write_begin(NULL, mapping, 0, chunk_size, | 576 | err = ufs_prepare_chunk(page, 0, chunk_size); |
581 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | ||
582 | if (err) { | 577 | if (err) { |
583 | unlock_page(page); | 578 | unlock_page(page); |
584 | goto fail; | 579 | goto fail; |
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 594480e537d2..428017e018fe 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c | |||
@@ -94,8 +94,6 @@ void ufs_free_inode (struct inode * inode) | |||
94 | 94 | ||
95 | is_directory = S_ISDIR(inode->i_mode); | 95 | is_directory = S_ISDIR(inode->i_mode); |
96 | 96 | ||
97 | clear_inode (inode); | ||
98 | |||
99 | if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit)) | 97 | if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit)) |
100 | ufs_error(sb, "ufs_free_inode", "bit already cleared for inode %u", ino); | 98 | ufs_error(sb, "ufs_free_inode", "bit already cleared for inode %u", ino); |
101 | else { | 99 | else { |
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 73fe773aa034..2b251f2093af 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c | |||
@@ -558,20 +558,26 @@ static int ufs_readpage(struct file *file, struct page *page) | |||
558 | return block_read_full_page(page,ufs_getfrag_block); | 558 | return block_read_full_page(page,ufs_getfrag_block); |
559 | } | 559 | } |
560 | 560 | ||
561 | int __ufs_write_begin(struct file *file, struct address_space *mapping, | 561 | int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len) |
562 | loff_t pos, unsigned len, unsigned flags, | ||
563 | struct page **pagep, void **fsdata) | ||
564 | { | 562 | { |
565 | return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 563 | return __block_write_begin(page, pos, len, ufs_getfrag_block); |
566 | ufs_getfrag_block); | ||
567 | } | 564 | } |
568 | 565 | ||
569 | static int ufs_write_begin(struct file *file, struct address_space *mapping, | 566 | static int ufs_write_begin(struct file *file, struct address_space *mapping, |
570 | loff_t pos, unsigned len, unsigned flags, | 567 | loff_t pos, unsigned len, unsigned flags, |
571 | struct page **pagep, void **fsdata) | 568 | struct page **pagep, void **fsdata) |
572 | { | 569 | { |
573 | *pagep = NULL; | 570 | int ret; |
574 | return __ufs_write_begin(file, mapping, pos, len, flags, pagep, fsdata); | 571 | |
572 | ret = block_write_begin(mapping, pos, len, flags, pagep, | ||
573 | ufs_getfrag_block); | ||
574 | if (unlikely(ret)) { | ||
575 | loff_t isize = mapping->host->i_size; | ||
576 | if (pos + len > isize) | ||
577 | vmtruncate(mapping->host, isize); | ||
578 | } | ||
579 | |||
580 | return ret; | ||
575 | } | 581 | } |
576 | 582 | ||
577 | static sector_t ufs_bmap(struct address_space *mapping, sector_t block) | 583 | static sector_t ufs_bmap(struct address_space *mapping, sector_t block) |
@@ -905,24 +911,33 @@ int ufs_sync_inode (struct inode *inode) | |||
905 | return ufs_update_inode (inode, 1); | 911 | return ufs_update_inode (inode, 1); |
906 | } | 912 | } |
907 | 913 | ||
908 | void ufs_delete_inode (struct inode * inode) | 914 | void ufs_evict_inode(struct inode * inode) |
909 | { | 915 | { |
910 | loff_t old_i_size; | 916 | int want_delete = 0; |
917 | |||
918 | if (!inode->i_nlink && !is_bad_inode(inode)) | ||
919 | want_delete = 1; | ||
911 | 920 | ||
912 | truncate_inode_pages(&inode->i_data, 0); | 921 | truncate_inode_pages(&inode->i_data, 0); |
913 | if (is_bad_inode(inode)) | 922 | if (want_delete) { |
914 | goto no_delete; | 923 | loff_t old_i_size; |
915 | /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ | 924 | /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ |
916 | lock_kernel(); | 925 | lock_kernel(); |
917 | mark_inode_dirty(inode); | 926 | mark_inode_dirty(inode); |
918 | ufs_update_inode(inode, IS_SYNC(inode)); | 927 | ufs_update_inode(inode, IS_SYNC(inode)); |
919 | old_i_size = inode->i_size; | 928 | old_i_size = inode->i_size; |
920 | inode->i_size = 0; | 929 | inode->i_size = 0; |
921 | if (inode->i_blocks && ufs_truncate(inode, old_i_size)) | 930 | if (inode->i_blocks && ufs_truncate(inode, old_i_size)) |
922 | ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n"); | 931 | ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n"); |
923 | ufs_free_inode (inode); | 932 | unlock_kernel(); |
924 | unlock_kernel(); | 933 | } |
925 | return; | 934 | |
926 | no_delete: | 935 | invalidate_inode_buffers(inode); |
927 | clear_inode(inode); /* We must guarantee clearing of inode... */ | 936 | end_writeback(inode); |
937 | |||
938 | if (want_delete) { | ||
939 | lock_kernel(); | ||
940 | ufs_free_inode (inode); | ||
941 | unlock_kernel(); | ||
942 | } | ||
928 | } | 943 | } |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 3ec5a9eb6efb..d510c1b91817 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -1440,7 +1440,7 @@ static const struct super_operations ufs_super_ops = { | |||
1440 | .alloc_inode = ufs_alloc_inode, | 1440 | .alloc_inode = ufs_alloc_inode, |
1441 | .destroy_inode = ufs_destroy_inode, | 1441 | .destroy_inode = ufs_destroy_inode, |
1442 | .write_inode = ufs_write_inode, | 1442 | .write_inode = ufs_write_inode, |
1443 | .delete_inode = ufs_delete_inode, | 1443 | .evict_inode = ufs_evict_inode, |
1444 | .put_super = ufs_put_super, | 1444 | .put_super = ufs_put_super, |
1445 | .write_super = ufs_write_super, | 1445 | .write_super = ufs_write_super, |
1446 | .sync_fs = ufs_sync_fs, | 1446 | .sync_fs = ufs_sync_fs, |
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 589e01a465ba..34d5cb135320 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c | |||
@@ -500,11 +500,6 @@ out: | |||
500 | return err; | 500 | return err; |
501 | } | 501 | } |
502 | 502 | ||
503 | /* | ||
504 | * TODO: | ||
505 | * - truncate case should use proper ordering instead of using | ||
506 | * simple_setsize | ||
507 | */ | ||
508 | int ufs_setattr(struct dentry *dentry, struct iattr *attr) | 503 | int ufs_setattr(struct dentry *dentry, struct iattr *attr) |
509 | { | 504 | { |
510 | struct inode *inode = dentry->d_inode; | 505 | struct inode *inode = dentry->d_inode; |
@@ -518,14 +513,17 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr) | |||
518 | if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { | 513 | if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { |
519 | loff_t old_i_size = inode->i_size; | 514 | loff_t old_i_size = inode->i_size; |
520 | 515 | ||
521 | error = simple_setsize(inode, attr->ia_size); | 516 | /* XXX(truncate): truncate_setsize should be called last */ |
522 | if (error) | 517 | truncate_setsize(inode, attr->ia_size); |
523 | return error; | 518 | |
524 | error = ufs_truncate(inode, old_i_size); | 519 | error = ufs_truncate(inode, old_i_size); |
525 | if (error) | 520 | if (error) |
526 | return error; | 521 | return error; |
527 | } | 522 | } |
528 | return inode_setattr(inode, attr); | 523 | |
524 | setattr_copy(inode, attr); | ||
525 | mark_inode_dirty(inode); | ||
526 | return 0; | ||
529 | } | 527 | } |
530 | 528 | ||
531 | const struct inode_operations ufs_file_inode_operations = { | 529 | const struct inode_operations ufs_file_inode_operations = { |
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 179ae6b3180a..c08782e1b48a 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h | |||
@@ -108,7 +108,7 @@ extern struct inode * ufs_new_inode (struct inode *, int); | |||
108 | extern struct inode *ufs_iget(struct super_block *, unsigned long); | 108 | extern struct inode *ufs_iget(struct super_block *, unsigned long); |
109 | extern int ufs_write_inode (struct inode *, struct writeback_control *); | 109 | extern int ufs_write_inode (struct inode *, struct writeback_control *); |
110 | extern int ufs_sync_inode (struct inode *); | 110 | extern int ufs_sync_inode (struct inode *); |
111 | extern void ufs_delete_inode (struct inode *); | 111 | extern void ufs_evict_inode (struct inode *); |
112 | extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); | 112 | extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); |
113 | extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create); | 113 | extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create); |
114 | 114 | ||
diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 23ceed8c8fb9..0466036912f1 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h | |||
@@ -257,9 +257,7 @@ ufs_set_inode_gid(struct super_block *sb, struct ufs_inode *inode, u32 value) | |||
257 | 257 | ||
258 | extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *); | 258 | extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *); |
259 | extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t); | 259 | extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t); |
260 | extern int __ufs_write_begin(struct file *file, struct address_space *mapping, | 260 | extern int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len); |
261 | loff_t pos, unsigned len, unsigned flags, | ||
262 | struct page **pagep, void **fsdata); | ||
263 | 261 | ||
264 | /* | 262 | /* |
265 | * These functions manipulate ufs buffers | 263 | * These functions manipulate ufs buffers |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index d24e78f32f3e..15412fe15c3a 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -1478,22 +1478,38 @@ xfs_vm_direct_IO( | |||
1478 | if (rw & WRITE) { | 1478 | if (rw & WRITE) { |
1479 | iocb->private = xfs_alloc_ioend(inode, IO_NEW); | 1479 | iocb->private = xfs_alloc_ioend(inode, IO_NEW); |
1480 | 1480 | ||
1481 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, | 1481 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1482 | offset, nr_segs, | 1482 | offset, nr_segs, |
1483 | xfs_get_blocks_direct, | 1483 | xfs_get_blocks_direct, |
1484 | xfs_end_io_direct_write); | 1484 | xfs_end_io_direct_write, NULL, 0); |
1485 | if (ret != -EIOCBQUEUED && iocb->private) | 1485 | if (ret != -EIOCBQUEUED && iocb->private) |
1486 | xfs_destroy_ioend(iocb->private); | 1486 | xfs_destroy_ioend(iocb->private); |
1487 | } else { | 1487 | } else { |
1488 | ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, | 1488 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1489 | offset, nr_segs, | 1489 | offset, nr_segs, |
1490 | xfs_get_blocks_direct, | 1490 | xfs_get_blocks_direct, |
1491 | NULL); | 1491 | NULL, NULL, 0); |
1492 | } | 1492 | } |
1493 | 1493 | ||
1494 | return ret; | 1494 | return ret; |
1495 | } | 1495 | } |
1496 | 1496 | ||
1497 | STATIC void | ||
1498 | xfs_vm_write_failed( | ||
1499 | struct address_space *mapping, | ||
1500 | loff_t to) | ||
1501 | { | ||
1502 | struct inode *inode = mapping->host; | ||
1503 | |||
1504 | if (to > inode->i_size) { | ||
1505 | struct iattr ia = { | ||
1506 | .ia_valid = ATTR_SIZE | ATTR_FORCE, | ||
1507 | .ia_size = inode->i_size, | ||
1508 | }; | ||
1509 | xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK); | ||
1510 | } | ||
1511 | } | ||
1512 | |||
1497 | STATIC int | 1513 | STATIC int |
1498 | xfs_vm_write_begin( | 1514 | xfs_vm_write_begin( |
1499 | struct file *file, | 1515 | struct file *file, |
@@ -1504,9 +1520,31 @@ xfs_vm_write_begin( | |||
1504 | struct page **pagep, | 1520 | struct page **pagep, |
1505 | void **fsdata) | 1521 | void **fsdata) |
1506 | { | 1522 | { |
1507 | *pagep = NULL; | 1523 | int ret; |
1508 | return block_write_begin(file, mapping, pos, len, flags | AOP_FLAG_NOFS, | 1524 | |
1509 | pagep, fsdata, xfs_get_blocks); | 1525 | ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, |
1526 | pagep, xfs_get_blocks); | ||
1527 | if (unlikely(ret)) | ||
1528 | xfs_vm_write_failed(mapping, pos + len); | ||
1529 | return ret; | ||
1530 | } | ||
1531 | |||
1532 | STATIC int | ||
1533 | xfs_vm_write_end( | ||
1534 | struct file *file, | ||
1535 | struct address_space *mapping, | ||
1536 | loff_t pos, | ||
1537 | unsigned len, | ||
1538 | unsigned copied, | ||
1539 | struct page *page, | ||
1540 | void *fsdata) | ||
1541 | { | ||
1542 | int ret; | ||
1543 | |||
1544 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | ||
1545 | if (unlikely(ret < len)) | ||
1546 | xfs_vm_write_failed(mapping, pos + len); | ||
1547 | return ret; | ||
1510 | } | 1548 | } |
1511 | 1549 | ||
1512 | STATIC sector_t | 1550 | STATIC sector_t |
@@ -1551,7 +1589,7 @@ const struct address_space_operations xfs_address_space_operations = { | |||
1551 | .releasepage = xfs_vm_releasepage, | 1589 | .releasepage = xfs_vm_releasepage, |
1552 | .invalidatepage = xfs_vm_invalidatepage, | 1590 | .invalidatepage = xfs_vm_invalidatepage, |
1553 | .write_begin = xfs_vm_write_begin, | 1591 | .write_begin = xfs_vm_write_begin, |
1554 | .write_end = generic_write_end, | 1592 | .write_end = xfs_vm_write_end, |
1555 | .bmap = xfs_vm_bmap, | 1593 | .bmap = xfs_vm_bmap, |
1556 | .direct_IO = xfs_vm_direct_IO, | 1594 | .direct_IO = xfs_vm_direct_IO, |
1557 | .migratepage = buffer_migrate_page, | 1595 | .migratepage = buffer_migrate_page, |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 536b81e63a3d..68be25dcd301 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -80,7 +80,7 @@ xfs_mark_inode_dirty_sync( | |||
80 | { | 80 | { |
81 | struct inode *inode = VFS_I(ip); | 81 | struct inode *inode = VFS_I(ip); |
82 | 82 | ||
83 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) | 83 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) |
84 | mark_inode_dirty_sync(inode); | 84 | mark_inode_dirty_sync(inode); |
85 | } | 85 | } |
86 | 86 | ||
@@ -90,7 +90,7 @@ xfs_mark_inode_dirty( | |||
90 | { | 90 | { |
91 | struct inode *inode = VFS_I(ip); | 91 | struct inode *inode = VFS_I(ip); |
92 | 92 | ||
93 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) | 93 | if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) |
94 | mark_inode_dirty(inode); | 94 | mark_inode_dirty(inode); |
95 | } | 95 | } |
96 | 96 | ||
@@ -540,21 +540,6 @@ xfs_vn_setattr( | |||
540 | return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); | 540 | return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); |
541 | } | 541 | } |
542 | 542 | ||
543 | /* | ||
544 | * block_truncate_page can return an error, but we can't propagate it | ||
545 | * at all here. Leave a complaint + stack trace in the syslog because | ||
546 | * this could be bad. If it is bad, we need to propagate the error further. | ||
547 | */ | ||
548 | STATIC void | ||
549 | xfs_vn_truncate( | ||
550 | struct inode *inode) | ||
551 | { | ||
552 | int error; | ||
553 | error = block_truncate_page(inode->i_mapping, inode->i_size, | ||
554 | xfs_get_blocks); | ||
555 | WARN_ON(error); | ||
556 | } | ||
557 | |||
558 | STATIC long | 543 | STATIC long |
559 | xfs_vn_fallocate( | 544 | xfs_vn_fallocate( |
560 | struct inode *inode, | 545 | struct inode *inode, |
@@ -694,7 +679,6 @@ xfs_vn_fiemap( | |||
694 | 679 | ||
695 | static const struct inode_operations xfs_inode_operations = { | 680 | static const struct inode_operations xfs_inode_operations = { |
696 | .check_acl = xfs_check_acl, | 681 | .check_acl = xfs_check_acl, |
697 | .truncate = xfs_vn_truncate, | ||
698 | .getattr = xfs_vn_getattr, | 682 | .getattr = xfs_vn_getattr, |
699 | .setattr = xfs_vn_setattr, | 683 | .setattr = xfs_vn_setattr, |
700 | .setxattr = generic_setxattr, | 684 | .setxattr = generic_setxattr, |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 998a9d7fb9c8..2fa0bd9ebc7f 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -156,8 +156,6 @@ | |||
156 | */ | 156 | */ |
157 | #define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) | 157 | #define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) |
158 | #define xfs_stack_trace() dump_stack() | 158 | #define xfs_stack_trace() dump_stack() |
159 | #define xfs_itruncate_data(ip, off) \ | ||
160 | (-vmtruncate(VFS_I(ip), (off))) | ||
161 | 159 | ||
162 | 160 | ||
163 | /* Move the kernel do_div definition off to one side */ | 161 | /* Move the kernel do_div definition off to one side */ |
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index bfd5ac9d1f6f..29b9d642e93d 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c | |||
@@ -68,15 +68,15 @@ xfs_fs_set_xstate( | |||
68 | if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) | 68 | if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) |
69 | return -ENOSYS; | 69 | return -ENOSYS; |
70 | 70 | ||
71 | if (uflags & XFS_QUOTA_UDQ_ACCT) | 71 | if (uflags & FS_QUOTA_UDQ_ACCT) |
72 | flags |= XFS_UQUOTA_ACCT; | 72 | flags |= XFS_UQUOTA_ACCT; |
73 | if (uflags & XFS_QUOTA_PDQ_ACCT) | 73 | if (uflags & FS_QUOTA_PDQ_ACCT) |
74 | flags |= XFS_PQUOTA_ACCT; | 74 | flags |= XFS_PQUOTA_ACCT; |
75 | if (uflags & XFS_QUOTA_GDQ_ACCT) | 75 | if (uflags & FS_QUOTA_GDQ_ACCT) |
76 | flags |= XFS_GQUOTA_ACCT; | 76 | flags |= XFS_GQUOTA_ACCT; |
77 | if (uflags & XFS_QUOTA_UDQ_ENFD) | 77 | if (uflags & FS_QUOTA_UDQ_ENFD) |
78 | flags |= XFS_UQUOTA_ENFD; | 78 | flags |= XFS_UQUOTA_ENFD; |
79 | if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD)) | 79 | if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD)) |
80 | flags |= XFS_OQUOTA_ENFD; | 80 | flags |= XFS_OQUOTA_ENFD; |
81 | 81 | ||
82 | switch (op) { | 82 | switch (op) { |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 758df94690ed..15c35b62ff14 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1100,13 +1100,15 @@ xfs_fs_write_inode( | |||
1100 | } | 1100 | } |
1101 | 1101 | ||
1102 | STATIC void | 1102 | STATIC void |
1103 | xfs_fs_clear_inode( | 1103 | xfs_fs_evict_inode( |
1104 | struct inode *inode) | 1104 | struct inode *inode) |
1105 | { | 1105 | { |
1106 | xfs_inode_t *ip = XFS_I(inode); | 1106 | xfs_inode_t *ip = XFS_I(inode); |
1107 | 1107 | ||
1108 | trace_xfs_clear_inode(ip); | 1108 | trace_xfs_evict_inode(ip); |
1109 | 1109 | ||
1110 | truncate_inode_pages(&inode->i_data, 0); | ||
1111 | end_writeback(inode); | ||
1110 | XFS_STATS_INC(vn_rele); | 1112 | XFS_STATS_INC(vn_rele); |
1111 | XFS_STATS_INC(vn_remove); | 1113 | XFS_STATS_INC(vn_remove); |
1112 | XFS_STATS_DEC(vn_active); | 1114 | XFS_STATS_DEC(vn_active); |
@@ -1622,7 +1624,7 @@ static const struct super_operations xfs_super_operations = { | |||
1622 | .destroy_inode = xfs_fs_destroy_inode, | 1624 | .destroy_inode = xfs_fs_destroy_inode, |
1623 | .dirty_inode = xfs_fs_dirty_inode, | 1625 | .dirty_inode = xfs_fs_dirty_inode, |
1624 | .write_inode = xfs_fs_write_inode, | 1626 | .write_inode = xfs_fs_write_inode, |
1625 | .clear_inode = xfs_fs_clear_inode, | 1627 | .evict_inode = xfs_fs_evict_inode, |
1626 | .put_super = xfs_fs_put_super, | 1628 | .put_super = xfs_fs_put_super, |
1627 | .sync_fs = xfs_fs_sync_fs, | 1629 | .sync_fs = xfs_fs_sync_fs, |
1628 | .freeze_fs = xfs_fs_freeze, | 1630 | .freeze_fs = xfs_fs_freeze, |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index c657cdca2cd2..be5dffd282a1 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -581,7 +581,7 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr); | |||
581 | DEFINE_INODE_EVENT(xfs_file_fsync); | 581 | DEFINE_INODE_EVENT(xfs_file_fsync); |
582 | DEFINE_INODE_EVENT(xfs_destroy_inode); | 582 | DEFINE_INODE_EVENT(xfs_destroy_inode); |
583 | DEFINE_INODE_EVENT(xfs_write_inode); | 583 | DEFINE_INODE_EVENT(xfs_write_inode); |
584 | DEFINE_INODE_EVENT(xfs_clear_inode); | 584 | DEFINE_INODE_EVENT(xfs_evict_inode); |
585 | 585 | ||
586 | DEFINE_INODE_EVENT(xfs_dquot_dqalloc); | 586 | DEFINE_INODE_EVENT(xfs_dquot_dqalloc); |
587 | DEFINE_INODE_EVENT(xfs_dquot_dqdetach); | 587 | DEFINE_INODE_EVENT(xfs_dquot_dqdetach); |
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index d257eb8557c4..45e5849df238 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -810,9 +810,9 @@ xfs_qm_export_dquot( | |||
810 | } | 810 | } |
811 | 811 | ||
812 | #ifdef DEBUG | 812 | #ifdef DEBUG |
813 | if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == XFS_USER_QUOTA) || | 813 | if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || |
814 | (XFS_IS_OQUOTA_ENFORCED(mp) && | 814 | (XFS_IS_OQUOTA_ENFORCED(mp) && |
815 | (dst->d_flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)))) && | 815 | (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && |
816 | dst->d_id != 0) { | 816 | dst->d_id != 0) { |
817 | if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && | 817 | if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && |
818 | (dst->d_blk_softlimit > 0)) { | 818 | (dst->d_blk_softlimit > 0)) { |
@@ -833,17 +833,17 @@ xfs_qm_export_qtype_flags( | |||
833 | /* | 833 | /* |
834 | * Can't be more than one, or none. | 834 | * Can't be more than one, or none. |
835 | */ | 835 | */ |
836 | ASSERT((flags & (XFS_PROJ_QUOTA | XFS_USER_QUOTA)) != | 836 | ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) != |
837 | (XFS_PROJ_QUOTA | XFS_USER_QUOTA)); | 837 | (FS_PROJ_QUOTA | FS_USER_QUOTA)); |
838 | ASSERT((flags & (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)) != | 838 | ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) != |
839 | (XFS_PROJ_QUOTA | XFS_GROUP_QUOTA)); | 839 | (FS_PROJ_QUOTA | FS_GROUP_QUOTA)); |
840 | ASSERT((flags & (XFS_USER_QUOTA | XFS_GROUP_QUOTA)) != | 840 | ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) != |
841 | (XFS_USER_QUOTA | XFS_GROUP_QUOTA)); | 841 | (FS_USER_QUOTA | FS_GROUP_QUOTA)); |
842 | ASSERT((flags & (XFS_PROJ_QUOTA|XFS_USER_QUOTA|XFS_GROUP_QUOTA)) != 0); | 842 | ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0); |
843 | 843 | ||
844 | return (flags & XFS_DQ_USER) ? | 844 | return (flags & XFS_DQ_USER) ? |
845 | XFS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? | 845 | FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? |
846 | XFS_PROJ_QUOTA : XFS_GROUP_QUOTA; | 846 | FS_PROJ_QUOTA : FS_GROUP_QUOTA; |
847 | } | 847 | } |
848 | 848 | ||
849 | STATIC uint | 849 | STATIC uint |
@@ -854,16 +854,16 @@ xfs_qm_export_flags( | |||
854 | 854 | ||
855 | uflags = 0; | 855 | uflags = 0; |
856 | if (flags & XFS_UQUOTA_ACCT) | 856 | if (flags & XFS_UQUOTA_ACCT) |
857 | uflags |= XFS_QUOTA_UDQ_ACCT; | 857 | uflags |= FS_QUOTA_UDQ_ACCT; |
858 | if (flags & XFS_PQUOTA_ACCT) | 858 | if (flags & XFS_PQUOTA_ACCT) |
859 | uflags |= XFS_QUOTA_PDQ_ACCT; | 859 | uflags |= FS_QUOTA_PDQ_ACCT; |
860 | if (flags & XFS_GQUOTA_ACCT) | 860 | if (flags & XFS_GQUOTA_ACCT) |
861 | uflags |= XFS_QUOTA_GDQ_ACCT; | 861 | uflags |= FS_QUOTA_GDQ_ACCT; |
862 | if (flags & XFS_UQUOTA_ENFD) | 862 | if (flags & XFS_UQUOTA_ENFD) |
863 | uflags |= XFS_QUOTA_UDQ_ENFD; | 863 | uflags |= FS_QUOTA_UDQ_ENFD; |
864 | if (flags & (XFS_OQUOTA_ENFD)) { | 864 | if (flags & (XFS_OQUOTA_ENFD)) { |
865 | uflags |= (flags & XFS_GQUOTA_ACCT) ? | 865 | uflags |= (flags & XFS_GQUOTA_ACCT) ? |
866 | XFS_QUOTA_GDQ_ENFD : XFS_QUOTA_PDQ_ENFD; | 866 | FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD; |
867 | } | 867 | } |
868 | return (uflags); | 868 | return (uflags); |
869 | } | 869 | } |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 3ac137dd531b..66d585c6917c 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -221,8 +221,11 @@ xfs_setattr( | |||
221 | * transaction to modify the i_size. | 221 | * transaction to modify the i_size. |
222 | */ | 222 | */ |
223 | code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); | 223 | code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); |
224 | if (code) | ||
225 | goto error_return; | ||
224 | } | 226 | } |
225 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 227 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
228 | lock_flags &= ~XFS_ILOCK_EXCL; | ||
226 | 229 | ||
227 | /* | 230 | /* |
228 | * We are going to log the inode size change in this | 231 | * We are going to log the inode size change in this |
@@ -236,36 +239,35 @@ xfs_setattr( | |||
236 | * really care about here and prevents waiting for other data | 239 | * really care about here and prevents waiting for other data |
237 | * not within the range we care about here. | 240 | * not within the range we care about here. |
238 | */ | 241 | */ |
239 | if (!code && | 242 | if (ip->i_size != ip->i_d.di_size && |
240 | ip->i_size != ip->i_d.di_size && | ||
241 | iattr->ia_size > ip->i_d.di_size) { | 243 | iattr->ia_size > ip->i_d.di_size) { |
242 | code = xfs_flush_pages(ip, | 244 | code = xfs_flush_pages(ip, |
243 | ip->i_d.di_size, iattr->ia_size, | 245 | ip->i_d.di_size, iattr->ia_size, |
244 | XBF_ASYNC, FI_NONE); | 246 | XBF_ASYNC, FI_NONE); |
247 | if (code) | ||
248 | goto error_return; | ||
245 | } | 249 | } |
246 | 250 | ||
247 | /* wait for all I/O to complete */ | 251 | /* wait for all I/O to complete */ |
248 | xfs_ioend_wait(ip); | 252 | xfs_ioend_wait(ip); |
249 | 253 | ||
250 | if (!code) | 254 | code = -block_truncate_page(inode->i_mapping, iattr->ia_size, |
251 | code = xfs_itruncate_data(ip, iattr->ia_size); | 255 | xfs_get_blocks); |
252 | if (code) { | 256 | if (code) |
253 | ASSERT(tp == NULL); | ||
254 | lock_flags &= ~XFS_ILOCK_EXCL; | ||
255 | ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock); | ||
256 | goto error_return; | 257 | goto error_return; |
257 | } | 258 | |
258 | tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); | 259 | tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); |
259 | if ((code = xfs_trans_reserve(tp, 0, | 260 | code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, |
260 | XFS_ITRUNCATE_LOG_RES(mp), 0, | 261 | XFS_TRANS_PERM_LOG_RES, |
261 | XFS_TRANS_PERM_LOG_RES, | 262 | XFS_ITRUNCATE_LOG_COUNT); |
262 | XFS_ITRUNCATE_LOG_COUNT))) { | 263 | if (code) |
263 | xfs_trans_cancel(tp, 0); | 264 | goto error_return; |
264 | if (need_iolock) | 265 | |
265 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 266 | truncate_setsize(inode, iattr->ia_size); |
266 | return code; | 267 | |
267 | } | ||
268 | commit_flags = XFS_TRANS_RELEASE_LOG_RES; | 268 | commit_flags = XFS_TRANS_RELEASE_LOG_RES; |
269 | lock_flags |= XFS_ILOCK_EXCL; | ||
270 | |||
269 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 271 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
270 | 272 | ||
271 | xfs_trans_ijoin(tp, ip); | 273 | xfs_trans_ijoin(tp, ip); |