diff options
Diffstat (limited to 'fs')
109 files changed, 3339 insertions, 1835 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index 814ac4e213a8..0a93dc1cb4ac 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config 9P_FS | 1 | config 9P_FS |
2 | tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" | 2 | tristate "Plan 9 Resource Sharing Support (9P2000)" |
3 | depends on INET && NET_9P && EXPERIMENTAL | 3 | depends on INET && NET_9P |
4 | help | 4 | help |
5 | If you say Y here, you will get experimental support for | 5 | If you say Y here, you will get experimental support for |
6 | Plan 9 resource sharing via the 9P2000 protocol. | 6 | Plan 9 resource sharing via the 9P2000 protocol. |
@@ -10,7 +10,6 @@ config 9P_FS | |||
10 | If unsure, say N. | 10 | If unsure, say N. |
11 | 11 | ||
12 | if 9P_FS | 12 | if 9P_FS |
13 | |||
14 | config 9P_FSCACHE | 13 | config 9P_FSCACHE |
15 | bool "Enable 9P client caching support (EXPERIMENTAL)" | 14 | bool "Enable 9P client caching support (EXPERIMENTAL)" |
16 | depends on EXPERIMENTAL | 15 | depends on EXPERIMENTAL |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 7f6c67703195..8d7f3e69ae29 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -814,6 +814,7 @@ int v9fs_vfs_unlink(struct inode *i, struct dentry *d) | |||
814 | 814 | ||
815 | int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) | 815 | int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) |
816 | { | 816 | { |
817 | dentry_unhash(d); | ||
817 | return v9fs_remove(i, d, 1); | 818 | return v9fs_remove(i, d, 1); |
818 | } | 819 | } |
819 | 820 | ||
@@ -839,6 +840,9 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
839 | struct p9_fid *newdirfid; | 840 | struct p9_fid *newdirfid; |
840 | struct p9_wstat wstat; | 841 | struct p9_wstat wstat; |
841 | 842 | ||
843 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
844 | dentry_unhash(new_dentry); | ||
845 | |||
842 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); | 846 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); |
843 | retval = 0; | 847 | retval = 0; |
844 | old_inode = old_dentry->d_inode; | 848 | old_inode = old_dentry->d_inode; |
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 82a7c38ddad0..691c78f58bef 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c | |||
@@ -259,7 +259,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
259 | if (IS_ERR(inode_fid)) { | 259 | if (IS_ERR(inode_fid)) { |
260 | err = PTR_ERR(inode_fid); | 260 | err = PTR_ERR(inode_fid); |
261 | mutex_unlock(&v9inode->v_mutex); | 261 | mutex_unlock(&v9inode->v_mutex); |
262 | goto error; | 262 | goto err_clunk_old_fid; |
263 | } | 263 | } |
264 | v9inode->writeback_fid = (void *) inode_fid; | 264 | v9inode->writeback_fid = (void *) inode_fid; |
265 | } | 265 | } |
@@ -267,8 +267,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
267 | /* Since we are opening a file, assign the open fid to the file */ | 267 | /* Since we are opening a file, assign the open fid to the file */ |
268 | filp = lookup_instantiate_filp(nd, dentry, generic_file_open); | 268 | filp = lookup_instantiate_filp(nd, dentry, generic_file_open); |
269 | if (IS_ERR(filp)) { | 269 | if (IS_ERR(filp)) { |
270 | p9_client_clunk(ofid); | 270 | err = PTR_ERR(filp); |
271 | return PTR_ERR(filp); | 271 | goto err_clunk_old_fid; |
272 | } | 272 | } |
273 | filp->private_data = ofid; | 273 | filp->private_data = ofid; |
274 | #ifdef CONFIG_9P_FSCACHE | 274 | #ifdef CONFIG_9P_FSCACHE |
@@ -278,10 +278,11 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
278 | return 0; | 278 | return 0; |
279 | 279 | ||
280 | error: | 280 | error: |
281 | if (ofid) | ||
282 | p9_client_clunk(ofid); | ||
283 | if (fid) | 281 | if (fid) |
284 | p9_client_clunk(fid); | 282 | p9_client_clunk(fid); |
283 | err_clunk_old_fid: | ||
284 | if (ofid) | ||
285 | p9_client_clunk(ofid); | ||
285 | return err; | 286 | return err; |
286 | } | 287 | } |
287 | 288 | ||
diff --git a/fs/Kconfig b/fs/Kconfig index f3aa9b08b228..19891aab9c6e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -47,7 +47,7 @@ config FS_POSIX_ACL | |||
47 | def_bool n | 47 | def_bool n |
48 | 48 | ||
49 | config EXPORTFS | 49 | config EXPORTFS |
50 | bool | 50 | tristate |
51 | 51 | ||
52 | config FILE_LOCKING | 52 | config FILE_LOCKING |
53 | bool "Enable POSIX file locking API" if EXPERT | 53 | bool "Enable POSIX file locking API" if EXPERT |
@@ -124,6 +124,7 @@ config TMPFS | |||
124 | config TMPFS_POSIX_ACL | 124 | config TMPFS_POSIX_ACL |
125 | bool "Tmpfs POSIX Access Control Lists" | 125 | bool "Tmpfs POSIX Access Control Lists" |
126 | depends on TMPFS | 126 | depends on TMPFS |
127 | select TMPFS_XATTR | ||
127 | select GENERIC_ACL | 128 | select GENERIC_ACL |
128 | help | 129 | help |
129 | POSIX Access Control Lists (ACLs) support permissions for users and | 130 | POSIX Access Control Lists (ACLs) support permissions for users and |
@@ -134,6 +135,22 @@ config TMPFS_POSIX_ACL | |||
134 | 135 | ||
135 | If you don't know what Access Control Lists are, say N. | 136 | If you don't know what Access Control Lists are, say N. |
136 | 137 | ||
138 | config TMPFS_XATTR | ||
139 | bool "Tmpfs extended attributes" | ||
140 | depends on TMPFS | ||
141 | default n | ||
142 | help | ||
143 | Extended attributes are name:value pairs associated with inodes by | ||
144 | the kernel or by users (see the attr(5) manual page, or visit | ||
145 | <http://acl.bestbits.at/> for details). | ||
146 | |||
147 | Currently this enables support for the trusted.* and | ||
148 | security.* namespaces. | ||
149 | |||
150 | You need this for POSIX ACL support on tmpfs. | ||
151 | |||
152 | If unsure, say N. | ||
153 | |||
137 | config HUGETLBFS | 154 | config HUGETLBFS |
138 | bool "HugeTLB file system support" | 155 | bool "HugeTLB file system support" |
139 | depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \ | 156 | depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \ |
diff --git a/fs/affs/namei.c b/fs/affs/namei.c index e3e9efc1fdd8..03330e2e390c 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c | |||
@@ -320,6 +320,8 @@ affs_rmdir(struct inode *dir, struct dentry *dentry) | |||
320 | dentry->d_inode->i_ino, | 320 | dentry->d_inode->i_ino, |
321 | (int)dentry->d_name.len, dentry->d_name.name); | 321 | (int)dentry->d_name.len, dentry->d_name.name); |
322 | 322 | ||
323 | dentry_unhash(dentry); | ||
324 | |||
323 | return affs_remove_header(dentry); | 325 | return affs_remove_header(dentry); |
324 | } | 326 | } |
325 | 327 | ||
@@ -417,6 +419,9 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
417 | struct buffer_head *bh = NULL; | 419 | struct buffer_head *bh = NULL; |
418 | int retval; | 420 | int retval; |
419 | 421 | ||
422 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
423 | dentry_unhash(new_dentry); | ||
424 | |||
420 | pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n", | 425 | pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n", |
421 | (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, | 426 | (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, |
422 | (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); | 427 | (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 20c106f24927..2c4e05160042 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -845,6 +845,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) | |||
845 | _enter("{%x:%u},{%s}", | 845 | _enter("{%x:%u},{%s}", |
846 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); | 846 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); |
847 | 847 | ||
848 | dentry_unhash(dentry); | ||
849 | |||
848 | ret = -ENAMETOOLONG; | 850 | ret = -ENAMETOOLONG; |
849 | if (dentry->d_name.len >= AFSNAMEMAX) | 851 | if (dentry->d_name.len >= AFSNAMEMAX) |
850 | goto error; | 852 | goto error; |
@@ -1146,6 +1148,9 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1146 | struct key *key; | 1148 | struct key *key; |
1147 | int ret; | 1149 | int ret; |
1148 | 1150 | ||
1151 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
1152 | dentry_unhash(new_dentry); | ||
1153 | |||
1149 | vnode = AFS_FS_I(old_dentry->d_inode); | 1154 | vnode = AFS_FS_I(old_dentry->d_inode); |
1150 | orig_dvnode = AFS_FS_I(old_dir); | 1155 | orig_dvnode = AFS_FS_I(old_dir); |
1151 | new_dvnode = AFS_FS_I(new_dir); | 1156 | new_dvnode = AFS_FS_I(new_dir); |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index f55ae23b137e..87d95a8cddbc 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -583,6 +583,8 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) | |||
583 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) | 583 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) |
584 | return -EACCES; | 584 | return -EACCES; |
585 | 585 | ||
586 | dentry_unhash(dentry); | ||
587 | |||
586 | if (atomic_dec_and_test(&ino->count)) { | 588 | if (atomic_dec_and_test(&ino->count)) { |
587 | p_ino = autofs4_dentry_ino(dentry->d_parent); | 589 | p_ino = autofs4_dentry_ino(dentry->d_parent); |
588 | if (p_ino && dentry->d_parent != dentry) | 590 | if (p_ino && dentry->d_parent != dentry) |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index b14cebfd9047..c7d1d06b0483 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -224,6 +224,9 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
224 | struct bfs_sb_info *info; | 224 | struct bfs_sb_info *info; |
225 | int error = -ENOENT; | 225 | int error = -ENOENT; |
226 | 226 | ||
227 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
228 | dentry_unhash(new_dentry); | ||
229 | |||
227 | old_bh = new_bh = NULL; | 230 | old_bh = new_bh = NULL; |
228 | old_inode = old_dentry->d_inode; | 231 | old_inode = old_dentry->d_inode; |
229 | if (S_ISDIR(old_inode->i_mode)) | 232 | if (S_ISDIR(old_inode->i_mode)) |
diff --git a/fs/block_dev.c b/fs/block_dev.c index bf9c7a720371..1f2b19978333 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1238,6 +1238,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) | |||
1238 | res = __blkdev_get(bdev, mode, 0); | 1238 | res = __blkdev_get(bdev, mode, 0); |
1239 | 1239 | ||
1240 | if (whole) { | 1240 | if (whole) { |
1241 | struct gendisk *disk = whole->bd_disk; | ||
1242 | |||
1241 | /* finish claiming */ | 1243 | /* finish claiming */ |
1242 | mutex_lock(&bdev->bd_mutex); | 1244 | mutex_lock(&bdev->bd_mutex); |
1243 | spin_lock(&bdev_lock); | 1245 | spin_lock(&bdev_lock); |
@@ -1264,15 +1266,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) | |||
1264 | spin_unlock(&bdev_lock); | 1266 | spin_unlock(&bdev_lock); |
1265 | 1267 | ||
1266 | /* | 1268 | /* |
1267 | * Block event polling for write claims. Any write | 1269 | * Block event polling for write claims if requested. Any |
1268 | * holder makes the write_holder state stick until all | 1270 | * write holder makes the write_holder state stick until |
1269 | * are released. This is good enough and tracking | 1271 | * all are released. This is good enough and tracking |
1270 | * individual writeable reference is too fragile given | 1272 | * individual writeable reference is too fragile given the |
1271 | * the way @mode is used in blkdev_get/put(). | 1273 | * way @mode is used in blkdev_get/put(). |
1272 | */ | 1274 | */ |
1273 | if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { | 1275 | if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) && |
1276 | !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { | ||
1274 | bdev->bd_write_holder = true; | 1277 | bdev->bd_write_holder = true; |
1275 | disk_block_events(bdev->bd_disk); | 1278 | disk_block_events(disk); |
1276 | } | 1279 | } |
1277 | 1280 | ||
1278 | mutex_unlock(&bdev->bd_mutex); | 1281 | mutex_unlock(&bdev->bd_mutex); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 96fcfa522dab..4f9893243dae 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/writeback.h> | 11 | #include <linux/writeback.h> |
12 | #include <linux/pagevec.h> | 12 | #include <linux/pagevec.h> |
13 | #include <linux/prefetch.h> | 13 | #include <linux/prefetch.h> |
14 | #include <linux/cleancache.h> | ||
14 | #include "extent_io.h" | 15 | #include "extent_io.h" |
15 | #include "extent_map.h" | 16 | #include "extent_map.h" |
16 | #include "compat.h" | 17 | #include "compat.h" |
@@ -2016,6 +2017,13 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2016 | 2017 | ||
2017 | set_page_extent_mapped(page); | 2018 | set_page_extent_mapped(page); |
2018 | 2019 | ||
2020 | if (!PageUptodate(page)) { | ||
2021 | if (cleancache_get_page(page) == 0) { | ||
2022 | BUG_ON(blocksize != PAGE_SIZE); | ||
2023 | goto out; | ||
2024 | } | ||
2025 | } | ||
2026 | |||
2019 | end = page_end; | 2027 | end = page_end; |
2020 | while (1) { | 2028 | while (1) { |
2021 | lock_extent(tree, start, end, GFP_NOFS); | 2029 | lock_extent(tree, start, end, GFP_NOFS); |
@@ -2149,6 +2157,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2149 | cur = cur + iosize; | 2157 | cur = cur + iosize; |
2150 | page_offset += iosize; | 2158 | page_offset += iosize; |
2151 | } | 2159 | } |
2160 | out: | ||
2152 | if (!nr) { | 2161 | if (!nr) { |
2153 | if (!PageError(page)) | 2162 | if (!PageError(page)) |
2154 | SetPageUptodate(page); | 2163 | SetPageUptodate(page); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0ac712efcdf2..be4ffa12f3ef 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/miscdevice.h> | 39 | #include <linux/miscdevice.h> |
40 | #include <linux/magic.h> | 40 | #include <linux/magic.h> |
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/cleancache.h> | ||
42 | #include "compat.h" | 43 | #include "compat.h" |
43 | #include "ctree.h" | 44 | #include "ctree.h" |
44 | #include "disk-io.h" | 45 | #include "disk-io.h" |
@@ -624,6 +625,7 @@ static int btrfs_fill_super(struct super_block *sb, | |||
624 | sb->s_root = root_dentry; | 625 | sb->s_root = root_dentry; |
625 | 626 | ||
626 | save_mount_options(sb, data); | 627 | save_mount_options(sb, data); |
628 | cleancache_init_fs(sb); | ||
627 | return 0; | 629 | return 0; |
628 | 630 | ||
629 | fail_close: | 631 | fail_close: |
diff --git a/fs/buffer.c b/fs/buffer.c index a08bb8e61c6f..698c6b2cc462 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/bitops.h> | 41 | #include <linux/bitops.h> |
42 | #include <linux/mpage.h> | 42 | #include <linux/mpage.h> |
43 | #include <linux/bit_spinlock.h> | 43 | #include <linux/bit_spinlock.h> |
44 | #include <linux/cleancache.h> | ||
44 | 45 | ||
45 | static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); | 46 | static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); |
46 | 47 | ||
@@ -269,6 +270,10 @@ void invalidate_bdev(struct block_device *bdev) | |||
269 | invalidate_bh_lrus(); | 270 | invalidate_bh_lrus(); |
270 | lru_add_drain_all(); /* make sure all lru add caches are flushed */ | 271 | lru_add_drain_all(); /* make sure all lru add caches are flushed */ |
271 | invalidate_mapping_pages(mapping, 0, -1); | 272 | invalidate_mapping_pages(mapping, 0, -1); |
273 | /* 99% of the time, we don't need to flush the cleancache on the bdev. | ||
274 | * But, for the strange corners, lets be cautious | ||
275 | */ | ||
276 | cleancache_flush_inode(mapping); | ||
272 | } | 277 | } |
273 | EXPORT_SYMBOL(invalidate_bdev); | 278 | EXPORT_SYMBOL(invalidate_bdev); |
274 | 279 | ||
@@ -2331,24 +2336,26 @@ EXPORT_SYMBOL(block_commit_write); | |||
2331 | * page lock we can determine safely if the page is beyond EOF. If it is not | 2336 | * page lock we can determine safely if the page is beyond EOF. If it is not |
2332 | * beyond EOF, then the page is guaranteed safe against truncation until we | 2337 | * beyond EOF, then the page is guaranteed safe against truncation until we |
2333 | * unlock the page. | 2338 | * unlock the page. |
2339 | * | ||
2340 | * Direct callers of this function should call vfs_check_frozen() so that page | ||
2341 | * fault does not busyloop until the fs is thawed. | ||
2334 | */ | 2342 | */ |
2335 | int | 2343 | int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, |
2336 | block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | 2344 | get_block_t get_block) |
2337 | get_block_t get_block) | ||
2338 | { | 2345 | { |
2339 | struct page *page = vmf->page; | 2346 | struct page *page = vmf->page; |
2340 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 2347 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
2341 | unsigned long end; | 2348 | unsigned long end; |
2342 | loff_t size; | 2349 | loff_t size; |
2343 | int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 2350 | int ret; |
2344 | 2351 | ||
2345 | lock_page(page); | 2352 | lock_page(page); |
2346 | size = i_size_read(inode); | 2353 | size = i_size_read(inode); |
2347 | if ((page->mapping != inode->i_mapping) || | 2354 | if ((page->mapping != inode->i_mapping) || |
2348 | (page_offset(page) > size)) { | 2355 | (page_offset(page) > size)) { |
2349 | /* page got truncated out from underneath us */ | 2356 | /* We overload EFAULT to mean page got truncated */ |
2350 | unlock_page(page); | 2357 | ret = -EFAULT; |
2351 | goto out; | 2358 | goto out_unlock; |
2352 | } | 2359 | } |
2353 | 2360 | ||
2354 | /* page is wholly or partially inside EOF */ | 2361 | /* page is wholly or partially inside EOF */ |
@@ -2361,18 +2368,41 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
2361 | if (!ret) | 2368 | if (!ret) |
2362 | ret = block_commit_write(page, 0, end); | 2369 | ret = block_commit_write(page, 0, end); |
2363 | 2370 | ||
2364 | if (unlikely(ret)) { | 2371 | if (unlikely(ret < 0)) |
2365 | unlock_page(page); | 2372 | goto out_unlock; |
2366 | if (ret == -ENOMEM) | 2373 | /* |
2367 | ret = VM_FAULT_OOM; | 2374 | * Freezing in progress? We check after the page is marked dirty and |
2368 | else /* -ENOSPC, -EIO, etc */ | 2375 | * with page lock held so if the test here fails, we are sure freezing |
2369 | ret = VM_FAULT_SIGBUS; | 2376 | * code will wait during syncing until the page fault is done - at that |
2370 | } else | 2377 | * point page will be dirty and unlocked so freezing code will write it |
2371 | ret = VM_FAULT_LOCKED; | 2378 | * and writeprotect it again. |
2372 | 2379 | */ | |
2373 | out: | 2380 | set_page_dirty(page); |
2381 | if (inode->i_sb->s_frozen != SB_UNFROZEN) { | ||
2382 | ret = -EAGAIN; | ||
2383 | goto out_unlock; | ||
2384 | } | ||
2385 | return 0; | ||
2386 | out_unlock: | ||
2387 | unlock_page(page); | ||
2374 | return ret; | 2388 | return ret; |
2375 | } | 2389 | } |
2390 | EXPORT_SYMBOL(__block_page_mkwrite); | ||
2391 | |||
2392 | int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | ||
2393 | get_block_t get_block) | ||
2394 | { | ||
2395 | int ret; | ||
2396 | struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; | ||
2397 | |||
2398 | /* | ||
2399 | * This check is racy but catches the common case. The check in | ||
2400 | * __block_page_mkwrite() is reliable. | ||
2401 | */ | ||
2402 | vfs_check_frozen(sb, SB_FREEZE_WRITE); | ||
2403 | ret = __block_page_mkwrite(vma, vmf, get_block); | ||
2404 | return block_page_mkwrite_return(ret); | ||
2405 | } | ||
2376 | EXPORT_SYMBOL(block_page_mkwrite); | 2406 | EXPORT_SYMBOL(block_page_mkwrite); |
2377 | 2407 | ||
2378 | /* | 2408 | /* |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 38b8ab554924..33da49dc3cc6 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -848,7 +848,8 @@ get_more_pages: | |||
848 | op->payload_len = cpu_to_le32(len); | 848 | op->payload_len = cpu_to_le32(len); |
849 | req->r_request->hdr.data_len = cpu_to_le32(len); | 849 | req->r_request->hdr.data_len = cpu_to_le32(len); |
850 | 850 | ||
851 | ceph_osdc_start_request(&fsc->client->osdc, req, true); | 851 | rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); |
852 | BUG_ON(rc); | ||
852 | req = NULL; | 853 | req = NULL; |
853 | 854 | ||
854 | /* continue? */ | 855 | /* continue? */ |
@@ -880,8 +881,6 @@ release_pvec_pages: | |||
880 | out: | 881 | out: |
881 | if (req) | 882 | if (req) |
882 | ceph_osdc_put_request(req); | 883 | ceph_osdc_put_request(req); |
883 | if (rc > 0) | ||
884 | rc = 0; /* vfs expects us to return 0 */ | ||
885 | ceph_put_snap_context(snapc); | 884 | ceph_put_snap_context(snapc); |
886 | dout("writepages done, rc = %d\n", rc); | 885 | dout("writepages done, rc = %d\n", rc); |
887 | return rc; | 886 | return rc; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 2a5404c1c42f..1f72b00447c4 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -569,7 +569,8 @@ retry: | |||
569 | list_add_tail(&cap->session_caps, &session->s_caps); | 569 | list_add_tail(&cap->session_caps, &session->s_caps); |
570 | session->s_nr_caps++; | 570 | session->s_nr_caps++; |
571 | spin_unlock(&session->s_cap_lock); | 571 | spin_unlock(&session->s_cap_lock); |
572 | } | 572 | } else if (new_cap) |
573 | ceph_put_cap(mdsc, new_cap); | ||
573 | 574 | ||
574 | if (!ci->i_snap_realm) { | 575 | if (!ci->i_snap_realm) { |
575 | /* | 576 | /* |
@@ -2634,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2634 | struct ceph_mds_session *session, | 2635 | struct ceph_mds_session *session, |
2635 | int *open_target_sessions) | 2636 | int *open_target_sessions) |
2636 | { | 2637 | { |
2638 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | ||
2637 | struct ceph_inode_info *ci = ceph_inode(inode); | 2639 | struct ceph_inode_info *ci = ceph_inode(inode); |
2638 | int mds = session->s_mds; | 2640 | int mds = session->s_mds; |
2639 | unsigned mseq = le32_to_cpu(ex->migrate_seq); | 2641 | unsigned mseq = le32_to_cpu(ex->migrate_seq); |
@@ -2670,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
2670 | * export targets, so that we get the matching IMPORT | 2672 | * export targets, so that we get the matching IMPORT |
2671 | */ | 2673 | */ |
2672 | *open_target_sessions = 1; | 2674 | *open_target_sessions = 1; |
2675 | |||
2676 | /* | ||
2677 | * we can't flush dirty caps that we've seen the | ||
2678 | * EXPORT but no IMPORT for | ||
2679 | */ | ||
2680 | spin_lock(&mdsc->cap_dirty_lock); | ||
2681 | if (!list_empty(&ci->i_dirty_item)) { | ||
2682 | dout(" moving %p to cap_dirty_migrating\n", | ||
2683 | inode); | ||
2684 | list_move(&ci->i_dirty_item, | ||
2685 | &mdsc->cap_dirty_migrating); | ||
2686 | } | ||
2687 | spin_unlock(&mdsc->cap_dirty_lock); | ||
2673 | } | 2688 | } |
2674 | __ceph_remove_cap(cap); | 2689 | __ceph_remove_cap(cap); |
2675 | } | 2690 | } |
@@ -2707,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, | |||
2707 | ci->i_cap_exporting_issued = 0; | 2722 | ci->i_cap_exporting_issued = 0; |
2708 | ci->i_cap_exporting_mseq = 0; | 2723 | ci->i_cap_exporting_mseq = 0; |
2709 | ci->i_cap_exporting_mds = -1; | 2724 | ci->i_cap_exporting_mds = -1; |
2725 | |||
2726 | spin_lock(&mdsc->cap_dirty_lock); | ||
2727 | if (!list_empty(&ci->i_dirty_item)) { | ||
2728 | dout(" moving %p back to cap_dirty\n", inode); | ||
2729 | list_move(&ci->i_dirty_item, &mdsc->cap_dirty); | ||
2730 | } | ||
2731 | spin_unlock(&mdsc->cap_dirty_lock); | ||
2710 | } else { | 2732 | } else { |
2711 | dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", | 2733 | dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", |
2712 | inode, ci, mds, mseq); | 2734 | inode, ci, mds, mseq); |
@@ -2910,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) | |||
2910 | */ | 2932 | */ |
2911 | void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) | 2933 | void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) |
2912 | { | 2934 | { |
2913 | struct ceph_inode_info *ci, *nci = NULL; | 2935 | struct ceph_inode_info *ci; |
2914 | struct inode *inode, *ninode = NULL; | 2936 | struct inode *inode; |
2915 | struct list_head *p, *n; | ||
2916 | 2937 | ||
2917 | dout("flush_dirty_caps\n"); | 2938 | dout("flush_dirty_caps\n"); |
2918 | spin_lock(&mdsc->cap_dirty_lock); | 2939 | spin_lock(&mdsc->cap_dirty_lock); |
2919 | list_for_each_safe(p, n, &mdsc->cap_dirty) { | 2940 | while (!list_empty(&mdsc->cap_dirty)) { |
2920 | if (nci) { | 2941 | ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info, |
2921 | ci = nci; | 2942 | i_dirty_item); |
2922 | inode = ninode; | 2943 | inode = igrab(&ci->vfs_inode); |
2923 | ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; | 2944 | dout("flush_dirty_caps %p\n", inode); |
2924 | dout("flush_dirty_caps inode %p (was next inode)\n", | ||
2925 | inode); | ||
2926 | } else { | ||
2927 | ci = list_entry(p, struct ceph_inode_info, | ||
2928 | i_dirty_item); | ||
2929 | inode = igrab(&ci->vfs_inode); | ||
2930 | BUG_ON(!inode); | ||
2931 | dout("flush_dirty_caps inode %p\n", inode); | ||
2932 | } | ||
2933 | if (n != &mdsc->cap_dirty) { | ||
2934 | nci = list_entry(n, struct ceph_inode_info, | ||
2935 | i_dirty_item); | ||
2936 | ninode = igrab(&nci->vfs_inode); | ||
2937 | BUG_ON(!ninode); | ||
2938 | nci->i_ceph_flags |= CEPH_I_NOFLUSH; | ||
2939 | dout("flush_dirty_caps next inode %p, noflush\n", | ||
2940 | ninode); | ||
2941 | } else { | ||
2942 | nci = NULL; | ||
2943 | ninode = NULL; | ||
2944 | } | ||
2945 | spin_unlock(&mdsc->cap_dirty_lock); | 2945 | spin_unlock(&mdsc->cap_dirty_lock); |
2946 | if (inode) { | 2946 | if (inode) { |
2947 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, | 2947 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, |
@@ -2951,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) | |||
2951 | spin_lock(&mdsc->cap_dirty_lock); | 2951 | spin_lock(&mdsc->cap_dirty_lock); |
2952 | } | 2952 | } |
2953 | spin_unlock(&mdsc->cap_dirty_lock); | 2953 | spin_unlock(&mdsc->cap_dirty_lock); |
2954 | dout("flush_dirty_caps done\n"); | ||
2954 | } | 2955 | } |
2955 | 2956 | ||
2956 | /* | 2957 | /* |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 1a867a3601ae..33729e822bb9 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -360,7 +360,7 @@ more: | |||
360 | rinfo = &fi->last_readdir->r_reply_info; | 360 | rinfo = &fi->last_readdir->r_reply_info; |
361 | dout("readdir frag %x num %d off %d chunkoff %d\n", frag, | 361 | dout("readdir frag %x num %d off %d chunkoff %d\n", frag, |
362 | rinfo->dir_nr, off, fi->offset); | 362 | rinfo->dir_nr, off, fi->offset); |
363 | while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) { | 363 | while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) { |
364 | u64 pos = ceph_make_fpos(frag, off); | 364 | u64 pos = ceph_make_fpos(frag, off); |
365 | struct ceph_mds_reply_inode *in = | 365 | struct ceph_mds_reply_inode *in = |
366 | rinfo->dir_in[off - fi->offset].in; | 366 | rinfo->dir_in[off - fi->offset].in; |
@@ -1066,16 +1066,17 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||
1066 | struct inode *inode = file->f_dentry->d_inode; | 1066 | struct inode *inode = file->f_dentry->d_inode; |
1067 | struct ceph_inode_info *ci = ceph_inode(inode); | 1067 | struct ceph_inode_info *ci = ceph_inode(inode); |
1068 | int left; | 1068 | int left; |
1069 | const int bufsize = 1024; | ||
1069 | 1070 | ||
1070 | if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) | 1071 | if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) |
1071 | return -EISDIR; | 1072 | return -EISDIR; |
1072 | 1073 | ||
1073 | if (!cf->dir_info) { | 1074 | if (!cf->dir_info) { |
1074 | cf->dir_info = kmalloc(1024, GFP_NOFS); | 1075 | cf->dir_info = kmalloc(bufsize, GFP_NOFS); |
1075 | if (!cf->dir_info) | 1076 | if (!cf->dir_info) |
1076 | return -ENOMEM; | 1077 | return -ENOMEM; |
1077 | cf->dir_info_len = | 1078 | cf->dir_info_len = |
1078 | sprintf(cf->dir_info, | 1079 | snprintf(cf->dir_info, bufsize, |
1079 | "entries: %20lld\n" | 1080 | "entries: %20lld\n" |
1080 | " files: %20lld\n" | 1081 | " files: %20lld\n" |
1081 | " subdirs: %20lld\n" | 1082 | " subdirs: %20lld\n" |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index e41056174bf8..a610d3d67488 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | |||
86 | static struct dentry *__fh_to_dentry(struct super_block *sb, | 86 | static struct dentry *__fh_to_dentry(struct super_block *sb, |
87 | struct ceph_nfs_fh *fh) | 87 | struct ceph_nfs_fh *fh) |
88 | { | 88 | { |
89 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; | ||
89 | struct inode *inode; | 90 | struct inode *inode; |
90 | struct dentry *dentry; | 91 | struct dentry *dentry; |
91 | struct ceph_vino vino; | 92 | struct ceph_vino vino; |
@@ -95,8 +96,24 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
95 | vino.ino = fh->ino; | 96 | vino.ino = fh->ino; |
96 | vino.snap = CEPH_NOSNAP; | 97 | vino.snap = CEPH_NOSNAP; |
97 | inode = ceph_find_inode(sb, vino); | 98 | inode = ceph_find_inode(sb, vino); |
98 | if (!inode) | 99 | if (!inode) { |
99 | return ERR_PTR(-ESTALE); | 100 | struct ceph_mds_request *req; |
101 | |||
102 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO, | ||
103 | USE_ANY_MDS); | ||
104 | if (IS_ERR(req)) | ||
105 | return ERR_CAST(req); | ||
106 | |||
107 | req->r_ino1 = vino; | ||
108 | req->r_num_caps = 1; | ||
109 | err = ceph_mdsc_do_request(mdsc, NULL, req); | ||
110 | inode = req->r_target_inode; | ||
111 | if (inode) | ||
112 | igrab(inode); | ||
113 | ceph_mdsc_put_request(req); | ||
114 | if (!inode) | ||
115 | return ERR_PTR(-ESTALE); | ||
116 | } | ||
100 | 117 | ||
101 | dentry = d_obtain_alias(inode); | 118 | dentry = d_obtain_alias(inode); |
102 | if (IS_ERR(dentry)) { | 119 | if (IS_ERR(dentry)) { |
@@ -148,8 +165,10 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, | |||
148 | snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); | 165 | snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); |
149 | req->r_num_caps = 1; | 166 | req->r_num_caps = 1; |
150 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 167 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
168 | inode = req->r_target_inode; | ||
169 | if (inode) | ||
170 | igrab(inode); | ||
151 | ceph_mdsc_put_request(req); | 171 | ceph_mdsc_put_request(req); |
152 | inode = ceph_find_inode(sb, vino); | ||
153 | if (!inode) | 172 | if (!inode) |
154 | return ERR_PTR(err ? err : -ESTALE); | 173 | return ERR_PTR(err ? err : -ESTALE); |
155 | } | 174 | } |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d0fae4ce9ba5..79743d146be6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -578,6 +578,7 @@ static void __register_request(struct ceph_mds_client *mdsc, | |||
578 | if (dir) { | 578 | if (dir) { |
579 | struct ceph_inode_info *ci = ceph_inode(dir); | 579 | struct ceph_inode_info *ci = ceph_inode(dir); |
580 | 580 | ||
581 | ihold(dir); | ||
581 | spin_lock(&ci->i_unsafe_lock); | 582 | spin_lock(&ci->i_unsafe_lock); |
582 | req->r_unsafe_dir = dir; | 583 | req->r_unsafe_dir = dir; |
583 | list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); | 584 | list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); |
@@ -598,6 +599,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
598 | spin_lock(&ci->i_unsafe_lock); | 599 | spin_lock(&ci->i_unsafe_lock); |
599 | list_del_init(&req->r_unsafe_dir_item); | 600 | list_del_init(&req->r_unsafe_dir_item); |
600 | spin_unlock(&ci->i_unsafe_lock); | 601 | spin_unlock(&ci->i_unsafe_lock); |
602 | |||
603 | iput(req->r_unsafe_dir); | ||
604 | req->r_unsafe_dir = NULL; | ||
601 | } | 605 | } |
602 | 606 | ||
603 | ceph_mdsc_put_request(req); | 607 | ceph_mdsc_put_request(req); |
@@ -2691,7 +2695,6 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
2691 | { | 2695 | { |
2692 | struct super_block *sb = mdsc->fsc->sb; | 2696 | struct super_block *sb = mdsc->fsc->sb; |
2693 | struct inode *inode; | 2697 | struct inode *inode; |
2694 | struct ceph_inode_info *ci; | ||
2695 | struct dentry *parent, *dentry; | 2698 | struct dentry *parent, *dentry; |
2696 | struct ceph_dentry_info *di; | 2699 | struct ceph_dentry_info *di; |
2697 | int mds = session->s_mds; | 2700 | int mds = session->s_mds; |
@@ -2728,7 +2731,6 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
2728 | dout("handle_lease no inode %llx\n", vino.ino); | 2731 | dout("handle_lease no inode %llx\n", vino.ino); |
2729 | goto release; | 2732 | goto release; |
2730 | } | 2733 | } |
2731 | ci = ceph_inode(inode); | ||
2732 | 2734 | ||
2733 | /* dentry */ | 2735 | /* dentry */ |
2734 | parent = d_find_alias(inode); | 2736 | parent = d_find_alias(inode); |
@@ -3002,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) | |||
3002 | spin_lock_init(&mdsc->snap_flush_lock); | 3004 | spin_lock_init(&mdsc->snap_flush_lock); |
3003 | mdsc->cap_flush_seq = 0; | 3005 | mdsc->cap_flush_seq = 0; |
3004 | INIT_LIST_HEAD(&mdsc->cap_dirty); | 3006 | INIT_LIST_HEAD(&mdsc->cap_dirty); |
3007 | INIT_LIST_HEAD(&mdsc->cap_dirty_migrating); | ||
3005 | mdsc->num_cap_flushing = 0; | 3008 | mdsc->num_cap_flushing = 0; |
3006 | spin_lock_init(&mdsc->cap_dirty_lock); | 3009 | spin_lock_init(&mdsc->cap_dirty_lock); |
3007 | init_waitqueue_head(&mdsc->cap_flushing_wq); | 3010 | init_waitqueue_head(&mdsc->cap_flushing_wq); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 4e3a9cc0bba6..7d8a0d662d56 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -278,6 +278,7 @@ struct ceph_mds_client { | |||
278 | 278 | ||
279 | u64 cap_flush_seq; | 279 | u64 cap_flush_seq; |
280 | struct list_head cap_dirty; /* inodes with dirty caps */ | 280 | struct list_head cap_dirty; /* inodes with dirty caps */ |
281 | struct list_head cap_dirty_migrating; /* ...that are migration... */ | ||
281 | int num_cap_flushing; /* # caps we are flushing */ | 282 | int num_cap_flushing; /* # caps we are flushing */ |
282 | spinlock_t cap_dirty_lock; /* protects above items */ | 283 | spinlock_t cap_dirty_lock; /* protects above items */ |
283 | wait_queue_head_t cap_flushing_wq; | 284 | wait_queue_head_t cap_flushing_wq; |
diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 2b8dae4d121e..a46126fd5735 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c | |||
@@ -336,6 +336,8 @@ static int coda_rmdir(struct inode *dir, struct dentry *de) | |||
336 | int len = de->d_name.len; | 336 | int len = de->d_name.len; |
337 | int error; | 337 | int error; |
338 | 338 | ||
339 | dentry_unhash(de); | ||
340 | |||
339 | error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); | 341 | error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); |
340 | if (!error) { | 342 | if (!error) { |
341 | /* VFS may delete the child */ | 343 | /* VFS may delete the child */ |
@@ -359,6 +361,9 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
359 | int new_length = new_dentry->d_name.len; | 361 | int new_length = new_dentry->d_name.len; |
360 | int error; | 362 | int error; |
361 | 363 | ||
364 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
365 | dentry_unhash(new_dentry); | ||
366 | |||
362 | error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), | 367 | error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), |
363 | coda_i2f(new_dir), old_length, new_length, | 368 | coda_i2f(new_dir), old_length, new_length, |
364 | (const char *) old_name, (const char *)new_name); | 369 | (const char *) old_name, (const char *)new_name); |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 9a37a9b6de3a..9d17d350abc5 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -1359,6 +1359,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1359 | struct module *subsys_owner = NULL, *dead_item_owner = NULL; | 1359 | struct module *subsys_owner = NULL, *dead_item_owner = NULL; |
1360 | int ret; | 1360 | int ret; |
1361 | 1361 | ||
1362 | dentry_unhash(dentry); | ||
1363 | |||
1362 | if (dentry->d_parent == configfs_sb->s_root) | 1364 | if (dentry->d_parent == configfs_sb->s_root) |
1363 | return -EPERM; | 1365 | return -EPERM; |
1364 | 1366 | ||
diff --git a/fs/dcache.c b/fs/dcache.c index 18b2a1f10ed8..37f72ee5bf7c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1220,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent) | |||
1220 | EXPORT_SYMBOL(shrink_dcache_parent); | 1220 | EXPORT_SYMBOL(shrink_dcache_parent); |
1221 | 1221 | ||
1222 | /* | 1222 | /* |
1223 | * Scan `nr' dentries and return the number which remain. | 1223 | * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain. |
1224 | * | 1224 | * |
1225 | * We need to avoid reentering the filesystem if the caller is performing a | 1225 | * We need to avoid reentering the filesystem if the caller is performing a |
1226 | * GFP_NOFS allocation attempt. One example deadlock is: | 1226 | * GFP_NOFS allocation attempt. One example deadlock is: |
@@ -1231,8 +1231,12 @@ EXPORT_SYMBOL(shrink_dcache_parent); | |||
1231 | * | 1231 | * |
1232 | * In this case we return -1 to tell the caller that we baled. | 1232 | * In this case we return -1 to tell the caller that we baled. |
1233 | */ | 1233 | */ |
1234 | static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | 1234 | static int shrink_dcache_memory(struct shrinker *shrink, |
1235 | struct shrink_control *sc) | ||
1235 | { | 1236 | { |
1237 | int nr = sc->nr_to_scan; | ||
1238 | gfp_t gfp_mask = sc->gfp_mask; | ||
1239 | |||
1236 | if (nr) { | 1240 | if (nr) { |
1237 | if (!(gfp_mask & __GFP_FS)) | 1241 | if (!(gfp_mask & __GFP_FS)) |
1238 | return -1; | 1242 | return -1; |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 98b77c89494c..c00e055b6282 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -40,9 +40,12 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) | |||
40 | static void drop_slab(void) | 40 | static void drop_slab(void) |
41 | { | 41 | { |
42 | int nr_objects; | 42 | int nr_objects; |
43 | struct shrink_control shrink = { | ||
44 | .gfp_mask = GFP_KERNEL, | ||
45 | }; | ||
43 | 46 | ||
44 | do { | 47 | do { |
45 | nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); | 48 | nr_objects = shrink_slab(&shrink, 1000, 1000); |
46 | } while (nr_objects > 10); | 49 | } while (nr_objects > 10); |
47 | } | 50 | } |
48 | 51 | ||
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 4d4cc6a90cd5..227b409b8406 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -521,6 +521,8 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
521 | struct dentry *lower_dir_dentry; | 521 | struct dentry *lower_dir_dentry; |
522 | int rc; | 522 | int rc; |
523 | 523 | ||
524 | dentry_unhash(dentry); | ||
525 | |||
524 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | 526 | lower_dentry = ecryptfs_dentry_to_lower(dentry); |
525 | dget(dentry); | 527 | dget(dentry); |
526 | lower_dir_dentry = lock_parent(lower_dentry); | 528 | lower_dir_dentry = lock_parent(lower_dentry); |
@@ -571,6 +573,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
571 | struct dentry *lower_new_dir_dentry; | 573 | struct dentry *lower_new_dir_dentry; |
572 | struct dentry *trap = NULL; | 574 | struct dentry *trap = NULL; |
573 | 575 | ||
576 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
577 | dentry_unhash(new_dentry); | ||
578 | |||
574 | lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); | 579 | lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); |
575 | lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); | 580 | lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); |
576 | dget(lower_old_dentry); | 581 | dget(lower_old_dentry); |
@@ -200,7 +200,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, | |||
200 | 200 | ||
201 | #ifdef CONFIG_STACK_GROWSUP | 201 | #ifdef CONFIG_STACK_GROWSUP |
202 | if (write) { | 202 | if (write) { |
203 | ret = expand_stack_downwards(bprm->vma, pos); | 203 | ret = expand_downwards(bprm->vma, pos); |
204 | if (ret < 0) | 204 | if (ret < 0) |
205 | return NULL; | 205 | return NULL; |
206 | } | 206 | } |
@@ -600,7 +600,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) | |||
600 | unsigned long length = old_end - old_start; | 600 | unsigned long length = old_end - old_start; |
601 | unsigned long new_start = old_start - shift; | 601 | unsigned long new_start = old_start - shift; |
602 | unsigned long new_end = old_end - shift; | 602 | unsigned long new_end = old_end - shift; |
603 | struct mmu_gather *tlb; | 603 | struct mmu_gather tlb; |
604 | 604 | ||
605 | BUG_ON(new_start > new_end); | 605 | BUG_ON(new_start > new_end); |
606 | 606 | ||
@@ -626,12 +626,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) | |||
626 | return -ENOMEM; | 626 | return -ENOMEM; |
627 | 627 | ||
628 | lru_add_drain(); | 628 | lru_add_drain(); |
629 | tlb = tlb_gather_mmu(mm, 0); | 629 | tlb_gather_mmu(&tlb, mm, 0); |
630 | if (new_end > old_start) { | 630 | if (new_end > old_start) { |
631 | /* | 631 | /* |
632 | * when the old and new regions overlap clear from new_end. | 632 | * when the old and new regions overlap clear from new_end. |
633 | */ | 633 | */ |
634 | free_pgd_range(tlb, new_end, old_end, new_end, | 634 | free_pgd_range(&tlb, new_end, old_end, new_end, |
635 | vma->vm_next ? vma->vm_next->vm_start : 0); | 635 | vma->vm_next ? vma->vm_next->vm_start : 0); |
636 | } else { | 636 | } else { |
637 | /* | 637 | /* |
@@ -640,10 +640,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) | |||
640 | * have constraints on va-space that make this illegal (IA64) - | 640 | * have constraints on va-space that make this illegal (IA64) - |
641 | * for the others its just a little faster. | 641 | * for the others its just a little faster. |
642 | */ | 642 | */ |
643 | free_pgd_range(tlb, old_start, old_end, new_end, | 643 | free_pgd_range(&tlb, old_start, old_end, new_end, |
644 | vma->vm_next ? vma->vm_next->vm_start : 0); | 644 | vma->vm_next ? vma->vm_next->vm_start : 0); |
645 | } | 645 | } |
646 | tlb_finish_mmu(tlb, new_end, old_end); | 646 | tlb_finish_mmu(&tlb, new_end, old_end); |
647 | 647 | ||
648 | /* | 648 | /* |
649 | * Shrink the vma to just the new range. Always succeeds. | 649 | * Shrink the vma to just the new range. Always succeeds. |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3c6a9e0eadc1..aad153ef6b78 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/quotaops.h> | 36 | #include <linux/quotaops.h> |
37 | #include <linux/seq_file.h> | 37 | #include <linux/seq_file.h> |
38 | #include <linux/log2.h> | 38 | #include <linux/log2.h> |
39 | #include <linux/cleancache.h> | ||
39 | 40 | ||
40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
41 | 42 | ||
@@ -1367,6 +1368,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, | |||
1367 | } else { | 1368 | } else { |
1368 | ext3_msg(sb, KERN_INFO, "using internal journal"); | 1369 | ext3_msg(sb, KERN_INFO, "using internal journal"); |
1369 | } | 1370 | } |
1371 | cleancache_init_fs(sb); | ||
1370 | return res; | 1372 | return res; |
1371 | } | 1373 | } |
1372 | 1374 | ||
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index c947e36eda6c..04109460ba9e 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
@@ -6,7 +6,8 @@ obj-$(CONFIG_EXT4_FS) += ext4.o | |||
6 | 6 | ||
7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ |
8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o | 9 | ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ |
10 | mmp.o | ||
10 | 11 | ||
11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 12 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o | 13 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1c67139ad4b4..264f6949511e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -362,130 +362,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
362 | } | 362 | } |
363 | 363 | ||
364 | /** | 364 | /** |
365 | * ext4_add_groupblocks() -- Add given blocks to an existing group | ||
366 | * @handle: handle to this transaction | ||
367 | * @sb: super block | ||
368 | * @block: start physcial block to add to the block group | ||
369 | * @count: number of blocks to free | ||
370 | * | ||
371 | * This marks the blocks as free in the bitmap. We ask the | ||
372 | * mballoc to reload the buddy after this by setting group | ||
373 | * EXT4_GROUP_INFO_NEED_INIT_BIT flag | ||
374 | */ | ||
375 | void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | ||
376 | ext4_fsblk_t block, unsigned long count) | ||
377 | { | ||
378 | struct buffer_head *bitmap_bh = NULL; | ||
379 | struct buffer_head *gd_bh; | ||
380 | ext4_group_t block_group; | ||
381 | ext4_grpblk_t bit; | ||
382 | unsigned int i; | ||
383 | struct ext4_group_desc *desc; | ||
384 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
385 | int err = 0, ret, blk_free_count; | ||
386 | ext4_grpblk_t blocks_freed; | ||
387 | struct ext4_group_info *grp; | ||
388 | |||
389 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); | ||
390 | |||
391 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | ||
392 | grp = ext4_get_group_info(sb, block_group); | ||
393 | /* | ||
394 | * Check to see if we are freeing blocks across a group | ||
395 | * boundary. | ||
396 | */ | ||
397 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { | ||
398 | goto error_return; | ||
399 | } | ||
400 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | ||
401 | if (!bitmap_bh) | ||
402 | goto error_return; | ||
403 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); | ||
404 | if (!desc) | ||
405 | goto error_return; | ||
406 | |||
407 | if (in_range(ext4_block_bitmap(sb, desc), block, count) || | ||
408 | in_range(ext4_inode_bitmap(sb, desc), block, count) || | ||
409 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | ||
410 | in_range(block + count - 1, ext4_inode_table(sb, desc), | ||
411 | sbi->s_itb_per_group)) { | ||
412 | ext4_error(sb, "Adding blocks in system zones - " | ||
413 | "Block = %llu, count = %lu", | ||
414 | block, count); | ||
415 | goto error_return; | ||
416 | } | ||
417 | |||
418 | /* | ||
419 | * We are about to add blocks to the bitmap, | ||
420 | * so we need undo access. | ||
421 | */ | ||
422 | BUFFER_TRACE(bitmap_bh, "getting undo access"); | ||
423 | err = ext4_journal_get_undo_access(handle, bitmap_bh); | ||
424 | if (err) | ||
425 | goto error_return; | ||
426 | |||
427 | /* | ||
428 | * We are about to modify some metadata. Call the journal APIs | ||
429 | * to unshare ->b_data if a currently-committing transaction is | ||
430 | * using it | ||
431 | */ | ||
432 | BUFFER_TRACE(gd_bh, "get_write_access"); | ||
433 | err = ext4_journal_get_write_access(handle, gd_bh); | ||
434 | if (err) | ||
435 | goto error_return; | ||
436 | /* | ||
437 | * make sure we don't allow a parallel init on other groups in the | ||
438 | * same buddy cache | ||
439 | */ | ||
440 | down_write(&grp->alloc_sem); | ||
441 | for (i = 0, blocks_freed = 0; i < count; i++) { | ||
442 | BUFFER_TRACE(bitmap_bh, "clear bit"); | ||
443 | if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), | ||
444 | bit + i, bitmap_bh->b_data)) { | ||
445 | ext4_error(sb, "bit already cleared for block %llu", | ||
446 | (ext4_fsblk_t)(block + i)); | ||
447 | BUFFER_TRACE(bitmap_bh, "bit already cleared"); | ||
448 | } else { | ||
449 | blocks_freed++; | ||
450 | } | ||
451 | } | ||
452 | ext4_lock_group(sb, block_group); | ||
453 | blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); | ||
454 | ext4_free_blks_set(sb, desc, blk_free_count); | ||
455 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | ||
456 | ext4_unlock_group(sb, block_group); | ||
457 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); | ||
458 | |||
459 | if (sbi->s_log_groups_per_flex) { | ||
460 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | ||
461 | atomic_add(blocks_freed, | ||
462 | &sbi->s_flex_groups[flex_group].free_blocks); | ||
463 | } | ||
464 | /* | ||
465 | * request to reload the buddy with the | ||
466 | * new bitmap information | ||
467 | */ | ||
468 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); | ||
469 | grp->bb_free += blocks_freed; | ||
470 | up_write(&grp->alloc_sem); | ||
471 | |||
472 | /* We dirtied the bitmap block */ | ||
473 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | ||
474 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | ||
475 | |||
476 | /* And the group descriptor block */ | ||
477 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); | ||
478 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); | ||
479 | if (!err) | ||
480 | err = ret; | ||
481 | |||
482 | error_return: | ||
483 | brelse(bitmap_bh); | ||
484 | ext4_std_error(sb, err); | ||
485 | return; | ||
486 | } | ||
487 | |||
488 | /** | ||
489 | * ext4_has_free_blocks() | 365 | * ext4_has_free_blocks() |
490 | * @sbi: in-core super block structure. | 366 | * @sbi: in-core super block structure. |
491 | * @nblocks: number of needed blocks | 367 | * @nblocks: number of needed blocks |
@@ -493,7 +369,8 @@ error_return: | |||
493 | * Check if filesystem has nblocks free & available for allocation. | 369 | * Check if filesystem has nblocks free & available for allocation. |
494 | * On success return 1, return 0 on failure. | 370 | * On success return 1, return 0 on failure. |
495 | */ | 371 | */ |
496 | static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) | 372 | static int ext4_has_free_blocks(struct ext4_sb_info *sbi, |
373 | s64 nblocks, unsigned int flags) | ||
497 | { | 374 | { |
498 | s64 free_blocks, dirty_blocks, root_blocks; | 375 | s64 free_blocks, dirty_blocks, root_blocks; |
499 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | 376 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; |
@@ -507,11 +384,6 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) | |||
507 | EXT4_FREEBLOCKS_WATERMARK) { | 384 | EXT4_FREEBLOCKS_WATERMARK) { |
508 | free_blocks = percpu_counter_sum_positive(fbc); | 385 | free_blocks = percpu_counter_sum_positive(fbc); |
509 | dirty_blocks = percpu_counter_sum_positive(dbc); | 386 | dirty_blocks = percpu_counter_sum_positive(dbc); |
510 | if (dirty_blocks < 0) { | ||
511 | printk(KERN_CRIT "Dirty block accounting " | ||
512 | "went wrong %lld\n", | ||
513 | (long long)dirty_blocks); | ||
514 | } | ||
515 | } | 387 | } |
516 | /* Check whether we have space after | 388 | /* Check whether we have space after |
517 | * accounting for current dirty blocks & root reserved blocks. | 389 | * accounting for current dirty blocks & root reserved blocks. |
@@ -522,7 +394,9 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) | |||
522 | /* Hm, nope. Are (enough) root reserved blocks available? */ | 394 | /* Hm, nope. Are (enough) root reserved blocks available? */ |
523 | if (sbi->s_resuid == current_fsuid() || | 395 | if (sbi->s_resuid == current_fsuid() || |
524 | ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || | 396 | ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || |
525 | capable(CAP_SYS_RESOURCE)) { | 397 | capable(CAP_SYS_RESOURCE) || |
398 | (flags & EXT4_MB_USE_ROOT_BLOCKS)) { | ||
399 | |||
526 | if (free_blocks >= (nblocks + dirty_blocks)) | 400 | if (free_blocks >= (nblocks + dirty_blocks)) |
527 | return 1; | 401 | return 1; |
528 | } | 402 | } |
@@ -531,9 +405,9 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) | |||
531 | } | 405 | } |
532 | 406 | ||
533 | int ext4_claim_free_blocks(struct ext4_sb_info *sbi, | 407 | int ext4_claim_free_blocks(struct ext4_sb_info *sbi, |
534 | s64 nblocks) | 408 | s64 nblocks, unsigned int flags) |
535 | { | 409 | { |
536 | if (ext4_has_free_blocks(sbi, nblocks)) { | 410 | if (ext4_has_free_blocks(sbi, nblocks, flags)) { |
537 | percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); | 411 | percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); |
538 | return 0; | 412 | return 0; |
539 | } else | 413 | } else |
@@ -554,7 +428,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, | |||
554 | */ | 428 | */ |
555 | int ext4_should_retry_alloc(struct super_block *sb, int *retries) | 429 | int ext4_should_retry_alloc(struct super_block *sb, int *retries) |
556 | { | 430 | { |
557 | if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || | 431 | if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) || |
558 | (*retries)++ > 3 || | 432 | (*retries)++ > 3 || |
559 | !EXT4_SB(sb)->s_journal) | 433 | !EXT4_SB(sb)->s_journal) |
560 | return 0; | 434 | return 0; |
@@ -577,7 +451,8 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
577 | * error stores in errp pointer | 451 | * error stores in errp pointer |
578 | */ | 452 | */ |
579 | ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | 453 | ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, |
580 | ext4_fsblk_t goal, unsigned long *count, int *errp) | 454 | ext4_fsblk_t goal, unsigned int flags, |
455 | unsigned long *count, int *errp) | ||
581 | { | 456 | { |
582 | struct ext4_allocation_request ar; | 457 | struct ext4_allocation_request ar; |
583 | ext4_fsblk_t ret; | 458 | ext4_fsblk_t ret; |
@@ -587,6 +462,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
587 | ar.inode = inode; | 462 | ar.inode = inode; |
588 | ar.goal = goal; | 463 | ar.goal = goal; |
589 | ar.len = count ? *count : 1; | 464 | ar.len = count ? *count : 1; |
465 | ar.flags = flags; | ||
590 | 466 | ||
591 | ret = ext4_mb_new_blocks(handle, &ar, errp); | 467 | ret = ext4_mb_new_blocks(handle, &ar, errp); |
592 | if (count) | 468 | if (count) |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4daaf2b753f4..a74b89c09f90 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -108,7 +108,8 @@ typedef unsigned int ext4_group_t; | |||
108 | #define EXT4_MB_DELALLOC_RESERVED 0x0400 | 108 | #define EXT4_MB_DELALLOC_RESERVED 0x0400 |
109 | /* We are doing stream allocation */ | 109 | /* We are doing stream allocation */ |
110 | #define EXT4_MB_STREAM_ALLOC 0x0800 | 110 | #define EXT4_MB_STREAM_ALLOC 0x0800 |
111 | 111 | /* Use reserved root blocks if needed */ | |
112 | #define EXT4_MB_USE_ROOT_BLOCKS 0x1000 | ||
112 | 113 | ||
113 | struct ext4_allocation_request { | 114 | struct ext4_allocation_request { |
114 | /* target inode for block we're allocating */ | 115 | /* target inode for block we're allocating */ |
@@ -209,6 +210,8 @@ struct ext4_io_submit { | |||
209 | */ | 210 | */ |
210 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ | 211 | #define EXT4_BAD_INO 1 /* Bad blocks inode */ |
211 | #define EXT4_ROOT_INO 2 /* Root inode */ | 212 | #define EXT4_ROOT_INO 2 /* Root inode */ |
213 | #define EXT4_USR_QUOTA_INO 3 /* User quota inode */ | ||
214 | #define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */ | ||
212 | #define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ | 215 | #define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */ |
213 | #define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ | 216 | #define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */ |
214 | #define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ | 217 | #define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */ |
@@ -512,6 +515,10 @@ struct ext4_new_group_data { | |||
512 | /* Convert extent to initialized after IO complete */ | 515 | /* Convert extent to initialized after IO complete */ |
513 | #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ | 516 | #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ |
514 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) | 517 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) |
518 | /* Punch out blocks of an extent */ | ||
519 | #define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020 | ||
520 | /* Don't normalize allocation size (used for fallocate) */ | ||
521 | #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 | ||
515 | 522 | ||
516 | /* | 523 | /* |
517 | * Flags used by ext4_free_blocks | 524 | * Flags used by ext4_free_blocks |
@@ -1028,7 +1035,7 @@ struct ext4_super_block { | |||
1028 | __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ | 1035 | __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ |
1029 | __le32 s_flags; /* Miscellaneous flags */ | 1036 | __le32 s_flags; /* Miscellaneous flags */ |
1030 | __le16 s_raid_stride; /* RAID stride */ | 1037 | __le16 s_raid_stride; /* RAID stride */ |
1031 | __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ | 1038 | __le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */ |
1032 | __le64 s_mmp_block; /* Block for multi-mount protection */ | 1039 | __le64 s_mmp_block; /* Block for multi-mount protection */ |
1033 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ | 1040 | __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ |
1034 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ | 1041 | __u8 s_log_groups_per_flex; /* FLEX_BG group size */ |
@@ -1144,6 +1151,9 @@ struct ext4_sb_info { | |||
1144 | unsigned long s_ext_blocks; | 1151 | unsigned long s_ext_blocks; |
1145 | unsigned long s_ext_extents; | 1152 | unsigned long s_ext_extents; |
1146 | #endif | 1153 | #endif |
1154 | /* ext4 extent cache stats */ | ||
1155 | unsigned long extent_cache_hits; | ||
1156 | unsigned long extent_cache_misses; | ||
1147 | 1157 | ||
1148 | /* for buddy allocator */ | 1158 | /* for buddy allocator */ |
1149 | struct ext4_group_info ***s_group_info; | 1159 | struct ext4_group_info ***s_group_info; |
@@ -1201,6 +1211,9 @@ struct ext4_sb_info { | |||
1201 | struct ext4_li_request *s_li_request; | 1211 | struct ext4_li_request *s_li_request; |
1202 | /* Wait multiplier for lazy initialization thread */ | 1212 | /* Wait multiplier for lazy initialization thread */ |
1203 | unsigned int s_li_wait_mult; | 1213 | unsigned int s_li_wait_mult; |
1214 | |||
1215 | /* Kernel thread for multiple mount protection */ | ||
1216 | struct task_struct *s_mmp_tsk; | ||
1204 | }; | 1217 | }; |
1205 | 1218 | ||
1206 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) | 1219 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
@@ -1338,6 +1351,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1338 | #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 | 1351 | #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 |
1339 | #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 | 1352 | #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 |
1340 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 | 1353 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 |
1354 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 | ||
1341 | 1355 | ||
1342 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 | 1356 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 |
1343 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 | 1357 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 |
@@ -1351,13 +1365,29 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1351 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ | 1365 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ |
1352 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ | 1366 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ |
1353 | 1367 | ||
1368 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR | ||
1369 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | ||
1370 | EXT4_FEATURE_INCOMPAT_META_BG) | ||
1371 | #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ | ||
1372 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ | ||
1373 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR) | ||
1374 | |||
1375 | #define EXT3_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR | ||
1376 | #define EXT3_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | ||
1377 | EXT4_FEATURE_INCOMPAT_RECOVER| \ | ||
1378 | EXT4_FEATURE_INCOMPAT_META_BG) | ||
1379 | #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ | ||
1380 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ | ||
1381 | EXT4_FEATURE_RO_COMPAT_BTREE_DIR) | ||
1382 | |||
1354 | #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR | 1383 | #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR |
1355 | #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | 1384 | #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ |
1356 | EXT4_FEATURE_INCOMPAT_RECOVER| \ | 1385 | EXT4_FEATURE_INCOMPAT_RECOVER| \ |
1357 | EXT4_FEATURE_INCOMPAT_META_BG| \ | 1386 | EXT4_FEATURE_INCOMPAT_META_BG| \ |
1358 | EXT4_FEATURE_INCOMPAT_EXTENTS| \ | 1387 | EXT4_FEATURE_INCOMPAT_EXTENTS| \ |
1359 | EXT4_FEATURE_INCOMPAT_64BIT| \ | 1388 | EXT4_FEATURE_INCOMPAT_64BIT| \ |
1360 | EXT4_FEATURE_INCOMPAT_FLEX_BG) | 1389 | EXT4_FEATURE_INCOMPAT_FLEX_BG| \ |
1390 | EXT4_FEATURE_INCOMPAT_MMP) | ||
1361 | #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ | 1391 | #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ |
1362 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ | 1392 | EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ |
1363 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ | 1393 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ |
@@ -1590,12 +1620,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | |||
1590 | */ | 1620 | */ |
1591 | struct ext4_lazy_init { | 1621 | struct ext4_lazy_init { |
1592 | unsigned long li_state; | 1622 | unsigned long li_state; |
1593 | |||
1594 | wait_queue_head_t li_wait_daemon; | ||
1595 | wait_queue_head_t li_wait_task; | ||
1596 | struct timer_list li_timer; | ||
1597 | struct task_struct *li_task; | ||
1598 | |||
1599 | struct list_head li_request_list; | 1623 | struct list_head li_request_list; |
1600 | struct mutex li_list_mtx; | 1624 | struct mutex li_list_mtx; |
1601 | }; | 1625 | }; |
@@ -1615,6 +1639,67 @@ struct ext4_features { | |||
1615 | }; | 1639 | }; |
1616 | 1640 | ||
1617 | /* | 1641 | /* |
1642 | * This structure will be used for multiple mount protection. It will be | ||
1643 | * written into the block number saved in the s_mmp_block field in the | ||
1644 | * superblock. Programs that check MMP should assume that if | ||
1645 | * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe | ||
1646 | * to use the filesystem, regardless of how old the timestamp is. | ||
1647 | */ | ||
1648 | #define EXT4_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */ | ||
1649 | #define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */ | ||
1650 | #define EXT4_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */ | ||
1651 | #define EXT4_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */ | ||
1652 | |||
1653 | struct mmp_struct { | ||
1654 | __le32 mmp_magic; /* Magic number for MMP */ | ||
1655 | __le32 mmp_seq; /* Sequence no. updated periodically */ | ||
1656 | |||
1657 | /* | ||
1658 | * mmp_time, mmp_nodename & mmp_bdevname are only used for information | ||
1659 | * purposes and do not affect the correctness of the algorithm | ||
1660 | */ | ||
1661 | __le64 mmp_time; /* Time last updated */ | ||
1662 | char mmp_nodename[64]; /* Node which last updated MMP block */ | ||
1663 | char mmp_bdevname[32]; /* Bdev which last updated MMP block */ | ||
1664 | |||
1665 | /* | ||
1666 | * mmp_check_interval is used to verify if the MMP block has been | ||
1667 | * updated on the block device. The value is updated based on the | ||
1668 | * maximum time to write the MMP block during an update cycle. | ||
1669 | */ | ||
1670 | __le16 mmp_check_interval; | ||
1671 | |||
1672 | __le16 mmp_pad1; | ||
1673 | __le32 mmp_pad2[227]; | ||
1674 | }; | ||
1675 | |||
1676 | /* arguments passed to the mmp thread */ | ||
1677 | struct mmpd_data { | ||
1678 | struct buffer_head *bh; /* bh from initial read_mmp_block() */ | ||
1679 | struct super_block *sb; /* super block of the fs */ | ||
1680 | }; | ||
1681 | |||
1682 | /* | ||
1683 | * Check interval multiplier | ||
1684 | * The MMP block is written every update interval and initially checked every | ||
1685 | * update interval x the multiplier (the value is then adapted based on the | ||
1686 | * write latency). The reason is that writes can be delayed under load and we | ||
1687 | * don't want readers to incorrectly assume that the filesystem is no longer | ||
1688 | * in use. | ||
1689 | */ | ||
1690 | #define EXT4_MMP_CHECK_MULT 2UL | ||
1691 | |||
1692 | /* | ||
1693 | * Minimum interval for MMP checking in seconds. | ||
1694 | */ | ||
1695 | #define EXT4_MMP_MIN_CHECK_INTERVAL 5UL | ||
1696 | |||
1697 | /* | ||
1698 | * Maximum interval for MMP checking in seconds. | ||
1699 | */ | ||
1700 | #define EXT4_MMP_MAX_CHECK_INTERVAL 300UL | ||
1701 | |||
1702 | /* | ||
1618 | * Function prototypes | 1703 | * Function prototypes |
1619 | */ | 1704 | */ |
1620 | 1705 | ||
@@ -1638,10 +1723,12 @@ extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group); | |||
1638 | extern unsigned long ext4_bg_num_gdb(struct super_block *sb, | 1723 | extern unsigned long ext4_bg_num_gdb(struct super_block *sb, |
1639 | ext4_group_t group); | 1724 | ext4_group_t group); |
1640 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | 1725 | extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, |
1641 | ext4_fsblk_t goal, unsigned long *count, int *errp); | 1726 | ext4_fsblk_t goal, |
1642 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | 1727 | unsigned int flags, |
1643 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | 1728 | unsigned long *count, |
1644 | ext4_fsblk_t block, unsigned long count); | 1729 | int *errp); |
1730 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, | ||
1731 | s64 nblocks, unsigned int flags); | ||
1645 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); | 1732 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); |
1646 | extern void ext4_check_blocks_bitmap(struct super_block *); | 1733 | extern void ext4_check_blocks_bitmap(struct super_block *); |
1647 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 1734 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
@@ -1706,6 +1793,8 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
1706 | unsigned long count, int flags); | 1793 | unsigned long count, int flags); |
1707 | extern int ext4_mb_add_groupinfo(struct super_block *sb, | 1794 | extern int ext4_mb_add_groupinfo(struct super_block *sb, |
1708 | ext4_group_t i, struct ext4_group_desc *desc); | 1795 | ext4_group_t i, struct ext4_group_desc *desc); |
1796 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | ||
1797 | ext4_fsblk_t block, unsigned long count); | ||
1709 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); | 1798 | extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); |
1710 | 1799 | ||
1711 | /* inode.c */ | 1800 | /* inode.c */ |
@@ -1729,6 +1818,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); | |||
1729 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1818 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
1730 | extern int ext4_can_truncate(struct inode *inode); | 1819 | extern int ext4_can_truncate(struct inode *inode); |
1731 | extern void ext4_truncate(struct inode *); | 1820 | extern void ext4_truncate(struct inode *); |
1821 | extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length); | ||
1732 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); | 1822 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); |
1733 | extern void ext4_set_inode_flags(struct inode *); | 1823 | extern void ext4_set_inode_flags(struct inode *); |
1734 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1824 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
@@ -1738,6 +1828,8 @@ extern int ext4_writepage_trans_blocks(struct inode *); | |||
1738 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 1828 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
1739 | extern int ext4_block_truncate_page(handle_t *handle, | 1829 | extern int ext4_block_truncate_page(handle_t *handle, |
1740 | struct address_space *mapping, loff_t from); | 1830 | struct address_space *mapping, loff_t from); |
1831 | extern int ext4_block_zero_page_range(handle_t *handle, | ||
1832 | struct address_space *mapping, loff_t from, loff_t length); | ||
1741 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 1833 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
1742 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 1834 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
1743 | extern void ext4_da_update_reserve_space(struct inode *inode, | 1835 | extern void ext4_da_update_reserve_space(struct inode *inode, |
@@ -1788,6 +1880,10 @@ extern void __ext4_warning(struct super_block *, const char *, unsigned int, | |||
1788 | __LINE__, ## message) | 1880 | __LINE__, ## message) |
1789 | extern void ext4_msg(struct super_block *, const char *, const char *, ...) | 1881 | extern void ext4_msg(struct super_block *, const char *, const char *, ...) |
1790 | __attribute__ ((format (printf, 3, 4))); | 1882 | __attribute__ ((format (printf, 3, 4))); |
1883 | extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, | ||
1884 | const char *, unsigned int, const char *); | ||
1885 | #define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \ | ||
1886 | __LINE__, msg) | ||
1791 | extern void __ext4_grp_locked_error(const char *, unsigned int, \ | 1887 | extern void __ext4_grp_locked_error(const char *, unsigned int, \ |
1792 | struct super_block *, ext4_group_t, \ | 1888 | struct super_block *, ext4_group_t, \ |
1793 | unsigned long, ext4_fsblk_t, \ | 1889 | unsigned long, ext4_fsblk_t, \ |
@@ -2064,6 +2160,8 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | |||
2064 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | 2160 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, |
2065 | struct ext4_map_blocks *map, int flags); | 2161 | struct ext4_map_blocks *map, int flags); |
2066 | extern void ext4_ext_truncate(struct inode *); | 2162 | extern void ext4_ext_truncate(struct inode *); |
2163 | extern int ext4_ext_punch_hole(struct file *file, loff_t offset, | ||
2164 | loff_t length); | ||
2067 | extern void ext4_ext_init(struct super_block *); | 2165 | extern void ext4_ext_init(struct super_block *); |
2068 | extern void ext4_ext_release(struct super_block *); | 2166 | extern void ext4_ext_release(struct super_block *); |
2069 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, | 2167 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, |
@@ -2092,6 +2190,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io, | |||
2092 | int len, | 2190 | int len, |
2093 | struct writeback_control *wbc); | 2191 | struct writeback_control *wbc); |
2094 | 2192 | ||
2193 | /* mmp.c */ | ||
2194 | extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); | ||
2195 | |||
2095 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ | 2196 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ |
2096 | enum ext4_state_bits { | 2197 | enum ext4_state_bits { |
2097 | BH_Uninit /* blocks are allocated but uninitialized on disk */ | 2198 | BH_Uninit /* blocks are allocated but uninitialized on disk */ |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 6e272ef6ba96..f5240aa15601 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -6,20 +6,6 @@ | |||
6 | 6 | ||
7 | #include <trace/events/ext4.h> | 7 | #include <trace/events/ext4.h> |
8 | 8 | ||
9 | int __ext4_journal_get_undo_access(const char *where, unsigned int line, | ||
10 | handle_t *handle, struct buffer_head *bh) | ||
11 | { | ||
12 | int err = 0; | ||
13 | |||
14 | if (ext4_handle_valid(handle)) { | ||
15 | err = jbd2_journal_get_undo_access(handle, bh); | ||
16 | if (err) | ||
17 | ext4_journal_abort_handle(where, line, __func__, bh, | ||
18 | handle, err); | ||
19 | } | ||
20 | return err; | ||
21 | } | ||
22 | |||
23 | int __ext4_journal_get_write_access(const char *where, unsigned int line, | 9 | int __ext4_journal_get_write_access(const char *where, unsigned int line, |
24 | handle_t *handle, struct buffer_head *bh) | 10 | handle_t *handle, struct buffer_head *bh) |
25 | { | 11 | { |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index d0f53538a57f..bb85757689b6 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -126,9 +126,6 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line, | |||
126 | const char *err_fn, | 126 | const char *err_fn, |
127 | struct buffer_head *bh, handle_t *handle, int err); | 127 | struct buffer_head *bh, handle_t *handle, int err); |
128 | 128 | ||
129 | int __ext4_journal_get_undo_access(const char *where, unsigned int line, | ||
130 | handle_t *handle, struct buffer_head *bh); | ||
131 | |||
132 | int __ext4_journal_get_write_access(const char *where, unsigned int line, | 129 | int __ext4_journal_get_write_access(const char *where, unsigned int line, |
133 | handle_t *handle, struct buffer_head *bh); | 130 | handle_t *handle, struct buffer_head *bh); |
134 | 131 | ||
@@ -146,8 +143,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |||
146 | int __ext4_handle_dirty_super(const char *where, unsigned int line, | 143 | int __ext4_handle_dirty_super(const char *where, unsigned int line, |
147 | handle_t *handle, struct super_block *sb); | 144 | handle_t *handle, struct super_block *sb); |
148 | 145 | ||
149 | #define ext4_journal_get_undo_access(handle, bh) \ | ||
150 | __ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh)) | ||
151 | #define ext4_journal_get_write_access(handle, bh) \ | 146 | #define ext4_journal_get_write_access(handle, bh) \ |
152 | __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) | 147 | __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) |
153 | #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ | 148 | #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4890d6f3ad15..5199bac7fc62 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -46,6 +46,13 @@ | |||
46 | 46 | ||
47 | #include <trace/events/ext4.h> | 47 | #include <trace/events/ext4.h> |
48 | 48 | ||
49 | static int ext4_split_extent(handle_t *handle, | ||
50 | struct inode *inode, | ||
51 | struct ext4_ext_path *path, | ||
52 | struct ext4_map_blocks *map, | ||
53 | int split_flag, | ||
54 | int flags); | ||
55 | |||
49 | static int ext4_ext_truncate_extend_restart(handle_t *handle, | 56 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
50 | struct inode *inode, | 57 | struct inode *inode, |
51 | int needed) | 58 | int needed) |
@@ -192,12 +199,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
192 | static ext4_fsblk_t | 199 | static ext4_fsblk_t |
193 | ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, | 200 | ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, |
194 | struct ext4_ext_path *path, | 201 | struct ext4_ext_path *path, |
195 | struct ext4_extent *ex, int *err) | 202 | struct ext4_extent *ex, int *err, unsigned int flags) |
196 | { | 203 | { |
197 | ext4_fsblk_t goal, newblock; | 204 | ext4_fsblk_t goal, newblock; |
198 | 205 | ||
199 | goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); | 206 | goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); |
200 | newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err); | 207 | newblock = ext4_new_meta_blocks(handle, inode, goal, flags, |
208 | NULL, err); | ||
201 | return newblock; | 209 | return newblock; |
202 | } | 210 | } |
203 | 211 | ||
@@ -474,9 +482,43 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
474 | } | 482 | } |
475 | ext_debug("\n"); | 483 | ext_debug("\n"); |
476 | } | 484 | } |
485 | |||
486 | static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path, | ||
487 | ext4_fsblk_t newblock, int level) | ||
488 | { | ||
489 | int depth = ext_depth(inode); | ||
490 | struct ext4_extent *ex; | ||
491 | |||
492 | if (depth != level) { | ||
493 | struct ext4_extent_idx *idx; | ||
494 | idx = path[level].p_idx; | ||
495 | while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) { | ||
496 | ext_debug("%d: move %d:%llu in new index %llu\n", level, | ||
497 | le32_to_cpu(idx->ei_block), | ||
498 | ext4_idx_pblock(idx), | ||
499 | newblock); | ||
500 | idx++; | ||
501 | } | ||
502 | |||
503 | return; | ||
504 | } | ||
505 | |||
506 | ex = path[depth].p_ext; | ||
507 | while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) { | ||
508 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", | ||
509 | le32_to_cpu(ex->ee_block), | ||
510 | ext4_ext_pblock(ex), | ||
511 | ext4_ext_is_uninitialized(ex), | ||
512 | ext4_ext_get_actual_len(ex), | ||
513 | newblock); | ||
514 | ex++; | ||
515 | } | ||
516 | } | ||
517 | |||
477 | #else | 518 | #else |
478 | #define ext4_ext_show_path(inode, path) | 519 | #define ext4_ext_show_path(inode, path) |
479 | #define ext4_ext_show_leaf(inode, path) | 520 | #define ext4_ext_show_leaf(inode, path) |
521 | #define ext4_ext_show_move(inode, path, newblock, level) | ||
480 | #endif | 522 | #endif |
481 | 523 | ||
482 | void ext4_ext_drop_refs(struct ext4_ext_path *path) | 524 | void ext4_ext_drop_refs(struct ext4_ext_path *path) |
@@ -792,14 +834,14 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, | |||
792 | * - initializes subtree | 834 | * - initializes subtree |
793 | */ | 835 | */ |
794 | static int ext4_ext_split(handle_t *handle, struct inode *inode, | 836 | static int ext4_ext_split(handle_t *handle, struct inode *inode, |
795 | struct ext4_ext_path *path, | 837 | unsigned int flags, |
796 | struct ext4_extent *newext, int at) | 838 | struct ext4_ext_path *path, |
839 | struct ext4_extent *newext, int at) | ||
797 | { | 840 | { |
798 | struct buffer_head *bh = NULL; | 841 | struct buffer_head *bh = NULL; |
799 | int depth = ext_depth(inode); | 842 | int depth = ext_depth(inode); |
800 | struct ext4_extent_header *neh; | 843 | struct ext4_extent_header *neh; |
801 | struct ext4_extent_idx *fidx; | 844 | struct ext4_extent_idx *fidx; |
802 | struct ext4_extent *ex; | ||
803 | int i = at, k, m, a; | 845 | int i = at, k, m, a; |
804 | ext4_fsblk_t newblock, oldblock; | 846 | ext4_fsblk_t newblock, oldblock; |
805 | __le32 border; | 847 | __le32 border; |
@@ -847,7 +889,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
847 | ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); | 889 | ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); |
848 | for (a = 0; a < depth - at; a++) { | 890 | for (a = 0; a < depth - at; a++) { |
849 | newblock = ext4_ext_new_meta_block(handle, inode, path, | 891 | newblock = ext4_ext_new_meta_block(handle, inode, path, |
850 | newext, &err); | 892 | newext, &err, flags); |
851 | if (newblock == 0) | 893 | if (newblock == 0) |
852 | goto cleanup; | 894 | goto cleanup; |
853 | ablocks[a] = newblock; | 895 | ablocks[a] = newblock; |
@@ -876,7 +918,6 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
876 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); | 918 | neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); |
877 | neh->eh_magic = EXT4_EXT_MAGIC; | 919 | neh->eh_magic = EXT4_EXT_MAGIC; |
878 | neh->eh_depth = 0; | 920 | neh->eh_depth = 0; |
879 | ex = EXT_FIRST_EXTENT(neh); | ||
880 | 921 | ||
881 | /* move remainder of path[depth] to the new leaf */ | 922 | /* move remainder of path[depth] to the new leaf */ |
882 | if (unlikely(path[depth].p_hdr->eh_entries != | 923 | if (unlikely(path[depth].p_hdr->eh_entries != |
@@ -888,25 +929,12 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
888 | goto cleanup; | 929 | goto cleanup; |
889 | } | 930 | } |
890 | /* start copy from next extent */ | 931 | /* start copy from next extent */ |
891 | /* TODO: we could do it by single memmove */ | 932 | m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++; |
892 | m = 0; | 933 | ext4_ext_show_move(inode, path, newblock, depth); |
893 | path[depth].p_ext++; | ||
894 | while (path[depth].p_ext <= | ||
895 | EXT_MAX_EXTENT(path[depth].p_hdr)) { | ||
896 | ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", | ||
897 | le32_to_cpu(path[depth].p_ext->ee_block), | ||
898 | ext4_ext_pblock(path[depth].p_ext), | ||
899 | ext4_ext_is_uninitialized(path[depth].p_ext), | ||
900 | ext4_ext_get_actual_len(path[depth].p_ext), | ||
901 | newblock); | ||
902 | /*memmove(ex++, path[depth].p_ext++, | ||
903 | sizeof(struct ext4_extent)); | ||
904 | neh->eh_entries++;*/ | ||
905 | path[depth].p_ext++; | ||
906 | m++; | ||
907 | } | ||
908 | if (m) { | 934 | if (m) { |
909 | memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); | 935 | struct ext4_extent *ex; |
936 | ex = EXT_FIRST_EXTENT(neh); | ||
937 | memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m); | ||
910 | le16_add_cpu(&neh->eh_entries, m); | 938 | le16_add_cpu(&neh->eh_entries, m); |
911 | } | 939 | } |
912 | 940 | ||
@@ -968,12 +996,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
968 | 996 | ||
969 | ext_debug("int.index at %d (block %llu): %u -> %llu\n", | 997 | ext_debug("int.index at %d (block %llu): %u -> %llu\n", |
970 | i, newblock, le32_to_cpu(border), oldblock); | 998 | i, newblock, le32_to_cpu(border), oldblock); |
971 | /* copy indexes */ | ||
972 | m = 0; | ||
973 | path[i].p_idx++; | ||
974 | 999 | ||
975 | ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, | 1000 | /* move remainder of path[i] to the new index block */ |
976 | EXT_MAX_INDEX(path[i].p_hdr)); | ||
977 | if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != | 1001 | if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != |
978 | EXT_LAST_INDEX(path[i].p_hdr))) { | 1002 | EXT_LAST_INDEX(path[i].p_hdr))) { |
979 | EXT4_ERROR_INODE(inode, | 1003 | EXT4_ERROR_INODE(inode, |
@@ -982,20 +1006,13 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, | |||
982 | err = -EIO; | 1006 | err = -EIO; |
983 | goto cleanup; | 1007 | goto cleanup; |
984 | } | 1008 | } |
985 | while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { | 1009 | /* start copy indexes */ |
986 | ext_debug("%d: move %d:%llu in new index %llu\n", i, | 1010 | m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++; |
987 | le32_to_cpu(path[i].p_idx->ei_block), | 1011 | ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, |
988 | ext4_idx_pblock(path[i].p_idx), | 1012 | EXT_MAX_INDEX(path[i].p_hdr)); |
989 | newblock); | 1013 | ext4_ext_show_move(inode, path, newblock, i); |
990 | /*memmove(++fidx, path[i].p_idx++, | ||
991 | sizeof(struct ext4_extent_idx)); | ||
992 | neh->eh_entries++; | ||
993 | BUG_ON(neh->eh_entries > neh->eh_max);*/ | ||
994 | path[i].p_idx++; | ||
995 | m++; | ||
996 | } | ||
997 | if (m) { | 1014 | if (m) { |
998 | memmove(++fidx, path[i].p_idx - m, | 1015 | memmove(++fidx, path[i].p_idx, |
999 | sizeof(struct ext4_extent_idx) * m); | 1016 | sizeof(struct ext4_extent_idx) * m); |
1000 | le16_add_cpu(&neh->eh_entries, m); | 1017 | le16_add_cpu(&neh->eh_entries, m); |
1001 | } | 1018 | } |
@@ -1056,8 +1073,9 @@ cleanup: | |||
1056 | * just created block | 1073 | * just created block |
1057 | */ | 1074 | */ |
1058 | static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | 1075 | static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, |
1059 | struct ext4_ext_path *path, | 1076 | unsigned int flags, |
1060 | struct ext4_extent *newext) | 1077 | struct ext4_ext_path *path, |
1078 | struct ext4_extent *newext) | ||
1061 | { | 1079 | { |
1062 | struct ext4_ext_path *curp = path; | 1080 | struct ext4_ext_path *curp = path; |
1063 | struct ext4_extent_header *neh; | 1081 | struct ext4_extent_header *neh; |
@@ -1065,7 +1083,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, | |||
1065 | ext4_fsblk_t newblock; | 1083 | ext4_fsblk_t newblock; |
1066 | int err = 0; | 1084 | int err = 0; |
1067 | 1085 | ||
1068 | newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err); | 1086 | newblock = ext4_ext_new_meta_block(handle, inode, path, |
1087 | newext, &err, flags); | ||
1069 | if (newblock == 0) | 1088 | if (newblock == 0) |
1070 | return err; | 1089 | return err; |
1071 | 1090 | ||
@@ -1140,8 +1159,9 @@ out: | |||
1140 | * if no free index is found, then it requests in-depth growing. | 1159 | * if no free index is found, then it requests in-depth growing. |
1141 | */ | 1160 | */ |
1142 | static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, | 1161 | static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, |
1143 | struct ext4_ext_path *path, | 1162 | unsigned int flags, |
1144 | struct ext4_extent *newext) | 1163 | struct ext4_ext_path *path, |
1164 | struct ext4_extent *newext) | ||
1145 | { | 1165 | { |
1146 | struct ext4_ext_path *curp; | 1166 | struct ext4_ext_path *curp; |
1147 | int depth, i, err = 0; | 1167 | int depth, i, err = 0; |
@@ -1161,7 +1181,7 @@ repeat: | |||
1161 | if (EXT_HAS_FREE_INDEX(curp)) { | 1181 | if (EXT_HAS_FREE_INDEX(curp)) { |
1162 | /* if we found index with free entry, then use that | 1182 | /* if we found index with free entry, then use that |
1163 | * entry: create all needed subtree and add new leaf */ | 1183 | * entry: create all needed subtree and add new leaf */ |
1164 | err = ext4_ext_split(handle, inode, path, newext, i); | 1184 | err = ext4_ext_split(handle, inode, flags, path, newext, i); |
1165 | if (err) | 1185 | if (err) |
1166 | goto out; | 1186 | goto out; |
1167 | 1187 | ||
@@ -1174,7 +1194,8 @@ repeat: | |||
1174 | err = PTR_ERR(path); | 1194 | err = PTR_ERR(path); |
1175 | } else { | 1195 | } else { |
1176 | /* tree is full, time to grow in depth */ | 1196 | /* tree is full, time to grow in depth */ |
1177 | err = ext4_ext_grow_indepth(handle, inode, path, newext); | 1197 | err = ext4_ext_grow_indepth(handle, inode, flags, |
1198 | path, newext); | ||
1178 | if (err) | 1199 | if (err) |
1179 | goto out; | 1200 | goto out; |
1180 | 1201 | ||
@@ -1563,7 +1584,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1563 | * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns | 1584 | * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns |
1564 | * 1 if they got merged. | 1585 | * 1 if they got merged. |
1565 | */ | 1586 | */ |
1566 | static int ext4_ext_try_to_merge(struct inode *inode, | 1587 | static int ext4_ext_try_to_merge_right(struct inode *inode, |
1567 | struct ext4_ext_path *path, | 1588 | struct ext4_ext_path *path, |
1568 | struct ext4_extent *ex) | 1589 | struct ext4_extent *ex) |
1569 | { | 1590 | { |
@@ -1603,6 +1624,31 @@ static int ext4_ext_try_to_merge(struct inode *inode, | |||
1603 | } | 1624 | } |
1604 | 1625 | ||
1605 | /* | 1626 | /* |
1627 | * This function tries to merge the @ex extent to neighbours in the tree. | ||
1628 | * return 1 if merge left else 0. | ||
1629 | */ | ||
1630 | static int ext4_ext_try_to_merge(struct inode *inode, | ||
1631 | struct ext4_ext_path *path, | ||
1632 | struct ext4_extent *ex) { | ||
1633 | struct ext4_extent_header *eh; | ||
1634 | unsigned int depth; | ||
1635 | int merge_done = 0; | ||
1636 | int ret = 0; | ||
1637 | |||
1638 | depth = ext_depth(inode); | ||
1639 | BUG_ON(path[depth].p_hdr == NULL); | ||
1640 | eh = path[depth].p_hdr; | ||
1641 | |||
1642 | if (ex > EXT_FIRST_EXTENT(eh)) | ||
1643 | merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); | ||
1644 | |||
1645 | if (!merge_done) | ||
1646 | ret = ext4_ext_try_to_merge_right(inode, path, ex); | ||
1647 | |||
1648 | return ret; | ||
1649 | } | ||
1650 | |||
1651 | /* | ||
1606 | * check if a portion of the "newext" extent overlaps with an | 1652 | * check if a portion of the "newext" extent overlaps with an |
1607 | * existing extent. | 1653 | * existing extent. |
1608 | * | 1654 | * |
@@ -1668,6 +1714,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1668 | int depth, len, err; | 1714 | int depth, len, err; |
1669 | ext4_lblk_t next; | 1715 | ext4_lblk_t next; |
1670 | unsigned uninitialized = 0; | 1716 | unsigned uninitialized = 0; |
1717 | int flags = 0; | ||
1671 | 1718 | ||
1672 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { | 1719 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { |
1673 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); | 1720 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); |
@@ -1742,7 +1789,9 @@ repeat: | |||
1742 | * There is no free space in the found leaf. | 1789 | * There is no free space in the found leaf. |
1743 | * We're gonna add a new leaf in the tree. | 1790 | * We're gonna add a new leaf in the tree. |
1744 | */ | 1791 | */ |
1745 | err = ext4_ext_create_new_leaf(handle, inode, path, newext); | 1792 | if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) |
1793 | flags = EXT4_MB_USE_ROOT_BLOCKS; | ||
1794 | err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext); | ||
1746 | if (err) | 1795 | if (err) |
1747 | goto cleanup; | 1796 | goto cleanup; |
1748 | depth = ext_depth(inode); | 1797 | depth = ext_depth(inode); |
@@ -2003,13 +2052,25 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, | |||
2003 | } | 2052 | } |
2004 | 2053 | ||
2005 | /* | 2054 | /* |
2055 | * ext4_ext_in_cache() | ||
2056 | * Checks to see if the given block is in the cache. | ||
2057 | * If it is, the cached extent is stored in the given | ||
2058 | * cache extent pointer. If the cached extent is a hole, | ||
2059 | * this routine should be used instead of | ||
2060 | * ext4_ext_in_cache if the calling function needs to | ||
2061 | * know the size of the hole. | ||
2062 | * | ||
2063 | * @inode: The files inode | ||
2064 | * @block: The block to look for in the cache | ||
2065 | * @ex: Pointer where the cached extent will be stored | ||
2066 | * if it contains block | ||
2067 | * | ||
2006 | * Return 0 if cache is invalid; 1 if the cache is valid | 2068 | * Return 0 if cache is invalid; 1 if the cache is valid |
2007 | */ | 2069 | */ |
2008 | static int | 2070 | static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, |
2009 | ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | 2071 | struct ext4_ext_cache *ex){ |
2010 | struct ext4_extent *ex) | ||
2011 | { | ||
2012 | struct ext4_ext_cache *cex; | 2072 | struct ext4_ext_cache *cex; |
2073 | struct ext4_sb_info *sbi; | ||
2013 | int ret = 0; | 2074 | int ret = 0; |
2014 | 2075 | ||
2015 | /* | 2076 | /* |
@@ -2017,26 +2078,60 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | |||
2017 | */ | 2078 | */ |
2018 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 2079 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
2019 | cex = &EXT4_I(inode)->i_cached_extent; | 2080 | cex = &EXT4_I(inode)->i_cached_extent; |
2081 | sbi = EXT4_SB(inode->i_sb); | ||
2020 | 2082 | ||
2021 | /* has cache valid data? */ | 2083 | /* has cache valid data? */ |
2022 | if (cex->ec_len == 0) | 2084 | if (cex->ec_len == 0) |
2023 | goto errout; | 2085 | goto errout; |
2024 | 2086 | ||
2025 | if (in_range(block, cex->ec_block, cex->ec_len)) { | 2087 | if (in_range(block, cex->ec_block, cex->ec_len)) { |
2026 | ex->ee_block = cpu_to_le32(cex->ec_block); | 2088 | memcpy(ex, cex, sizeof(struct ext4_ext_cache)); |
2027 | ext4_ext_store_pblock(ex, cex->ec_start); | ||
2028 | ex->ee_len = cpu_to_le16(cex->ec_len); | ||
2029 | ext_debug("%u cached by %u:%u:%llu\n", | 2089 | ext_debug("%u cached by %u:%u:%llu\n", |
2030 | block, | 2090 | block, |
2031 | cex->ec_block, cex->ec_len, cex->ec_start); | 2091 | cex->ec_block, cex->ec_len, cex->ec_start); |
2032 | ret = 1; | 2092 | ret = 1; |
2033 | } | 2093 | } |
2034 | errout: | 2094 | errout: |
2095 | if (!ret) | ||
2096 | sbi->extent_cache_misses++; | ||
2097 | else | ||
2098 | sbi->extent_cache_hits++; | ||
2035 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 2099 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
2036 | return ret; | 2100 | return ret; |
2037 | } | 2101 | } |
2038 | 2102 | ||
2039 | /* | 2103 | /* |
2104 | * ext4_ext_in_cache() | ||
2105 | * Checks to see if the given block is in the cache. | ||
2106 | * If it is, the cached extent is stored in the given | ||
2107 | * extent pointer. | ||
2108 | * | ||
2109 | * @inode: The files inode | ||
2110 | * @block: The block to look for in the cache | ||
2111 | * @ex: Pointer where the cached extent will be stored | ||
2112 | * if it contains block | ||
2113 | * | ||
2114 | * Return 0 if cache is invalid; 1 if the cache is valid | ||
2115 | */ | ||
2116 | static int | ||
2117 | ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | ||
2118 | struct ext4_extent *ex) | ||
2119 | { | ||
2120 | struct ext4_ext_cache cex; | ||
2121 | int ret = 0; | ||
2122 | |||
2123 | if (ext4_ext_check_cache(inode, block, &cex)) { | ||
2124 | ex->ee_block = cpu_to_le32(cex.ec_block); | ||
2125 | ext4_ext_store_pblock(ex, cex.ec_start); | ||
2126 | ex->ee_len = cpu_to_le16(cex.ec_len); | ||
2127 | ret = 1; | ||
2128 | } | ||
2129 | |||
2130 | return ret; | ||
2131 | } | ||
2132 | |||
2133 | |||
2134 | /* | ||
2040 | * ext4_ext_rm_idx: | 2135 | * ext4_ext_rm_idx: |
2041 | * removes index from the index block. | 2136 | * removes index from the index block. |
2042 | * It's used in truncate case only, thus all requests are for | 2137 | * It's used in truncate case only, thus all requests are for |
@@ -2163,8 +2258,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2163 | ext4_free_blocks(handle, inode, NULL, start, num, flags); | 2258 | ext4_free_blocks(handle, inode, NULL, start, num, flags); |
2164 | } else if (from == le32_to_cpu(ex->ee_block) | 2259 | } else if (from == le32_to_cpu(ex->ee_block) |
2165 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2260 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2166 | printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", | 2261 | /* head removal */ |
2167 | from, to, le32_to_cpu(ex->ee_block), ee_len); | 2262 | ext4_lblk_t num; |
2263 | ext4_fsblk_t start; | ||
2264 | |||
2265 | num = to - from; | ||
2266 | start = ext4_ext_pblock(ex); | ||
2267 | |||
2268 | ext_debug("free first %u blocks starting %llu\n", num, start); | ||
2269 | ext4_free_blocks(handle, inode, 0, start, num, flags); | ||
2270 | |||
2168 | } else { | 2271 | } else { |
2169 | printk(KERN_INFO "strange request: removal(2) " | 2272 | printk(KERN_INFO "strange request: removal(2) " |
2170 | "%u-%u from %u:%u\n", | 2273 | "%u-%u from %u:%u\n", |
@@ -2173,9 +2276,22 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2173 | return 0; | 2276 | return 0; |
2174 | } | 2277 | } |
2175 | 2278 | ||
2279 | |||
2280 | /* | ||
2281 | * ext4_ext_rm_leaf() Removes the extents associated with the | ||
2282 | * blocks appearing between "start" and "end", and splits the extents | ||
2283 | * if "start" and "end" appear in the same extent | ||
2284 | * | ||
2285 | * @handle: The journal handle | ||
2286 | * @inode: The files inode | ||
2287 | * @path: The path to the leaf | ||
2288 | * @start: The first block to remove | ||
2289 | * @end: The last block to remove | ||
2290 | */ | ||
2176 | static int | 2291 | static int |
2177 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | 2292 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, |
2178 | struct ext4_ext_path *path, ext4_lblk_t start) | 2293 | struct ext4_ext_path *path, ext4_lblk_t start, |
2294 | ext4_lblk_t end) | ||
2179 | { | 2295 | { |
2180 | int err = 0, correct_index = 0; | 2296 | int err = 0, correct_index = 0; |
2181 | int depth = ext_depth(inode), credits; | 2297 | int depth = ext_depth(inode), credits; |
@@ -2186,6 +2302,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2186 | unsigned short ex_ee_len; | 2302 | unsigned short ex_ee_len; |
2187 | unsigned uninitialized = 0; | 2303 | unsigned uninitialized = 0; |
2188 | struct ext4_extent *ex; | 2304 | struct ext4_extent *ex; |
2305 | struct ext4_map_blocks map; | ||
2189 | 2306 | ||
2190 | /* the header must be checked already in ext4_ext_remove_space() */ | 2307 | /* the header must be checked already in ext4_ext_remove_space() */ |
2191 | ext_debug("truncate since %u in leaf\n", start); | 2308 | ext_debug("truncate since %u in leaf\n", start); |
@@ -2215,31 +2332,95 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2215 | path[depth].p_ext = ex; | 2332 | path[depth].p_ext = ex; |
2216 | 2333 | ||
2217 | a = ex_ee_block > start ? ex_ee_block : start; | 2334 | a = ex_ee_block > start ? ex_ee_block : start; |
2218 | b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? | 2335 | b = ex_ee_block+ex_ee_len - 1 < end ? |
2219 | ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; | 2336 | ex_ee_block+ex_ee_len - 1 : end; |
2220 | 2337 | ||
2221 | ext_debug(" border %u:%u\n", a, b); | 2338 | ext_debug(" border %u:%u\n", a, b); |
2222 | 2339 | ||
2223 | if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { | 2340 | /* If this extent is beyond the end of the hole, skip it */ |
2224 | block = 0; | 2341 | if (end <= ex_ee_block) { |
2225 | num = 0; | 2342 | ex--; |
2226 | BUG(); | 2343 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2344 | ex_ee_len = ext4_ext_get_actual_len(ex); | ||
2345 | continue; | ||
2346 | } else if (a != ex_ee_block && | ||
2347 | b != ex_ee_block + ex_ee_len - 1) { | ||
2348 | /* | ||
2349 | * If this is a truncate, then this condition should | ||
2350 | * never happen because at least one of the end points | ||
2351 | * needs to be on the edge of the extent. | ||
2352 | */ | ||
2353 | if (end == EXT_MAX_BLOCK) { | ||
2354 | ext_debug(" bad truncate %u:%u\n", | ||
2355 | start, end); | ||
2356 | block = 0; | ||
2357 | num = 0; | ||
2358 | err = -EIO; | ||
2359 | goto out; | ||
2360 | } | ||
2361 | /* | ||
2362 | * else this is a hole punch, so the extent needs to | ||
2363 | * be split since neither edge of the hole is on the | ||
2364 | * extent edge | ||
2365 | */ | ||
2366 | else{ | ||
2367 | map.m_pblk = ext4_ext_pblock(ex); | ||
2368 | map.m_lblk = ex_ee_block; | ||
2369 | map.m_len = b - ex_ee_block; | ||
2370 | |||
2371 | err = ext4_split_extent(handle, | ||
2372 | inode, path, &map, 0, | ||
2373 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT | | ||
2374 | EXT4_GET_BLOCKS_PRE_IO); | ||
2375 | |||
2376 | if (err < 0) | ||
2377 | goto out; | ||
2378 | |||
2379 | ex_ee_len = ext4_ext_get_actual_len(ex); | ||
2380 | |||
2381 | b = ex_ee_block+ex_ee_len - 1 < end ? | ||
2382 | ex_ee_block+ex_ee_len - 1 : end; | ||
2383 | |||
2384 | /* Then remove tail of this extent */ | ||
2385 | block = ex_ee_block; | ||
2386 | num = a - block; | ||
2387 | } | ||
2227 | } else if (a != ex_ee_block) { | 2388 | } else if (a != ex_ee_block) { |
2228 | /* remove tail of the extent */ | 2389 | /* remove tail of the extent */ |
2229 | block = ex_ee_block; | 2390 | block = ex_ee_block; |
2230 | num = a - block; | 2391 | num = a - block; |
2231 | } else if (b != ex_ee_block + ex_ee_len - 1) { | 2392 | } else if (b != ex_ee_block + ex_ee_len - 1) { |
2232 | /* remove head of the extent */ | 2393 | /* remove head of the extent */ |
2233 | block = a; | 2394 | block = b; |
2234 | num = b - a; | 2395 | num = ex_ee_block + ex_ee_len - b; |
2235 | /* there is no "make a hole" API yet */ | 2396 | |
2236 | BUG(); | 2397 | /* |
2398 | * If this is a truncate, this condition | ||
2399 | * should never happen | ||
2400 | */ | ||
2401 | if (end == EXT_MAX_BLOCK) { | ||
2402 | ext_debug(" bad truncate %u:%u\n", | ||
2403 | start, end); | ||
2404 | err = -EIO; | ||
2405 | goto out; | ||
2406 | } | ||
2237 | } else { | 2407 | } else { |
2238 | /* remove whole extent: excellent! */ | 2408 | /* remove whole extent: excellent! */ |
2239 | block = ex_ee_block; | 2409 | block = ex_ee_block; |
2240 | num = 0; | 2410 | num = 0; |
2241 | BUG_ON(a != ex_ee_block); | 2411 | if (a != ex_ee_block) { |
2242 | BUG_ON(b != ex_ee_block + ex_ee_len - 1); | 2412 | ext_debug(" bad truncate %u:%u\n", |
2413 | start, end); | ||
2414 | err = -EIO; | ||
2415 | goto out; | ||
2416 | } | ||
2417 | |||
2418 | if (b != ex_ee_block + ex_ee_len - 1) { | ||
2419 | ext_debug(" bad truncate %u:%u\n", | ||
2420 | start, end); | ||
2421 | err = -EIO; | ||
2422 | goto out; | ||
2423 | } | ||
2243 | } | 2424 | } |
2244 | 2425 | ||
2245 | /* | 2426 | /* |
@@ -2270,7 +2451,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2270 | if (num == 0) { | 2451 | if (num == 0) { |
2271 | /* this extent is removed; mark slot entirely unused */ | 2452 | /* this extent is removed; mark slot entirely unused */ |
2272 | ext4_ext_store_pblock(ex, 0); | 2453 | ext4_ext_store_pblock(ex, 0); |
2273 | le16_add_cpu(&eh->eh_entries, -1); | 2454 | } else if (block != ex_ee_block) { |
2455 | /* | ||
2456 | * If this was a head removal, then we need to update | ||
2457 | * the physical block since it is now at a different | ||
2458 | * location | ||
2459 | */ | ||
2460 | ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a)); | ||
2274 | } | 2461 | } |
2275 | 2462 | ||
2276 | ex->ee_block = cpu_to_le32(block); | 2463 | ex->ee_block = cpu_to_le32(block); |
@@ -2286,6 +2473,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2286 | if (err) | 2473 | if (err) |
2287 | goto out; | 2474 | goto out; |
2288 | 2475 | ||
2476 | /* | ||
2477 | * If the extent was completely released, | ||
2478 | * we need to remove it from the leaf | ||
2479 | */ | ||
2480 | if (num == 0) { | ||
2481 | if (end != EXT_MAX_BLOCK) { | ||
2482 | /* | ||
2483 | * For hole punching, we need to scoot all the | ||
2484 | * extents up when an extent is removed so that | ||
2485 | * we dont have blank extents in the middle | ||
2486 | */ | ||
2487 | memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) * | ||
2488 | sizeof(struct ext4_extent)); | ||
2489 | |||
2490 | /* Now get rid of the one at the end */ | ||
2491 | memset(EXT_LAST_EXTENT(eh), 0, | ||
2492 | sizeof(struct ext4_extent)); | ||
2493 | } | ||
2494 | le16_add_cpu(&eh->eh_entries, -1); | ||
2495 | } | ||
2496 | |||
2289 | ext_debug("new extent: %u:%u:%llu\n", block, num, | 2497 | ext_debug("new extent: %u:%u:%llu\n", block, num, |
2290 | ext4_ext_pblock(ex)); | 2498 | ext4_ext_pblock(ex)); |
2291 | ex--; | 2499 | ex--; |
@@ -2326,7 +2534,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path) | |||
2326 | return 1; | 2534 | return 1; |
2327 | } | 2535 | } |
2328 | 2536 | ||
2329 | static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | 2537 | static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, |
2538 | ext4_lblk_t end) | ||
2330 | { | 2539 | { |
2331 | struct super_block *sb = inode->i_sb; | 2540 | struct super_block *sb = inode->i_sb; |
2332 | int depth = ext_depth(inode); | 2541 | int depth = ext_depth(inode); |
@@ -2365,7 +2574,8 @@ again: | |||
2365 | while (i >= 0 && err == 0) { | 2574 | while (i >= 0 && err == 0) { |
2366 | if (i == depth) { | 2575 | if (i == depth) { |
2367 | /* this is leaf block */ | 2576 | /* this is leaf block */ |
2368 | err = ext4_ext_rm_leaf(handle, inode, path, start); | 2577 | err = ext4_ext_rm_leaf(handle, inode, path, |
2578 | start, end); | ||
2369 | /* root level has p_bh == NULL, brelse() eats this */ | 2579 | /* root level has p_bh == NULL, brelse() eats this */ |
2370 | brelse(path[i].p_bh); | 2580 | brelse(path[i].p_bh); |
2371 | path[i].p_bh = NULL; | 2581 | path[i].p_bh = NULL; |
@@ -2529,6 +2739,195 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
2529 | return ret; | 2739 | return ret; |
2530 | } | 2740 | } |
2531 | 2741 | ||
2742 | /* | ||
2743 | * used by extent splitting. | ||
2744 | */ | ||
2745 | #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ | ||
2746 | due to ENOSPC */ | ||
2747 | #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ | ||
2748 | #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ | ||
2749 | |||
2750 | /* | ||
2751 | * ext4_split_extent_at() splits an extent at given block. | ||
2752 | * | ||
2753 | * @handle: the journal handle | ||
2754 | * @inode: the file inode | ||
2755 | * @path: the path to the extent | ||
2756 | * @split: the logical block where the extent is splitted. | ||
2757 | * @split_flags: indicates if the extent could be zeroout if split fails, and | ||
2758 | * the states(init or uninit) of new extents. | ||
2759 | * @flags: flags used to insert new extent to extent tree. | ||
2760 | * | ||
2761 | * | ||
2762 | * Splits extent [a, b] into two extents [a, @split) and [@split, b], states | ||
2763 | * of which are deterimined by split_flag. | ||
2764 | * | ||
2765 | * There are two cases: | ||
2766 | * a> the extent are splitted into two extent. | ||
2767 | * b> split is not needed, and just mark the extent. | ||
2768 | * | ||
2769 | * return 0 on success. | ||
2770 | */ | ||
2771 | static int ext4_split_extent_at(handle_t *handle, | ||
2772 | struct inode *inode, | ||
2773 | struct ext4_ext_path *path, | ||
2774 | ext4_lblk_t split, | ||
2775 | int split_flag, | ||
2776 | int flags) | ||
2777 | { | ||
2778 | ext4_fsblk_t newblock; | ||
2779 | ext4_lblk_t ee_block; | ||
2780 | struct ext4_extent *ex, newex, orig_ex; | ||
2781 | struct ext4_extent *ex2 = NULL; | ||
2782 | unsigned int ee_len, depth; | ||
2783 | int err = 0; | ||
2784 | |||
2785 | ext_debug("ext4_split_extents_at: inode %lu, logical" | ||
2786 | "block %llu\n", inode->i_ino, (unsigned long long)split); | ||
2787 | |||
2788 | ext4_ext_show_leaf(inode, path); | ||
2789 | |||
2790 | depth = ext_depth(inode); | ||
2791 | ex = path[depth].p_ext; | ||
2792 | ee_block = le32_to_cpu(ex->ee_block); | ||
2793 | ee_len = ext4_ext_get_actual_len(ex); | ||
2794 | newblock = split - ee_block + ext4_ext_pblock(ex); | ||
2795 | |||
2796 | BUG_ON(split < ee_block || split >= (ee_block + ee_len)); | ||
2797 | |||
2798 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
2799 | if (err) | ||
2800 | goto out; | ||
2801 | |||
2802 | if (split == ee_block) { | ||
2803 | /* | ||
2804 | * case b: block @split is the block that the extent begins with | ||
2805 | * then we just change the state of the extent, and splitting | ||
2806 | * is not needed. | ||
2807 | */ | ||
2808 | if (split_flag & EXT4_EXT_MARK_UNINIT2) | ||
2809 | ext4_ext_mark_uninitialized(ex); | ||
2810 | else | ||
2811 | ext4_ext_mark_initialized(ex); | ||
2812 | |||
2813 | if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) | ||
2814 | ext4_ext_try_to_merge(inode, path, ex); | ||
2815 | |||
2816 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
2817 | goto out; | ||
2818 | } | ||
2819 | |||
2820 | /* case a */ | ||
2821 | memcpy(&orig_ex, ex, sizeof(orig_ex)); | ||
2822 | ex->ee_len = cpu_to_le16(split - ee_block); | ||
2823 | if (split_flag & EXT4_EXT_MARK_UNINIT1) | ||
2824 | ext4_ext_mark_uninitialized(ex); | ||
2825 | |||
2826 | /* | ||
2827 | * path may lead to new leaf, not to original leaf any more | ||
2828 | * after ext4_ext_insert_extent() returns, | ||
2829 | */ | ||
2830 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
2831 | if (err) | ||
2832 | goto fix_extent_len; | ||
2833 | |||
2834 | ex2 = &newex; | ||
2835 | ex2->ee_block = cpu_to_le32(split); | ||
2836 | ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); | ||
2837 | ext4_ext_store_pblock(ex2, newblock); | ||
2838 | if (split_flag & EXT4_EXT_MARK_UNINIT2) | ||
2839 | ext4_ext_mark_uninitialized(ex2); | ||
2840 | |||
2841 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | ||
2842 | if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
2843 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
2844 | if (err) | ||
2845 | goto fix_extent_len; | ||
2846 | /* update the extent length and mark as initialized */ | ||
2847 | ex->ee_len = cpu_to_le32(ee_len); | ||
2848 | ext4_ext_try_to_merge(inode, path, ex); | ||
2849 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
2850 | goto out; | ||
2851 | } else if (err) | ||
2852 | goto fix_extent_len; | ||
2853 | |||
2854 | out: | ||
2855 | ext4_ext_show_leaf(inode, path); | ||
2856 | return err; | ||
2857 | |||
2858 | fix_extent_len: | ||
2859 | ex->ee_len = orig_ex.ee_len; | ||
2860 | ext4_ext_dirty(handle, inode, path + depth); | ||
2861 | return err; | ||
2862 | } | ||
2863 | |||
2864 | /* | ||
2865 | * ext4_split_extents() splits an extent and mark extent which is covered | ||
2866 | * by @map as split_flags indicates | ||
2867 | * | ||
2868 | * It may result in splitting the extent into multiple extents (upto three) | ||
2869 | * There are three possibilities: | ||
2870 | * a> There is no split required | ||
2871 | * b> Splits in two extents: Split is happening at either end of the extent | ||
2872 | * c> Splits in three extents: Somone is splitting in middle of the extent | ||
2873 | * | ||
2874 | */ | ||
2875 | static int ext4_split_extent(handle_t *handle, | ||
2876 | struct inode *inode, | ||
2877 | struct ext4_ext_path *path, | ||
2878 | struct ext4_map_blocks *map, | ||
2879 | int split_flag, | ||
2880 | int flags) | ||
2881 | { | ||
2882 | ext4_lblk_t ee_block; | ||
2883 | struct ext4_extent *ex; | ||
2884 | unsigned int ee_len, depth; | ||
2885 | int err = 0; | ||
2886 | int uninitialized; | ||
2887 | int split_flag1, flags1; | ||
2888 | |||
2889 | depth = ext_depth(inode); | ||
2890 | ex = path[depth].p_ext; | ||
2891 | ee_block = le32_to_cpu(ex->ee_block); | ||
2892 | ee_len = ext4_ext_get_actual_len(ex); | ||
2893 | uninitialized = ext4_ext_is_uninitialized(ex); | ||
2894 | |||
2895 | if (map->m_lblk + map->m_len < ee_block + ee_len) { | ||
2896 | split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? | ||
2897 | EXT4_EXT_MAY_ZEROOUT : 0; | ||
2898 | flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; | ||
2899 | if (uninitialized) | ||
2900 | split_flag1 |= EXT4_EXT_MARK_UNINIT1 | | ||
2901 | EXT4_EXT_MARK_UNINIT2; | ||
2902 | err = ext4_split_extent_at(handle, inode, path, | ||
2903 | map->m_lblk + map->m_len, split_flag1, flags1); | ||
2904 | if (err) | ||
2905 | goto out; | ||
2906 | } | ||
2907 | |||
2908 | ext4_ext_drop_refs(path); | ||
2909 | path = ext4_ext_find_extent(inode, map->m_lblk, path); | ||
2910 | if (IS_ERR(path)) | ||
2911 | return PTR_ERR(path); | ||
2912 | |||
2913 | if (map->m_lblk >= ee_block) { | ||
2914 | split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? | ||
2915 | EXT4_EXT_MAY_ZEROOUT : 0; | ||
2916 | if (uninitialized) | ||
2917 | split_flag1 |= EXT4_EXT_MARK_UNINIT1; | ||
2918 | if (split_flag & EXT4_EXT_MARK_UNINIT2) | ||
2919 | split_flag1 |= EXT4_EXT_MARK_UNINIT2; | ||
2920 | err = ext4_split_extent_at(handle, inode, path, | ||
2921 | map->m_lblk, split_flag1, flags); | ||
2922 | if (err) | ||
2923 | goto out; | ||
2924 | } | ||
2925 | |||
2926 | ext4_ext_show_leaf(inode, path); | ||
2927 | out: | ||
2928 | return err ? err : map->m_len; | ||
2929 | } | ||
2930 | |||
2532 | #define EXT4_EXT_ZERO_LEN 7 | 2931 | #define EXT4_EXT_ZERO_LEN 7 |
2533 | /* | 2932 | /* |
2534 | * This function is called by ext4_ext_map_blocks() if someone tries to write | 2933 | * This function is called by ext4_ext_map_blocks() if someone tries to write |
@@ -2545,17 +2944,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2545 | struct ext4_map_blocks *map, | 2944 | struct ext4_map_blocks *map, |
2546 | struct ext4_ext_path *path) | 2945 | struct ext4_ext_path *path) |
2547 | { | 2946 | { |
2548 | struct ext4_extent *ex, newex, orig_ex; | 2947 | struct ext4_map_blocks split_map; |
2549 | struct ext4_extent *ex1 = NULL; | 2948 | struct ext4_extent zero_ex; |
2550 | struct ext4_extent *ex2 = NULL; | 2949 | struct ext4_extent *ex; |
2551 | struct ext4_extent *ex3 = NULL; | ||
2552 | struct ext4_extent_header *eh; | ||
2553 | ext4_lblk_t ee_block, eof_block; | 2950 | ext4_lblk_t ee_block, eof_block; |
2554 | unsigned int allocated, ee_len, depth; | 2951 | unsigned int allocated, ee_len, depth; |
2555 | ext4_fsblk_t newblock; | ||
2556 | int err = 0; | 2952 | int err = 0; |
2557 | int ret = 0; | 2953 | int split_flag = 0; |
2558 | int may_zeroout; | ||
2559 | 2954 | ||
2560 | ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" | 2955 | ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" |
2561 | "block %llu, max_blocks %u\n", inode->i_ino, | 2956 | "block %llu, max_blocks %u\n", inode->i_ino, |
@@ -2567,280 +2962,86 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2567 | eof_block = map->m_lblk + map->m_len; | 2962 | eof_block = map->m_lblk + map->m_len; |
2568 | 2963 | ||
2569 | depth = ext_depth(inode); | 2964 | depth = ext_depth(inode); |
2570 | eh = path[depth].p_hdr; | ||
2571 | ex = path[depth].p_ext; | 2965 | ex = path[depth].p_ext; |
2572 | ee_block = le32_to_cpu(ex->ee_block); | 2966 | ee_block = le32_to_cpu(ex->ee_block); |
2573 | ee_len = ext4_ext_get_actual_len(ex); | 2967 | ee_len = ext4_ext_get_actual_len(ex); |
2574 | allocated = ee_len - (map->m_lblk - ee_block); | 2968 | allocated = ee_len - (map->m_lblk - ee_block); |
2575 | newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); | ||
2576 | |||
2577 | ex2 = ex; | ||
2578 | orig_ex.ee_block = ex->ee_block; | ||
2579 | orig_ex.ee_len = cpu_to_le16(ee_len); | ||
2580 | ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); | ||
2581 | 2969 | ||
2970 | WARN_ON(map->m_lblk < ee_block); | ||
2582 | /* | 2971 | /* |
2583 | * It is safe to convert extent to initialized via explicit | 2972 | * It is safe to convert extent to initialized via explicit |
2584 | * zeroout only if extent is fully insde i_size or new_size. | 2973 | * zeroout only if extent is fully insde i_size or new_size. |
2585 | */ | 2974 | */ |
2586 | may_zeroout = ee_block + ee_len <= eof_block; | 2975 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; |
2587 | 2976 | ||
2588 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
2589 | if (err) | ||
2590 | goto out; | ||
2591 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ | 2977 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ |
2592 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { | 2978 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN && |
2593 | err = ext4_ext_zeroout(inode, &orig_ex); | 2979 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { |
2980 | err = ext4_ext_zeroout(inode, ex); | ||
2594 | if (err) | 2981 | if (err) |
2595 | goto fix_extent_len; | ||
2596 | /* update the extent length and mark as initialized */ | ||
2597 | ex->ee_block = orig_ex.ee_block; | ||
2598 | ex->ee_len = orig_ex.ee_len; | ||
2599 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); | ||
2600 | ext4_ext_dirty(handle, inode, path + depth); | ||
2601 | /* zeroed the full extent */ | ||
2602 | return allocated; | ||
2603 | } | ||
2604 | |||
2605 | /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ | ||
2606 | if (map->m_lblk > ee_block) { | ||
2607 | ex1 = ex; | ||
2608 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); | ||
2609 | ext4_ext_mark_uninitialized(ex1); | ||
2610 | ex2 = &newex; | ||
2611 | } | ||
2612 | /* | ||
2613 | * for sanity, update the length of the ex2 extent before | ||
2614 | * we insert ex3, if ex1 is NULL. This is to avoid temporary | ||
2615 | * overlap of blocks. | ||
2616 | */ | ||
2617 | if (!ex1 && allocated > map->m_len) | ||
2618 | ex2->ee_len = cpu_to_le16(map->m_len); | ||
2619 | /* ex3: to ee_block + ee_len : uninitialised */ | ||
2620 | if (allocated > map->m_len) { | ||
2621 | unsigned int newdepth; | ||
2622 | /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ | ||
2623 | if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { | ||
2624 | /* | ||
2625 | * map->m_lblk == ee_block is handled by the zerouout | ||
2626 | * at the beginning. | ||
2627 | * Mark first half uninitialized. | ||
2628 | * Mark second half initialized and zero out the | ||
2629 | * initialized extent | ||
2630 | */ | ||
2631 | ex->ee_block = orig_ex.ee_block; | ||
2632 | ex->ee_len = cpu_to_le16(ee_len - allocated); | ||
2633 | ext4_ext_mark_uninitialized(ex); | ||
2634 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); | ||
2635 | ext4_ext_dirty(handle, inode, path + depth); | ||
2636 | |||
2637 | ex3 = &newex; | ||
2638 | ex3->ee_block = cpu_to_le32(map->m_lblk); | ||
2639 | ext4_ext_store_pblock(ex3, newblock); | ||
2640 | ex3->ee_len = cpu_to_le16(allocated); | ||
2641 | err = ext4_ext_insert_extent(handle, inode, path, | ||
2642 | ex3, 0); | ||
2643 | if (err == -ENOSPC) { | ||
2644 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
2645 | if (err) | ||
2646 | goto fix_extent_len; | ||
2647 | ex->ee_block = orig_ex.ee_block; | ||
2648 | ex->ee_len = orig_ex.ee_len; | ||
2649 | ext4_ext_store_pblock(ex, | ||
2650 | ext4_ext_pblock(&orig_ex)); | ||
2651 | ext4_ext_dirty(handle, inode, path + depth); | ||
2652 | /* blocks available from map->m_lblk */ | ||
2653 | return allocated; | ||
2654 | |||
2655 | } else if (err) | ||
2656 | goto fix_extent_len; | ||
2657 | |||
2658 | /* | ||
2659 | * We need to zero out the second half because | ||
2660 | * an fallocate request can update file size and | ||
2661 | * converting the second half to initialized extent | ||
2662 | * implies that we can leak some junk data to user | ||
2663 | * space. | ||
2664 | */ | ||
2665 | err = ext4_ext_zeroout(inode, ex3); | ||
2666 | if (err) { | ||
2667 | /* | ||
2668 | * We should actually mark the | ||
2669 | * second half as uninit and return error | ||
2670 | * Insert would have changed the extent | ||
2671 | */ | ||
2672 | depth = ext_depth(inode); | ||
2673 | ext4_ext_drop_refs(path); | ||
2674 | path = ext4_ext_find_extent(inode, map->m_lblk, | ||
2675 | path); | ||
2676 | if (IS_ERR(path)) { | ||
2677 | err = PTR_ERR(path); | ||
2678 | return err; | ||
2679 | } | ||
2680 | /* get the second half extent details */ | ||
2681 | ex = path[depth].p_ext; | ||
2682 | err = ext4_ext_get_access(handle, inode, | ||
2683 | path + depth); | ||
2684 | if (err) | ||
2685 | return err; | ||
2686 | ext4_ext_mark_uninitialized(ex); | ||
2687 | ext4_ext_dirty(handle, inode, path + depth); | ||
2688 | return err; | ||
2689 | } | ||
2690 | |||
2691 | /* zeroed the second half */ | ||
2692 | return allocated; | ||
2693 | } | ||
2694 | ex3 = &newex; | ||
2695 | ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); | ||
2696 | ext4_ext_store_pblock(ex3, newblock + map->m_len); | ||
2697 | ex3->ee_len = cpu_to_le16(allocated - map->m_len); | ||
2698 | ext4_ext_mark_uninitialized(ex3); | ||
2699 | err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); | ||
2700 | if (err == -ENOSPC && may_zeroout) { | ||
2701 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
2702 | if (err) | ||
2703 | goto fix_extent_len; | ||
2704 | /* update the extent length and mark as initialized */ | ||
2705 | ex->ee_block = orig_ex.ee_block; | ||
2706 | ex->ee_len = orig_ex.ee_len; | ||
2707 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); | ||
2708 | ext4_ext_dirty(handle, inode, path + depth); | ||
2709 | /* zeroed the full extent */ | ||
2710 | /* blocks available from map->m_lblk */ | ||
2711 | return allocated; | ||
2712 | |||
2713 | } else if (err) | ||
2714 | goto fix_extent_len; | ||
2715 | /* | ||
2716 | * The depth, and hence eh & ex might change | ||
2717 | * as part of the insert above. | ||
2718 | */ | ||
2719 | newdepth = ext_depth(inode); | ||
2720 | /* | ||
2721 | * update the extent length after successful insert of the | ||
2722 | * split extent | ||
2723 | */ | ||
2724 | ee_len -= ext4_ext_get_actual_len(ex3); | ||
2725 | orig_ex.ee_len = cpu_to_le16(ee_len); | ||
2726 | may_zeroout = ee_block + ee_len <= eof_block; | ||
2727 | |||
2728 | depth = newdepth; | ||
2729 | ext4_ext_drop_refs(path); | ||
2730 | path = ext4_ext_find_extent(inode, map->m_lblk, path); | ||
2731 | if (IS_ERR(path)) { | ||
2732 | err = PTR_ERR(path); | ||
2733 | goto out; | 2982 | goto out; |
2734 | } | ||
2735 | eh = path[depth].p_hdr; | ||
2736 | ex = path[depth].p_ext; | ||
2737 | if (ex2 != &newex) | ||
2738 | ex2 = ex; | ||
2739 | 2983 | ||
2740 | err = ext4_ext_get_access(handle, inode, path + depth); | 2984 | err = ext4_ext_get_access(handle, inode, path + depth); |
2741 | if (err) | 2985 | if (err) |
2742 | goto out; | 2986 | goto out; |
2743 | 2987 | ext4_ext_mark_initialized(ex); | |
2744 | allocated = map->m_len; | 2988 | ext4_ext_try_to_merge(inode, path, ex); |
2745 | 2989 | err = ext4_ext_dirty(handle, inode, path + depth); | |
2746 | /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying | 2990 | goto out; |
2747 | * to insert a extent in the middle zerout directly | ||
2748 | * otherwise give the extent a chance to merge to left | ||
2749 | */ | ||
2750 | if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && | ||
2751 | map->m_lblk != ee_block && may_zeroout) { | ||
2752 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
2753 | if (err) | ||
2754 | goto fix_extent_len; | ||
2755 | /* update the extent length and mark as initialized */ | ||
2756 | ex->ee_block = orig_ex.ee_block; | ||
2757 | ex->ee_len = orig_ex.ee_len; | ||
2758 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); | ||
2759 | ext4_ext_dirty(handle, inode, path + depth); | ||
2760 | /* zero out the first half */ | ||
2761 | /* blocks available from map->m_lblk */ | ||
2762 | return allocated; | ||
2763 | } | ||
2764 | } | ||
2765 | /* | ||
2766 | * If there was a change of depth as part of the | ||
2767 | * insertion of ex3 above, we need to update the length | ||
2768 | * of the ex1 extent again here | ||
2769 | */ | ||
2770 | if (ex1 && ex1 != ex) { | ||
2771 | ex1 = ex; | ||
2772 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); | ||
2773 | ext4_ext_mark_uninitialized(ex1); | ||
2774 | ex2 = &newex; | ||
2775 | } | ||
2776 | /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ | ||
2777 | ex2->ee_block = cpu_to_le32(map->m_lblk); | ||
2778 | ext4_ext_store_pblock(ex2, newblock); | ||
2779 | ex2->ee_len = cpu_to_le16(allocated); | ||
2780 | if (ex2 != ex) | ||
2781 | goto insert; | ||
2782 | /* | ||
2783 | * New (initialized) extent starts from the first block | ||
2784 | * in the current extent. i.e., ex2 == ex | ||
2785 | * We have to see if it can be merged with the extent | ||
2786 | * on the left. | ||
2787 | */ | ||
2788 | if (ex2 > EXT_FIRST_EXTENT(eh)) { | ||
2789 | /* | ||
2790 | * To merge left, pass "ex2 - 1" to try_to_merge(), | ||
2791 | * since it merges towards right _only_. | ||
2792 | */ | ||
2793 | ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); | ||
2794 | if (ret) { | ||
2795 | err = ext4_ext_correct_indexes(handle, inode, path); | ||
2796 | if (err) | ||
2797 | goto out; | ||
2798 | depth = ext_depth(inode); | ||
2799 | ex2--; | ||
2800 | } | ||
2801 | } | 2991 | } |
2992 | |||
2802 | /* | 2993 | /* |
2803 | * Try to Merge towards right. This might be required | 2994 | * four cases: |
2804 | * only when the whole extent is being written to. | 2995 | * 1. split the extent into three extents. |
2805 | * i.e. ex2 == ex and ex3 == NULL. | 2996 | * 2. split the extent into two extents, zeroout the first half. |
2997 | * 3. split the extent into two extents, zeroout the second half. | ||
2998 | * 4. split the extent into two extents with out zeroout. | ||
2806 | */ | 2999 | */ |
2807 | if (!ex3) { | 3000 | split_map.m_lblk = map->m_lblk; |
2808 | ret = ext4_ext_try_to_merge(inode, path, ex2); | 3001 | split_map.m_len = map->m_len; |
2809 | if (ret) { | 3002 | |
2810 | err = ext4_ext_correct_indexes(handle, inode, path); | 3003 | if (allocated > map->m_len) { |
3004 | if (allocated <= EXT4_EXT_ZERO_LEN && | ||
3005 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3006 | /* case 3 */ | ||
3007 | zero_ex.ee_block = | ||
3008 | cpu_to_le32(map->m_lblk); | ||
3009 | zero_ex.ee_len = cpu_to_le16(allocated); | ||
3010 | ext4_ext_store_pblock(&zero_ex, | ||
3011 | ext4_ext_pblock(ex) + map->m_lblk - ee_block); | ||
3012 | err = ext4_ext_zeroout(inode, &zero_ex); | ||
2811 | if (err) | 3013 | if (err) |
2812 | goto out; | 3014 | goto out; |
3015 | split_map.m_lblk = map->m_lblk; | ||
3016 | split_map.m_len = allocated; | ||
3017 | } else if ((map->m_lblk - ee_block + map->m_len < | ||
3018 | EXT4_EXT_ZERO_LEN) && | ||
3019 | (EXT4_EXT_MAY_ZEROOUT & split_flag)) { | ||
3020 | /* case 2 */ | ||
3021 | if (map->m_lblk != ee_block) { | ||
3022 | zero_ex.ee_block = ex->ee_block; | ||
3023 | zero_ex.ee_len = cpu_to_le16(map->m_lblk - | ||
3024 | ee_block); | ||
3025 | ext4_ext_store_pblock(&zero_ex, | ||
3026 | ext4_ext_pblock(ex)); | ||
3027 | err = ext4_ext_zeroout(inode, &zero_ex); | ||
3028 | if (err) | ||
3029 | goto out; | ||
3030 | } | ||
3031 | |||
3032 | split_map.m_lblk = ee_block; | ||
3033 | split_map.m_len = map->m_lblk - ee_block + map->m_len; | ||
3034 | allocated = map->m_len; | ||
2813 | } | 3035 | } |
2814 | } | 3036 | } |
2815 | /* Mark modified extent as dirty */ | 3037 | |
2816 | err = ext4_ext_dirty(handle, inode, path + depth); | 3038 | allocated = ext4_split_extent(handle, inode, path, |
2817 | goto out; | 3039 | &split_map, split_flag, 0); |
2818 | insert: | 3040 | if (allocated < 0) |
2819 | err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); | 3041 | err = allocated; |
2820 | if (err == -ENOSPC && may_zeroout) { | 3042 | |
2821 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
2822 | if (err) | ||
2823 | goto fix_extent_len; | ||
2824 | /* update the extent length and mark as initialized */ | ||
2825 | ex->ee_block = orig_ex.ee_block; | ||
2826 | ex->ee_len = orig_ex.ee_len; | ||
2827 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); | ||
2828 | ext4_ext_dirty(handle, inode, path + depth); | ||
2829 | /* zero out the first half */ | ||
2830 | return allocated; | ||
2831 | } else if (err) | ||
2832 | goto fix_extent_len; | ||
2833 | out: | 3043 | out: |
2834 | ext4_ext_show_leaf(inode, path); | ||
2835 | return err ? err : allocated; | 3044 | return err ? err : allocated; |
2836 | |||
2837 | fix_extent_len: | ||
2838 | ex->ee_block = orig_ex.ee_block; | ||
2839 | ex->ee_len = orig_ex.ee_len; | ||
2840 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); | ||
2841 | ext4_ext_mark_uninitialized(ex); | ||
2842 | ext4_ext_dirty(handle, inode, path + depth); | ||
2843 | return err; | ||
2844 | } | 3045 | } |
2845 | 3046 | ||
2846 | /* | 3047 | /* |
@@ -2871,15 +3072,11 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2871 | struct ext4_ext_path *path, | 3072 | struct ext4_ext_path *path, |
2872 | int flags) | 3073 | int flags) |
2873 | { | 3074 | { |
2874 | struct ext4_extent *ex, newex, orig_ex; | 3075 | ext4_lblk_t eof_block; |
2875 | struct ext4_extent *ex1 = NULL; | 3076 | ext4_lblk_t ee_block; |
2876 | struct ext4_extent *ex2 = NULL; | 3077 | struct ext4_extent *ex; |
2877 | struct ext4_extent *ex3 = NULL; | 3078 | unsigned int ee_len; |
2878 | ext4_lblk_t ee_block, eof_block; | 3079 | int split_flag = 0, depth; |
2879 | unsigned int allocated, ee_len, depth; | ||
2880 | ext4_fsblk_t newblock; | ||
2881 | int err = 0; | ||
2882 | int may_zeroout; | ||
2883 | 3080 | ||
2884 | ext_debug("ext4_split_unwritten_extents: inode %lu, logical" | 3081 | ext_debug("ext4_split_unwritten_extents: inode %lu, logical" |
2885 | "block %llu, max_blocks %u\n", inode->i_ino, | 3082 | "block %llu, max_blocks %u\n", inode->i_ino, |
@@ -2889,156 +3086,22 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2889 | inode->i_sb->s_blocksize_bits; | 3086 | inode->i_sb->s_blocksize_bits; |
2890 | if (eof_block < map->m_lblk + map->m_len) | 3087 | if (eof_block < map->m_lblk + map->m_len) |
2891 | eof_block = map->m_lblk + map->m_len; | 3088 | eof_block = map->m_lblk + map->m_len; |
2892 | |||
2893 | depth = ext_depth(inode); | ||
2894 | ex = path[depth].p_ext; | ||
2895 | ee_block = le32_to_cpu(ex->ee_block); | ||
2896 | ee_len = ext4_ext_get_actual_len(ex); | ||
2897 | allocated = ee_len - (map->m_lblk - ee_block); | ||
2898 | newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex); | ||
2899 | |||
2900 | ex2 = ex; | ||
2901 | orig_ex.ee_block = ex->ee_block; | ||
2902 | orig_ex.ee_len = cpu_to_le16(ee_len); | ||
2903 | ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex)); | ||
2904 | |||
2905 | /* | 3089 | /* |
2906 | * It is safe to convert extent to initialized via explicit | 3090 | * It is safe to convert extent to initialized via explicit |
2907 | * zeroout only if extent is fully insde i_size or new_size. | 3091 | * zeroout only if extent is fully insde i_size or new_size. |
2908 | */ | 3092 | */ |
2909 | may_zeroout = ee_block + ee_len <= eof_block; | 3093 | depth = ext_depth(inode); |
2910 | 3094 | ex = path[depth].p_ext; | |
2911 | /* | 3095 | ee_block = le32_to_cpu(ex->ee_block); |
2912 | * If the uninitialized extent begins at the same logical | 3096 | ee_len = ext4_ext_get_actual_len(ex); |
2913 | * block where the write begins, and the write completely | ||
2914 | * covers the extent, then we don't need to split it. | ||
2915 | */ | ||
2916 | if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) | ||
2917 | return allocated; | ||
2918 | |||
2919 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
2920 | if (err) | ||
2921 | goto out; | ||
2922 | /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ | ||
2923 | if (map->m_lblk > ee_block) { | ||
2924 | ex1 = ex; | ||
2925 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); | ||
2926 | ext4_ext_mark_uninitialized(ex1); | ||
2927 | ex2 = &newex; | ||
2928 | } | ||
2929 | /* | ||
2930 | * for sanity, update the length of the ex2 extent before | ||
2931 | * we insert ex3, if ex1 is NULL. This is to avoid temporary | ||
2932 | * overlap of blocks. | ||
2933 | */ | ||
2934 | if (!ex1 && allocated > map->m_len) | ||
2935 | ex2->ee_len = cpu_to_le16(map->m_len); | ||
2936 | /* ex3: to ee_block + ee_len : uninitialised */ | ||
2937 | if (allocated > map->m_len) { | ||
2938 | unsigned int newdepth; | ||
2939 | ex3 = &newex; | ||
2940 | ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); | ||
2941 | ext4_ext_store_pblock(ex3, newblock + map->m_len); | ||
2942 | ex3->ee_len = cpu_to_le16(allocated - map->m_len); | ||
2943 | ext4_ext_mark_uninitialized(ex3); | ||
2944 | err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); | ||
2945 | if (err == -ENOSPC && may_zeroout) { | ||
2946 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
2947 | if (err) | ||
2948 | goto fix_extent_len; | ||
2949 | /* update the extent length and mark as initialized */ | ||
2950 | ex->ee_block = orig_ex.ee_block; | ||
2951 | ex->ee_len = orig_ex.ee_len; | ||
2952 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); | ||
2953 | ext4_ext_dirty(handle, inode, path + depth); | ||
2954 | /* zeroed the full extent */ | ||
2955 | /* blocks available from map->m_lblk */ | ||
2956 | return allocated; | ||
2957 | |||
2958 | } else if (err) | ||
2959 | goto fix_extent_len; | ||
2960 | /* | ||
2961 | * The depth, and hence eh & ex might change | ||
2962 | * as part of the insert above. | ||
2963 | */ | ||
2964 | newdepth = ext_depth(inode); | ||
2965 | /* | ||
2966 | * update the extent length after successful insert of the | ||
2967 | * split extent | ||
2968 | */ | ||
2969 | ee_len -= ext4_ext_get_actual_len(ex3); | ||
2970 | orig_ex.ee_len = cpu_to_le16(ee_len); | ||
2971 | may_zeroout = ee_block + ee_len <= eof_block; | ||
2972 | |||
2973 | depth = newdepth; | ||
2974 | ext4_ext_drop_refs(path); | ||
2975 | path = ext4_ext_find_extent(inode, map->m_lblk, path); | ||
2976 | if (IS_ERR(path)) { | ||
2977 | err = PTR_ERR(path); | ||
2978 | goto out; | ||
2979 | } | ||
2980 | ex = path[depth].p_ext; | ||
2981 | if (ex2 != &newex) | ||
2982 | ex2 = ex; | ||
2983 | 3097 | ||
2984 | err = ext4_ext_get_access(handle, inode, path + depth); | 3098 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; |
2985 | if (err) | 3099 | split_flag |= EXT4_EXT_MARK_UNINIT2; |
2986 | goto out; | ||
2987 | 3100 | ||
2988 | allocated = map->m_len; | 3101 | flags |= EXT4_GET_BLOCKS_PRE_IO; |
2989 | } | 3102 | return ext4_split_extent(handle, inode, path, map, split_flag, flags); |
2990 | /* | ||
2991 | * If there was a change of depth as part of the | ||
2992 | * insertion of ex3 above, we need to update the length | ||
2993 | * of the ex1 extent again here | ||
2994 | */ | ||
2995 | if (ex1 && ex1 != ex) { | ||
2996 | ex1 = ex; | ||
2997 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); | ||
2998 | ext4_ext_mark_uninitialized(ex1); | ||
2999 | ex2 = &newex; | ||
3000 | } | ||
3001 | /* | ||
3002 | * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written | ||
3003 | * using direct I/O, uninitialised still. | ||
3004 | */ | ||
3005 | ex2->ee_block = cpu_to_le32(map->m_lblk); | ||
3006 | ext4_ext_store_pblock(ex2, newblock); | ||
3007 | ex2->ee_len = cpu_to_le16(allocated); | ||
3008 | ext4_ext_mark_uninitialized(ex2); | ||
3009 | if (ex2 != ex) | ||
3010 | goto insert; | ||
3011 | /* Mark modified extent as dirty */ | ||
3012 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
3013 | ext_debug("out here\n"); | ||
3014 | goto out; | ||
3015 | insert: | ||
3016 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | ||
3017 | if (err == -ENOSPC && may_zeroout) { | ||
3018 | err = ext4_ext_zeroout(inode, &orig_ex); | ||
3019 | if (err) | ||
3020 | goto fix_extent_len; | ||
3021 | /* update the extent length and mark as initialized */ | ||
3022 | ex->ee_block = orig_ex.ee_block; | ||
3023 | ex->ee_len = orig_ex.ee_len; | ||
3024 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); | ||
3025 | ext4_ext_dirty(handle, inode, path + depth); | ||
3026 | /* zero out the first half */ | ||
3027 | return allocated; | ||
3028 | } else if (err) | ||
3029 | goto fix_extent_len; | ||
3030 | out: | ||
3031 | ext4_ext_show_leaf(inode, path); | ||
3032 | return err ? err : allocated; | ||
3033 | |||
3034 | fix_extent_len: | ||
3035 | ex->ee_block = orig_ex.ee_block; | ||
3036 | ex->ee_len = orig_ex.ee_len; | ||
3037 | ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex)); | ||
3038 | ext4_ext_mark_uninitialized(ex); | ||
3039 | ext4_ext_dirty(handle, inode, path + depth); | ||
3040 | return err; | ||
3041 | } | 3103 | } |
3104 | |||
3042 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, | 3105 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, |
3043 | struct inode *inode, | 3106 | struct inode *inode, |
3044 | struct ext4_ext_path *path) | 3107 | struct ext4_ext_path *path) |
@@ -3047,46 +3110,27 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
3047 | struct ext4_extent_header *eh; | 3110 | struct ext4_extent_header *eh; |
3048 | int depth; | 3111 | int depth; |
3049 | int err = 0; | 3112 | int err = 0; |
3050 | int ret = 0; | ||
3051 | 3113 | ||
3052 | depth = ext_depth(inode); | 3114 | depth = ext_depth(inode); |
3053 | eh = path[depth].p_hdr; | 3115 | eh = path[depth].p_hdr; |
3054 | ex = path[depth].p_ext; | 3116 | ex = path[depth].p_ext; |
3055 | 3117 | ||
3118 | ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" | ||
3119 | "block %llu, max_blocks %u\n", inode->i_ino, | ||
3120 | (unsigned long long)le32_to_cpu(ex->ee_block), | ||
3121 | ext4_ext_get_actual_len(ex)); | ||
3122 | |||
3056 | err = ext4_ext_get_access(handle, inode, path + depth); | 3123 | err = ext4_ext_get_access(handle, inode, path + depth); |
3057 | if (err) | 3124 | if (err) |
3058 | goto out; | 3125 | goto out; |
3059 | /* first mark the extent as initialized */ | 3126 | /* first mark the extent as initialized */ |
3060 | ext4_ext_mark_initialized(ex); | 3127 | ext4_ext_mark_initialized(ex); |
3061 | 3128 | ||
3062 | /* | 3129 | /* note: ext4_ext_correct_indexes() isn't needed here because |
3063 | * We have to see if it can be merged with the extent | 3130 | * borders are not changed |
3064 | * on the left. | ||
3065 | */ | ||
3066 | if (ex > EXT_FIRST_EXTENT(eh)) { | ||
3067 | /* | ||
3068 | * To merge left, pass "ex - 1" to try_to_merge(), | ||
3069 | * since it merges towards right _only_. | ||
3070 | */ | ||
3071 | ret = ext4_ext_try_to_merge(inode, path, ex - 1); | ||
3072 | if (ret) { | ||
3073 | err = ext4_ext_correct_indexes(handle, inode, path); | ||
3074 | if (err) | ||
3075 | goto out; | ||
3076 | depth = ext_depth(inode); | ||
3077 | ex--; | ||
3078 | } | ||
3079 | } | ||
3080 | /* | ||
3081 | * Try to Merge towards right. | ||
3082 | */ | 3131 | */ |
3083 | ret = ext4_ext_try_to_merge(inode, path, ex); | 3132 | ext4_ext_try_to_merge(inode, path, ex); |
3084 | if (ret) { | 3133 | |
3085 | err = ext4_ext_correct_indexes(handle, inode, path); | ||
3086 | if (err) | ||
3087 | goto out; | ||
3088 | depth = ext_depth(inode); | ||
3089 | } | ||
3090 | /* Mark modified extent as dirty */ | 3134 | /* Mark modified extent as dirty */ |
3091 | err = ext4_ext_dirty(handle, inode, path + depth); | 3135 | err = ext4_ext_dirty(handle, inode, path + depth); |
3092 | out: | 3136 | out: |
@@ -3302,15 +3346,19 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3302 | ext4_fsblk_t newblock = 0; | 3346 | ext4_fsblk_t newblock = 0; |
3303 | int err = 0, depth, ret; | 3347 | int err = 0, depth, ret; |
3304 | unsigned int allocated = 0; | 3348 | unsigned int allocated = 0; |
3349 | unsigned int punched_out = 0; | ||
3350 | unsigned int result = 0; | ||
3305 | struct ext4_allocation_request ar; | 3351 | struct ext4_allocation_request ar; |
3306 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3352 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
3353 | struct ext4_map_blocks punch_map; | ||
3307 | 3354 | ||
3308 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3355 | ext_debug("blocks %u/%u requested for inode %lu\n", |
3309 | map->m_lblk, map->m_len, inode->i_ino); | 3356 | map->m_lblk, map->m_len, inode->i_ino); |
3310 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | 3357 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); |
3311 | 3358 | ||
3312 | /* check in cache */ | 3359 | /* check in cache */ |
3313 | if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { | 3360 | if (ext4_ext_in_cache(inode, map->m_lblk, &newex) && |
3361 | ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) { | ||
3314 | if (!newex.ee_start_lo && !newex.ee_start_hi) { | 3362 | if (!newex.ee_start_lo && !newex.ee_start_hi) { |
3315 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { | 3363 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
3316 | /* | 3364 | /* |
@@ -3375,16 +3423,84 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3375 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, | 3423 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, |
3376 | ee_block, ee_len, newblock); | 3424 | ee_block, ee_len, newblock); |
3377 | 3425 | ||
3378 | /* Do not put uninitialized extent in the cache */ | 3426 | if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) { |
3379 | if (!ext4_ext_is_uninitialized(ex)) { | 3427 | /* |
3380 | ext4_ext_put_in_cache(inode, ee_block, | 3428 | * Do not put uninitialized extent |
3381 | ee_len, ee_start); | 3429 | * in the cache |
3382 | goto out; | 3430 | */ |
3431 | if (!ext4_ext_is_uninitialized(ex)) { | ||
3432 | ext4_ext_put_in_cache(inode, ee_block, | ||
3433 | ee_len, ee_start); | ||
3434 | goto out; | ||
3435 | } | ||
3436 | ret = ext4_ext_handle_uninitialized_extents( | ||
3437 | handle, inode, map, path, flags, | ||
3438 | allocated, newblock); | ||
3439 | return ret; | ||
3383 | } | 3440 | } |
3384 | ret = ext4_ext_handle_uninitialized_extents(handle, | 3441 | |
3385 | inode, map, path, flags, allocated, | 3442 | /* |
3386 | newblock); | 3443 | * Punch out the map length, but only to the |
3387 | return ret; | 3444 | * end of the extent |
3445 | */ | ||
3446 | punched_out = allocated < map->m_len ? | ||
3447 | allocated : map->m_len; | ||
3448 | |||
3449 | /* | ||
3450 | * Sense extents need to be converted to | ||
3451 | * uninitialized, they must fit in an | ||
3452 | * uninitialized extent | ||
3453 | */ | ||
3454 | if (punched_out > EXT_UNINIT_MAX_LEN) | ||
3455 | punched_out = EXT_UNINIT_MAX_LEN; | ||
3456 | |||
3457 | punch_map.m_lblk = map->m_lblk; | ||
3458 | punch_map.m_pblk = newblock; | ||
3459 | punch_map.m_len = punched_out; | ||
3460 | punch_map.m_flags = 0; | ||
3461 | |||
3462 | /* Check to see if the extent needs to be split */ | ||
3463 | if (punch_map.m_len != ee_len || | ||
3464 | punch_map.m_lblk != ee_block) { | ||
3465 | |||
3466 | ret = ext4_split_extent(handle, inode, | ||
3467 | path, &punch_map, 0, | ||
3468 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT | | ||
3469 | EXT4_GET_BLOCKS_PRE_IO); | ||
3470 | |||
3471 | if (ret < 0) { | ||
3472 | err = ret; | ||
3473 | goto out2; | ||
3474 | } | ||
3475 | /* | ||
3476 | * find extent for the block at | ||
3477 | * the start of the hole | ||
3478 | */ | ||
3479 | ext4_ext_drop_refs(path); | ||
3480 | kfree(path); | ||
3481 | |||
3482 | path = ext4_ext_find_extent(inode, | ||
3483 | map->m_lblk, NULL); | ||
3484 | if (IS_ERR(path)) { | ||
3485 | err = PTR_ERR(path); | ||
3486 | path = NULL; | ||
3487 | goto out2; | ||
3488 | } | ||
3489 | |||
3490 | depth = ext_depth(inode); | ||
3491 | ex = path[depth].p_ext; | ||
3492 | ee_len = ext4_ext_get_actual_len(ex); | ||
3493 | ee_block = le32_to_cpu(ex->ee_block); | ||
3494 | ee_start = ext4_ext_pblock(ex); | ||
3495 | |||
3496 | } | ||
3497 | |||
3498 | ext4_ext_mark_uninitialized(ex); | ||
3499 | |||
3500 | err = ext4_ext_remove_space(inode, map->m_lblk, | ||
3501 | map->m_lblk + punched_out); | ||
3502 | |||
3503 | goto out2; | ||
3388 | } | 3504 | } |
3389 | } | 3505 | } |
3390 | 3506 | ||
@@ -3446,6 +3562,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3446 | else | 3562 | else |
3447 | /* disable in-core preallocation for non-regular files */ | 3563 | /* disable in-core preallocation for non-regular files */ |
3448 | ar.flags = 0; | 3564 | ar.flags = 0; |
3565 | if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) | ||
3566 | ar.flags |= EXT4_MB_HINT_NOPREALLOC; | ||
3449 | newblock = ext4_mb_new_blocks(handle, &ar, &err); | 3567 | newblock = ext4_mb_new_blocks(handle, &ar, &err); |
3450 | if (!newblock) | 3568 | if (!newblock) |
3451 | goto out2; | 3569 | goto out2; |
@@ -3529,7 +3647,11 @@ out2: | |||
3529 | } | 3647 | } |
3530 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, | 3648 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, |
3531 | newblock, map->m_len, err ? err : allocated); | 3649 | newblock, map->m_len, err ? err : allocated); |
3532 | return err ? err : allocated; | 3650 | |
3651 | result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? | ||
3652 | punched_out : allocated; | ||
3653 | |||
3654 | return err ? err : result; | ||
3533 | } | 3655 | } |
3534 | 3656 | ||
3535 | void ext4_ext_truncate(struct inode *inode) | 3657 | void ext4_ext_truncate(struct inode *inode) |
@@ -3577,7 +3699,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
3577 | 3699 | ||
3578 | last_block = (inode->i_size + sb->s_blocksize - 1) | 3700 | last_block = (inode->i_size + sb->s_blocksize - 1) |
3579 | >> EXT4_BLOCK_SIZE_BITS(sb); | 3701 | >> EXT4_BLOCK_SIZE_BITS(sb); |
3580 | err = ext4_ext_remove_space(inode, last_block); | 3702 | err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK); |
3581 | 3703 | ||
3582 | /* In a multi-transaction truncate, we only make the final | 3704 | /* In a multi-transaction truncate, we only make the final |
3583 | * transaction synchronous. | 3705 | * transaction synchronous. |
@@ -3585,8 +3707,9 @@ void ext4_ext_truncate(struct inode *inode) | |||
3585 | if (IS_SYNC(inode)) | 3707 | if (IS_SYNC(inode)) |
3586 | ext4_handle_sync(handle); | 3708 | ext4_handle_sync(handle); |
3587 | 3709 | ||
3588 | out_stop: | ||
3589 | up_write(&EXT4_I(inode)->i_data_sem); | 3710 | up_write(&EXT4_I(inode)->i_data_sem); |
3711 | |||
3712 | out_stop: | ||
3590 | /* | 3713 | /* |
3591 | * If this was a simple ftruncate() and the file will remain alive, | 3714 | * If this was a simple ftruncate() and the file will remain alive, |
3592 | * then we need to clear up the orphan record which we created above. | 3715 | * then we need to clear up the orphan record which we created above. |
@@ -3651,10 +3774,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
3651 | struct ext4_map_blocks map; | 3774 | struct ext4_map_blocks map; |
3652 | unsigned int credits, blkbits = inode->i_blkbits; | 3775 | unsigned int credits, blkbits = inode->i_blkbits; |
3653 | 3776 | ||
3654 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | ||
3655 | if (mode & ~FALLOC_FL_KEEP_SIZE) | ||
3656 | return -EOPNOTSUPP; | ||
3657 | |||
3658 | /* | 3777 | /* |
3659 | * currently supporting (pre)allocate mode for extent-based | 3778 | * currently supporting (pre)allocate mode for extent-based |
3660 | * files _only_ | 3779 | * files _only_ |
@@ -3662,6 +3781,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
3662 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 3781 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
3663 | return -EOPNOTSUPP; | 3782 | return -EOPNOTSUPP; |
3664 | 3783 | ||
3784 | /* Return error if mode is not supported */ | ||
3785 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | ||
3786 | return -EOPNOTSUPP; | ||
3787 | |||
3788 | if (mode & FALLOC_FL_PUNCH_HOLE) | ||
3789 | return ext4_punch_hole(file, offset, len); | ||
3790 | |||
3665 | trace_ext4_fallocate_enter(inode, offset, len, mode); | 3791 | trace_ext4_fallocate_enter(inode, offset, len, mode); |
3666 | map.m_lblk = offset >> blkbits; | 3792 | map.m_lblk = offset >> blkbits; |
3667 | /* | 3793 | /* |
@@ -3691,7 +3817,8 @@ retry: | |||
3691 | break; | 3817 | break; |
3692 | } | 3818 | } |
3693 | ret = ext4_map_blocks(handle, inode, &map, | 3819 | ret = ext4_map_blocks(handle, inode, &map, |
3694 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); | 3820 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | |
3821 | EXT4_GET_BLOCKS_NO_NORMALIZE); | ||
3695 | if (ret <= 0) { | 3822 | if (ret <= 0) { |
3696 | #ifdef EXT4FS_DEBUG | 3823 | #ifdef EXT4FS_DEBUG |
3697 | WARN_ON(ret <= 0); | 3824 | WARN_ON(ret <= 0); |
@@ -3822,6 +3949,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | |||
3822 | pgoff_t last_offset; | 3949 | pgoff_t last_offset; |
3823 | pgoff_t offset; | 3950 | pgoff_t offset; |
3824 | pgoff_t index; | 3951 | pgoff_t index; |
3952 | pgoff_t start_index = 0; | ||
3825 | struct page **pages = NULL; | 3953 | struct page **pages = NULL; |
3826 | struct buffer_head *bh = NULL; | 3954 | struct buffer_head *bh = NULL; |
3827 | struct buffer_head *head = NULL; | 3955 | struct buffer_head *head = NULL; |
@@ -3848,39 +3976,57 @@ out: | |||
3848 | kfree(pages); | 3976 | kfree(pages); |
3849 | return EXT_CONTINUE; | 3977 | return EXT_CONTINUE; |
3850 | } | 3978 | } |
3979 | index = 0; | ||
3851 | 3980 | ||
3981 | next_page: | ||
3852 | /* Try to find the 1st mapped buffer. */ | 3982 | /* Try to find the 1st mapped buffer. */ |
3853 | end = ((__u64)pages[0]->index << PAGE_SHIFT) >> | 3983 | end = ((__u64)pages[index]->index << PAGE_SHIFT) >> |
3854 | blksize_bits; | 3984 | blksize_bits; |
3855 | if (!page_has_buffers(pages[0])) | 3985 | if (!page_has_buffers(pages[index])) |
3856 | goto out; | 3986 | goto out; |
3857 | head = page_buffers(pages[0]); | 3987 | head = page_buffers(pages[index]); |
3858 | if (!head) | 3988 | if (!head) |
3859 | goto out; | 3989 | goto out; |
3860 | 3990 | ||
3991 | index++; | ||
3861 | bh = head; | 3992 | bh = head; |
3862 | do { | 3993 | do { |
3863 | if (buffer_mapped(bh)) { | 3994 | if (end >= newex->ec_block + |
3995 | newex->ec_len) | ||
3996 | /* The buffer is out of | ||
3997 | * the request range. | ||
3998 | */ | ||
3999 | goto out; | ||
4000 | |||
4001 | if (buffer_mapped(bh) && | ||
4002 | end >= newex->ec_block) { | ||
4003 | start_index = index - 1; | ||
3864 | /* get the 1st mapped buffer. */ | 4004 | /* get the 1st mapped buffer. */ |
3865 | if (end > newex->ec_block + | ||
3866 | newex->ec_len) | ||
3867 | /* The buffer is out of | ||
3868 | * the request range. | ||
3869 | */ | ||
3870 | goto out; | ||
3871 | goto found_mapped_buffer; | 4005 | goto found_mapped_buffer; |
3872 | } | 4006 | } |
4007 | |||
3873 | bh = bh->b_this_page; | 4008 | bh = bh->b_this_page; |
3874 | end++; | 4009 | end++; |
3875 | } while (bh != head); | 4010 | } while (bh != head); |
3876 | 4011 | ||
3877 | /* No mapped buffer found. */ | 4012 | /* No mapped buffer in the range found in this page, |
3878 | goto out; | 4013 | * We need to look up next page. |
4014 | */ | ||
4015 | if (index >= ret) { | ||
4016 | /* There is no page left, but we need to limit | ||
4017 | * newex->ec_len. | ||
4018 | */ | ||
4019 | newex->ec_len = end - newex->ec_block; | ||
4020 | goto out; | ||
4021 | } | ||
4022 | goto next_page; | ||
3879 | } else { | 4023 | } else { |
3880 | /*Find contiguous delayed buffers. */ | 4024 | /*Find contiguous delayed buffers. */ |
3881 | if (ret > 0 && pages[0]->index == last_offset) | 4025 | if (ret > 0 && pages[0]->index == last_offset) |
3882 | head = page_buffers(pages[0]); | 4026 | head = page_buffers(pages[0]); |
3883 | bh = head; | 4027 | bh = head; |
4028 | index = 1; | ||
4029 | start_index = 0; | ||
3884 | } | 4030 | } |
3885 | 4031 | ||
3886 | found_mapped_buffer: | 4032 | found_mapped_buffer: |
@@ -3903,7 +4049,7 @@ found_mapped_buffer: | |||
3903 | end++; | 4049 | end++; |
3904 | } while (bh != head); | 4050 | } while (bh != head); |
3905 | 4051 | ||
3906 | for (index = 1; index < ret; index++) { | 4052 | for (; index < ret; index++) { |
3907 | if (!page_has_buffers(pages[index])) { | 4053 | if (!page_has_buffers(pages[index])) { |
3908 | bh = NULL; | 4054 | bh = NULL; |
3909 | break; | 4055 | break; |
@@ -3913,8 +4059,10 @@ found_mapped_buffer: | |||
3913 | bh = NULL; | 4059 | bh = NULL; |
3914 | break; | 4060 | break; |
3915 | } | 4061 | } |
4062 | |||
3916 | if (pages[index]->index != | 4063 | if (pages[index]->index != |
3917 | pages[0]->index + index) { | 4064 | pages[start_index]->index + index |
4065 | - start_index) { | ||
3918 | /* Blocks are not contiguous. */ | 4066 | /* Blocks are not contiguous. */ |
3919 | bh = NULL; | 4067 | bh = NULL; |
3920 | break; | 4068 | break; |
@@ -4006,6 +4154,177 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
4006 | return (error < 0 ? error : 0); | 4154 | return (error < 0 ? error : 0); |
4007 | } | 4155 | } |
4008 | 4156 | ||
4157 | /* | ||
4158 | * ext4_ext_punch_hole | ||
4159 | * | ||
4160 | * Punches a hole of "length" bytes in a file starting | ||
4161 | * at byte "offset" | ||
4162 | * | ||
4163 | * @inode: The inode of the file to punch a hole in | ||
4164 | * @offset: The starting byte offset of the hole | ||
4165 | * @length: The length of the hole | ||
4166 | * | ||
4167 | * Returns the number of blocks removed or negative on err | ||
4168 | */ | ||
4169 | int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) | ||
4170 | { | ||
4171 | struct inode *inode = file->f_path.dentry->d_inode; | ||
4172 | struct super_block *sb = inode->i_sb; | ||
4173 | struct ext4_ext_cache cache_ex; | ||
4174 | ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks; | ||
4175 | struct address_space *mapping = inode->i_mapping; | ||
4176 | struct ext4_map_blocks map; | ||
4177 | handle_t *handle; | ||
4178 | loff_t first_block_offset, last_block_offset, block_len; | ||
4179 | loff_t first_page, last_page, first_page_offset, last_page_offset; | ||
4180 | int ret, credits, blocks_released, err = 0; | ||
4181 | |||
4182 | first_block = (offset + sb->s_blocksize - 1) >> | ||
4183 | EXT4_BLOCK_SIZE_BITS(sb); | ||
4184 | last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
4185 | |||
4186 | first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb); | ||
4187 | last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb); | ||
4188 | |||
4189 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
4190 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | ||
4191 | |||
4192 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | ||
4193 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | ||
4194 | |||
4195 | /* | ||
4196 | * Write out all dirty pages to avoid race conditions | ||
4197 | * Then release them. | ||
4198 | */ | ||
4199 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4200 | err = filemap_write_and_wait_range(mapping, | ||
4201 | first_page_offset == 0 ? 0 : first_page_offset-1, | ||
4202 | last_page_offset); | ||
4203 | |||
4204 | if (err) | ||
4205 | return err; | ||
4206 | } | ||
4207 | |||
4208 | /* Now release the pages */ | ||
4209 | if (last_page_offset > first_page_offset) { | ||
4210 | truncate_inode_pages_range(mapping, first_page_offset, | ||
4211 | last_page_offset-1); | ||
4212 | } | ||
4213 | |||
4214 | /* finish any pending end_io work */ | ||
4215 | ext4_flush_completed_IO(inode); | ||
4216 | |||
4217 | credits = ext4_writepage_trans_blocks(inode); | ||
4218 | handle = ext4_journal_start(inode, credits); | ||
4219 | if (IS_ERR(handle)) | ||
4220 | return PTR_ERR(handle); | ||
4221 | |||
4222 | err = ext4_orphan_add(handle, inode); | ||
4223 | if (err) | ||
4224 | goto out; | ||
4225 | |||
4226 | /* | ||
4227 | * Now we need to zero out the un block aligned data. | ||
4228 | * If the file is smaller than a block, just | ||
4229 | * zero out the middle | ||
4230 | */ | ||
4231 | if (first_block > last_block) | ||
4232 | ext4_block_zero_page_range(handle, mapping, offset, length); | ||
4233 | else { | ||
4234 | /* zero out the head of the hole before the first block */ | ||
4235 | block_len = first_block_offset - offset; | ||
4236 | if (block_len > 0) | ||
4237 | ext4_block_zero_page_range(handle, mapping, | ||
4238 | offset, block_len); | ||
4239 | |||
4240 | /* zero out the tail of the hole after the last block */ | ||
4241 | block_len = offset + length - last_block_offset; | ||
4242 | if (block_len > 0) { | ||
4243 | ext4_block_zero_page_range(handle, mapping, | ||
4244 | last_block_offset, block_len); | ||
4245 | } | ||
4246 | } | ||
4247 | |||
4248 | /* If there are no blocks to remove, return now */ | ||
4249 | if (first_block >= last_block) | ||
4250 | goto out; | ||
4251 | |||
4252 | down_write(&EXT4_I(inode)->i_data_sem); | ||
4253 | ext4_ext_invalidate_cache(inode); | ||
4254 | ext4_discard_preallocations(inode); | ||
4255 | |||
4256 | /* | ||
4257 | * Loop over all the blocks and identify blocks | ||
4258 | * that need to be punched out | ||
4259 | */ | ||
4260 | iblock = first_block; | ||
4261 | blocks_released = 0; | ||
4262 | while (iblock < last_block) { | ||
4263 | max_blocks = last_block - iblock; | ||
4264 | num_blocks = 1; | ||
4265 | memset(&map, 0, sizeof(map)); | ||
4266 | map.m_lblk = iblock; | ||
4267 | map.m_len = max_blocks; | ||
4268 | ret = ext4_ext_map_blocks(handle, inode, &map, | ||
4269 | EXT4_GET_BLOCKS_PUNCH_OUT_EXT); | ||
4270 | |||
4271 | if (ret > 0) { | ||
4272 | blocks_released += ret; | ||
4273 | num_blocks = ret; | ||
4274 | } else if (ret == 0) { | ||
4275 | /* | ||
4276 | * If map blocks could not find the block, | ||
4277 | * then it is in a hole. If the hole was | ||
4278 | * not already cached, then map blocks should | ||
4279 | * put it in the cache. So we can get the hole | ||
4280 | * out of the cache | ||
4281 | */ | ||
4282 | memset(&cache_ex, 0, sizeof(cache_ex)); | ||
4283 | if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) && | ||
4284 | !cache_ex.ec_start) { | ||
4285 | |||
4286 | /* The hole is cached */ | ||
4287 | num_blocks = cache_ex.ec_block + | ||
4288 | cache_ex.ec_len - iblock; | ||
4289 | |||
4290 | } else { | ||
4291 | /* The block could not be identified */ | ||
4292 | err = -EIO; | ||
4293 | break; | ||
4294 | } | ||
4295 | } else { | ||
4296 | /* Map blocks error */ | ||
4297 | err = ret; | ||
4298 | break; | ||
4299 | } | ||
4300 | |||
4301 | if (num_blocks == 0) { | ||
4302 | /* This condition should never happen */ | ||
4303 | ext_debug("Block lookup failed"); | ||
4304 | err = -EIO; | ||
4305 | break; | ||
4306 | } | ||
4307 | |||
4308 | iblock += num_blocks; | ||
4309 | } | ||
4310 | |||
4311 | if (blocks_released > 0) { | ||
4312 | ext4_ext_invalidate_cache(inode); | ||
4313 | ext4_discard_preallocations(inode); | ||
4314 | } | ||
4315 | |||
4316 | if (IS_SYNC(inode)) | ||
4317 | ext4_handle_sync(handle); | ||
4318 | |||
4319 | up_write(&EXT4_I(inode)->i_data_sem); | ||
4320 | |||
4321 | out: | ||
4322 | ext4_orphan_del(handle, inode); | ||
4323 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
4324 | ext4_mark_inode_dirty(handle, inode); | ||
4325 | ext4_journal_stop(handle); | ||
4326 | return err; | ||
4327 | } | ||
4009 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 4328 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
4010 | __u64 start, __u64 len) | 4329 | __u64 start, __u64 len) |
4011 | { | 4330 | { |
@@ -4042,4 +4361,3 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4042 | 4361 | ||
4043 | return error; | 4362 | return error; |
4044 | } | 4363 | } |
4045 | |||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 7b80d543b89e..2c0972322009 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -272,7 +272,6 @@ const struct file_operations ext4_file_operations = { | |||
272 | }; | 272 | }; |
273 | 273 | ||
274 | const struct inode_operations ext4_file_inode_operations = { | 274 | const struct inode_operations ext4_file_inode_operations = { |
275 | .truncate = ext4_truncate, | ||
276 | .setattr = ext4_setattr, | 275 | .setattr = ext4_setattr, |
277 | .getattr = ext4_getattr, | 276 | .getattr = ext4_getattr, |
278 | #ifdef CONFIG_EXT4_FS_XATTR | 277 | #ifdef CONFIG_EXT4_FS_XATTR |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e9473cbe80df..ce66d2fe826c 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -36,7 +36,7 @@ | |||
36 | 36 | ||
37 | static void dump_completed_IO(struct inode * inode) | 37 | static void dump_completed_IO(struct inode * inode) |
38 | { | 38 | { |
39 | #ifdef EXT4_DEBUG | 39 | #ifdef EXT4FS_DEBUG |
40 | struct list_head *cur, *before, *after; | 40 | struct list_head *cur, *before, *after; |
41 | ext4_io_end_t *io, *io0, *io1; | 41 | ext4_io_end_t *io, *io0, *io1; |
42 | unsigned long flags; | 42 | unsigned long flags; |
@@ -172,6 +172,7 @@ int ext4_sync_file(struct file *file, int datasync) | |||
172 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 172 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
173 | int ret; | 173 | int ret; |
174 | tid_t commit_tid; | 174 | tid_t commit_tid; |
175 | bool needs_barrier = false; | ||
175 | 176 | ||
176 | J_ASSERT(ext4_journal_current_handle() == NULL); | 177 | J_ASSERT(ext4_journal_current_handle() == NULL); |
177 | 178 | ||
@@ -211,22 +212,12 @@ int ext4_sync_file(struct file *file, int datasync) | |||
211 | } | 212 | } |
212 | 213 | ||
213 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; | 214 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; |
214 | if (jbd2_log_start_commit(journal, commit_tid)) { | 215 | if (journal->j_flags & JBD2_BARRIER && |
215 | /* | 216 | !jbd2_trans_will_send_data_barrier(journal, commit_tid)) |
216 | * When the journal is on a different device than the | 217 | needs_barrier = true; |
217 | * fs data disk, we need to issue the barrier in | 218 | jbd2_log_start_commit(journal, commit_tid); |
218 | * writeback mode. (In ordered mode, the jbd2 layer | 219 | ret = jbd2_log_wait_commit(journal, commit_tid); |
219 | * will take care of issuing the barrier. In | 220 | if (needs_barrier) |
220 | * data=journal, all of the data blocks are written to | ||
221 | * the journal device.) | ||
222 | */ | ||
223 | if (ext4_should_writeback_data(inode) && | ||
224 | (journal->j_fs_dev != journal->j_dev) && | ||
225 | (journal->j_flags & JBD2_BARRIER)) | ||
226 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, | ||
227 | NULL); | ||
228 | ret = jbd2_log_wait_commit(journal, commit_tid); | ||
229 | } else if (journal->j_flags & JBD2_BARRIER) | ||
230 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 221 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
231 | out: | 222 | out: |
232 | trace_ext4_sync_file_exit(inode, ret); | 223 | trace_ext4_sync_file_exit(inode, ret); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f2fa5e8a582c..50d0e9c64584 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -639,8 +639,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, | |||
639 | while (target > 0) { | 639 | while (target > 0) { |
640 | count = target; | 640 | count = target; |
641 | /* allocating blocks for indirect blocks and direct blocks */ | 641 | /* allocating blocks for indirect blocks and direct blocks */ |
642 | current_block = ext4_new_meta_blocks(handle, inode, | 642 | current_block = ext4_new_meta_blocks(handle, inode, goal, |
643 | goal, &count, err); | 643 | 0, &count, err); |
644 | if (*err) | 644 | if (*err) |
645 | goto failed_out; | 645 | goto failed_out; |
646 | 646 | ||
@@ -1930,7 +1930,7 @@ repeat: | |||
1930 | * We do still charge estimated metadata to the sb though; | 1930 | * We do still charge estimated metadata to the sb though; |
1931 | * we cannot afford to run out of free blocks. | 1931 | * we cannot afford to run out of free blocks. |
1932 | */ | 1932 | */ |
1933 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { | 1933 | if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { |
1934 | dquot_release_reservation_block(inode, 1); | 1934 | dquot_release_reservation_block(inode, 1); |
1935 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1935 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1936 | yield(); | 1936 | yield(); |
@@ -2796,9 +2796,7 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2796 | continue; | 2796 | continue; |
2797 | } | 2797 | } |
2798 | 2798 | ||
2799 | if (PageWriteback(page)) | 2799 | wait_on_page_writeback(page); |
2800 | wait_on_page_writeback(page); | ||
2801 | |||
2802 | BUG_ON(PageWriteback(page)); | 2800 | BUG_ON(PageWriteback(page)); |
2803 | 2801 | ||
2804 | if (mpd->next_page != page->index) | 2802 | if (mpd->next_page != page->index) |
@@ -3513,7 +3511,7 @@ retry: | |||
3513 | loff_t end = offset + iov_length(iov, nr_segs); | 3511 | loff_t end = offset + iov_length(iov, nr_segs); |
3514 | 3512 | ||
3515 | if (end > isize) | 3513 | if (end > isize) |
3516 | vmtruncate(inode, isize); | 3514 | ext4_truncate_failed_write(inode); |
3517 | } | 3515 | } |
3518 | } | 3516 | } |
3519 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3517 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -3916,9 +3914,30 @@ void ext4_set_aops(struct inode *inode) | |||
3916 | int ext4_block_truncate_page(handle_t *handle, | 3914 | int ext4_block_truncate_page(handle_t *handle, |
3917 | struct address_space *mapping, loff_t from) | 3915 | struct address_space *mapping, loff_t from) |
3918 | { | 3916 | { |
3917 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3918 | unsigned length; | ||
3919 | unsigned blocksize; | ||
3920 | struct inode *inode = mapping->host; | ||
3921 | |||
3922 | blocksize = inode->i_sb->s_blocksize; | ||
3923 | length = blocksize - (offset & (blocksize - 1)); | ||
3924 | |||
3925 | return ext4_block_zero_page_range(handle, mapping, from, length); | ||
3926 | } | ||
3927 | |||
3928 | /* | ||
3929 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | ||
3930 | * starting from file offset 'from'. The range to be zero'd must | ||
3931 | * be contained with in one block. If the specified range exceeds | ||
3932 | * the end of the block it will be shortened to end of the block | ||
3933 | * that cooresponds to 'from' | ||
3934 | */ | ||
3935 | int ext4_block_zero_page_range(handle_t *handle, | ||
3936 | struct address_space *mapping, loff_t from, loff_t length) | ||
3937 | { | ||
3919 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3938 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
3920 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 3939 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
3921 | unsigned blocksize, length, pos; | 3940 | unsigned blocksize, max, pos; |
3922 | ext4_lblk_t iblock; | 3941 | ext4_lblk_t iblock; |
3923 | struct inode *inode = mapping->host; | 3942 | struct inode *inode = mapping->host; |
3924 | struct buffer_head *bh; | 3943 | struct buffer_head *bh; |
@@ -3931,7 +3950,15 @@ int ext4_block_truncate_page(handle_t *handle, | |||
3931 | return -EINVAL; | 3950 | return -EINVAL; |
3932 | 3951 | ||
3933 | blocksize = inode->i_sb->s_blocksize; | 3952 | blocksize = inode->i_sb->s_blocksize; |
3934 | length = blocksize - (offset & (blocksize - 1)); | 3953 | max = blocksize - (offset & (blocksize - 1)); |
3954 | |||
3955 | /* | ||
3956 | * correct length if it does not fall between | ||
3957 | * 'from' and the end of the block | ||
3958 | */ | ||
3959 | if (length > max || length < 0) | ||
3960 | length = max; | ||
3961 | |||
3935 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | 3962 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); |
3936 | 3963 | ||
3937 | if (!page_has_buffers(page)) | 3964 | if (!page_has_buffers(page)) |
@@ -4380,8 +4407,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4380 | 4407 | ||
4381 | int ext4_can_truncate(struct inode *inode) | 4408 | int ext4_can_truncate(struct inode *inode) |
4382 | { | 4409 | { |
4383 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
4384 | return 0; | ||
4385 | if (S_ISREG(inode->i_mode)) | 4410 | if (S_ISREG(inode->i_mode)) |
4386 | return 1; | 4411 | return 1; |
4387 | if (S_ISDIR(inode->i_mode)) | 4412 | if (S_ISDIR(inode->i_mode)) |
@@ -4392,6 +4417,31 @@ int ext4_can_truncate(struct inode *inode) | |||
4392 | } | 4417 | } |
4393 | 4418 | ||
4394 | /* | 4419 | /* |
4420 | * ext4_punch_hole: punches a hole in a file by releaseing the blocks | ||
4421 | * associated with the given offset and length | ||
4422 | * | ||
4423 | * @inode: File inode | ||
4424 | * @offset: The offset where the hole will begin | ||
4425 | * @len: The length of the hole | ||
4426 | * | ||
4427 | * Returns: 0 on sucess or negative on failure | ||
4428 | */ | ||
4429 | |||
4430 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | ||
4431 | { | ||
4432 | struct inode *inode = file->f_path.dentry->d_inode; | ||
4433 | if (!S_ISREG(inode->i_mode)) | ||
4434 | return -ENOTSUPP; | ||
4435 | |||
4436 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | ||
4437 | /* TODO: Add support for non extent hole punching */ | ||
4438 | return -ENOTSUPP; | ||
4439 | } | ||
4440 | |||
4441 | return ext4_ext_punch_hole(file, offset, length); | ||
4442 | } | ||
4443 | |||
4444 | /* | ||
4395 | * ext4_truncate() | 4445 | * ext4_truncate() |
4396 | * | 4446 | * |
4397 | * We block out ext4_get_block() block instantiations across the entire | 4447 | * We block out ext4_get_block() block instantiations across the entire |
@@ -4617,7 +4667,7 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4617 | /* | 4667 | /* |
4618 | * Figure out the offset within the block group inode table | 4668 | * Figure out the offset within the block group inode table |
4619 | */ | 4669 | */ |
4620 | inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); | 4670 | inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; |
4621 | inode_offset = ((inode->i_ino - 1) % | 4671 | inode_offset = ((inode->i_ino - 1) % |
4622 | EXT4_INODES_PER_GROUP(sb)); | 4672 | EXT4_INODES_PER_GROUP(sb)); |
4623 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); | 4673 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); |
@@ -5311,8 +5361,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5311 | 5361 | ||
5312 | if (S_ISREG(inode->i_mode) && | 5362 | if (S_ISREG(inode->i_mode) && |
5313 | attr->ia_valid & ATTR_SIZE && | 5363 | attr->ia_valid & ATTR_SIZE && |
5314 | (attr->ia_size < inode->i_size || | 5364 | (attr->ia_size < inode->i_size)) { |
5315 | (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) { | ||
5316 | handle_t *handle; | 5365 | handle_t *handle; |
5317 | 5366 | ||
5318 | handle = ext4_journal_start(inode, 3); | 5367 | handle = ext4_journal_start(inode, 3); |
@@ -5346,14 +5395,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5346 | goto err_out; | 5395 | goto err_out; |
5347 | } | 5396 | } |
5348 | } | 5397 | } |
5349 | /* ext4_truncate will clear the flag */ | ||
5350 | if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) | ||
5351 | ext4_truncate(inode); | ||
5352 | } | 5398 | } |
5353 | 5399 | ||
5354 | if ((attr->ia_valid & ATTR_SIZE) && | 5400 | if (attr->ia_valid & ATTR_SIZE) { |
5355 | attr->ia_size != i_size_read(inode)) | 5401 | if (attr->ia_size != i_size_read(inode)) { |
5356 | rc = vmtruncate(inode, attr->ia_size); | 5402 | truncate_setsize(inode, attr->ia_size); |
5403 | ext4_truncate(inode); | ||
5404 | } else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | ||
5405 | ext4_truncate(inode); | ||
5406 | } | ||
5357 | 5407 | ||
5358 | if (!rc) { | 5408 | if (!rc) { |
5359 | setattr_copy(inode, attr); | 5409 | setattr_copy(inode, attr); |
@@ -5811,15 +5861,19 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5811 | goto out_unlock; | 5861 | goto out_unlock; |
5812 | } | 5862 | } |
5813 | ret = 0; | 5863 | ret = 0; |
5814 | if (PageMappedToDisk(page)) | 5864 | |
5815 | goto out_unlock; | 5865 | lock_page(page); |
5866 | wait_on_page_writeback(page); | ||
5867 | if (PageMappedToDisk(page)) { | ||
5868 | up_read(&inode->i_alloc_sem); | ||
5869 | return VM_FAULT_LOCKED; | ||
5870 | } | ||
5816 | 5871 | ||
5817 | if (page->index == size >> PAGE_CACHE_SHIFT) | 5872 | if (page->index == size >> PAGE_CACHE_SHIFT) |
5818 | len = size & ~PAGE_CACHE_MASK; | 5873 | len = size & ~PAGE_CACHE_MASK; |
5819 | else | 5874 | else |
5820 | len = PAGE_CACHE_SIZE; | 5875 | len = PAGE_CACHE_SIZE; |
5821 | 5876 | ||
5822 | lock_page(page); | ||
5823 | /* | 5877 | /* |
5824 | * return if we have all the buffers mapped. This avoid | 5878 | * return if we have all the buffers mapped. This avoid |
5825 | * the need to call write_begin/write_end which does a | 5879 | * the need to call write_begin/write_end which does a |
@@ -5829,8 +5883,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5829 | if (page_has_buffers(page)) { | 5883 | if (page_has_buffers(page)) { |
5830 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | 5884 | if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, |
5831 | ext4_bh_unmapped)) { | 5885 | ext4_bh_unmapped)) { |
5832 | unlock_page(page); | 5886 | up_read(&inode->i_alloc_sem); |
5833 | goto out_unlock; | 5887 | return VM_FAULT_LOCKED; |
5834 | } | 5888 | } |
5835 | } | 5889 | } |
5836 | unlock_page(page); | 5890 | unlock_page(page); |
@@ -5850,6 +5904,16 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5850 | if (ret < 0) | 5904 | if (ret < 0) |
5851 | goto out_unlock; | 5905 | goto out_unlock; |
5852 | ret = 0; | 5906 | ret = 0; |
5907 | |||
5908 | /* | ||
5909 | * write_begin/end might have created a dirty page and someone | ||
5910 | * could wander in and start the IO. Make sure that hasn't | ||
5911 | * happened. | ||
5912 | */ | ||
5913 | lock_page(page); | ||
5914 | wait_on_page_writeback(page); | ||
5915 | up_read(&inode->i_alloc_sem); | ||
5916 | return VM_FAULT_LOCKED; | ||
5853 | out_unlock: | 5917 | out_unlock: |
5854 | if (ret) | 5918 | if (ret) |
5855 | ret = VM_FAULT_SIGBUS; | 5919 | ret = VM_FAULT_SIGBUS; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d8a16eecf1d5..859f2ae8864e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -787,6 +787,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
787 | struct inode *inode; | 787 | struct inode *inode; |
788 | char *data; | 788 | char *data; |
789 | char *bitmap; | 789 | char *bitmap; |
790 | struct ext4_group_info *grinfo; | ||
790 | 791 | ||
791 | mb_debug(1, "init page %lu\n", page->index); | 792 | mb_debug(1, "init page %lu\n", page->index); |
792 | 793 | ||
@@ -819,6 +820,18 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
819 | if (first_group + i >= ngroups) | 820 | if (first_group + i >= ngroups) |
820 | break; | 821 | break; |
821 | 822 | ||
823 | grinfo = ext4_get_group_info(sb, first_group + i); | ||
824 | /* | ||
825 | * If page is uptodate then we came here after online resize | ||
826 | * which added some new uninitialized group info structs, so | ||
827 | * we must skip all initialized uptodate buddies on the page, | ||
828 | * which may be currently in use by an allocating task. | ||
829 | */ | ||
830 | if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) { | ||
831 | bh[i] = NULL; | ||
832 | continue; | ||
833 | } | ||
834 | |||
822 | err = -EIO; | 835 | err = -EIO; |
823 | desc = ext4_get_group_desc(sb, first_group + i, NULL); | 836 | desc = ext4_get_group_desc(sb, first_group + i, NULL); |
824 | if (desc == NULL) | 837 | if (desc == NULL) |
@@ -871,26 +884,28 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
871 | } | 884 | } |
872 | 885 | ||
873 | /* wait for I/O completion */ | 886 | /* wait for I/O completion */ |
874 | for (i = 0; i < groups_per_page && bh[i]; i++) | 887 | for (i = 0; i < groups_per_page; i++) |
875 | wait_on_buffer(bh[i]); | 888 | if (bh[i]) |
889 | wait_on_buffer(bh[i]); | ||
876 | 890 | ||
877 | err = -EIO; | 891 | err = -EIO; |
878 | for (i = 0; i < groups_per_page && bh[i]; i++) | 892 | for (i = 0; i < groups_per_page; i++) |
879 | if (!buffer_uptodate(bh[i])) | 893 | if (bh[i] && !buffer_uptodate(bh[i])) |
880 | goto out; | 894 | goto out; |
881 | 895 | ||
882 | err = 0; | 896 | err = 0; |
883 | first_block = page->index * blocks_per_page; | 897 | first_block = page->index * blocks_per_page; |
884 | /* init the page */ | ||
885 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); | ||
886 | for (i = 0; i < blocks_per_page; i++) { | 898 | for (i = 0; i < blocks_per_page; i++) { |
887 | int group; | 899 | int group; |
888 | struct ext4_group_info *grinfo; | ||
889 | 900 | ||
890 | group = (first_block + i) >> 1; | 901 | group = (first_block + i) >> 1; |
891 | if (group >= ngroups) | 902 | if (group >= ngroups) |
892 | break; | 903 | break; |
893 | 904 | ||
905 | if (!bh[group - first_group]) | ||
906 | /* skip initialized uptodate buddy */ | ||
907 | continue; | ||
908 | |||
894 | /* | 909 | /* |
895 | * data carry information regarding this | 910 | * data carry information regarding this |
896 | * particular group in the format specified | 911 | * particular group in the format specified |
@@ -919,6 +934,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
919 | * incore got set to the group block bitmap below | 934 | * incore got set to the group block bitmap below |
920 | */ | 935 | */ |
921 | ext4_lock_group(sb, group); | 936 | ext4_lock_group(sb, group); |
937 | /* init the buddy */ | ||
938 | memset(data, 0xff, blocksize); | ||
922 | ext4_mb_generate_buddy(sb, data, incore, group); | 939 | ext4_mb_generate_buddy(sb, data, incore, group); |
923 | ext4_unlock_group(sb, group); | 940 | ext4_unlock_group(sb, group); |
924 | incore = NULL; | 941 | incore = NULL; |
@@ -948,7 +965,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
948 | 965 | ||
949 | out: | 966 | out: |
950 | if (bh) { | 967 | if (bh) { |
951 | for (i = 0; i < groups_per_page && bh[i]; i++) | 968 | for (i = 0; i < groups_per_page; i++) |
952 | brelse(bh[i]); | 969 | brelse(bh[i]); |
953 | if (bh != &bhs) | 970 | if (bh != &bhs) |
954 | kfree(bh); | 971 | kfree(bh); |
@@ -957,22 +974,21 @@ out: | |||
957 | } | 974 | } |
958 | 975 | ||
959 | /* | 976 | /* |
960 | * lock the group_info alloc_sem of all the groups | 977 | * Lock the buddy and bitmap pages. This make sure other parallel init_group |
961 | * belonging to the same buddy cache page. This | 978 | * on the same buddy page doesn't happen whild holding the buddy page lock. |
962 | * make sure other parallel operation on the buddy | 979 | * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap |
963 | * cache doesn't happen whild holding the buddy cache | 980 | * are on the same page e4b->bd_buddy_page is NULL and return value is 0. |
964 | * lock | ||
965 | */ | 981 | */ |
966 | static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, | 982 | static int ext4_mb_get_buddy_page_lock(struct super_block *sb, |
967 | ext4_group_t group) | 983 | ext4_group_t group, struct ext4_buddy *e4b) |
968 | { | 984 | { |
969 | int i; | 985 | struct inode *inode = EXT4_SB(sb)->s_buddy_cache; |
970 | int block, pnum; | 986 | int block, pnum, poff; |
971 | int blocks_per_page; | 987 | int blocks_per_page; |
972 | int groups_per_page; | 988 | struct page *page; |
973 | ext4_group_t ngroups = ext4_get_groups_count(sb); | 989 | |
974 | ext4_group_t first_group; | 990 | e4b->bd_buddy_page = NULL; |
975 | struct ext4_group_info *grp; | 991 | e4b->bd_bitmap_page = NULL; |
976 | 992 | ||
977 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | 993 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
978 | /* | 994 | /* |
@@ -982,57 +998,40 @@ static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, | |||
982 | */ | 998 | */ |
983 | block = group * 2; | 999 | block = group * 2; |
984 | pnum = block / blocks_per_page; | 1000 | pnum = block / blocks_per_page; |
985 | first_group = pnum * blocks_per_page / 2; | 1001 | poff = block % blocks_per_page; |
986 | 1002 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | |
987 | groups_per_page = blocks_per_page >> 1; | 1003 | if (!page) |
988 | if (groups_per_page == 0) | 1004 | return -EIO; |
989 | groups_per_page = 1; | 1005 | BUG_ON(page->mapping != inode->i_mapping); |
990 | /* read all groups the page covers into the cache */ | 1006 | e4b->bd_bitmap_page = page; |
991 | for (i = 0; i < groups_per_page; i++) { | 1007 | e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); |
992 | 1008 | ||
993 | if ((first_group + i) >= ngroups) | 1009 | if (blocks_per_page >= 2) { |
994 | break; | 1010 | /* buddy and bitmap are on the same page */ |
995 | grp = ext4_get_group_info(sb, first_group + i); | 1011 | return 0; |
996 | /* take all groups write allocation | ||
997 | * semaphore. This make sure there is | ||
998 | * no block allocation going on in any | ||
999 | * of that groups | ||
1000 | */ | ||
1001 | down_write_nested(&grp->alloc_sem, i); | ||
1002 | } | 1012 | } |
1003 | return i; | 1013 | |
1014 | block++; | ||
1015 | pnum = block / blocks_per_page; | ||
1016 | poff = block % blocks_per_page; | ||
1017 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1018 | if (!page) | ||
1019 | return -EIO; | ||
1020 | BUG_ON(page->mapping != inode->i_mapping); | ||
1021 | e4b->bd_buddy_page = page; | ||
1022 | return 0; | ||
1004 | } | 1023 | } |
1005 | 1024 | ||
1006 | static void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | 1025 | static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b) |
1007 | ext4_group_t group, int locked_group) | ||
1008 | { | 1026 | { |
1009 | int i; | 1027 | if (e4b->bd_bitmap_page) { |
1010 | int block, pnum; | 1028 | unlock_page(e4b->bd_bitmap_page); |
1011 | int blocks_per_page; | 1029 | page_cache_release(e4b->bd_bitmap_page); |
1012 | ext4_group_t first_group; | 1030 | } |
1013 | struct ext4_group_info *grp; | 1031 | if (e4b->bd_buddy_page) { |
1014 | 1032 | unlock_page(e4b->bd_buddy_page); | |
1015 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | 1033 | page_cache_release(e4b->bd_buddy_page); |
1016 | /* | ||
1017 | * the buddy cache inode stores the block bitmap | ||
1018 | * and buddy information in consecutive blocks. | ||
1019 | * So for each group we need two blocks. | ||
1020 | */ | ||
1021 | block = group * 2; | ||
1022 | pnum = block / blocks_per_page; | ||
1023 | first_group = pnum * blocks_per_page / 2; | ||
1024 | /* release locks on all the groups */ | ||
1025 | for (i = 0; i < locked_group; i++) { | ||
1026 | |||
1027 | grp = ext4_get_group_info(sb, first_group + i); | ||
1028 | /* take all groups write allocation | ||
1029 | * semaphore. This make sure there is | ||
1030 | * no block allocation going on in any | ||
1031 | * of that groups | ||
1032 | */ | ||
1033 | up_write(&grp->alloc_sem); | ||
1034 | } | 1034 | } |
1035 | |||
1036 | } | 1035 | } |
1037 | 1036 | ||
1038 | /* | 1037 | /* |
@@ -1044,93 +1043,60 @@ static noinline_for_stack | |||
1044 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | 1043 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) |
1045 | { | 1044 | { |
1046 | 1045 | ||
1047 | int ret = 0; | ||
1048 | void *bitmap; | ||
1049 | int blocks_per_page; | ||
1050 | int block, pnum, poff; | ||
1051 | int num_grp_locked = 0; | ||
1052 | struct ext4_group_info *this_grp; | 1046 | struct ext4_group_info *this_grp; |
1053 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1047 | struct ext4_buddy e4b; |
1054 | struct inode *inode = sbi->s_buddy_cache; | 1048 | struct page *page; |
1055 | struct page *page = NULL, *bitmap_page = NULL; | 1049 | int ret = 0; |
1056 | 1050 | ||
1057 | mb_debug(1, "init group %u\n", group); | 1051 | mb_debug(1, "init group %u\n", group); |
1058 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1059 | this_grp = ext4_get_group_info(sb, group); | 1052 | this_grp = ext4_get_group_info(sb, group); |
1060 | /* | 1053 | /* |
1061 | * This ensures that we don't reinit the buddy cache | 1054 | * This ensures that we don't reinit the buddy cache |
1062 | * page which map to the group from which we are already | 1055 | * page which map to the group from which we are already |
1063 | * allocating. If we are looking at the buddy cache we would | 1056 | * allocating. If we are looking at the buddy cache we would |
1064 | * have taken a reference using ext4_mb_load_buddy and that | 1057 | * have taken a reference using ext4_mb_load_buddy and that |
1065 | * would have taken the alloc_sem lock. | 1058 | * would have pinned buddy page to page cache. |
1066 | */ | 1059 | */ |
1067 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | 1060 | ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b); |
1068 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | 1061 | if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { |
1069 | /* | 1062 | /* |
1070 | * somebody initialized the group | 1063 | * somebody initialized the group |
1071 | * return without doing anything | 1064 | * return without doing anything |
1072 | */ | 1065 | */ |
1073 | ret = 0; | ||
1074 | goto err; | 1066 | goto err; |
1075 | } | 1067 | } |
1076 | /* | 1068 | |
1077 | * the buddy cache inode stores the block bitmap | 1069 | page = e4b.bd_bitmap_page; |
1078 | * and buddy information in consecutive blocks. | 1070 | ret = ext4_mb_init_cache(page, NULL); |
1079 | * So for each group we need two blocks. | 1071 | if (ret) |
1080 | */ | 1072 | goto err; |
1081 | block = group * 2; | 1073 | if (!PageUptodate(page)) { |
1082 | pnum = block / blocks_per_page; | ||
1083 | poff = block % blocks_per_page; | ||
1084 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1085 | if (page) { | ||
1086 | BUG_ON(page->mapping != inode->i_mapping); | ||
1087 | ret = ext4_mb_init_cache(page, NULL); | ||
1088 | if (ret) { | ||
1089 | unlock_page(page); | ||
1090 | goto err; | ||
1091 | } | ||
1092 | unlock_page(page); | ||
1093 | } | ||
1094 | if (page == NULL || !PageUptodate(page)) { | ||
1095 | ret = -EIO; | 1074 | ret = -EIO; |
1096 | goto err; | 1075 | goto err; |
1097 | } | 1076 | } |
1098 | mark_page_accessed(page); | 1077 | mark_page_accessed(page); |
1099 | bitmap_page = page; | ||
1100 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1101 | 1078 | ||
1102 | /* init buddy cache */ | 1079 | if (e4b.bd_buddy_page == NULL) { |
1103 | block++; | ||
1104 | pnum = block / blocks_per_page; | ||
1105 | poff = block % blocks_per_page; | ||
1106 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1107 | if (page == bitmap_page) { | ||
1108 | /* | 1080 | /* |
1109 | * If both the bitmap and buddy are in | 1081 | * If both the bitmap and buddy are in |
1110 | * the same page we don't need to force | 1082 | * the same page we don't need to force |
1111 | * init the buddy | 1083 | * init the buddy |
1112 | */ | 1084 | */ |
1113 | unlock_page(page); | 1085 | ret = 0; |
1114 | } else if (page) { | 1086 | goto err; |
1115 | BUG_ON(page->mapping != inode->i_mapping); | ||
1116 | ret = ext4_mb_init_cache(page, bitmap); | ||
1117 | if (ret) { | ||
1118 | unlock_page(page); | ||
1119 | goto err; | ||
1120 | } | ||
1121 | unlock_page(page); | ||
1122 | } | 1087 | } |
1123 | if (page == NULL || !PageUptodate(page)) { | 1088 | /* init buddy cache */ |
1089 | page = e4b.bd_buddy_page; | ||
1090 | ret = ext4_mb_init_cache(page, e4b.bd_bitmap); | ||
1091 | if (ret) | ||
1092 | goto err; | ||
1093 | if (!PageUptodate(page)) { | ||
1124 | ret = -EIO; | 1094 | ret = -EIO; |
1125 | goto err; | 1095 | goto err; |
1126 | } | 1096 | } |
1127 | mark_page_accessed(page); | 1097 | mark_page_accessed(page); |
1128 | err: | 1098 | err: |
1129 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | 1099 | ext4_mb_put_buddy_page_lock(&e4b); |
1130 | if (bitmap_page) | ||
1131 | page_cache_release(bitmap_page); | ||
1132 | if (page) | ||
1133 | page_cache_release(page); | ||
1134 | return ret; | 1100 | return ret; |
1135 | } | 1101 | } |
1136 | 1102 | ||
@@ -1164,24 +1130,8 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
1164 | e4b->bd_group = group; | 1130 | e4b->bd_group = group; |
1165 | e4b->bd_buddy_page = NULL; | 1131 | e4b->bd_buddy_page = NULL; |
1166 | e4b->bd_bitmap_page = NULL; | 1132 | e4b->bd_bitmap_page = NULL; |
1167 | e4b->alloc_semp = &grp->alloc_sem; | ||
1168 | |||
1169 | /* Take the read lock on the group alloc | ||
1170 | * sem. This would make sure a parallel | ||
1171 | * ext4_mb_init_group happening on other | ||
1172 | * groups mapped by the page is blocked | ||
1173 | * till we are done with allocation | ||
1174 | */ | ||
1175 | repeat_load_buddy: | ||
1176 | down_read(e4b->alloc_semp); | ||
1177 | 1133 | ||
1178 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | 1134 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
1179 | /* we need to check for group need init flag | ||
1180 | * with alloc_semp held so that we can be sure | ||
1181 | * that new blocks didn't get added to the group | ||
1182 | * when we are loading the buddy cache | ||
1183 | */ | ||
1184 | up_read(e4b->alloc_semp); | ||
1185 | /* | 1135 | /* |
1186 | * we need full data about the group | 1136 | * we need full data about the group |
1187 | * to make a good selection | 1137 | * to make a good selection |
@@ -1189,7 +1139,6 @@ repeat_load_buddy: | |||
1189 | ret = ext4_mb_init_group(sb, group); | 1139 | ret = ext4_mb_init_group(sb, group); |
1190 | if (ret) | 1140 | if (ret) |
1191 | return ret; | 1141 | return ret; |
1192 | goto repeat_load_buddy; | ||
1193 | } | 1142 | } |
1194 | 1143 | ||
1195 | /* | 1144 | /* |
@@ -1273,15 +1222,14 @@ repeat_load_buddy: | |||
1273 | return 0; | 1222 | return 0; |
1274 | 1223 | ||
1275 | err: | 1224 | err: |
1225 | if (page) | ||
1226 | page_cache_release(page); | ||
1276 | if (e4b->bd_bitmap_page) | 1227 | if (e4b->bd_bitmap_page) |
1277 | page_cache_release(e4b->bd_bitmap_page); | 1228 | page_cache_release(e4b->bd_bitmap_page); |
1278 | if (e4b->bd_buddy_page) | 1229 | if (e4b->bd_buddy_page) |
1279 | page_cache_release(e4b->bd_buddy_page); | 1230 | page_cache_release(e4b->bd_buddy_page); |
1280 | e4b->bd_buddy = NULL; | 1231 | e4b->bd_buddy = NULL; |
1281 | e4b->bd_bitmap = NULL; | 1232 | e4b->bd_bitmap = NULL; |
1282 | |||
1283 | /* Done with the buddy cache */ | ||
1284 | up_read(e4b->alloc_semp); | ||
1285 | return ret; | 1233 | return ret; |
1286 | } | 1234 | } |
1287 | 1235 | ||
@@ -1291,9 +1239,6 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) | |||
1291 | page_cache_release(e4b->bd_bitmap_page); | 1239 | page_cache_release(e4b->bd_bitmap_page); |
1292 | if (e4b->bd_buddy_page) | 1240 | if (e4b->bd_buddy_page) |
1293 | page_cache_release(e4b->bd_buddy_page); | 1241 | page_cache_release(e4b->bd_buddy_page); |
1294 | /* Done with the buddy cache */ | ||
1295 | if (e4b->alloc_semp) | ||
1296 | up_read(e4b->alloc_semp); | ||
1297 | } | 1242 | } |
1298 | 1243 | ||
1299 | 1244 | ||
@@ -1606,9 +1551,6 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, | |||
1606 | get_page(ac->ac_bitmap_page); | 1551 | get_page(ac->ac_bitmap_page); |
1607 | ac->ac_buddy_page = e4b->bd_buddy_page; | 1552 | ac->ac_buddy_page = e4b->bd_buddy_page; |
1608 | get_page(ac->ac_buddy_page); | 1553 | get_page(ac->ac_buddy_page); |
1609 | /* on allocation we use ac to track the held semaphore */ | ||
1610 | ac->alloc_semp = e4b->alloc_semp; | ||
1611 | e4b->alloc_semp = NULL; | ||
1612 | /* store last allocated for subsequent stream allocation */ | 1554 | /* store last allocated for subsequent stream allocation */ |
1613 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { | 1555 | if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { |
1614 | spin_lock(&sbi->s_md_lock); | 1556 | spin_lock(&sbi->s_md_lock); |
@@ -2659,7 +2601,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2659 | struct super_block *sb = journal->j_private; | 2601 | struct super_block *sb = journal->j_private; |
2660 | struct ext4_buddy e4b; | 2602 | struct ext4_buddy e4b; |
2661 | struct ext4_group_info *db; | 2603 | struct ext4_group_info *db; |
2662 | int err, ret, count = 0, count2 = 0; | 2604 | int err, count = 0, count2 = 0; |
2663 | struct ext4_free_data *entry; | 2605 | struct ext4_free_data *entry; |
2664 | struct list_head *l, *ltmp; | 2606 | struct list_head *l, *ltmp; |
2665 | 2607 | ||
@@ -2669,15 +2611,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2669 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", | 2611 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
2670 | entry->count, entry->group, entry); | 2612 | entry->count, entry->group, entry); |
2671 | 2613 | ||
2672 | if (test_opt(sb, DISCARD)) { | 2614 | if (test_opt(sb, DISCARD)) |
2673 | ret = ext4_issue_discard(sb, entry->group, | 2615 | ext4_issue_discard(sb, entry->group, |
2674 | entry->start_blk, entry->count); | 2616 | entry->start_blk, entry->count); |
2675 | if (unlikely(ret == -EOPNOTSUPP)) { | ||
2676 | ext4_warning(sb, "discard not supported, " | ||
2677 | "disabling"); | ||
2678 | clear_opt(sb, DISCARD); | ||
2679 | } | ||
2680 | } | ||
2681 | 2617 | ||
2682 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2618 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2683 | /* we expect to find existing buddy because it's pinned */ | 2619 | /* we expect to find existing buddy because it's pinned */ |
@@ -4226,15 +4162,12 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) | |||
4226 | spin_unlock(&pa->pa_lock); | 4162 | spin_unlock(&pa->pa_lock); |
4227 | } | 4163 | } |
4228 | } | 4164 | } |
4229 | if (ac->alloc_semp) | ||
4230 | up_read(ac->alloc_semp); | ||
4231 | if (pa) { | 4165 | if (pa) { |
4232 | /* | 4166 | /* |
4233 | * We want to add the pa to the right bucket. | 4167 | * We want to add the pa to the right bucket. |
4234 | * Remove it from the list and while adding | 4168 | * Remove it from the list and while adding |
4235 | * make sure the list to which we are adding | 4169 | * make sure the list to which we are adding |
4236 | * doesn't grow big. We need to release | 4170 | * doesn't grow big. |
4237 | * alloc_semp before calling ext4_mb_add_n_trim() | ||
4238 | */ | 4171 | */ |
4239 | if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { | 4172 | if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) { |
4240 | spin_lock(pa->pa_obj_lock); | 4173 | spin_lock(pa->pa_obj_lock); |
@@ -4303,7 +4236,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4303 | * there is enough free blocks to do block allocation | 4236 | * there is enough free blocks to do block allocation |
4304 | * and verify allocation doesn't exceed the quota limits. | 4237 | * and verify allocation doesn't exceed the quota limits. |
4305 | */ | 4238 | */ |
4306 | while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { | 4239 | while (ar->len && |
4240 | ext4_claim_free_blocks(sbi, ar->len, ar->flags)) { | ||
4241 | |||
4307 | /* let others to free the space */ | 4242 | /* let others to free the space */ |
4308 | yield(); | 4243 | yield(); |
4309 | ar->len = ar->len >> 1; | 4244 | ar->len = ar->len >> 1; |
@@ -4313,9 +4248,15 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4313 | return 0; | 4248 | return 0; |
4314 | } | 4249 | } |
4315 | reserv_blks = ar->len; | 4250 | reserv_blks = ar->len; |
4316 | while (ar->len && dquot_alloc_block(ar->inode, ar->len)) { | 4251 | if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { |
4317 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4252 | dquot_alloc_block_nofail(ar->inode, ar->len); |
4318 | ar->len--; | 4253 | } else { |
4254 | while (ar->len && | ||
4255 | dquot_alloc_block(ar->inode, ar->len)) { | ||
4256 | |||
4257 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | ||
4258 | ar->len--; | ||
4259 | } | ||
4319 | } | 4260 | } |
4320 | inquota = ar->len; | 4261 | inquota = ar->len; |
4321 | if (ar->len == 0) { | 4262 | if (ar->len == 0) { |
@@ -4704,6 +4645,127 @@ error_return: | |||
4704 | } | 4645 | } |
4705 | 4646 | ||
4706 | /** | 4647 | /** |
4648 | * ext4_add_groupblocks() -- Add given blocks to an existing group | ||
4649 | * @handle: handle to this transaction | ||
4650 | * @sb: super block | ||
4651 | * @block: start physcial block to add to the block group | ||
4652 | * @count: number of blocks to free | ||
4653 | * | ||
4654 | * This marks the blocks as free in the bitmap and buddy. | ||
4655 | */ | ||
4656 | void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, | ||
4657 | ext4_fsblk_t block, unsigned long count) | ||
4658 | { | ||
4659 | struct buffer_head *bitmap_bh = NULL; | ||
4660 | struct buffer_head *gd_bh; | ||
4661 | ext4_group_t block_group; | ||
4662 | ext4_grpblk_t bit; | ||
4663 | unsigned int i; | ||
4664 | struct ext4_group_desc *desc; | ||
4665 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
4666 | struct ext4_buddy e4b; | ||
4667 | int err = 0, ret, blk_free_count; | ||
4668 | ext4_grpblk_t blocks_freed; | ||
4669 | struct ext4_group_info *grp; | ||
4670 | |||
4671 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); | ||
4672 | |||
4673 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | ||
4674 | grp = ext4_get_group_info(sb, block_group); | ||
4675 | /* | ||
4676 | * Check to see if we are freeing blocks across a group | ||
4677 | * boundary. | ||
4678 | */ | ||
4679 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) | ||
4680 | goto error_return; | ||
4681 | |||
4682 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | ||
4683 | if (!bitmap_bh) | ||
4684 | goto error_return; | ||
4685 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); | ||
4686 | if (!desc) | ||
4687 | goto error_return; | ||
4688 | |||
4689 | if (in_range(ext4_block_bitmap(sb, desc), block, count) || | ||
4690 | in_range(ext4_inode_bitmap(sb, desc), block, count) || | ||
4691 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | ||
4692 | in_range(block + count - 1, ext4_inode_table(sb, desc), | ||
4693 | sbi->s_itb_per_group)) { | ||
4694 | ext4_error(sb, "Adding blocks in system zones - " | ||
4695 | "Block = %llu, count = %lu", | ||
4696 | block, count); | ||
4697 | goto error_return; | ||
4698 | } | ||
4699 | |||
4700 | BUFFER_TRACE(bitmap_bh, "getting write access"); | ||
4701 | err = ext4_journal_get_write_access(handle, bitmap_bh); | ||
4702 | if (err) | ||
4703 | goto error_return; | ||
4704 | |||
4705 | /* | ||
4706 | * We are about to modify some metadata. Call the journal APIs | ||
4707 | * to unshare ->b_data if a currently-committing transaction is | ||
4708 | * using it | ||
4709 | */ | ||
4710 | BUFFER_TRACE(gd_bh, "get_write_access"); | ||
4711 | err = ext4_journal_get_write_access(handle, gd_bh); | ||
4712 | if (err) | ||
4713 | goto error_return; | ||
4714 | |||
4715 | for (i = 0, blocks_freed = 0; i < count; i++) { | ||
4716 | BUFFER_TRACE(bitmap_bh, "clear bit"); | ||
4717 | if (!mb_test_bit(bit + i, bitmap_bh->b_data)) { | ||
4718 | ext4_error(sb, "bit already cleared for block %llu", | ||
4719 | (ext4_fsblk_t)(block + i)); | ||
4720 | BUFFER_TRACE(bitmap_bh, "bit already cleared"); | ||
4721 | } else { | ||
4722 | blocks_freed++; | ||
4723 | } | ||
4724 | } | ||
4725 | |||
4726 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | ||
4727 | if (err) | ||
4728 | goto error_return; | ||
4729 | |||
4730 | /* | ||
4731 | * need to update group_info->bb_free and bitmap | ||
4732 | * with group lock held. generate_buddy look at | ||
4733 | * them with group lock_held | ||
4734 | */ | ||
4735 | ext4_lock_group(sb, block_group); | ||
4736 | mb_clear_bits(bitmap_bh->b_data, bit, count); | ||
4737 | mb_free_blocks(NULL, &e4b, bit, count); | ||
4738 | blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); | ||
4739 | ext4_free_blks_set(sb, desc, blk_free_count); | ||
4740 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | ||
4741 | ext4_unlock_group(sb, block_group); | ||
4742 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); | ||
4743 | |||
4744 | if (sbi->s_log_groups_per_flex) { | ||
4745 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | ||
4746 | atomic_add(blocks_freed, | ||
4747 | &sbi->s_flex_groups[flex_group].free_blocks); | ||
4748 | } | ||
4749 | |||
4750 | ext4_mb_unload_buddy(&e4b); | ||
4751 | |||
4752 | /* We dirtied the bitmap block */ | ||
4753 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | ||
4754 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | ||
4755 | |||
4756 | /* And the group descriptor block */ | ||
4757 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); | ||
4758 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); | ||
4759 | if (!err) | ||
4760 | err = ret; | ||
4761 | |||
4762 | error_return: | ||
4763 | brelse(bitmap_bh); | ||
4764 | ext4_std_error(sb, err); | ||
4765 | return; | ||
4766 | } | ||
4767 | |||
4768 | /** | ||
4707 | * ext4_trim_extent -- function to TRIM one single free extent in the group | 4769 | * ext4_trim_extent -- function to TRIM one single free extent in the group |
4708 | * @sb: super block for the file system | 4770 | * @sb: super block for the file system |
4709 | * @start: starting block of the free extent in the alloc. group | 4771 | * @start: starting block of the free extent in the alloc. group |
@@ -4715,11 +4777,10 @@ error_return: | |||
4715 | * one will allocate those blocks, mark it as used in buddy bitmap. This must | 4777 | * one will allocate those blocks, mark it as used in buddy bitmap. This must |
4716 | * be called with under the group lock. | 4778 | * be called with under the group lock. |
4717 | */ | 4779 | */ |
4718 | static int ext4_trim_extent(struct super_block *sb, int start, int count, | 4780 | static void ext4_trim_extent(struct super_block *sb, int start, int count, |
4719 | ext4_group_t group, struct ext4_buddy *e4b) | 4781 | ext4_group_t group, struct ext4_buddy *e4b) |
4720 | { | 4782 | { |
4721 | struct ext4_free_extent ex; | 4783 | struct ext4_free_extent ex; |
4722 | int ret = 0; | ||
4723 | 4784 | ||
4724 | assert_spin_locked(ext4_group_lock_ptr(sb, group)); | 4785 | assert_spin_locked(ext4_group_lock_ptr(sb, group)); |
4725 | 4786 | ||
@@ -4733,12 +4794,9 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, | |||
4733 | */ | 4794 | */ |
4734 | mb_mark_used(e4b, &ex); | 4795 | mb_mark_used(e4b, &ex); |
4735 | ext4_unlock_group(sb, group); | 4796 | ext4_unlock_group(sb, group); |
4736 | 4797 | ext4_issue_discard(sb, group, start, count); | |
4737 | ret = ext4_issue_discard(sb, group, start, count); | ||
4738 | |||
4739 | ext4_lock_group(sb, group); | 4798 | ext4_lock_group(sb, group); |
4740 | mb_free_blocks(NULL, e4b, start, ex.fe_len); | 4799 | mb_free_blocks(NULL, e4b, start, ex.fe_len); |
4741 | return ret; | ||
4742 | } | 4800 | } |
4743 | 4801 | ||
4744 | /** | 4802 | /** |
@@ -4760,21 +4818,26 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, | |||
4760 | * the group buddy bitmap. This is done until whole group is scanned. | 4818 | * the group buddy bitmap. This is done until whole group is scanned. |
4761 | */ | 4819 | */ |
4762 | static ext4_grpblk_t | 4820 | static ext4_grpblk_t |
4763 | ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | 4821 | ext4_trim_all_free(struct super_block *sb, ext4_group_t group, |
4764 | ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) | 4822 | ext4_grpblk_t start, ext4_grpblk_t max, |
4823 | ext4_grpblk_t minblocks) | ||
4765 | { | 4824 | { |
4766 | void *bitmap; | 4825 | void *bitmap; |
4767 | ext4_grpblk_t next, count = 0; | 4826 | ext4_grpblk_t next, count = 0; |
4768 | ext4_group_t group; | 4827 | struct ext4_buddy e4b; |
4769 | int ret = 0; | 4828 | int ret; |
4770 | 4829 | ||
4771 | BUG_ON(e4b == NULL); | 4830 | ret = ext4_mb_load_buddy(sb, group, &e4b); |
4831 | if (ret) { | ||
4832 | ext4_error(sb, "Error in loading buddy " | ||
4833 | "information for %u", group); | ||
4834 | return ret; | ||
4835 | } | ||
4836 | bitmap = e4b.bd_bitmap; | ||
4772 | 4837 | ||
4773 | bitmap = e4b->bd_bitmap; | ||
4774 | group = e4b->bd_group; | ||
4775 | start = (e4b->bd_info->bb_first_free > start) ? | ||
4776 | e4b->bd_info->bb_first_free : start; | ||
4777 | ext4_lock_group(sb, group); | 4838 | ext4_lock_group(sb, group); |
4839 | start = (e4b.bd_info->bb_first_free > start) ? | ||
4840 | e4b.bd_info->bb_first_free : start; | ||
4778 | 4841 | ||
4779 | while (start < max) { | 4842 | while (start < max) { |
4780 | start = mb_find_next_zero_bit(bitmap, max, start); | 4843 | start = mb_find_next_zero_bit(bitmap, max, start); |
@@ -4783,10 +4846,8 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | |||
4783 | next = mb_find_next_bit(bitmap, max, start); | 4846 | next = mb_find_next_bit(bitmap, max, start); |
4784 | 4847 | ||
4785 | if ((next - start) >= minblocks) { | 4848 | if ((next - start) >= minblocks) { |
4786 | ret = ext4_trim_extent(sb, start, | 4849 | ext4_trim_extent(sb, start, |
4787 | next - start, group, e4b); | 4850 | next - start, group, &e4b); |
4788 | if (ret < 0) | ||
4789 | break; | ||
4790 | count += next - start; | 4851 | count += next - start; |
4791 | } | 4852 | } |
4792 | start = next + 1; | 4853 | start = next + 1; |
@@ -4802,17 +4863,15 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | |||
4802 | ext4_lock_group(sb, group); | 4863 | ext4_lock_group(sb, group); |
4803 | } | 4864 | } |
4804 | 4865 | ||
4805 | if ((e4b->bd_info->bb_free - count) < minblocks) | 4866 | if ((e4b.bd_info->bb_free - count) < minblocks) |
4806 | break; | 4867 | break; |
4807 | } | 4868 | } |
4808 | ext4_unlock_group(sb, group); | 4869 | ext4_unlock_group(sb, group); |
4870 | ext4_mb_unload_buddy(&e4b); | ||
4809 | 4871 | ||
4810 | ext4_debug("trimmed %d blocks in the group %d\n", | 4872 | ext4_debug("trimmed %d blocks in the group %d\n", |
4811 | count, group); | 4873 | count, group); |
4812 | 4874 | ||
4813 | if (ret < 0) | ||
4814 | count = ret; | ||
4815 | |||
4816 | return count; | 4875 | return count; |
4817 | } | 4876 | } |
4818 | 4877 | ||
@@ -4830,11 +4889,11 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | |||
4830 | */ | 4889 | */ |
4831 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | 4890 | int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) |
4832 | { | 4891 | { |
4833 | struct ext4_buddy e4b; | 4892 | struct ext4_group_info *grp; |
4834 | ext4_group_t first_group, last_group; | 4893 | ext4_group_t first_group, last_group; |
4835 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); | 4894 | ext4_group_t group, ngroups = ext4_get_groups_count(sb); |
4836 | ext4_grpblk_t cnt = 0, first_block, last_block; | 4895 | ext4_grpblk_t cnt = 0, first_block, last_block; |
4837 | uint64_t start, len, minlen, trimmed; | 4896 | uint64_t start, len, minlen, trimmed = 0; |
4838 | ext4_fsblk_t first_data_blk = | 4897 | ext4_fsblk_t first_data_blk = |
4839 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 4898 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
4840 | int ret = 0; | 4899 | int ret = 0; |
@@ -4842,7 +4901,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4842 | start = range->start >> sb->s_blocksize_bits; | 4901 | start = range->start >> sb->s_blocksize_bits; |
4843 | len = range->len >> sb->s_blocksize_bits; | 4902 | len = range->len >> sb->s_blocksize_bits; |
4844 | minlen = range->minlen >> sb->s_blocksize_bits; | 4903 | minlen = range->minlen >> sb->s_blocksize_bits; |
4845 | trimmed = 0; | ||
4846 | 4904 | ||
4847 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) | 4905 | if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) |
4848 | return -EINVAL; | 4906 | return -EINVAL; |
@@ -4863,11 +4921,12 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4863 | return -EINVAL; | 4921 | return -EINVAL; |
4864 | 4922 | ||
4865 | for (group = first_group; group <= last_group; group++) { | 4923 | for (group = first_group; group <= last_group; group++) { |
4866 | ret = ext4_mb_load_buddy(sb, group, &e4b); | 4924 | grp = ext4_get_group_info(sb, group); |
4867 | if (ret) { | 4925 | /* We only do this if the grp has never been initialized */ |
4868 | ext4_error(sb, "Error in loading buddy " | 4926 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { |
4869 | "information for %u", group); | 4927 | ret = ext4_mb_init_group(sb, group); |
4870 | break; | 4928 | if (ret) |
4929 | break; | ||
4871 | } | 4930 | } |
4872 | 4931 | ||
4873 | /* | 4932 | /* |
@@ -4880,16 +4939,14 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4880 | last_block = first_block + len; | 4939 | last_block = first_block + len; |
4881 | len -= last_block - first_block; | 4940 | len -= last_block - first_block; |
4882 | 4941 | ||
4883 | if (e4b.bd_info->bb_free >= minlen) { | 4942 | if (grp->bb_free >= minlen) { |
4884 | cnt = ext4_trim_all_free(sb, &e4b, first_block, | 4943 | cnt = ext4_trim_all_free(sb, group, first_block, |
4885 | last_block, minlen); | 4944 | last_block, minlen); |
4886 | if (cnt < 0) { | 4945 | if (cnt < 0) { |
4887 | ret = cnt; | 4946 | ret = cnt; |
4888 | ext4_mb_unload_buddy(&e4b); | ||
4889 | break; | 4947 | break; |
4890 | } | 4948 | } |
4891 | } | 4949 | } |
4892 | ext4_mb_unload_buddy(&e4b); | ||
4893 | trimmed += cnt; | 4950 | trimmed += cnt; |
4894 | first_block = 0; | 4951 | first_block = 0; |
4895 | } | 4952 | } |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 22bd4d7f289b..20b5e7bfebd1 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -193,11 +193,6 @@ struct ext4_allocation_context { | |||
193 | __u8 ac_op; /* operation, for history only */ | 193 | __u8 ac_op; /* operation, for history only */ |
194 | struct page *ac_bitmap_page; | 194 | struct page *ac_bitmap_page; |
195 | struct page *ac_buddy_page; | 195 | struct page *ac_buddy_page; |
196 | /* | ||
197 | * pointer to the held semaphore upon successful | ||
198 | * block allocation | ||
199 | */ | ||
200 | struct rw_semaphore *alloc_semp; | ||
201 | struct ext4_prealloc_space *ac_pa; | 196 | struct ext4_prealloc_space *ac_pa; |
202 | struct ext4_locality_group *ac_lg; | 197 | struct ext4_locality_group *ac_lg; |
203 | }; | 198 | }; |
@@ -215,7 +210,6 @@ struct ext4_buddy { | |||
215 | struct super_block *bd_sb; | 210 | struct super_block *bd_sb; |
216 | __u16 bd_blkbits; | 211 | __u16 bd_blkbits; |
217 | ext4_group_t bd_group; | 212 | ext4_group_t bd_group; |
218 | struct rw_semaphore *alloc_semp; | ||
219 | }; | 213 | }; |
220 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) | 214 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) |
221 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) | 215 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 92816b4e0f16..b57b98fb44d1 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, | |||
376 | * We have the extent map build with the tmp inode. | 376 | * We have the extent map build with the tmp inode. |
377 | * Now copy the i_data across | 377 | * Now copy the i_data across |
378 | */ | 378 | */ |
379 | ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS); | 379 | ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); |
380 | memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); | 380 | memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); |
381 | 381 | ||
382 | /* | 382 | /* |
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c new file mode 100644 index 000000000000..9bdef3f537c5 --- /dev/null +++ b/fs/ext4/mmp.c | |||
@@ -0,0 +1,351 @@ | |||
1 | #include <linux/fs.h> | ||
2 | #include <linux/random.h> | ||
3 | #include <linux/buffer_head.h> | ||
4 | #include <linux/utsname.h> | ||
5 | #include <linux/kthread.h> | ||
6 | |||
7 | #include "ext4.h" | ||
8 | |||
9 | /* | ||
10 | * Write the MMP block using WRITE_SYNC to try to get the block on-disk | ||
11 | * faster. | ||
12 | */ | ||
13 | static int write_mmp_block(struct buffer_head *bh) | ||
14 | { | ||
15 | mark_buffer_dirty(bh); | ||
16 | lock_buffer(bh); | ||
17 | bh->b_end_io = end_buffer_write_sync; | ||
18 | get_bh(bh); | ||
19 | submit_bh(WRITE_SYNC, bh); | ||
20 | wait_on_buffer(bh); | ||
21 | if (unlikely(!buffer_uptodate(bh))) | ||
22 | return 1; | ||
23 | |||
24 | return 0; | ||
25 | } | ||
26 | |||
27 | /* | ||
28 | * Read the MMP block. It _must_ be read from disk and hence we clear the | ||
29 | * uptodate flag on the buffer. | ||
30 | */ | ||
31 | static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, | ||
32 | ext4_fsblk_t mmp_block) | ||
33 | { | ||
34 | struct mmp_struct *mmp; | ||
35 | |||
36 | if (*bh) | ||
37 | clear_buffer_uptodate(*bh); | ||
38 | |||
39 | /* This would be sb_bread(sb, mmp_block), except we need to be sure | ||
40 | * that the MD RAID device cache has been bypassed, and that the read | ||
41 | * is not blocked in the elevator. */ | ||
42 | if (!*bh) | ||
43 | *bh = sb_getblk(sb, mmp_block); | ||
44 | if (*bh) { | ||
45 | get_bh(*bh); | ||
46 | lock_buffer(*bh); | ||
47 | (*bh)->b_end_io = end_buffer_read_sync; | ||
48 | submit_bh(READ_SYNC, *bh); | ||
49 | wait_on_buffer(*bh); | ||
50 | if (!buffer_uptodate(*bh)) { | ||
51 | brelse(*bh); | ||
52 | *bh = NULL; | ||
53 | } | ||
54 | } | ||
55 | if (!*bh) { | ||
56 | ext4_warning(sb, "Error while reading MMP block %llu", | ||
57 | mmp_block); | ||
58 | return -EIO; | ||
59 | } | ||
60 | |||
61 | mmp = (struct mmp_struct *)((*bh)->b_data); | ||
62 | if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) | ||
63 | return -EINVAL; | ||
64 | |||
65 | return 0; | ||
66 | } | ||
67 | |||
68 | /* | ||
69 | * Dump as much information as possible to help the admin. | ||
70 | */ | ||
71 | void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, | ||
72 | const char *function, unsigned int line, const char *msg) | ||
73 | { | ||
74 | __ext4_warning(sb, function, line, msg); | ||
75 | __ext4_warning(sb, function, line, | ||
76 | "MMP failure info: last update time: %llu, last update " | ||
77 | "node: %s, last update device: %s\n", | ||
78 | (long long unsigned int) le64_to_cpu(mmp->mmp_time), | ||
79 | mmp->mmp_nodename, mmp->mmp_bdevname); | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * kmmpd will update the MMP sequence every s_mmp_update_interval seconds | ||
84 | */ | ||
85 | static int kmmpd(void *data) | ||
86 | { | ||
87 | struct super_block *sb = ((struct mmpd_data *) data)->sb; | ||
88 | struct buffer_head *bh = ((struct mmpd_data *) data)->bh; | ||
89 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
90 | struct mmp_struct *mmp; | ||
91 | ext4_fsblk_t mmp_block; | ||
92 | u32 seq = 0; | ||
93 | unsigned long failed_writes = 0; | ||
94 | int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); | ||
95 | unsigned mmp_check_interval; | ||
96 | unsigned long last_update_time; | ||
97 | unsigned long diff; | ||
98 | int retval; | ||
99 | |||
100 | mmp_block = le64_to_cpu(es->s_mmp_block); | ||
101 | mmp = (struct mmp_struct *)(bh->b_data); | ||
102 | mmp->mmp_time = cpu_to_le64(get_seconds()); | ||
103 | /* | ||
104 | * Start with the higher mmp_check_interval and reduce it if | ||
105 | * the MMP block is being updated on time. | ||
106 | */ | ||
107 | mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, | ||
108 | EXT4_MMP_MIN_CHECK_INTERVAL); | ||
109 | mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); | ||
110 | bdevname(bh->b_bdev, mmp->mmp_bdevname); | ||
111 | |||
112 | memcpy(mmp->mmp_nodename, init_utsname()->sysname, | ||
113 | sizeof(mmp->mmp_nodename)); | ||
114 | |||
115 | while (!kthread_should_stop()) { | ||
116 | if (++seq > EXT4_MMP_SEQ_MAX) | ||
117 | seq = 1; | ||
118 | |||
119 | mmp->mmp_seq = cpu_to_le32(seq); | ||
120 | mmp->mmp_time = cpu_to_le64(get_seconds()); | ||
121 | last_update_time = jiffies; | ||
122 | |||
123 | retval = write_mmp_block(bh); | ||
124 | /* | ||
125 | * Don't spew too many error messages. Print one every | ||
126 | * (s_mmp_update_interval * 60) seconds. | ||
127 | */ | ||
128 | if (retval && (failed_writes % 60) == 0) { | ||
129 | ext4_error(sb, "Error writing to MMP block"); | ||
130 | failed_writes++; | ||
131 | } | ||
132 | |||
133 | if (!(le32_to_cpu(es->s_feature_incompat) & | ||
134 | EXT4_FEATURE_INCOMPAT_MMP)) { | ||
135 | ext4_warning(sb, "kmmpd being stopped since MMP feature" | ||
136 | " has been disabled."); | ||
137 | EXT4_SB(sb)->s_mmp_tsk = NULL; | ||
138 | goto failed; | ||
139 | } | ||
140 | |||
141 | if (sb->s_flags & MS_RDONLY) { | ||
142 | ext4_warning(sb, "kmmpd being stopped since filesystem " | ||
143 | "has been remounted as readonly."); | ||
144 | EXT4_SB(sb)->s_mmp_tsk = NULL; | ||
145 | goto failed; | ||
146 | } | ||
147 | |||
148 | diff = jiffies - last_update_time; | ||
149 | if (diff < mmp_update_interval * HZ) | ||
150 | schedule_timeout_interruptible(mmp_update_interval * | ||
151 | HZ - diff); | ||
152 | |||
153 | /* | ||
154 | * We need to make sure that more than mmp_check_interval | ||
155 | * seconds have not passed since writing. If that has happened | ||
156 | * we need to check if the MMP block is as we left it. | ||
157 | */ | ||
158 | diff = jiffies - last_update_time; | ||
159 | if (diff > mmp_check_interval * HZ) { | ||
160 | struct buffer_head *bh_check = NULL; | ||
161 | struct mmp_struct *mmp_check; | ||
162 | |||
163 | retval = read_mmp_block(sb, &bh_check, mmp_block); | ||
164 | if (retval) { | ||
165 | ext4_error(sb, "error reading MMP data: %d", | ||
166 | retval); | ||
167 | |||
168 | EXT4_SB(sb)->s_mmp_tsk = NULL; | ||
169 | goto failed; | ||
170 | } | ||
171 | |||
172 | mmp_check = (struct mmp_struct *)(bh_check->b_data); | ||
173 | if (mmp->mmp_seq != mmp_check->mmp_seq || | ||
174 | memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, | ||
175 | sizeof(mmp->mmp_nodename))) { | ||
176 | dump_mmp_msg(sb, mmp_check, | ||
177 | "Error while updating MMP info. " | ||
178 | "The filesystem seems to have been" | ||
179 | " multiply mounted."); | ||
180 | ext4_error(sb, "abort"); | ||
181 | goto failed; | ||
182 | } | ||
183 | put_bh(bh_check); | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * Adjust the mmp_check_interval depending on how much time | ||
188 | * it took for the MMP block to be written. | ||
189 | */ | ||
190 | mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ, | ||
191 | EXT4_MMP_MAX_CHECK_INTERVAL), | ||
192 | EXT4_MMP_MIN_CHECK_INTERVAL); | ||
193 | mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); | ||
194 | } | ||
195 | |||
196 | /* | ||
197 | * Unmount seems to be clean. | ||
198 | */ | ||
199 | mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); | ||
200 | mmp->mmp_time = cpu_to_le64(get_seconds()); | ||
201 | |||
202 | retval = write_mmp_block(bh); | ||
203 | |||
204 | failed: | ||
205 | kfree(data); | ||
206 | brelse(bh); | ||
207 | return retval; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Get a random new sequence number but make sure it is not greater than | ||
212 | * EXT4_MMP_SEQ_MAX. | ||
213 | */ | ||
214 | static unsigned int mmp_new_seq(void) | ||
215 | { | ||
216 | u32 new_seq; | ||
217 | |||
218 | do { | ||
219 | get_random_bytes(&new_seq, sizeof(u32)); | ||
220 | } while (new_seq > EXT4_MMP_SEQ_MAX); | ||
221 | |||
222 | return new_seq; | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | * Protect the filesystem from being mounted more than once. | ||
227 | */ | ||
228 | int ext4_multi_mount_protect(struct super_block *sb, | ||
229 | ext4_fsblk_t mmp_block) | ||
230 | { | ||
231 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
232 | struct buffer_head *bh = NULL; | ||
233 | struct mmp_struct *mmp = NULL; | ||
234 | struct mmpd_data *mmpd_data; | ||
235 | u32 seq; | ||
236 | unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); | ||
237 | unsigned int wait_time = 0; | ||
238 | int retval; | ||
239 | |||
240 | if (mmp_block < le32_to_cpu(es->s_first_data_block) || | ||
241 | mmp_block >= ext4_blocks_count(es)) { | ||
242 | ext4_warning(sb, "Invalid MMP block in superblock"); | ||
243 | goto failed; | ||
244 | } | ||
245 | |||
246 | retval = read_mmp_block(sb, &bh, mmp_block); | ||
247 | if (retval) | ||
248 | goto failed; | ||
249 | |||
250 | mmp = (struct mmp_struct *)(bh->b_data); | ||
251 | |||
252 | if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL) | ||
253 | mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL; | ||
254 | |||
255 | /* | ||
256 | * If check_interval in MMP block is larger, use that instead of | ||
257 | * update_interval from the superblock. | ||
258 | */ | ||
259 | if (mmp->mmp_check_interval > mmp_check_interval) | ||
260 | mmp_check_interval = mmp->mmp_check_interval; | ||
261 | |||
262 | seq = le32_to_cpu(mmp->mmp_seq); | ||
263 | if (seq == EXT4_MMP_SEQ_CLEAN) | ||
264 | goto skip; | ||
265 | |||
266 | if (seq == EXT4_MMP_SEQ_FSCK) { | ||
267 | dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); | ||
268 | goto failed; | ||
269 | } | ||
270 | |||
271 | wait_time = min(mmp_check_interval * 2 + 1, | ||
272 | mmp_check_interval + 60); | ||
273 | |||
274 | /* Print MMP interval if more than 20 secs. */ | ||
275 | if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4) | ||
276 | ext4_warning(sb, "MMP interval %u higher than expected, please" | ||
277 | " wait.\n", wait_time * 2); | ||
278 | |||
279 | if (schedule_timeout_interruptible(HZ * wait_time) != 0) { | ||
280 | ext4_warning(sb, "MMP startup interrupted, failing mount\n"); | ||
281 | goto failed; | ||
282 | } | ||
283 | |||
284 | retval = read_mmp_block(sb, &bh, mmp_block); | ||
285 | if (retval) | ||
286 | goto failed; | ||
287 | mmp = (struct mmp_struct *)(bh->b_data); | ||
288 | if (seq != le32_to_cpu(mmp->mmp_seq)) { | ||
289 | dump_mmp_msg(sb, mmp, | ||
290 | "Device is already active on another node."); | ||
291 | goto failed; | ||
292 | } | ||
293 | |||
294 | skip: | ||
295 | /* | ||
296 | * write a new random sequence number. | ||
297 | */ | ||
298 | mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq()); | ||
299 | |||
300 | retval = write_mmp_block(bh); | ||
301 | if (retval) | ||
302 | goto failed; | ||
303 | |||
304 | /* | ||
305 | * wait for MMP interval and check mmp_seq. | ||
306 | */ | ||
307 | if (schedule_timeout_interruptible(HZ * wait_time) != 0) { | ||
308 | ext4_warning(sb, "MMP startup interrupted, failing mount\n"); | ||
309 | goto failed; | ||
310 | } | ||
311 | |||
312 | retval = read_mmp_block(sb, &bh, mmp_block); | ||
313 | if (retval) | ||
314 | goto failed; | ||
315 | mmp = (struct mmp_struct *)(bh->b_data); | ||
316 | if (seq != le32_to_cpu(mmp->mmp_seq)) { | ||
317 | dump_mmp_msg(sb, mmp, | ||
318 | "Device is already active on another node."); | ||
319 | goto failed; | ||
320 | } | ||
321 | |||
322 | mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL); | ||
323 | if (!mmpd_data) { | ||
324 | ext4_warning(sb, "not enough memory for mmpd_data"); | ||
325 | goto failed; | ||
326 | } | ||
327 | mmpd_data->sb = sb; | ||
328 | mmpd_data->bh = bh; | ||
329 | |||
330 | /* | ||
331 | * Start a kernel thread to update the MMP block periodically. | ||
332 | */ | ||
333 | EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", | ||
334 | bdevname(bh->b_bdev, | ||
335 | mmp->mmp_bdevname)); | ||
336 | if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { | ||
337 | EXT4_SB(sb)->s_mmp_tsk = NULL; | ||
338 | kfree(mmpd_data); | ||
339 | ext4_warning(sb, "Unable to create kmmpd thread for %s.", | ||
340 | sb->s_id); | ||
341 | goto failed; | ||
342 | } | ||
343 | |||
344 | return 0; | ||
345 | |||
346 | failed: | ||
347 | brelse(bh); | ||
348 | return 1; | ||
349 | } | ||
350 | |||
351 | |||
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index b9f3e7862f13..2b8304bf3c50 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -876,8 +876,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
876 | * It needs to call wait_on_page_writeback() to wait for the | 876 | * It needs to call wait_on_page_writeback() to wait for the |
877 | * writeback of the page. | 877 | * writeback of the page. |
878 | */ | 878 | */ |
879 | if (PageWriteback(page)) | 879 | wait_on_page_writeback(page); |
880 | wait_on_page_writeback(page); | ||
881 | 880 | ||
882 | /* Release old bh and drop refs */ | 881 | /* Release old bh and drop refs */ |
883 | try_to_release_page(page, 0); | 882 | try_to_release_page(page, 0); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 67fd0b025858..b754b7721f51 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1413,10 +1413,22 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1413 | frame->at = entries; | 1413 | frame->at = entries; |
1414 | frame->bh = bh; | 1414 | frame->bh = bh; |
1415 | bh = bh2; | 1415 | bh = bh2; |
1416 | |||
1417 | ext4_handle_dirty_metadata(handle, dir, frame->bh); | ||
1418 | ext4_handle_dirty_metadata(handle, dir, bh); | ||
1419 | |||
1416 | de = do_split(handle,dir, &bh, frame, &hinfo, &retval); | 1420 | de = do_split(handle,dir, &bh, frame, &hinfo, &retval); |
1417 | dx_release (frames); | 1421 | if (!de) { |
1418 | if (!(de)) | 1422 | /* |
1423 | * Even if the block split failed, we have to properly write | ||
1424 | * out all the changes we did so far. Otherwise we can end up | ||
1425 | * with corrupted filesystem. | ||
1426 | */ | ||
1427 | ext4_mark_inode_dirty(handle, dir); | ||
1428 | dx_release(frames); | ||
1419 | return retval; | 1429 | return retval; |
1430 | } | ||
1431 | dx_release(frames); | ||
1420 | 1432 | ||
1421 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); | 1433 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1422 | brelse(bh); | 1434 | brelse(bh); |
@@ -2240,6 +2252,7 @@ static int ext4_symlink(struct inode *dir, | |||
2240 | handle_t *handle; | 2252 | handle_t *handle; |
2241 | struct inode *inode; | 2253 | struct inode *inode; |
2242 | int l, err, retries = 0; | 2254 | int l, err, retries = 0; |
2255 | int credits; | ||
2243 | 2256 | ||
2244 | l = strlen(symname)+1; | 2257 | l = strlen(symname)+1; |
2245 | if (l > dir->i_sb->s_blocksize) | 2258 | if (l > dir->i_sb->s_blocksize) |
@@ -2247,10 +2260,26 @@ static int ext4_symlink(struct inode *dir, | |||
2247 | 2260 | ||
2248 | dquot_initialize(dir); | 2261 | dquot_initialize(dir); |
2249 | 2262 | ||
2263 | if (l > EXT4_N_BLOCKS * 4) { | ||
2264 | /* | ||
2265 | * For non-fast symlinks, we just allocate inode and put it on | ||
2266 | * orphan list in the first transaction => we need bitmap, | ||
2267 | * group descriptor, sb, inode block, quota blocks. | ||
2268 | */ | ||
2269 | credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | ||
2270 | } else { | ||
2271 | /* | ||
2272 | * Fast symlink. We have to add entry to directory | ||
2273 | * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS), | ||
2274 | * allocate new inode (bitmap, group descriptor, inode block, | ||
2275 | * quota blocks, sb is already counted in previous macros). | ||
2276 | */ | ||
2277 | credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | ||
2278 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + | ||
2279 | EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | ||
2280 | } | ||
2250 | retry: | 2281 | retry: |
2251 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | 2282 | handle = ext4_journal_start(dir, credits); |
2252 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + | ||
2253 | EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); | ||
2254 | if (IS_ERR(handle)) | 2283 | if (IS_ERR(handle)) |
2255 | return PTR_ERR(handle); | 2284 | return PTR_ERR(handle); |
2256 | 2285 | ||
@@ -2263,21 +2292,44 @@ retry: | |||
2263 | if (IS_ERR(inode)) | 2292 | if (IS_ERR(inode)) |
2264 | goto out_stop; | 2293 | goto out_stop; |
2265 | 2294 | ||
2266 | if (l > sizeof(EXT4_I(inode)->i_data)) { | 2295 | if (l > EXT4_N_BLOCKS * 4) { |
2267 | inode->i_op = &ext4_symlink_inode_operations; | 2296 | inode->i_op = &ext4_symlink_inode_operations; |
2268 | ext4_set_aops(inode); | 2297 | ext4_set_aops(inode); |
2269 | /* | 2298 | /* |
2270 | * page_symlink() calls into ext4_prepare/commit_write. | 2299 | * We cannot call page_symlink() with transaction started |
2271 | * We have a transaction open. All is sweetness. It also sets | 2300 | * because it calls into ext4_write_begin() which can wait |
2272 | * i_size in generic_commit_write(). | 2301 | * for transaction commit if we are running out of space |
2302 | * and thus we deadlock. So we have to stop transaction now | ||
2303 | * and restart it when symlink contents is written. | ||
2304 | * | ||
2305 | * To keep fs consistent in case of crash, we have to put inode | ||
2306 | * to orphan list in the mean time. | ||
2273 | */ | 2307 | */ |
2308 | drop_nlink(inode); | ||
2309 | err = ext4_orphan_add(handle, inode); | ||
2310 | ext4_journal_stop(handle); | ||
2311 | if (err) | ||
2312 | goto err_drop_inode; | ||
2274 | err = __page_symlink(inode, symname, l, 1); | 2313 | err = __page_symlink(inode, symname, l, 1); |
2314 | if (err) | ||
2315 | goto err_drop_inode; | ||
2316 | /* | ||
2317 | * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS | ||
2318 | * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified | ||
2319 | */ | ||
2320 | handle = ext4_journal_start(dir, | ||
2321 | EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | ||
2322 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); | ||
2323 | if (IS_ERR(handle)) { | ||
2324 | err = PTR_ERR(handle); | ||
2325 | goto err_drop_inode; | ||
2326 | } | ||
2327 | inc_nlink(inode); | ||
2328 | err = ext4_orphan_del(handle, inode); | ||
2275 | if (err) { | 2329 | if (err) { |
2330 | ext4_journal_stop(handle); | ||
2276 | clear_nlink(inode); | 2331 | clear_nlink(inode); |
2277 | unlock_new_inode(inode); | 2332 | goto err_drop_inode; |
2278 | ext4_mark_inode_dirty(handle, inode); | ||
2279 | iput(inode); | ||
2280 | goto out_stop; | ||
2281 | } | 2333 | } |
2282 | } else { | 2334 | } else { |
2283 | /* clear the extent format for fast symlink */ | 2335 | /* clear the extent format for fast symlink */ |
@@ -2293,6 +2345,10 @@ out_stop: | |||
2293 | if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) | 2345 | if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) |
2294 | goto retry; | 2346 | goto retry; |
2295 | return err; | 2347 | return err; |
2348 | err_drop_inode: | ||
2349 | unlock_new_inode(inode); | ||
2350 | iput(inode); | ||
2351 | return err; | ||
2296 | } | 2352 | } |
2297 | 2353 | ||
2298 | static int ext4_link(struct dentry *old_dentry, | 2354 | static int ext4_link(struct dentry *old_dentry, |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index b6dbd056fcb1..7bb8f76d470a 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -203,46 +203,29 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
203 | for (i = 0; i < io_end->num_io_pages; i++) { | 203 | for (i = 0; i < io_end->num_io_pages; i++) { |
204 | struct page *page = io_end->pages[i]->p_page; | 204 | struct page *page = io_end->pages[i]->p_page; |
205 | struct buffer_head *bh, *head; | 205 | struct buffer_head *bh, *head; |
206 | int partial_write = 0; | 206 | loff_t offset; |
207 | loff_t io_end_offset; | ||
207 | 208 | ||
208 | head = page_buffers(page); | 209 | if (error) { |
209 | if (error) | ||
210 | SetPageError(page); | 210 | SetPageError(page); |
211 | BUG_ON(!head); | 211 | set_bit(AS_EIO, &page->mapping->flags); |
212 | if (head->b_size != PAGE_CACHE_SIZE) { | 212 | head = page_buffers(page); |
213 | loff_t offset; | 213 | BUG_ON(!head); |
214 | loff_t io_end_offset = io_end->offset + io_end->size; | 214 | |
215 | io_end_offset = io_end->offset + io_end->size; | ||
215 | 216 | ||
216 | offset = (sector_t) page->index << PAGE_CACHE_SHIFT; | 217 | offset = (sector_t) page->index << PAGE_CACHE_SHIFT; |
217 | bh = head; | 218 | bh = head; |
218 | do { | 219 | do { |
219 | if ((offset >= io_end->offset) && | 220 | if ((offset >= io_end->offset) && |
220 | (offset+bh->b_size <= io_end_offset)) { | 221 | (offset+bh->b_size <= io_end_offset)) |
221 | if (error) | 222 | buffer_io_error(bh); |
222 | buffer_io_error(bh); | 223 | |
223 | |||
224 | } | ||
225 | if (buffer_delay(bh)) | ||
226 | partial_write = 1; | ||
227 | else if (!buffer_mapped(bh)) | ||
228 | clear_buffer_dirty(bh); | ||
229 | else if (buffer_dirty(bh)) | ||
230 | partial_write = 1; | ||
231 | offset += bh->b_size; | 224 | offset += bh->b_size; |
232 | bh = bh->b_this_page; | 225 | bh = bh->b_this_page; |
233 | } while (bh != head); | 226 | } while (bh != head); |
234 | } | 227 | } |
235 | 228 | ||
236 | /* | ||
237 | * If this is a partial write which happened to make | ||
238 | * all buffers uptodate then we can optimize away a | ||
239 | * bogus readpage() for the next read(). Here we | ||
240 | * 'discover' whether the page went uptodate as a | ||
241 | * result of this (potentially partial) write. | ||
242 | */ | ||
243 | if (!partial_write) | ||
244 | SetPageUptodate(page); | ||
245 | |||
246 | put_io_page(io_end->pages[i]); | 229 | put_io_page(io_end->pages[i]); |
247 | } | 230 | } |
248 | io_end->num_io_pages = 0; | 231 | io_end->num_io_pages = 0; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8553dfb310af..cc5c157aa11d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/ctype.h> | 38 | #include <linux/ctype.h> |
39 | #include <linux/log2.h> | 39 | #include <linux/log2.h> |
40 | #include <linux/crc16.h> | 40 | #include <linux/crc16.h> |
41 | #include <linux/cleancache.h> | ||
41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
42 | 43 | ||
43 | #include <linux/kthread.h> | 44 | #include <linux/kthread.h> |
@@ -75,11 +76,27 @@ static void ext4_write_super(struct super_block *sb); | |||
75 | static int ext4_freeze(struct super_block *sb); | 76 | static int ext4_freeze(struct super_block *sb); |
76 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, | 77 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, |
77 | const char *dev_name, void *data); | 78 | const char *dev_name, void *data); |
79 | static inline int ext2_feature_set_ok(struct super_block *sb); | ||
80 | static inline int ext3_feature_set_ok(struct super_block *sb); | ||
78 | static int ext4_feature_set_ok(struct super_block *sb, int readonly); | 81 | static int ext4_feature_set_ok(struct super_block *sb, int readonly); |
79 | static void ext4_destroy_lazyinit_thread(void); | 82 | static void ext4_destroy_lazyinit_thread(void); |
80 | static void ext4_unregister_li_request(struct super_block *sb); | 83 | static void ext4_unregister_li_request(struct super_block *sb); |
81 | static void ext4_clear_request_list(void); | 84 | static void ext4_clear_request_list(void); |
82 | 85 | ||
86 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | ||
87 | static struct file_system_type ext2_fs_type = { | ||
88 | .owner = THIS_MODULE, | ||
89 | .name = "ext2", | ||
90 | .mount = ext4_mount, | ||
91 | .kill_sb = kill_block_super, | ||
92 | .fs_flags = FS_REQUIRES_DEV, | ||
93 | }; | ||
94 | #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) | ||
95 | #else | ||
96 | #define IS_EXT2_SB(sb) (0) | ||
97 | #endif | ||
98 | |||
99 | |||
83 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 100 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
84 | static struct file_system_type ext3_fs_type = { | 101 | static struct file_system_type ext3_fs_type = { |
85 | .owner = THIS_MODULE, | 102 | .owner = THIS_MODULE, |
@@ -806,6 +823,8 @@ static void ext4_put_super(struct super_block *sb) | |||
806 | invalidate_bdev(sbi->journal_bdev); | 823 | invalidate_bdev(sbi->journal_bdev); |
807 | ext4_blkdev_remove(sbi); | 824 | ext4_blkdev_remove(sbi); |
808 | } | 825 | } |
826 | if (sbi->s_mmp_tsk) | ||
827 | kthread_stop(sbi->s_mmp_tsk); | ||
809 | sb->s_fs_info = NULL; | 828 | sb->s_fs_info = NULL; |
810 | /* | 829 | /* |
811 | * Now that we are completely done shutting down the | 830 | * Now that we are completely done shutting down the |
@@ -1096,7 +1115,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1096 | 1115 | ||
1097 | if (!test_opt(sb, INIT_INODE_TABLE)) | 1116 | if (!test_opt(sb, INIT_INODE_TABLE)) |
1098 | seq_puts(seq, ",noinit_inode_table"); | 1117 | seq_puts(seq, ",noinit_inode_table"); |
1099 | else if (sbi->s_li_wait_mult) | 1118 | else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) |
1100 | seq_printf(seq, ",init_inode_table=%u", | 1119 | seq_printf(seq, ",init_inode_table=%u", |
1101 | (unsigned) sbi->s_li_wait_mult); | 1120 | (unsigned) sbi->s_li_wait_mult); |
1102 | 1121 | ||
@@ -1187,9 +1206,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
1187 | const char *data, size_t len, loff_t off); | 1206 | const char *data, size_t len, loff_t off); |
1188 | 1207 | ||
1189 | static const struct dquot_operations ext4_quota_operations = { | 1208 | static const struct dquot_operations ext4_quota_operations = { |
1190 | #ifdef CONFIG_QUOTA | ||
1191 | .get_reserved_space = ext4_get_reserved_space, | 1209 | .get_reserved_space = ext4_get_reserved_space, |
1192 | #endif | ||
1193 | .write_dquot = ext4_write_dquot, | 1210 | .write_dquot = ext4_write_dquot, |
1194 | .acquire_dquot = ext4_acquire_dquot, | 1211 | .acquire_dquot = ext4_acquire_dquot, |
1195 | .release_dquot = ext4_release_dquot, | 1212 | .release_dquot = ext4_release_dquot, |
@@ -1900,7 +1917,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1900 | ext4_msg(sb, KERN_WARNING, | 1917 | ext4_msg(sb, KERN_WARNING, |
1901 | "warning: mounting fs with errors, " | 1918 | "warning: mounting fs with errors, " |
1902 | "running e2fsck is recommended"); | 1919 | "running e2fsck is recommended"); |
1903 | else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && | 1920 | else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && |
1904 | le16_to_cpu(es->s_mnt_count) >= | 1921 | le16_to_cpu(es->s_mnt_count) >= |
1905 | (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) | 1922 | (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) |
1906 | ext4_msg(sb, KERN_WARNING, | 1923 | ext4_msg(sb, KERN_WARNING, |
@@ -1932,6 +1949,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
1932 | EXT4_INODES_PER_GROUP(sb), | 1949 | EXT4_INODES_PER_GROUP(sb), |
1933 | sbi->s_mount_opt, sbi->s_mount_opt2); | 1950 | sbi->s_mount_opt, sbi->s_mount_opt2); |
1934 | 1951 | ||
1952 | cleancache_init_fs(sb); | ||
1935 | return res; | 1953 | return res; |
1936 | } | 1954 | } |
1937 | 1955 | ||
@@ -2425,6 +2443,18 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, | |||
2425 | EXT4_SB(sb)->s_sectors_written_start) >> 1))); | 2443 | EXT4_SB(sb)->s_sectors_written_start) >> 1))); |
2426 | } | 2444 | } |
2427 | 2445 | ||
2446 | static ssize_t extent_cache_hits_show(struct ext4_attr *a, | ||
2447 | struct ext4_sb_info *sbi, char *buf) | ||
2448 | { | ||
2449 | return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_hits); | ||
2450 | } | ||
2451 | |||
2452 | static ssize_t extent_cache_misses_show(struct ext4_attr *a, | ||
2453 | struct ext4_sb_info *sbi, char *buf) | ||
2454 | { | ||
2455 | return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_misses); | ||
2456 | } | ||
2457 | |||
2428 | static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | 2458 | static ssize_t inode_readahead_blks_store(struct ext4_attr *a, |
2429 | struct ext4_sb_info *sbi, | 2459 | struct ext4_sb_info *sbi, |
2430 | const char *buf, size_t count) | 2460 | const char *buf, size_t count) |
@@ -2482,6 +2512,8 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | |||
2482 | EXT4_RO_ATTR(delayed_allocation_blocks); | 2512 | EXT4_RO_ATTR(delayed_allocation_blocks); |
2483 | EXT4_RO_ATTR(session_write_kbytes); | 2513 | EXT4_RO_ATTR(session_write_kbytes); |
2484 | EXT4_RO_ATTR(lifetime_write_kbytes); | 2514 | EXT4_RO_ATTR(lifetime_write_kbytes); |
2515 | EXT4_RO_ATTR(extent_cache_hits); | ||
2516 | EXT4_RO_ATTR(extent_cache_misses); | ||
2485 | EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, | 2517 | EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, |
2486 | inode_readahead_blks_store, s_inode_readahead_blks); | 2518 | inode_readahead_blks_store, s_inode_readahead_blks); |
2487 | EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); | 2519 | EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); |
@@ -2497,6 +2529,8 @@ static struct attribute *ext4_attrs[] = { | |||
2497 | ATTR_LIST(delayed_allocation_blocks), | 2529 | ATTR_LIST(delayed_allocation_blocks), |
2498 | ATTR_LIST(session_write_kbytes), | 2530 | ATTR_LIST(session_write_kbytes), |
2499 | ATTR_LIST(lifetime_write_kbytes), | 2531 | ATTR_LIST(lifetime_write_kbytes), |
2532 | ATTR_LIST(extent_cache_hits), | ||
2533 | ATTR_LIST(extent_cache_misses), | ||
2500 | ATTR_LIST(inode_readahead_blks), | 2534 | ATTR_LIST(inode_readahead_blks), |
2501 | ATTR_LIST(inode_goal), | 2535 | ATTR_LIST(inode_goal), |
2502 | ATTR_LIST(mb_stats), | 2536 | ATTR_LIST(mb_stats), |
@@ -2659,12 +2693,6 @@ static void print_daily_error_info(unsigned long arg) | |||
2659 | mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ | 2693 | mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ |
2660 | } | 2694 | } |
2661 | 2695 | ||
2662 | static void ext4_lazyinode_timeout(unsigned long data) | ||
2663 | { | ||
2664 | struct task_struct *p = (struct task_struct *)data; | ||
2665 | wake_up_process(p); | ||
2666 | } | ||
2667 | |||
2668 | /* Find next suitable group and run ext4_init_inode_table */ | 2696 | /* Find next suitable group and run ext4_init_inode_table */ |
2669 | static int ext4_run_li_request(struct ext4_li_request *elr) | 2697 | static int ext4_run_li_request(struct ext4_li_request *elr) |
2670 | { | 2698 | { |
@@ -2696,11 +2724,8 @@ static int ext4_run_li_request(struct ext4_li_request *elr) | |||
2696 | ret = ext4_init_inode_table(sb, group, | 2724 | ret = ext4_init_inode_table(sb, group, |
2697 | elr->lr_timeout ? 0 : 1); | 2725 | elr->lr_timeout ? 0 : 1); |
2698 | if (elr->lr_timeout == 0) { | 2726 | if (elr->lr_timeout == 0) { |
2699 | timeout = jiffies - timeout; | 2727 | timeout = (jiffies - timeout) * |
2700 | if (elr->lr_sbi->s_li_wait_mult) | 2728 | elr->lr_sbi->s_li_wait_mult; |
2701 | timeout *= elr->lr_sbi->s_li_wait_mult; | ||
2702 | else | ||
2703 | timeout *= 20; | ||
2704 | elr->lr_timeout = timeout; | 2729 | elr->lr_timeout = timeout; |
2705 | } | 2730 | } |
2706 | elr->lr_next_sched = jiffies + elr->lr_timeout; | 2731 | elr->lr_next_sched = jiffies + elr->lr_timeout; |
@@ -2712,7 +2737,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr) | |||
2712 | 2737 | ||
2713 | /* | 2738 | /* |
2714 | * Remove lr_request from the list_request and free the | 2739 | * Remove lr_request from the list_request and free the |
2715 | * request tructure. Should be called with li_list_mtx held | 2740 | * request structure. Should be called with li_list_mtx held |
2716 | */ | 2741 | */ |
2717 | static void ext4_remove_li_request(struct ext4_li_request *elr) | 2742 | static void ext4_remove_li_request(struct ext4_li_request *elr) |
2718 | { | 2743 | { |
@@ -2730,14 +2755,16 @@ static void ext4_remove_li_request(struct ext4_li_request *elr) | |||
2730 | 2755 | ||
2731 | static void ext4_unregister_li_request(struct super_block *sb) | 2756 | static void ext4_unregister_li_request(struct super_block *sb) |
2732 | { | 2757 | { |
2733 | struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request; | 2758 | mutex_lock(&ext4_li_mtx); |
2734 | 2759 | if (!ext4_li_info) { | |
2735 | if (!ext4_li_info) | 2760 | mutex_unlock(&ext4_li_mtx); |
2736 | return; | 2761 | return; |
2762 | } | ||
2737 | 2763 | ||
2738 | mutex_lock(&ext4_li_info->li_list_mtx); | 2764 | mutex_lock(&ext4_li_info->li_list_mtx); |
2739 | ext4_remove_li_request(elr); | 2765 | ext4_remove_li_request(EXT4_SB(sb)->s_li_request); |
2740 | mutex_unlock(&ext4_li_info->li_list_mtx); | 2766 | mutex_unlock(&ext4_li_info->li_list_mtx); |
2767 | mutex_unlock(&ext4_li_mtx); | ||
2741 | } | 2768 | } |
2742 | 2769 | ||
2743 | static struct task_struct *ext4_lazyinit_task; | 2770 | static struct task_struct *ext4_lazyinit_task; |
@@ -2756,17 +2783,10 @@ static int ext4_lazyinit_thread(void *arg) | |||
2756 | struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; | 2783 | struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; |
2757 | struct list_head *pos, *n; | 2784 | struct list_head *pos, *n; |
2758 | struct ext4_li_request *elr; | 2785 | struct ext4_li_request *elr; |
2759 | unsigned long next_wakeup; | 2786 | unsigned long next_wakeup, cur; |
2760 | DEFINE_WAIT(wait); | ||
2761 | 2787 | ||
2762 | BUG_ON(NULL == eli); | 2788 | BUG_ON(NULL == eli); |
2763 | 2789 | ||
2764 | eli->li_timer.data = (unsigned long)current; | ||
2765 | eli->li_timer.function = ext4_lazyinode_timeout; | ||
2766 | |||
2767 | eli->li_task = current; | ||
2768 | wake_up(&eli->li_wait_task); | ||
2769 | |||
2770 | cont_thread: | 2790 | cont_thread: |
2771 | while (true) { | 2791 | while (true) { |
2772 | next_wakeup = MAX_JIFFY_OFFSET; | 2792 | next_wakeup = MAX_JIFFY_OFFSET; |
@@ -2797,19 +2817,15 @@ cont_thread: | |||
2797 | if (freezing(current)) | 2817 | if (freezing(current)) |
2798 | refrigerator(); | 2818 | refrigerator(); |
2799 | 2819 | ||
2800 | if ((time_after_eq(jiffies, next_wakeup)) || | 2820 | cur = jiffies; |
2821 | if ((time_after_eq(cur, next_wakeup)) || | ||
2801 | (MAX_JIFFY_OFFSET == next_wakeup)) { | 2822 | (MAX_JIFFY_OFFSET == next_wakeup)) { |
2802 | cond_resched(); | 2823 | cond_resched(); |
2803 | continue; | 2824 | continue; |
2804 | } | 2825 | } |
2805 | 2826 | ||
2806 | eli->li_timer.expires = next_wakeup; | 2827 | schedule_timeout_interruptible(next_wakeup - cur); |
2807 | add_timer(&eli->li_timer); | 2828 | |
2808 | prepare_to_wait(&eli->li_wait_daemon, &wait, | ||
2809 | TASK_INTERRUPTIBLE); | ||
2810 | if (time_before(jiffies, next_wakeup)) | ||
2811 | schedule(); | ||
2812 | finish_wait(&eli->li_wait_daemon, &wait); | ||
2813 | if (kthread_should_stop()) { | 2829 | if (kthread_should_stop()) { |
2814 | ext4_clear_request_list(); | 2830 | ext4_clear_request_list(); |
2815 | goto exit_thread; | 2831 | goto exit_thread; |
@@ -2833,12 +2849,7 @@ exit_thread: | |||
2833 | goto cont_thread; | 2849 | goto cont_thread; |
2834 | } | 2850 | } |
2835 | mutex_unlock(&eli->li_list_mtx); | 2851 | mutex_unlock(&eli->li_list_mtx); |
2836 | del_timer_sync(&ext4_li_info->li_timer); | ||
2837 | eli->li_task = NULL; | ||
2838 | wake_up(&eli->li_wait_task); | ||
2839 | |||
2840 | kfree(ext4_li_info); | 2852 | kfree(ext4_li_info); |
2841 | ext4_lazyinit_task = NULL; | ||
2842 | ext4_li_info = NULL; | 2853 | ext4_li_info = NULL; |
2843 | mutex_unlock(&ext4_li_mtx); | 2854 | mutex_unlock(&ext4_li_mtx); |
2844 | 2855 | ||
@@ -2866,7 +2877,6 @@ static int ext4_run_lazyinit_thread(void) | |||
2866 | if (IS_ERR(ext4_lazyinit_task)) { | 2877 | if (IS_ERR(ext4_lazyinit_task)) { |
2867 | int err = PTR_ERR(ext4_lazyinit_task); | 2878 | int err = PTR_ERR(ext4_lazyinit_task); |
2868 | ext4_clear_request_list(); | 2879 | ext4_clear_request_list(); |
2869 | del_timer_sync(&ext4_li_info->li_timer); | ||
2870 | kfree(ext4_li_info); | 2880 | kfree(ext4_li_info); |
2871 | ext4_li_info = NULL; | 2881 | ext4_li_info = NULL; |
2872 | printk(KERN_CRIT "EXT4: error %d creating inode table " | 2882 | printk(KERN_CRIT "EXT4: error %d creating inode table " |
@@ -2875,8 +2885,6 @@ static int ext4_run_lazyinit_thread(void) | |||
2875 | return err; | 2885 | return err; |
2876 | } | 2886 | } |
2877 | ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; | 2887 | ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; |
2878 | |||
2879 | wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL); | ||
2880 | return 0; | 2888 | return 0; |
2881 | } | 2889 | } |
2882 | 2890 | ||
@@ -2911,13 +2919,9 @@ static int ext4_li_info_new(void) | |||
2911 | if (!eli) | 2919 | if (!eli) |
2912 | return -ENOMEM; | 2920 | return -ENOMEM; |
2913 | 2921 | ||
2914 | eli->li_task = NULL; | ||
2915 | INIT_LIST_HEAD(&eli->li_request_list); | 2922 | INIT_LIST_HEAD(&eli->li_request_list); |
2916 | mutex_init(&eli->li_list_mtx); | 2923 | mutex_init(&eli->li_list_mtx); |
2917 | 2924 | ||
2918 | init_waitqueue_head(&eli->li_wait_daemon); | ||
2919 | init_waitqueue_head(&eli->li_wait_task); | ||
2920 | init_timer(&eli->li_timer); | ||
2921 | eli->li_state |= EXT4_LAZYINIT_QUIT; | 2925 | eli->li_state |= EXT4_LAZYINIT_QUIT; |
2922 | 2926 | ||
2923 | ext4_li_info = eli; | 2927 | ext4_li_info = eli; |
@@ -2960,20 +2964,19 @@ static int ext4_register_li_request(struct super_block *sb, | |||
2960 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; | 2964 | ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; |
2961 | int ret = 0; | 2965 | int ret = 0; |
2962 | 2966 | ||
2963 | if (sbi->s_li_request != NULL) | 2967 | if (sbi->s_li_request != NULL) { |
2968 | /* | ||
2969 | * Reset timeout so it can be computed again, because | ||
2970 | * s_li_wait_mult might have changed. | ||
2971 | */ | ||
2972 | sbi->s_li_request->lr_timeout = 0; | ||
2964 | return 0; | 2973 | return 0; |
2974 | } | ||
2965 | 2975 | ||
2966 | if (first_not_zeroed == ngroups || | 2976 | if (first_not_zeroed == ngroups || |
2967 | (sb->s_flags & MS_RDONLY) || | 2977 | (sb->s_flags & MS_RDONLY) || |
2968 | !test_opt(sb, INIT_INODE_TABLE)) { | 2978 | !test_opt(sb, INIT_INODE_TABLE)) |
2969 | sbi->s_li_request = NULL; | ||
2970 | return 0; | 2979 | return 0; |
2971 | } | ||
2972 | |||
2973 | if (first_not_zeroed == ngroups) { | ||
2974 | sbi->s_li_request = NULL; | ||
2975 | return 0; | ||
2976 | } | ||
2977 | 2980 | ||
2978 | elr = ext4_li_request_new(sb, first_not_zeroed); | 2981 | elr = ext4_li_request_new(sb, first_not_zeroed); |
2979 | if (!elr) | 2982 | if (!elr) |
@@ -3166,6 +3169,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3166 | ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) | 3169 | ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) |
3167 | set_opt(sb, DELALLOC); | 3170 | set_opt(sb, DELALLOC); |
3168 | 3171 | ||
3172 | /* | ||
3173 | * set default s_li_wait_mult for lazyinit, for the case there is | ||
3174 | * no mount option specified. | ||
3175 | */ | ||
3176 | sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; | ||
3177 | |||
3169 | if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, | 3178 | if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, |
3170 | &journal_devnum, &journal_ioprio, NULL, 0)) { | 3179 | &journal_devnum, &journal_ioprio, NULL, 0)) { |
3171 | ext4_msg(sb, KERN_WARNING, | 3180 | ext4_msg(sb, KERN_WARNING, |
@@ -3187,6 +3196,28 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3187 | "feature flags set on rev 0 fs, " | 3196 | "feature flags set on rev 0 fs, " |
3188 | "running e2fsck is recommended"); | 3197 | "running e2fsck is recommended"); |
3189 | 3198 | ||
3199 | if (IS_EXT2_SB(sb)) { | ||
3200 | if (ext2_feature_set_ok(sb)) | ||
3201 | ext4_msg(sb, KERN_INFO, "mounting ext2 file system " | ||
3202 | "using the ext4 subsystem"); | ||
3203 | else { | ||
3204 | ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " | ||
3205 | "to feature incompatibilities"); | ||
3206 | goto failed_mount; | ||
3207 | } | ||
3208 | } | ||
3209 | |||
3210 | if (IS_EXT3_SB(sb)) { | ||
3211 | if (ext3_feature_set_ok(sb)) | ||
3212 | ext4_msg(sb, KERN_INFO, "mounting ext3 file system " | ||
3213 | "using the ext4 subsystem"); | ||
3214 | else { | ||
3215 | ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " | ||
3216 | "to feature incompatibilities"); | ||
3217 | goto failed_mount; | ||
3218 | } | ||
3219 | } | ||
3220 | |||
3190 | /* | 3221 | /* |
3191 | * Check feature flags regardless of the revision level, since we | 3222 | * Check feature flags regardless of the revision level, since we |
3192 | * previously didn't change the revision level when setting the flags, | 3223 | * previously didn't change the revision level when setting the flags, |
@@ -3459,6 +3490,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3459 | EXT4_HAS_INCOMPAT_FEATURE(sb, | 3490 | EXT4_HAS_INCOMPAT_FEATURE(sb, |
3460 | EXT4_FEATURE_INCOMPAT_RECOVER)); | 3491 | EXT4_FEATURE_INCOMPAT_RECOVER)); |
3461 | 3492 | ||
3493 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && | ||
3494 | !(sb->s_flags & MS_RDONLY)) | ||
3495 | if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) | ||
3496 | goto failed_mount3; | ||
3497 | |||
3462 | /* | 3498 | /* |
3463 | * The first inode we look at is the journal inode. Don't try | 3499 | * The first inode we look at is the journal inode. Don't try |
3464 | * root first: it may be modified in the journal! | 3500 | * root first: it may be modified in the journal! |
@@ -3474,7 +3510,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3474 | goto failed_mount_wq; | 3510 | goto failed_mount_wq; |
3475 | } else { | 3511 | } else { |
3476 | clear_opt(sb, DATA_FLAGS); | 3512 | clear_opt(sb, DATA_FLAGS); |
3477 | set_opt(sb, WRITEBACK_DATA); | ||
3478 | sbi->s_journal = NULL; | 3513 | sbi->s_journal = NULL; |
3479 | needs_recovery = 0; | 3514 | needs_recovery = 0; |
3480 | goto no_journal; | 3515 | goto no_journal; |
@@ -3707,6 +3742,8 @@ failed_mount3: | |||
3707 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 3742 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
3708 | percpu_counter_destroy(&sbi->s_dirs_counter); | 3743 | percpu_counter_destroy(&sbi->s_dirs_counter); |
3709 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | 3744 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); |
3745 | if (sbi->s_mmp_tsk) | ||
3746 | kthread_stop(sbi->s_mmp_tsk); | ||
3710 | failed_mount2: | 3747 | failed_mount2: |
3711 | for (i = 0; i < db_count; i++) | 3748 | for (i = 0; i < db_count; i++) |
3712 | brelse(sbi->s_group_desc[i]); | 3749 | brelse(sbi->s_group_desc[i]); |
@@ -4242,7 +4279,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4242 | int enable_quota = 0; | 4279 | int enable_quota = 0; |
4243 | ext4_group_t g; | 4280 | ext4_group_t g; |
4244 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; | 4281 | unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; |
4245 | int err; | 4282 | int err = 0; |
4246 | #ifdef CONFIG_QUOTA | 4283 | #ifdef CONFIG_QUOTA |
4247 | int i; | 4284 | int i; |
4248 | #endif | 4285 | #endif |
@@ -4368,6 +4405,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4368 | goto restore_opts; | 4405 | goto restore_opts; |
4369 | if (!ext4_setup_super(sb, es, 0)) | 4406 | if (!ext4_setup_super(sb, es, 0)) |
4370 | sb->s_flags &= ~MS_RDONLY; | 4407 | sb->s_flags &= ~MS_RDONLY; |
4408 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
4409 | EXT4_FEATURE_INCOMPAT_MMP)) | ||
4410 | if (ext4_multi_mount_protect(sb, | ||
4411 | le64_to_cpu(es->s_mmp_block))) { | ||
4412 | err = -EROFS; | ||
4413 | goto restore_opts; | ||
4414 | } | ||
4371 | enable_quota = 1; | 4415 | enable_quota = 1; |
4372 | } | 4416 | } |
4373 | } | 4417 | } |
@@ -4432,6 +4476,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
4432 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4476 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4433 | struct ext4_super_block *es = sbi->s_es; | 4477 | struct ext4_super_block *es = sbi->s_es; |
4434 | u64 fsid; | 4478 | u64 fsid; |
4479 | s64 bfree; | ||
4435 | 4480 | ||
4436 | if (test_opt(sb, MINIX_DF)) { | 4481 | if (test_opt(sb, MINIX_DF)) { |
4437 | sbi->s_overhead_last = 0; | 4482 | sbi->s_overhead_last = 0; |
@@ -4475,8 +4520,10 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
4475 | buf->f_type = EXT4_SUPER_MAGIC; | 4520 | buf->f_type = EXT4_SUPER_MAGIC; |
4476 | buf->f_bsize = sb->s_blocksize; | 4521 | buf->f_bsize = sb->s_blocksize; |
4477 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; | 4522 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; |
4478 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - | 4523 | bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - |
4479 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); | 4524 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); |
4525 | /* prevent underflow in case that few free space is available */ | ||
4526 | buf->f_bfree = max_t(s64, bfree, 0); | ||
4480 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); | 4527 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); |
4481 | if (buf->f_bfree < ext4_r_blocks_count(es)) | 4528 | if (buf->f_bfree < ext4_r_blocks_count(es)) |
4482 | buf->f_bavail = 0; | 4529 | buf->f_bavail = 0; |
@@ -4652,6 +4699,9 @@ static int ext4_quota_off(struct super_block *sb, int type) | |||
4652 | if (test_opt(sb, DELALLOC)) | 4699 | if (test_opt(sb, DELALLOC)) |
4653 | sync_filesystem(sb); | 4700 | sync_filesystem(sb); |
4654 | 4701 | ||
4702 | if (!inode) | ||
4703 | goto out; | ||
4704 | |||
4655 | /* Update modification times of quota files when userspace can | 4705 | /* Update modification times of quota files when userspace can |
4656 | * start looking at them */ | 4706 | * start looking at them */ |
4657 | handle = ext4_journal_start(inode, 1); | 4707 | handle = ext4_journal_start(inode, 1); |
@@ -4772,14 +4822,6 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, | |||
4772 | } | 4822 | } |
4773 | 4823 | ||
4774 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 4824 | #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
4775 | static struct file_system_type ext2_fs_type = { | ||
4776 | .owner = THIS_MODULE, | ||
4777 | .name = "ext2", | ||
4778 | .mount = ext4_mount, | ||
4779 | .kill_sb = kill_block_super, | ||
4780 | .fs_flags = FS_REQUIRES_DEV, | ||
4781 | }; | ||
4782 | |||
4783 | static inline void register_as_ext2(void) | 4825 | static inline void register_as_ext2(void) |
4784 | { | 4826 | { |
4785 | int err = register_filesystem(&ext2_fs_type); | 4827 | int err = register_filesystem(&ext2_fs_type); |
@@ -4792,10 +4834,22 @@ static inline void unregister_as_ext2(void) | |||
4792 | { | 4834 | { |
4793 | unregister_filesystem(&ext2_fs_type); | 4835 | unregister_filesystem(&ext2_fs_type); |
4794 | } | 4836 | } |
4837 | |||
4838 | static inline int ext2_feature_set_ok(struct super_block *sb) | ||
4839 | { | ||
4840 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) | ||
4841 | return 0; | ||
4842 | if (sb->s_flags & MS_RDONLY) | ||
4843 | return 1; | ||
4844 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) | ||
4845 | return 0; | ||
4846 | return 1; | ||
4847 | } | ||
4795 | MODULE_ALIAS("ext2"); | 4848 | MODULE_ALIAS("ext2"); |
4796 | #else | 4849 | #else |
4797 | static inline void register_as_ext2(void) { } | 4850 | static inline void register_as_ext2(void) { } |
4798 | static inline void unregister_as_ext2(void) { } | 4851 | static inline void unregister_as_ext2(void) { } |
4852 | static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } | ||
4799 | #endif | 4853 | #endif |
4800 | 4854 | ||
4801 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) | 4855 | #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) |
@@ -4811,10 +4865,24 @@ static inline void unregister_as_ext3(void) | |||
4811 | { | 4865 | { |
4812 | unregister_filesystem(&ext3_fs_type); | 4866 | unregister_filesystem(&ext3_fs_type); |
4813 | } | 4867 | } |
4868 | |||
4869 | static inline int ext3_feature_set_ok(struct super_block *sb) | ||
4870 | { | ||
4871 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) | ||
4872 | return 0; | ||
4873 | if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) | ||
4874 | return 0; | ||
4875 | if (sb->s_flags & MS_RDONLY) | ||
4876 | return 1; | ||
4877 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) | ||
4878 | return 0; | ||
4879 | return 1; | ||
4880 | } | ||
4814 | MODULE_ALIAS("ext3"); | 4881 | MODULE_ALIAS("ext3"); |
4815 | #else | 4882 | #else |
4816 | static inline void register_as_ext3(void) { } | 4883 | static inline void register_as_ext3(void) { } |
4817 | static inline void unregister_as_ext3(void) { } | 4884 | static inline void unregister_as_ext3(void) { } |
4885 | static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } | ||
4818 | #endif | 4886 | #endif |
4819 | 4887 | ||
4820 | static struct file_system_type ext4_fs_type = { | 4888 | static struct file_system_type ext4_fs_type = { |
@@ -4898,8 +4966,8 @@ static int __init ext4_init_fs(void) | |||
4898 | err = init_inodecache(); | 4966 | err = init_inodecache(); |
4899 | if (err) | 4967 | if (err) |
4900 | goto out1; | 4968 | goto out1; |
4901 | register_as_ext2(); | ||
4902 | register_as_ext3(); | 4969 | register_as_ext3(); |
4970 | register_as_ext2(); | ||
4903 | err = register_filesystem(&ext4_fs_type); | 4971 | err = register_filesystem(&ext4_fs_type); |
4904 | if (err) | 4972 | if (err) |
4905 | goto out; | 4973 | goto out; |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index b545ca1c459c..c757adc97250 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -820,8 +820,8 @@ inserted: | |||
820 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 820 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
821 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | 821 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; |
822 | 822 | ||
823 | block = ext4_new_meta_blocks(handle, inode, | 823 | block = ext4_new_meta_blocks(handle, inode, goal, 0, |
824 | goal, NULL, &error); | 824 | NULL, &error); |
825 | if (error) | 825 | if (error) |
826 | goto cleanup; | 826 | goto cleanup; |
827 | 827 | ||
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 3b222dafd15b..be15437c272e 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c | |||
@@ -326,6 +326,8 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) | |||
326 | struct fat_slot_info sinfo; | 326 | struct fat_slot_info sinfo; |
327 | int err; | 327 | int err; |
328 | 328 | ||
329 | dentry_unhash(dentry); | ||
330 | |||
329 | lock_super(sb); | 331 | lock_super(sb); |
330 | /* | 332 | /* |
331 | * Check whether the directory is not in use, then check | 333 | * Check whether the directory is not in use, then check |
@@ -457,6 +459,9 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, | |||
457 | old_inode = old_dentry->d_inode; | 459 | old_inode = old_dentry->d_inode; |
458 | new_inode = new_dentry->d_inode; | 460 | new_inode = new_dentry->d_inode; |
459 | 461 | ||
462 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
463 | dentry_unhash(new_dentry); | ||
464 | |||
460 | err = fat_scan(old_dir, old_name, &old_sinfo); | 465 | err = fat_scan(old_dir, old_name, &old_sinfo); |
461 | if (err) { | 466 | if (err) { |
462 | err = -EIO; | 467 | err = -EIO; |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 20b4ea53fdc4..c61a6789f36c 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
@@ -824,6 +824,8 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) | |||
824 | struct fat_slot_info sinfo; | 824 | struct fat_slot_info sinfo; |
825 | int err; | 825 | int err; |
826 | 826 | ||
827 | dentry_unhash(dentry); | ||
828 | |||
827 | lock_super(sb); | 829 | lock_super(sb); |
828 | 830 | ||
829 | err = fat_dir_empty(inode); | 831 | err = fat_dir_empty(inode); |
@@ -931,6 +933,9 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
931 | int err, is_dir, update_dotdot, corrupt = 0; | 933 | int err, is_dir, update_dotdot, corrupt = 0; |
932 | struct super_block *sb = old_dir->i_sb; | 934 | struct super_block *sb = old_dir->i_sb; |
933 | 935 | ||
936 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
937 | dentry_unhash(new_dentry); | ||
938 | |||
934 | old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; | 939 | old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; |
935 | old_inode = old_dentry->d_inode; | 940 | old_inode = old_dentry->d_inode; |
936 | new_inode = new_dentry->d_inode; | 941 | new_inode = new_dentry->d_inode; |
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 48a18f184d50..30afdfa7aec7 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c | |||
@@ -33,8 +33,6 @@ void fscache_enqueue_operation(struct fscache_operation *op) | |||
33 | _enter("{OBJ%x OP%x,%u}", | 33 | _enter("{OBJ%x OP%x,%u}", |
34 | op->object->debug_id, op->debug_id, atomic_read(&op->usage)); | 34 | op->object->debug_id, op->debug_id, atomic_read(&op->usage)); |
35 | 35 | ||
36 | fscache_set_op_state(op, "EnQ"); | ||
37 | |||
38 | ASSERT(list_empty(&op->pend_link)); | 36 | ASSERT(list_empty(&op->pend_link)); |
39 | ASSERT(op->processor != NULL); | 37 | ASSERT(op->processor != NULL); |
40 | ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); | 38 | ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); |
@@ -66,8 +64,6 @@ EXPORT_SYMBOL(fscache_enqueue_operation); | |||
66 | static void fscache_run_op(struct fscache_object *object, | 64 | static void fscache_run_op(struct fscache_object *object, |
67 | struct fscache_operation *op) | 65 | struct fscache_operation *op) |
68 | { | 66 | { |
69 | fscache_set_op_state(op, "Run"); | ||
70 | |||
71 | object->n_in_progress++; | 67 | object->n_in_progress++; |
72 | if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) | 68 | if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) |
73 | wake_up_bit(&op->flags, FSCACHE_OP_WAITING); | 69 | wake_up_bit(&op->flags, FSCACHE_OP_WAITING); |
@@ -88,8 +84,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object, | |||
88 | 84 | ||
89 | _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); | 85 | _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); |
90 | 86 | ||
91 | fscache_set_op_state(op, "SubmitX"); | ||
92 | |||
93 | spin_lock(&object->lock); | 87 | spin_lock(&object->lock); |
94 | ASSERTCMP(object->n_ops, >=, object->n_in_progress); | 88 | ASSERTCMP(object->n_ops, >=, object->n_in_progress); |
95 | ASSERTCMP(object->n_ops, >=, object->n_exclusive); | 89 | ASSERTCMP(object->n_ops, >=, object->n_exclusive); |
@@ -194,8 +188,6 @@ int fscache_submit_op(struct fscache_object *object, | |||
194 | 188 | ||
195 | ASSERTCMP(atomic_read(&op->usage), >, 0); | 189 | ASSERTCMP(atomic_read(&op->usage), >, 0); |
196 | 190 | ||
197 | fscache_set_op_state(op, "Submit"); | ||
198 | |||
199 | spin_lock(&object->lock); | 191 | spin_lock(&object->lock); |
200 | ASSERTCMP(object->n_ops, >=, object->n_in_progress); | 192 | ASSERTCMP(object->n_ops, >=, object->n_in_progress); |
201 | ASSERTCMP(object->n_ops, >=, object->n_exclusive); | 193 | ASSERTCMP(object->n_ops, >=, object->n_exclusive); |
@@ -335,8 +327,6 @@ void fscache_put_operation(struct fscache_operation *op) | |||
335 | if (!atomic_dec_and_test(&op->usage)) | 327 | if (!atomic_dec_and_test(&op->usage)) |
336 | return; | 328 | return; |
337 | 329 | ||
338 | fscache_set_op_state(op, "Put"); | ||
339 | |||
340 | _debug("PUT OP"); | 330 | _debug("PUT OP"); |
341 | if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) | 331 | if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) |
342 | BUG(); | 332 | BUG(); |
diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 41c441c2058d..a2a5d19ece6a 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c | |||
@@ -155,11 +155,9 @@ static void fscache_attr_changed_op(struct fscache_operation *op) | |||
155 | fscache_stat(&fscache_n_attr_changed_calls); | 155 | fscache_stat(&fscache_n_attr_changed_calls); |
156 | 156 | ||
157 | if (fscache_object_is_active(object)) { | 157 | if (fscache_object_is_active(object)) { |
158 | fscache_set_op_state(op, "CallFS"); | ||
159 | fscache_stat(&fscache_n_cop_attr_changed); | 158 | fscache_stat(&fscache_n_cop_attr_changed); |
160 | ret = object->cache->ops->attr_changed(object); | 159 | ret = object->cache->ops->attr_changed(object); |
161 | fscache_stat_d(&fscache_n_cop_attr_changed); | 160 | fscache_stat_d(&fscache_n_cop_attr_changed); |
162 | fscache_set_op_state(op, "Done"); | ||
163 | if (ret < 0) | 161 | if (ret < 0) |
164 | fscache_abort_object(object); | 162 | fscache_abort_object(object); |
165 | } | 163 | } |
@@ -190,7 +188,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) | |||
190 | 188 | ||
191 | fscache_operation_init(op, fscache_attr_changed_op, NULL); | 189 | fscache_operation_init(op, fscache_attr_changed_op, NULL); |
192 | op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); | 190 | op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); |
193 | fscache_set_op_name(op, "Attr"); | ||
194 | 191 | ||
195 | spin_lock(&cookie->lock); | 192 | spin_lock(&cookie->lock); |
196 | 193 | ||
@@ -257,7 +254,6 @@ static struct fscache_retrieval *fscache_alloc_retrieval( | |||
257 | op->context = context; | 254 | op->context = context; |
258 | op->start_time = jiffies; | 255 | op->start_time = jiffies; |
259 | INIT_LIST_HEAD(&op->to_do); | 256 | INIT_LIST_HEAD(&op->to_do); |
260 | fscache_set_op_name(&op->op, "Retr"); | ||
261 | return op; | 257 | return op; |
262 | } | 258 | } |
263 | 259 | ||
@@ -368,7 +364,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, | |||
368 | _leave(" = -ENOMEM"); | 364 | _leave(" = -ENOMEM"); |
369 | return -ENOMEM; | 365 | return -ENOMEM; |
370 | } | 366 | } |
371 | fscache_set_op_name(&op->op, "RetrRA1"); | ||
372 | 367 | ||
373 | spin_lock(&cookie->lock); | 368 | spin_lock(&cookie->lock); |
374 | 369 | ||
@@ -487,7 +482,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, | |||
487 | op = fscache_alloc_retrieval(mapping, end_io_func, context); | 482 | op = fscache_alloc_retrieval(mapping, end_io_func, context); |
488 | if (!op) | 483 | if (!op) |
489 | return -ENOMEM; | 484 | return -ENOMEM; |
490 | fscache_set_op_name(&op->op, "RetrRAN"); | ||
491 | 485 | ||
492 | spin_lock(&cookie->lock); | 486 | spin_lock(&cookie->lock); |
493 | 487 | ||
@@ -589,7 +583,6 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, | |||
589 | op = fscache_alloc_retrieval(page->mapping, NULL, NULL); | 583 | op = fscache_alloc_retrieval(page->mapping, NULL, NULL); |
590 | if (!op) | 584 | if (!op) |
591 | return -ENOMEM; | 585 | return -ENOMEM; |
592 | fscache_set_op_name(&op->op, "RetrAL1"); | ||
593 | 586 | ||
594 | spin_lock(&cookie->lock); | 587 | spin_lock(&cookie->lock); |
595 | 588 | ||
@@ -662,8 +655,6 @@ static void fscache_write_op(struct fscache_operation *_op) | |||
662 | 655 | ||
663 | _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); | 656 | _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); |
664 | 657 | ||
665 | fscache_set_op_state(&op->op, "GetPage"); | ||
666 | |||
667 | spin_lock(&object->lock); | 658 | spin_lock(&object->lock); |
668 | cookie = object->cookie; | 659 | cookie = object->cookie; |
669 | 660 | ||
@@ -698,15 +689,12 @@ static void fscache_write_op(struct fscache_operation *_op) | |||
698 | spin_unlock(&cookie->stores_lock); | 689 | spin_unlock(&cookie->stores_lock); |
699 | spin_unlock(&object->lock); | 690 | spin_unlock(&object->lock); |
700 | 691 | ||
701 | fscache_set_op_state(&op->op, "Store"); | ||
702 | fscache_stat(&fscache_n_store_pages); | 692 | fscache_stat(&fscache_n_store_pages); |
703 | fscache_stat(&fscache_n_cop_write_page); | 693 | fscache_stat(&fscache_n_cop_write_page); |
704 | ret = object->cache->ops->write_page(op, page); | 694 | ret = object->cache->ops->write_page(op, page); |
705 | fscache_stat_d(&fscache_n_cop_write_page); | 695 | fscache_stat_d(&fscache_n_cop_write_page); |
706 | fscache_set_op_state(&op->op, "EndWrite"); | ||
707 | fscache_end_page_write(object, page); | 696 | fscache_end_page_write(object, page); |
708 | if (ret < 0) { | 697 | if (ret < 0) { |
709 | fscache_set_op_state(&op->op, "Abort"); | ||
710 | fscache_abort_object(object); | 698 | fscache_abort_object(object); |
711 | } else { | 699 | } else { |
712 | fscache_enqueue_operation(&op->op); | 700 | fscache_enqueue_operation(&op->op); |
@@ -778,7 +766,6 @@ int __fscache_write_page(struct fscache_cookie *cookie, | |||
778 | fscache_operation_init(&op->op, fscache_write_op, | 766 | fscache_operation_init(&op->op, fscache_write_op, |
779 | fscache_release_write_op); | 767 | fscache_release_write_op); |
780 | op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING); | 768 | op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING); |
781 | fscache_set_op_name(&op->op, "Write1"); | ||
782 | 769 | ||
783 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); | 770 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); |
784 | if (ret < 0) | 771 | if (ret < 0) |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b32eb29a4e6f..0d0e3faddcfa 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -667,6 +667,8 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) | |||
667 | if (IS_ERR(req)) | 667 | if (IS_ERR(req)) |
668 | return PTR_ERR(req); | 668 | return PTR_ERR(req); |
669 | 669 | ||
670 | dentry_unhash(entry); | ||
671 | |||
670 | req->in.h.opcode = FUSE_RMDIR; | 672 | req->in.h.opcode = FUSE_RMDIR; |
671 | req->in.h.nodeid = get_node_id(dir); | 673 | req->in.h.nodeid = get_node_id(dir); |
672 | req->in.numargs = 1; | 674 | req->in.numargs = 1; |
@@ -691,6 +693,10 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, | |||
691 | struct fuse_rename_in inarg; | 693 | struct fuse_rename_in inarg; |
692 | struct fuse_conn *fc = get_fuse_conn(olddir); | 694 | struct fuse_conn *fc = get_fuse_conn(olddir); |
693 | struct fuse_req *req = fuse_get_req(fc); | 695 | struct fuse_req *req = fuse_get_req(fc); |
696 | |||
697 | if (newent->d_inode && S_ISDIR(newent->d_inode->i_mode)) | ||
698 | dentry_unhash(newent); | ||
699 | |||
694 | if (IS_ERR(req)) | 700 | if (IS_ERR(req)) |
695 | return PTR_ERR(req); | 701 | return PTR_ERR(req); |
696 | 702 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a2a6abbccc07..2792a790e50b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -1346,11 +1346,14 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) | |||
1346 | } | 1346 | } |
1347 | 1347 | ||
1348 | 1348 | ||
1349 | static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | 1349 | static int gfs2_shrink_glock_memory(struct shrinker *shrink, |
1350 | struct shrink_control *sc) | ||
1350 | { | 1351 | { |
1351 | struct gfs2_glock *gl; | 1352 | struct gfs2_glock *gl; |
1352 | int may_demote; | 1353 | int may_demote; |
1353 | int nr_skipped = 0; | 1354 | int nr_skipped = 0; |
1355 | int nr = sc->nr_to_scan; | ||
1356 | gfp_t gfp_mask = sc->gfp_mask; | ||
1354 | LIST_HEAD(skipped); | 1357 | LIST_HEAD(skipped); |
1355 | 1358 | ||
1356 | if (nr == 0) | 1359 | if (nr == 0) |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e23d9864c418..42e8d23bc047 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -38,6 +38,7 @@ | |||
38 | 38 | ||
39 | #include <linux/sched.h> | 39 | #include <linux/sched.h> |
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include <linux/mm.h> | ||
41 | #include <linux/spinlock.h> | 42 | #include <linux/spinlock.h> |
42 | #include <linux/completion.h> | 43 | #include <linux/completion.h> |
43 | #include <linux/buffer_head.h> | 44 | #include <linux/buffer_head.h> |
@@ -77,19 +78,20 @@ static LIST_HEAD(qd_lru_list); | |||
77 | static atomic_t qd_lru_count = ATOMIC_INIT(0); | 78 | static atomic_t qd_lru_count = ATOMIC_INIT(0); |
78 | static DEFINE_SPINLOCK(qd_lru_lock); | 79 | static DEFINE_SPINLOCK(qd_lru_lock); |
79 | 80 | ||
80 | int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | 81 | int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) |
81 | { | 82 | { |
82 | struct gfs2_quota_data *qd; | 83 | struct gfs2_quota_data *qd; |
83 | struct gfs2_sbd *sdp; | 84 | struct gfs2_sbd *sdp; |
85 | int nr_to_scan = sc->nr_to_scan; | ||
84 | 86 | ||
85 | if (nr == 0) | 87 | if (nr_to_scan == 0) |
86 | goto out; | 88 | goto out; |
87 | 89 | ||
88 | if (!(gfp_mask & __GFP_FS)) | 90 | if (!(sc->gfp_mask & __GFP_FS)) |
89 | return -1; | 91 | return -1; |
90 | 92 | ||
91 | spin_lock(&qd_lru_lock); | 93 | spin_lock(&qd_lru_lock); |
92 | while (nr && !list_empty(&qd_lru_list)) { | 94 | while (nr_to_scan && !list_empty(&qd_lru_list)) { |
93 | qd = list_entry(qd_lru_list.next, | 95 | qd = list_entry(qd_lru_list.next, |
94 | struct gfs2_quota_data, qd_reclaim); | 96 | struct gfs2_quota_data, qd_reclaim); |
95 | sdp = qd->qd_gl->gl_sbd; | 97 | sdp = qd->qd_gl->gl_sbd; |
@@ -110,7 +112,7 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | |||
110 | spin_unlock(&qd_lru_lock); | 112 | spin_unlock(&qd_lru_lock); |
111 | kmem_cache_free(gfs2_quotad_cachep, qd); | 113 | kmem_cache_free(gfs2_quotad_cachep, qd); |
112 | spin_lock(&qd_lru_lock); | 114 | spin_lock(&qd_lru_lock); |
113 | nr--; | 115 | nr_to_scan--; |
114 | } | 116 | } |
115 | spin_unlock(&qd_lru_lock); | 117 | spin_unlock(&qd_lru_lock); |
116 | 118 | ||
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index e7d236ca48bd..90bf1c302a98 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | struct gfs2_inode; | 13 | struct gfs2_inode; |
14 | struct gfs2_sbd; | 14 | struct gfs2_sbd; |
15 | struct shrink_control; | ||
15 | 16 | ||
16 | #define NO_QUOTA_CHANGE ((u32)-1) | 17 | #define NO_QUOTA_CHANGE ((u32)-1) |
17 | 18 | ||
@@ -51,7 +52,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) | |||
51 | return ret; | 52 | return ret; |
52 | } | 53 | } |
53 | 54 | ||
54 | extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask); | 55 | extern int gfs2_shrink_qd_memory(struct shrinker *shrink, |
56 | struct shrink_control *sc); | ||
55 | extern const struct quotactl_ops gfs2_quotactl_ops; | 57 | extern const struct quotactl_ops gfs2_quotactl_ops; |
56 | 58 | ||
57 | #endif /* __QUOTA_DOT_H__ */ | 59 | #endif /* __QUOTA_DOT_H__ */ |
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index b4d70b13be92..1cb70cdba2c1 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c | |||
@@ -253,6 +253,9 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry) | |||
253 | struct inode *inode = dentry->d_inode; | 253 | struct inode *inode = dentry->d_inode; |
254 | int res; | 254 | int res; |
255 | 255 | ||
256 | if (S_ISDIR(inode->i_mode)) | ||
257 | dentry_unhash(dentry); | ||
258 | |||
256 | if (S_ISDIR(inode->i_mode) && inode->i_size != 2) | 259 | if (S_ISDIR(inode->i_mode) && inode->i_size != 2) |
257 | return -ENOTEMPTY; | 260 | return -ENOTEMPTY; |
258 | res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); | 261 | res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); |
@@ -283,6 +286,9 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
283 | 286 | ||
284 | /* Unlink destination if it already exists */ | 287 | /* Unlink destination if it already exists */ |
285 | if (new_dentry->d_inode) { | 288 | if (new_dentry->d_inode) { |
289 | if (S_ISDIR(new_dentry->d_inode->i_mode)) | ||
290 | dentry_unhash(new_dentry); | ||
291 | |||
286 | res = hfs_remove(new_dir, new_dentry); | 292 | res = hfs_remove(new_dir, new_dentry); |
287 | if (res) | 293 | if (res) |
288 | return res; | 294 | return res; |
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 4df5059c25da..b28835091dd0 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
@@ -370,6 +370,8 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) | |||
370 | struct inode *inode = dentry->d_inode; | 370 | struct inode *inode = dentry->d_inode; |
371 | int res; | 371 | int res; |
372 | 372 | ||
373 | dentry_unhash(dentry); | ||
374 | |||
373 | if (inode->i_size != 2) | 375 | if (inode->i_size != 2) |
374 | return -ENOTEMPTY; | 376 | return -ENOTEMPTY; |
375 | 377 | ||
@@ -467,10 +469,12 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
467 | 469 | ||
468 | /* Unlink destination if it already exists */ | 470 | /* Unlink destination if it already exists */ |
469 | if (new_dentry->d_inode) { | 471 | if (new_dentry->d_inode) { |
470 | if (S_ISDIR(new_dentry->d_inode->i_mode)) | 472 | if (S_ISDIR(new_dentry->d_inode->i_mode)) { |
473 | dentry_unhash(new_dentry); | ||
471 | res = hfsplus_rmdir(new_dir, new_dentry); | 474 | res = hfsplus_rmdir(new_dir, new_dentry); |
472 | else | 475 | } else { |
473 | res = hfsplus_unlink(new_dir, new_dentry); | 476 | res = hfsplus_unlink(new_dir, new_dentry); |
477 | } | ||
474 | if (res) | 478 | if (res) |
475 | return res; | 479 | return res; |
476 | } | 480 | } |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 2638c834ed28..e6816b9e6903 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -683,6 +683,8 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry) | |||
683 | char *file; | 683 | char *file; |
684 | int err; | 684 | int err; |
685 | 685 | ||
686 | dentry_unhash(dentry); | ||
687 | |||
686 | if ((file = dentry_name(dentry)) == NULL) | 688 | if ((file = dentry_name(dentry)) == NULL) |
687 | return -ENOMEM; | 689 | return -ENOMEM; |
688 | err = do_rmdir(file); | 690 | err = do_rmdir(file); |
@@ -736,6 +738,9 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, | |||
736 | char *from_name, *to_name; | 738 | char *from_name, *to_name; |
737 | int err; | 739 | int err; |
738 | 740 | ||
741 | if (to->d_inode && S_ISDIR(to->d_inode->i_mode)) | ||
742 | dentry_unhash(to); | ||
743 | |||
739 | if ((from_name = dentry_name(from)) == NULL) | 744 | if ((from_name = dentry_name(from)) == NULL) |
740 | return -ENOMEM; | 745 | return -ENOMEM; |
741 | if ((to_name = dentry_name(to)) == NULL) { | 746 | if ((to_name = dentry_name(to)) == NULL) { |
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 1f05839c27a7..ff0ce21c0867 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c | |||
@@ -395,7 +395,6 @@ again: | |||
395 | 395 | ||
396 | dentry_unhash(dentry); | 396 | dentry_unhash(dentry); |
397 | if (!d_unhashed(dentry)) { | 397 | if (!d_unhashed(dentry)) { |
398 | dput(dentry); | ||
399 | hpfs_unlock(dir->i_sb); | 398 | hpfs_unlock(dir->i_sb); |
400 | return -ENOSPC; | 399 | return -ENOSPC; |
401 | } | 400 | } |
@@ -403,7 +402,6 @@ again: | |||
403 | !S_ISREG(inode->i_mode) || | 402 | !S_ISREG(inode->i_mode) || |
404 | get_write_access(inode)) { | 403 | get_write_access(inode)) { |
405 | d_rehash(dentry); | 404 | d_rehash(dentry); |
406 | dput(dentry); | ||
407 | } else { | 405 | } else { |
408 | struct iattr newattrs; | 406 | struct iattr newattrs; |
409 | /*printk("HPFS: truncating file before delete.\n");*/ | 407 | /*printk("HPFS: truncating file before delete.\n");*/ |
@@ -411,7 +409,6 @@ again: | |||
411 | newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; | 409 | newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; |
412 | err = notify_change(dentry, &newattrs); | 410 | err = notify_change(dentry, &newattrs); |
413 | put_write_access(inode); | 411 | put_write_access(inode); |
414 | dput(dentry); | ||
415 | if (!err) | 412 | if (!err) |
416 | goto again; | 413 | goto again; |
417 | } | 414 | } |
@@ -442,6 +439,8 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
442 | int err; | 439 | int err; |
443 | int r; | 440 | int r; |
444 | 441 | ||
442 | dentry_unhash(dentry); | ||
443 | |||
445 | hpfs_adjust_length(name, &len); | 444 | hpfs_adjust_length(name, &len); |
446 | hpfs_lock(dir->i_sb); | 445 | hpfs_lock(dir->i_sb); |
447 | err = -ENOENT; | 446 | err = -ENOENT; |
@@ -535,6 +534,10 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
535 | struct buffer_head *bh; | 534 | struct buffer_head *bh; |
536 | struct fnode *fnode; | 535 | struct fnode *fnode; |
537 | int err; | 536 | int err; |
537 | |||
538 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
539 | dentry_unhash(new_dentry); | ||
540 | |||
538 | if ((err = hpfs_chk_name(new_name, &new_len))) return err; | 541 | if ((err = hpfs_chk_name(new_name, &new_len))) return err; |
539 | err = 0; | 542 | err = 0; |
540 | hpfs_adjust_length(old_name, &old_len); | 543 | hpfs_adjust_length(old_name, &old_len); |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b9eeb1cd03ff..7aafeb8fa300 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) | |||
412 | pgoff = offset >> PAGE_SHIFT; | 412 | pgoff = offset >> PAGE_SHIFT; |
413 | 413 | ||
414 | i_size_write(inode, offset); | 414 | i_size_write(inode, offset); |
415 | spin_lock(&mapping->i_mmap_lock); | 415 | mutex_lock(&mapping->i_mmap_mutex); |
416 | if (!prio_tree_empty(&mapping->i_mmap)) | 416 | if (!prio_tree_empty(&mapping->i_mmap)) |
417 | hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); | 417 | hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); |
418 | spin_unlock(&mapping->i_mmap_lock); | 418 | mutex_unlock(&mapping->i_mmap_mutex); |
419 | truncate_hugepages(inode, offset); | 419 | truncate_hugepages(inode, offset); |
420 | return 0; | 420 | return 0; |
421 | } | 421 | } |
@@ -921,7 +921,8 @@ static int can_do_hugetlb_shm(void) | |||
921 | return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); | 921 | return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); |
922 | } | 922 | } |
923 | 923 | ||
924 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, | 924 | struct file *hugetlb_file_setup(const char *name, size_t size, |
925 | vm_flags_t acctflag, | ||
925 | struct user_struct **user, int creat_flags) | 926 | struct user_struct **user, int creat_flags) |
926 | { | 927 | { |
927 | int error = -ENOMEM; | 928 | int error = -ENOMEM; |
diff --git a/fs/inode.c b/fs/inode.c index 05f4fa521325..990d284877a1 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -326,12 +326,11 @@ void address_space_init_once(struct address_space *mapping) | |||
326 | memset(mapping, 0, sizeof(*mapping)); | 326 | memset(mapping, 0, sizeof(*mapping)); |
327 | INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); | 327 | INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); |
328 | spin_lock_init(&mapping->tree_lock); | 328 | spin_lock_init(&mapping->tree_lock); |
329 | spin_lock_init(&mapping->i_mmap_lock); | 329 | mutex_init(&mapping->i_mmap_mutex); |
330 | INIT_LIST_HEAD(&mapping->private_list); | 330 | INIT_LIST_HEAD(&mapping->private_list); |
331 | spin_lock_init(&mapping->private_lock); | 331 | spin_lock_init(&mapping->private_lock); |
332 | INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); | 332 | INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); |
333 | INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); | 333 | INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); |
334 | mutex_init(&mapping->unmap_mutex); | ||
335 | } | 334 | } |
336 | EXPORT_SYMBOL(address_space_init_once); | 335 | EXPORT_SYMBOL(address_space_init_once); |
337 | 336 | ||
@@ -752,8 +751,12 @@ static void prune_icache(int nr_to_scan) | |||
752 | * This function is passed the number of inodes to scan, and it returns the | 751 | * This function is passed the number of inodes to scan, and it returns the |
753 | * total number of remaining possibly-reclaimable inodes. | 752 | * total number of remaining possibly-reclaimable inodes. |
754 | */ | 753 | */ |
755 | static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | 754 | static int shrink_icache_memory(struct shrinker *shrink, |
755 | struct shrink_control *sc) | ||
756 | { | 756 | { |
757 | int nr = sc->nr_to_scan; | ||
758 | gfp_t gfp_mask = sc->gfp_mask; | ||
759 | |||
757 | if (nr) { | 760 | if (nr) { |
758 | /* | 761 | /* |
759 | * Nasty deadlock avoidance. We may hold various FS locks, | 762 | * Nasty deadlock avoidance. We may hold various FS locks, |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 29148a81c783..7f21cf3aaf92 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -219,7 +219,6 @@ static int journal_submit_data_buffers(journal_t *journal, | |||
219 | ret = err; | 219 | ret = err; |
220 | spin_lock(&journal->j_list_lock); | 220 | spin_lock(&journal->j_list_lock); |
221 | J_ASSERT(jinode->i_transaction == commit_transaction); | 221 | J_ASSERT(jinode->i_transaction == commit_transaction); |
222 | commit_transaction->t_flushed_data_blocks = 1; | ||
223 | clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); | 222 | clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); |
224 | smp_mb__after_clear_bit(); | 223 | smp_mb__after_clear_bit(); |
225 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | 224 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); |
@@ -672,12 +671,16 @@ start_journal_io: | |||
672 | err = 0; | 671 | err = 0; |
673 | } | 672 | } |
674 | 673 | ||
674 | write_lock(&journal->j_state_lock); | ||
675 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | ||
676 | commit_transaction->t_state = T_COMMIT_DFLUSH; | ||
677 | write_unlock(&journal->j_state_lock); | ||
675 | /* | 678 | /* |
676 | * If the journal is not located on the file system device, | 679 | * If the journal is not located on the file system device, |
677 | * then we must flush the file system device before we issue | 680 | * then we must flush the file system device before we issue |
678 | * the commit record | 681 | * the commit record |
679 | */ | 682 | */ |
680 | if (commit_transaction->t_flushed_data_blocks && | 683 | if (commit_transaction->t_need_data_flush && |
681 | (journal->j_fs_dev != journal->j_dev) && | 684 | (journal->j_fs_dev != journal->j_dev) && |
682 | (journal->j_flags & JBD2_BARRIER)) | 685 | (journal->j_flags & JBD2_BARRIER)) |
683 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); | 686 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
@@ -754,8 +757,13 @@ wait_for_iobuf: | |||
754 | required. */ | 757 | required. */ |
755 | JBUFFER_TRACE(jh, "file as BJ_Forget"); | 758 | JBUFFER_TRACE(jh, "file as BJ_Forget"); |
756 | jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); | 759 | jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); |
757 | /* Wake up any transactions which were waiting for this | 760 | /* |
758 | IO to complete */ | 761 | * Wake up any transactions which were waiting for this IO to |
762 | * complete. The barrier must be here so that changes by | ||
763 | * jbd2_journal_file_buffer() take effect before wake_up_bit() | ||
764 | * does the waitqueue check. | ||
765 | */ | ||
766 | smp_mb(); | ||
759 | wake_up_bit(&bh->b_state, BH_Unshadow); | 767 | wake_up_bit(&bh->b_state, BH_Unshadow); |
760 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); | 768 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); |
761 | __brelse(bh); | 769 | __brelse(bh); |
@@ -794,6 +802,10 @@ wait_for_iobuf: | |||
794 | jbd2_journal_abort(journal, err); | 802 | jbd2_journal_abort(journal, err); |
795 | 803 | ||
796 | jbd_debug(3, "JBD: commit phase 5\n"); | 804 | jbd_debug(3, "JBD: commit phase 5\n"); |
805 | write_lock(&journal->j_state_lock); | ||
806 | J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); | ||
807 | commit_transaction->t_state = T_COMMIT_JFLUSH; | ||
808 | write_unlock(&journal->j_state_lock); | ||
797 | 809 | ||
798 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 810 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
799 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 811 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
@@ -949,7 +961,7 @@ restart_loop: | |||
949 | 961 | ||
950 | jbd_debug(3, "JBD: commit phase 7\n"); | 962 | jbd_debug(3, "JBD: commit phase 7\n"); |
951 | 963 | ||
952 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | 964 | J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); |
953 | 965 | ||
954 | commit_transaction->t_start = jiffies; | 966 | commit_transaction->t_start = jiffies; |
955 | stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging, | 967 | stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging, |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e0ec3db1c395..9a7826990304 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -479,9 +479,12 @@ int __jbd2_log_space_left(journal_t *journal) | |||
479 | int __jbd2_log_start_commit(journal_t *journal, tid_t target) | 479 | int __jbd2_log_start_commit(journal_t *journal, tid_t target) |
480 | { | 480 | { |
481 | /* | 481 | /* |
482 | * Are we already doing a recent enough commit? | 482 | * The only transaction we can possibly wait upon is the |
483 | * currently running transaction (if it exists). Otherwise, | ||
484 | * the target tid must be an old one. | ||
483 | */ | 485 | */ |
484 | if (!tid_geq(journal->j_commit_request, target)) { | 486 | if (journal->j_running_transaction && |
487 | journal->j_running_transaction->t_tid == target) { | ||
485 | /* | 488 | /* |
486 | * We want a new commit: OK, mark the request and wakeup the | 489 | * We want a new commit: OK, mark the request and wakeup the |
487 | * commit thread. We do _not_ do the commit ourselves. | 490 | * commit thread. We do _not_ do the commit ourselves. |
@@ -493,7 +496,15 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) | |||
493 | journal->j_commit_sequence); | 496 | journal->j_commit_sequence); |
494 | wake_up(&journal->j_wait_commit); | 497 | wake_up(&journal->j_wait_commit); |
495 | return 1; | 498 | return 1; |
496 | } | 499 | } else if (!tid_geq(journal->j_commit_request, target)) |
500 | /* This should never happen, but if it does, preserve | ||
501 | the evidence before kjournald goes into a loop and | ||
502 | increments j_commit_sequence beyond all recognition. */ | ||
503 | WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n", | ||
504 | journal->j_commit_request, | ||
505 | journal->j_commit_sequence, | ||
506 | target, journal->j_running_transaction ? | ||
507 | journal->j_running_transaction->t_tid : 0); | ||
497 | return 0; | 508 | return 0; |
498 | } | 509 | } |
499 | 510 | ||
@@ -577,6 +588,47 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) | |||
577 | } | 588 | } |
578 | 589 | ||
579 | /* | 590 | /* |
591 | * Return 1 if a given transaction has not yet sent barrier request | ||
592 | * connected with a transaction commit. If 0 is returned, transaction | ||
593 | * may or may not have sent the barrier. Used to avoid sending barrier | ||
594 | * twice in common cases. | ||
595 | */ | ||
596 | int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid) | ||
597 | { | ||
598 | int ret = 0; | ||
599 | transaction_t *commit_trans; | ||
600 | |||
601 | if (!(journal->j_flags & JBD2_BARRIER)) | ||
602 | return 0; | ||
603 | read_lock(&journal->j_state_lock); | ||
604 | /* Transaction already committed? */ | ||
605 | if (tid_geq(journal->j_commit_sequence, tid)) | ||
606 | goto out; | ||
607 | commit_trans = journal->j_committing_transaction; | ||
608 | if (!commit_trans || commit_trans->t_tid != tid) { | ||
609 | ret = 1; | ||
610 | goto out; | ||
611 | } | ||
612 | /* | ||
613 | * Transaction is being committed and we already proceeded to | ||
614 | * submitting a flush to fs partition? | ||
615 | */ | ||
616 | if (journal->j_fs_dev != journal->j_dev) { | ||
617 | if (!commit_trans->t_need_data_flush || | ||
618 | commit_trans->t_state >= T_COMMIT_DFLUSH) | ||
619 | goto out; | ||
620 | } else { | ||
621 | if (commit_trans->t_state >= T_COMMIT_JFLUSH) | ||
622 | goto out; | ||
623 | } | ||
624 | ret = 1; | ||
625 | out: | ||
626 | read_unlock(&journal->j_state_lock); | ||
627 | return ret; | ||
628 | } | ||
629 | EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier); | ||
630 | |||
631 | /* | ||
580 | * Wait for a specified commit to complete. | 632 | * Wait for a specified commit to complete. |
581 | * The caller may not hold the journal lock. | 633 | * The caller may not hold the journal lock. |
582 | */ | 634 | */ |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 05fa77a23711..3eec82d32fd4 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -82,7 +82,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
82 | */ | 82 | */ |
83 | 83 | ||
84 | /* | 84 | /* |
85 | * Update transiaction's maximum wait time, if debugging is enabled. | 85 | * Update transaction's maximum wait time, if debugging is enabled. |
86 | * | 86 | * |
87 | * In order for t_max_wait to be reliable, it must be protected by a | 87 | * In order for t_max_wait to be reliable, it must be protected by a |
88 | * lock. But doing so will mean that start_this_handle() can not be | 88 | * lock. But doing so will mean that start_this_handle() can not be |
@@ -91,11 +91,10 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
91 | * means that maximum wait time reported by the jbd2_run_stats | 91 | * means that maximum wait time reported by the jbd2_run_stats |
92 | * tracepoint will always be zero. | 92 | * tracepoint will always be zero. |
93 | */ | 93 | */ |
94 | static inline void update_t_max_wait(transaction_t *transaction) | 94 | static inline void update_t_max_wait(transaction_t *transaction, |
95 | unsigned long ts) | ||
95 | { | 96 | { |
96 | #ifdef CONFIG_JBD2_DEBUG | 97 | #ifdef CONFIG_JBD2_DEBUG |
97 | unsigned long ts = jiffies; | ||
98 | |||
99 | if (jbd2_journal_enable_debug && | 98 | if (jbd2_journal_enable_debug && |
100 | time_after(transaction->t_start, ts)) { | 99 | time_after(transaction->t_start, ts)) { |
101 | ts = jbd2_time_diff(ts, transaction->t_start); | 100 | ts = jbd2_time_diff(ts, transaction->t_start); |
@@ -121,6 +120,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle, | |||
121 | tid_t tid; | 120 | tid_t tid; |
122 | int needed, need_to_start; | 121 | int needed, need_to_start; |
123 | int nblocks = handle->h_buffer_credits; | 122 | int nblocks = handle->h_buffer_credits; |
123 | unsigned long ts = jiffies; | ||
124 | 124 | ||
125 | if (nblocks > journal->j_max_transaction_buffers) { | 125 | if (nblocks > journal->j_max_transaction_buffers) { |
126 | printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", | 126 | printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", |
@@ -271,7 +271,7 @@ repeat: | |||
271 | /* OK, account for the buffers that this operation expects to | 271 | /* OK, account for the buffers that this operation expects to |
272 | * use and add the handle to the running transaction. | 272 | * use and add the handle to the running transaction. |
273 | */ | 273 | */ |
274 | update_t_max_wait(transaction); | 274 | update_t_max_wait(transaction, ts); |
275 | handle->h_transaction = transaction; | 275 | handle->h_transaction = transaction; |
276 | atomic_inc(&transaction->t_updates); | 276 | atomic_inc(&transaction->t_updates); |
277 | atomic_inc(&transaction->t_handle_count); | 277 | atomic_inc(&transaction->t_handle_count); |
@@ -316,7 +316,8 @@ static handle_t *new_handle(int nblocks) | |||
316 | * This function is visible to journal users (like ext3fs), so is not | 316 | * This function is visible to journal users (like ext3fs), so is not |
317 | * called with the journal already locked. | 317 | * called with the journal already locked. |
318 | * | 318 | * |
319 | * Return a pointer to a newly allocated handle, or NULL on failure | 319 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value |
320 | * on failure. | ||
320 | */ | 321 | */ |
321 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) | 322 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) |
322 | { | 323 | { |
@@ -921,8 +922,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
921 | */ | 922 | */ |
922 | JBUFFER_TRACE(jh, "cancelling revoke"); | 923 | JBUFFER_TRACE(jh, "cancelling revoke"); |
923 | jbd2_journal_cancel_revoke(handle, jh); | 924 | jbd2_journal_cancel_revoke(handle, jh); |
924 | jbd2_journal_put_journal_head(jh); | ||
925 | out: | 925 | out: |
926 | jbd2_journal_put_journal_head(jh); | ||
926 | return err; | 927 | return err; |
927 | } | 928 | } |
928 | 929 | ||
@@ -2147,6 +2148,13 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) | |||
2147 | jinode->i_next_transaction == transaction) | 2148 | jinode->i_next_transaction == transaction) |
2148 | goto done; | 2149 | goto done; |
2149 | 2150 | ||
2151 | /* | ||
2152 | * We only ever set this variable to 1 so the test is safe. Since | ||
2153 | * t_need_data_flush is likely to be set, we do the test to save some | ||
2154 | * cacheline bouncing | ||
2155 | */ | ||
2156 | if (!transaction->t_need_data_flush) | ||
2157 | transaction->t_need_data_flush = 1; | ||
2150 | /* On some different transaction's list - should be | 2158 | /* On some different transaction's list - should be |
2151 | * the committing one */ | 2159 | * the committing one */ |
2152 | if (jinode->i_transaction) { | 2160 | if (jinode->i_transaction) { |
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 82faddd1f321..05f73328b28b 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c | |||
@@ -609,6 +609,8 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) | |||
609 | int ret; | 609 | int ret; |
610 | uint32_t now = get_seconds(); | 610 | uint32_t now = get_seconds(); |
611 | 611 | ||
612 | dentry_unhash(dentry); | ||
613 | |||
612 | for (fd = f->dents ; fd; fd = fd->next) { | 614 | for (fd = f->dents ; fd; fd = fd->next) { |
613 | if (fd->ino) | 615 | if (fd->ino) |
614 | return -ENOTEMPTY; | 616 | return -ENOTEMPTY; |
@@ -784,6 +786,9 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, | |||
784 | uint8_t type; | 786 | uint8_t type; |
785 | uint32_t now; | 787 | uint32_t now; |
786 | 788 | ||
789 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
790 | dentry_unhash(new_dentry); | ||
791 | |||
787 | /* The VFS will check for us and prevent trying to rename a | 792 | /* The VFS will check for us and prevent trying to rename a |
788 | * file over a directory and vice versa, but if it's a directory, | 793 | * file over a directory and vice versa, but if it's a directory, |
789 | * the VFS can't check whether the victim is empty. The filesystem | 794 | * the VFS can't check whether the victim is empty. The filesystem |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index eaaf2b511e89..865df16a6cf3 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -360,6 +360,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) | |||
360 | 360 | ||
361 | jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); | 361 | jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); |
362 | 362 | ||
363 | dentry_unhash(dentry); | ||
364 | |||
363 | /* Init inode for quota operations. */ | 365 | /* Init inode for quota operations. */ |
364 | dquot_initialize(dip); | 366 | dquot_initialize(dip); |
365 | dquot_initialize(ip); | 367 | dquot_initialize(ip); |
@@ -1095,6 +1097,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1095 | jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, | 1097 | jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, |
1096 | new_dentry->d_name.name); | 1098 | new_dentry->d_name.name); |
1097 | 1099 | ||
1100 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
1101 | dentry_unhash(new_dentry); | ||
1102 | |||
1098 | dquot_initialize(old_dir); | 1103 | dquot_initialize(old_dir); |
1099 | dquot_initialize(new_dir); | 1104 | dquot_initialize(new_dir); |
1100 | 1105 | ||
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 9ed89d1663f8..f34c9cde9e94 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c | |||
@@ -273,6 +273,8 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
273 | { | 273 | { |
274 | struct inode *inode = dentry->d_inode; | 274 | struct inode *inode = dentry->d_inode; |
275 | 275 | ||
276 | dentry_unhash(dentry); | ||
277 | |||
276 | if (!logfs_empty_dir(inode)) | 278 | if (!logfs_empty_dir(inode)) |
277 | return -ENOTEMPTY; | 279 | return -ENOTEMPTY; |
278 | 280 | ||
@@ -622,6 +624,9 @@ static int logfs_rename_cross(struct inode *old_dir, struct dentry *old_dentry, | |||
622 | loff_t pos; | 624 | loff_t pos; |
623 | int err; | 625 | int err; |
624 | 626 | ||
627 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
628 | dentry_unhash(new_dentry); | ||
629 | |||
625 | /* 1. locate source dd */ | 630 | /* 1. locate source dd */ |
626 | err = logfs_get_dd(old_dir, old_dentry, &dd, &pos); | 631 | err = logfs_get_dd(old_dir, old_dentry, &dd, &pos); |
627 | if (err) | 632 | if (err) |
diff --git a/fs/mbcache.c b/fs/mbcache.c index 2f174be06555..8c32ef3ba88e 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
@@ -90,7 +90,8 @@ static DEFINE_SPINLOCK(mb_cache_spinlock); | |||
90 | * What the mbcache registers as to get shrunk dynamically. | 90 | * What the mbcache registers as to get shrunk dynamically. |
91 | */ | 91 | */ |
92 | 92 | ||
93 | static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); | 93 | static int mb_cache_shrink_fn(struct shrinker *shrink, |
94 | struct shrink_control *sc); | ||
94 | 95 | ||
95 | static struct shrinker mb_cache_shrinker = { | 96 | static struct shrinker mb_cache_shrinker = { |
96 | .shrink = mb_cache_shrink_fn, | 97 | .shrink = mb_cache_shrink_fn, |
@@ -156,18 +157,19 @@ forget: | |||
156 | * gets low. | 157 | * gets low. |
157 | * | 158 | * |
158 | * @shrink: (ignored) | 159 | * @shrink: (ignored) |
159 | * @nr_to_scan: Number of objects to scan | 160 | * @sc: shrink_control passed from reclaim |
160 | * @gfp_mask: (ignored) | ||
161 | * | 161 | * |
162 | * Returns the number of objects which are present in the cache. | 162 | * Returns the number of objects which are present in the cache. |
163 | */ | 163 | */ |
164 | static int | 164 | static int |
165 | mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | 165 | mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) |
166 | { | 166 | { |
167 | LIST_HEAD(free_list); | 167 | LIST_HEAD(free_list); |
168 | struct mb_cache *cache; | 168 | struct mb_cache *cache; |
169 | struct mb_cache_entry *entry, *tmp; | 169 | struct mb_cache_entry *entry, *tmp; |
170 | int count = 0; | 170 | int count = 0; |
171 | int nr_to_scan = sc->nr_to_scan; | ||
172 | gfp_t gfp_mask = sc->gfp_mask; | ||
171 | 173 | ||
172 | mb_debug("trying to free %d entries", nr_to_scan); | 174 | mb_debug("trying to free %d entries", nr_to_scan); |
173 | spin_lock(&mb_cache_spinlock); | 175 | spin_lock(&mb_cache_spinlock); |
diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 6e6777f1b4b2..f60aed8db9c4 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c | |||
@@ -168,6 +168,8 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry) | |||
168 | struct inode * inode = dentry->d_inode; | 168 | struct inode * inode = dentry->d_inode; |
169 | int err = -ENOTEMPTY; | 169 | int err = -ENOTEMPTY; |
170 | 170 | ||
171 | dentry_unhash(dentry); | ||
172 | |||
171 | if (minix_empty_dir(inode)) { | 173 | if (minix_empty_dir(inode)) { |
172 | err = minix_unlink(dir, dentry); | 174 | err = minix_unlink(dir, dentry); |
173 | if (!err) { | 175 | if (!err) { |
@@ -190,6 +192,9 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, | |||
190 | struct minix_dir_entry * old_de; | 192 | struct minix_dir_entry * old_de; |
191 | int err = -ENOENT; | 193 | int err = -ENOENT; |
192 | 194 | ||
195 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
196 | dentry_unhash(new_dentry); | ||
197 | |||
193 | old_de = minix_find_entry(old_dentry, &old_page); | 198 | old_de = minix_find_entry(old_dentry, &old_page); |
194 | if (!old_de) | 199 | if (!old_de) |
195 | goto out; | 200 | goto out; |
diff --git a/fs/mpage.c b/fs/mpage.c index 0afc809e46e0..fdfae9fa98cd 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/backing-dev.h> | 28 | #include <linux/backing-dev.h> |
29 | #include <linux/pagevec.h> | 29 | #include <linux/pagevec.h> |
30 | #include <linux/cleancache.h> | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * I/O completion handler for multipage BIOs. | 33 | * I/O completion handler for multipage BIOs. |
@@ -271,6 +272,12 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, | |||
271 | SetPageMappedToDisk(page); | 272 | SetPageMappedToDisk(page); |
272 | } | 273 | } |
273 | 274 | ||
275 | if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) && | ||
276 | cleancache_get_page(page) == 0) { | ||
277 | SetPageUptodate(page); | ||
278 | goto confused; | ||
279 | } | ||
280 | |||
274 | /* | 281 | /* |
275 | * This page will go to BIO. Do we need to send this BIO off first? | 282 | * This page will go to BIO. Do we need to send this BIO off first? |
276 | */ | 283 | */ |
diff --git a/fs/namei.c b/fs/namei.c index 6ff858c049c0..2358b326b221 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -391,79 +391,28 @@ void path_put(struct path *path) | |||
391 | } | 391 | } |
392 | EXPORT_SYMBOL(path_put); | 392 | EXPORT_SYMBOL(path_put); |
393 | 393 | ||
394 | /** | 394 | /* |
395 | * nameidata_drop_rcu - drop this nameidata out of rcu-walk | ||
396 | * @nd: nameidata pathwalk data to drop | ||
397 | * Returns: 0 on success, -ECHILD on failure | ||
398 | * | ||
399 | * Path walking has 2 modes, rcu-walk and ref-walk (see | 395 | * Path walking has 2 modes, rcu-walk and ref-walk (see |
400 | * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt | 396 | * Documentation/filesystems/path-lookup.txt). In situations when we can't |
401 | * to drop out of rcu-walk mode and take normal reference counts on dentries | 397 | * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab |
402 | * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take | 398 | * normal reference counts on dentries and vfsmounts to transition to rcu-walk |
403 | * refcounts at the last known good point before rcu-walk got stuck, so | 399 | * mode. Refcounts are grabbed at the last known good point before rcu-walk |
404 | * ref-walk may continue from there. If this is not successful (eg. a seqcount | 400 | * got stuck, so ref-walk may continue from there. If this is not successful |
405 | * has changed), then failure is returned and path walk restarts from the | 401 | * (eg. a seqcount has changed), then failure is returned and it's up to caller |
406 | * beginning in ref-walk mode. | 402 | * to restart the path walk from the beginning in ref-walk mode. |
407 | * | ||
408 | * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into | ||
409 | * ref-walk. Must be called from rcu-walk context. | ||
410 | */ | 403 | */ |
411 | static int nameidata_drop_rcu(struct nameidata *nd) | ||
412 | { | ||
413 | struct fs_struct *fs = current->fs; | ||
414 | struct dentry *dentry = nd->path.dentry; | ||
415 | int want_root = 0; | ||
416 | |||
417 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | ||
418 | if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { | ||
419 | want_root = 1; | ||
420 | spin_lock(&fs->lock); | ||
421 | if (nd->root.mnt != fs->root.mnt || | ||
422 | nd->root.dentry != fs->root.dentry) | ||
423 | goto err_root; | ||
424 | } | ||
425 | spin_lock(&dentry->d_lock); | ||
426 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | ||
427 | goto err; | ||
428 | BUG_ON(nd->inode != dentry->d_inode); | ||
429 | spin_unlock(&dentry->d_lock); | ||
430 | if (want_root) { | ||
431 | path_get(&nd->root); | ||
432 | spin_unlock(&fs->lock); | ||
433 | } | ||
434 | mntget(nd->path.mnt); | ||
435 | |||
436 | rcu_read_unlock(); | ||
437 | br_read_unlock(vfsmount_lock); | ||
438 | nd->flags &= ~LOOKUP_RCU; | ||
439 | return 0; | ||
440 | err: | ||
441 | spin_unlock(&dentry->d_lock); | ||
442 | err_root: | ||
443 | if (want_root) | ||
444 | spin_unlock(&fs->lock); | ||
445 | return -ECHILD; | ||
446 | } | ||
447 | |||
448 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ | ||
449 | static inline int nameidata_drop_rcu_maybe(struct nameidata *nd) | ||
450 | { | ||
451 | if (nd->flags & LOOKUP_RCU) | ||
452 | return nameidata_drop_rcu(nd); | ||
453 | return 0; | ||
454 | } | ||
455 | 404 | ||
456 | /** | 405 | /** |
457 | * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk | 406 | * unlazy_walk - try to switch to ref-walk mode. |
458 | * @nd: nameidata pathwalk data to drop | 407 | * @nd: nameidata pathwalk data |
459 | * @dentry: dentry to drop | 408 | * @dentry: child of nd->path.dentry or NULL |
460 | * Returns: 0 on success, -ECHILD on failure | 409 | * Returns: 0 on success, -ECHILD on failure |
461 | * | 410 | * |
462 | * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root, | 411 | * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry |
463 | * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on | 412 | * for ref-walk mode. @dentry must be a path found by a do_lookup call on |
464 | * @nd. Must be called from rcu-walk context. | 413 | * @nd or NULL. Must be called from rcu-walk context. |
465 | */ | 414 | */ |
466 | static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry) | 415 | static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) |
467 | { | 416 | { |
468 | struct fs_struct *fs = current->fs; | 417 | struct fs_struct *fs = current->fs; |
469 | struct dentry *parent = nd->path.dentry; | 418 | struct dentry *parent = nd->path.dentry; |
@@ -478,18 +427,25 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry | |||
478 | goto err_root; | 427 | goto err_root; |
479 | } | 428 | } |
480 | spin_lock(&parent->d_lock); | 429 | spin_lock(&parent->d_lock); |
481 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | 430 | if (!dentry) { |
482 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | 431 | if (!__d_rcu_to_refcount(parent, nd->seq)) |
483 | goto err; | 432 | goto err_parent; |
484 | /* | 433 | BUG_ON(nd->inode != parent->d_inode); |
485 | * If the sequence check on the child dentry passed, then the child has | 434 | } else { |
486 | * not been removed from its parent. This means the parent dentry must | 435 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); |
487 | * be valid and able to take a reference at this point. | 436 | if (!__d_rcu_to_refcount(dentry, nd->seq)) |
488 | */ | 437 | goto err_child; |
489 | BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); | 438 | /* |
490 | BUG_ON(!parent->d_count); | 439 | * If the sequence check on the child dentry passed, then |
491 | parent->d_count++; | 440 | * the child has not been removed from its parent. This |
492 | spin_unlock(&dentry->d_lock); | 441 | * means the parent dentry must be valid and able to take |
442 | * a reference at this point. | ||
443 | */ | ||
444 | BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); | ||
445 | BUG_ON(!parent->d_count); | ||
446 | parent->d_count++; | ||
447 | spin_unlock(&dentry->d_lock); | ||
448 | } | ||
493 | spin_unlock(&parent->d_lock); | 449 | spin_unlock(&parent->d_lock); |
494 | if (want_root) { | 450 | if (want_root) { |
495 | path_get(&nd->root); | 451 | path_get(&nd->root); |
@@ -501,8 +457,10 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry | |||
501 | br_read_unlock(vfsmount_lock); | 457 | br_read_unlock(vfsmount_lock); |
502 | nd->flags &= ~LOOKUP_RCU; | 458 | nd->flags &= ~LOOKUP_RCU; |
503 | return 0; | 459 | return 0; |
504 | err: | 460 | |
461 | err_child: | ||
505 | spin_unlock(&dentry->d_lock); | 462 | spin_unlock(&dentry->d_lock); |
463 | err_parent: | ||
506 | spin_unlock(&parent->d_lock); | 464 | spin_unlock(&parent->d_lock); |
507 | err_root: | 465 | err_root: |
508 | if (want_root) | 466 | if (want_root) |
@@ -510,59 +468,6 @@ err_root: | |||
510 | return -ECHILD; | 468 | return -ECHILD; |
511 | } | 469 | } |
512 | 470 | ||
513 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ | ||
514 | static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) | ||
515 | { | ||
516 | if (nd->flags & LOOKUP_RCU) { | ||
517 | if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) { | ||
518 | nd->flags &= ~LOOKUP_RCU; | ||
519 | if (!(nd->flags & LOOKUP_ROOT)) | ||
520 | nd->root.mnt = NULL; | ||
521 | rcu_read_unlock(); | ||
522 | br_read_unlock(vfsmount_lock); | ||
523 | return -ECHILD; | ||
524 | } | ||
525 | } | ||
526 | return 0; | ||
527 | } | ||
528 | |||
529 | /** | ||
530 | * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk | ||
531 | * @nd: nameidata pathwalk data to drop | ||
532 | * Returns: 0 on success, -ECHILD on failure | ||
533 | * | ||
534 | * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk. | ||
535 | * nd->path should be the final element of the lookup, so nd->root is discarded. | ||
536 | * Must be called from rcu-walk context. | ||
537 | */ | ||
538 | static int nameidata_drop_rcu_last(struct nameidata *nd) | ||
539 | { | ||
540 | struct dentry *dentry = nd->path.dentry; | ||
541 | |||
542 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | ||
543 | nd->flags &= ~LOOKUP_RCU; | ||
544 | if (!(nd->flags & LOOKUP_ROOT)) | ||
545 | nd->root.mnt = NULL; | ||
546 | spin_lock(&dentry->d_lock); | ||
547 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | ||
548 | goto err_unlock; | ||
549 | BUG_ON(nd->inode != dentry->d_inode); | ||
550 | spin_unlock(&dentry->d_lock); | ||
551 | |||
552 | mntget(nd->path.mnt); | ||
553 | |||
554 | rcu_read_unlock(); | ||
555 | br_read_unlock(vfsmount_lock); | ||
556 | |||
557 | return 0; | ||
558 | |||
559 | err_unlock: | ||
560 | spin_unlock(&dentry->d_lock); | ||
561 | rcu_read_unlock(); | ||
562 | br_read_unlock(vfsmount_lock); | ||
563 | return -ECHILD; | ||
564 | } | ||
565 | |||
566 | /** | 471 | /** |
567 | * release_open_intent - free up open intent resources | 472 | * release_open_intent - free up open intent resources |
568 | * @nd: pointer to nameidata | 473 | * @nd: pointer to nameidata |
@@ -606,26 +511,39 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
606 | return dentry; | 511 | return dentry; |
607 | } | 512 | } |
608 | 513 | ||
609 | /* | 514 | /** |
610 | * handle_reval_path - force revalidation of a dentry | 515 | * complete_walk - successful completion of path walk |
611 | * | 516 | * @nd: pointer nameidata |
612 | * In some situations the path walking code will trust dentries without | ||
613 | * revalidating them. This causes problems for filesystems that depend on | ||
614 | * d_revalidate to handle file opens (e.g. NFSv4). When FS_REVAL_DOT is set | ||
615 | * (which indicates that it's possible for the dentry to go stale), force | ||
616 | * a d_revalidate call before proceeding. | ||
617 | * | 517 | * |
618 | * Returns 0 if the revalidation was successful. If the revalidation fails, | 518 | * If we had been in RCU mode, drop out of it and legitimize nd->path. |
619 | * either return the error returned by d_revalidate or -ESTALE if the | 519 | * Revalidate the final result, unless we'd already done that during |
620 | * revalidation it just returned 0. If d_revalidate returns 0, we attempt to | 520 | * the path walk or the filesystem doesn't ask for it. Return 0 on |
621 | * invalidate the dentry. It's up to the caller to handle putting references | 521 | * success, -error on failure. In case of failure caller does not |
622 | * to the path if necessary. | 522 | * need to drop nd->path. |
623 | */ | 523 | */ |
624 | static inline int handle_reval_path(struct nameidata *nd) | 524 | static int complete_walk(struct nameidata *nd) |
625 | { | 525 | { |
626 | struct dentry *dentry = nd->path.dentry; | 526 | struct dentry *dentry = nd->path.dentry; |
627 | int status; | 527 | int status; |
628 | 528 | ||
529 | if (nd->flags & LOOKUP_RCU) { | ||
530 | nd->flags &= ~LOOKUP_RCU; | ||
531 | if (!(nd->flags & LOOKUP_ROOT)) | ||
532 | nd->root.mnt = NULL; | ||
533 | spin_lock(&dentry->d_lock); | ||
534 | if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { | ||
535 | spin_unlock(&dentry->d_lock); | ||
536 | rcu_read_unlock(); | ||
537 | br_read_unlock(vfsmount_lock); | ||
538 | return -ECHILD; | ||
539 | } | ||
540 | BUG_ON(nd->inode != dentry->d_inode); | ||
541 | spin_unlock(&dentry->d_lock); | ||
542 | mntget(nd->path.mnt); | ||
543 | rcu_read_unlock(); | ||
544 | br_read_unlock(vfsmount_lock); | ||
545 | } | ||
546 | |||
629 | if (likely(!(nd->flags & LOOKUP_JUMPED))) | 547 | if (likely(!(nd->flags & LOOKUP_JUMPED))) |
630 | return 0; | 548 | return 0; |
631 | 549 | ||
@@ -643,6 +561,7 @@ static inline int handle_reval_path(struct nameidata *nd) | |||
643 | if (!status) | 561 | if (!status) |
644 | status = -ESTALE; | 562 | status = -ESTALE; |
645 | 563 | ||
564 | path_put(&nd->path); | ||
646 | return status; | 565 | return status; |
647 | } | 566 | } |
648 | 567 | ||
@@ -1241,13 +1160,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, | |||
1241 | if (likely(__follow_mount_rcu(nd, path, inode, false))) | 1160 | if (likely(__follow_mount_rcu(nd, path, inode, false))) |
1242 | return 0; | 1161 | return 0; |
1243 | unlazy: | 1162 | unlazy: |
1244 | if (dentry) { | 1163 | if (unlazy_walk(nd, dentry)) |
1245 | if (nameidata_dentry_drop_rcu(nd, dentry)) | 1164 | return -ECHILD; |
1246 | return -ECHILD; | ||
1247 | } else { | ||
1248 | if (nameidata_drop_rcu(nd)) | ||
1249 | return -ECHILD; | ||
1250 | } | ||
1251 | } else { | 1165 | } else { |
1252 | dentry = __d_lookup(parent, name); | 1166 | dentry = __d_lookup(parent, name); |
1253 | } | 1167 | } |
@@ -1303,7 +1217,7 @@ static inline int may_lookup(struct nameidata *nd) | |||
1303 | int err = exec_permission(nd->inode, IPERM_FLAG_RCU); | 1217 | int err = exec_permission(nd->inode, IPERM_FLAG_RCU); |
1304 | if (err != -ECHILD) | 1218 | if (err != -ECHILD) |
1305 | return err; | 1219 | return err; |
1306 | if (nameidata_drop_rcu(nd)) | 1220 | if (unlazy_walk(nd, NULL)) |
1307 | return -ECHILD; | 1221 | return -ECHILD; |
1308 | } | 1222 | } |
1309 | return exec_permission(nd->inode, 0); | 1223 | return exec_permission(nd->inode, 0); |
@@ -1357,8 +1271,12 @@ static inline int walk_component(struct nameidata *nd, struct path *path, | |||
1357 | return -ENOENT; | 1271 | return -ENOENT; |
1358 | } | 1272 | } |
1359 | if (unlikely(inode->i_op->follow_link) && follow) { | 1273 | if (unlikely(inode->i_op->follow_link) && follow) { |
1360 | if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) | 1274 | if (nd->flags & LOOKUP_RCU) { |
1361 | return -ECHILD; | 1275 | if (unlikely(unlazy_walk(nd, path->dentry))) { |
1276 | terminate_walk(nd); | ||
1277 | return -ECHILD; | ||
1278 | } | ||
1279 | } | ||
1362 | BUG_ON(inode != path->dentry->d_inode); | 1280 | BUG_ON(inode != path->dentry->d_inode); |
1363 | return 1; | 1281 | return 1; |
1364 | } | 1282 | } |
@@ -1657,18 +1575,8 @@ static int path_lookupat(int dfd, const char *name, | |||
1657 | } | 1575 | } |
1658 | } | 1576 | } |
1659 | 1577 | ||
1660 | if (nd->flags & LOOKUP_RCU) { | 1578 | if (!err) |
1661 | /* went all way through without dropping RCU */ | 1579 | err = complete_walk(nd); |
1662 | BUG_ON(err); | ||
1663 | if (nameidata_drop_rcu_last(nd)) | ||
1664 | err = -ECHILD; | ||
1665 | } | ||
1666 | |||
1667 | if (!err) { | ||
1668 | err = handle_reval_path(nd); | ||
1669 | if (err) | ||
1670 | path_put(&nd->path); | ||
1671 | } | ||
1672 | 1580 | ||
1673 | if (!err && nd->flags & LOOKUP_DIRECTORY) { | 1581 | if (!err && nd->flags & LOOKUP_DIRECTORY) { |
1674 | if (!nd->inode->i_op->lookup) { | 1582 | if (!nd->inode->i_op->lookup) { |
@@ -2134,13 +2042,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2134 | return ERR_PTR(error); | 2042 | return ERR_PTR(error); |
2135 | /* fallthrough */ | 2043 | /* fallthrough */ |
2136 | case LAST_ROOT: | 2044 | case LAST_ROOT: |
2137 | if (nd->flags & LOOKUP_RCU) { | 2045 | error = complete_walk(nd); |
2138 | if (nameidata_drop_rcu_last(nd)) | ||
2139 | return ERR_PTR(-ECHILD); | ||
2140 | } | ||
2141 | error = handle_reval_path(nd); | ||
2142 | if (error) | 2046 | if (error) |
2143 | goto exit; | 2047 | return ERR_PTR(error); |
2144 | audit_inode(pathname, nd->path.dentry); | 2048 | audit_inode(pathname, nd->path.dentry); |
2145 | if (open_flag & O_CREAT) { | 2049 | if (open_flag & O_CREAT) { |
2146 | error = -EISDIR; | 2050 | error = -EISDIR; |
@@ -2148,10 +2052,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2148 | } | 2052 | } |
2149 | goto ok; | 2053 | goto ok; |
2150 | case LAST_BIND: | 2054 | case LAST_BIND: |
2151 | /* can't be RCU mode here */ | 2055 | error = complete_walk(nd); |
2152 | error = handle_reval_path(nd); | ||
2153 | if (error) | 2056 | if (error) |
2154 | goto exit; | 2057 | return ERR_PTR(error); |
2155 | audit_inode(pathname, dir); | 2058 | audit_inode(pathname, dir); |
2156 | goto ok; | 2059 | goto ok; |
2157 | } | 2060 | } |
@@ -2170,10 +2073,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2170 | if (error) /* symlink */ | 2073 | if (error) /* symlink */ |
2171 | return NULL; | 2074 | return NULL; |
2172 | /* sayonara */ | 2075 | /* sayonara */ |
2173 | if (nd->flags & LOOKUP_RCU) { | 2076 | error = complete_walk(nd); |
2174 | if (nameidata_drop_rcu_last(nd)) | 2077 | if (error) |
2175 | return ERR_PTR(-ECHILD); | 2078 | return ERR_PTR(-ECHILD); |
2176 | } | ||
2177 | 2079 | ||
2178 | error = -ENOTDIR; | 2080 | error = -ENOTDIR; |
2179 | if (nd->flags & LOOKUP_DIRECTORY) { | 2081 | if (nd->flags & LOOKUP_DIRECTORY) { |
@@ -2185,11 +2087,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2185 | } | 2087 | } |
2186 | 2088 | ||
2187 | /* create side of things */ | 2089 | /* create side of things */ |
2188 | 2090 | error = complete_walk(nd); | |
2189 | if (nd->flags & LOOKUP_RCU) { | 2091 | if (error) |
2190 | if (nameidata_drop_rcu_last(nd)) | 2092 | return ERR_PTR(error); |
2191 | return ERR_PTR(-ECHILD); | ||
2192 | } | ||
2193 | 2093 | ||
2194 | audit_inode(pathname, dir); | 2094 | audit_inode(pathname, dir); |
2195 | error = -EISDIR; | 2095 | error = -EISDIR; |
@@ -2629,10 +2529,10 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode) | |||
2629 | } | 2529 | } |
2630 | 2530 | ||
2631 | /* | 2531 | /* |
2632 | * We try to drop the dentry early: we should have | 2532 | * The dentry_unhash() helper will try to drop the dentry early: we |
2633 | * a usage count of 2 if we're the only user of this | 2533 | * should have a usage count of 2 if we're the only user of this |
2634 | * dentry, and if that is true (possibly after pruning | 2534 | * dentry, and if that is true (possibly after pruning the dcache), |
2635 | * the dcache), then we drop the dentry now. | 2535 | * then we drop the dentry now. |
2636 | * | 2536 | * |
2637 | * A low-level filesystem can, if it choses, legally | 2537 | * A low-level filesystem can, if it choses, legally |
2638 | * do a | 2538 | * do a |
@@ -2645,10 +2545,9 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode) | |||
2645 | */ | 2545 | */ |
2646 | void dentry_unhash(struct dentry *dentry) | 2546 | void dentry_unhash(struct dentry *dentry) |
2647 | { | 2547 | { |
2648 | dget(dentry); | ||
2649 | shrink_dcache_parent(dentry); | 2548 | shrink_dcache_parent(dentry); |
2650 | spin_lock(&dentry->d_lock); | 2549 | spin_lock(&dentry->d_lock); |
2651 | if (dentry->d_count == 2) | 2550 | if (dentry->d_count == 1) |
2652 | __d_drop(dentry); | 2551 | __d_drop(dentry); |
2653 | spin_unlock(&dentry->d_lock); | 2552 | spin_unlock(&dentry->d_lock); |
2654 | } | 2553 | } |
@@ -2664,25 +2563,26 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2664 | return -EPERM; | 2563 | return -EPERM; |
2665 | 2564 | ||
2666 | mutex_lock(&dentry->d_inode->i_mutex); | 2565 | mutex_lock(&dentry->d_inode->i_mutex); |
2667 | dentry_unhash(dentry); | 2566 | |
2567 | error = -EBUSY; | ||
2668 | if (d_mountpoint(dentry)) | 2568 | if (d_mountpoint(dentry)) |
2669 | error = -EBUSY; | 2569 | goto out; |
2670 | else { | 2570 | |
2671 | error = security_inode_rmdir(dir, dentry); | 2571 | error = security_inode_rmdir(dir, dentry); |
2672 | if (!error) { | 2572 | if (error) |
2673 | error = dir->i_op->rmdir(dir, dentry); | 2573 | goto out; |
2674 | if (!error) { | 2574 | |
2675 | dentry->d_inode->i_flags |= S_DEAD; | 2575 | error = dir->i_op->rmdir(dir, dentry); |
2676 | dont_mount(dentry); | 2576 | if (error) |
2677 | } | 2577 | goto out; |
2678 | } | 2578 | |
2679 | } | 2579 | dentry->d_inode->i_flags |= S_DEAD; |
2580 | dont_mount(dentry); | ||
2581 | |||
2582 | out: | ||
2680 | mutex_unlock(&dentry->d_inode->i_mutex); | 2583 | mutex_unlock(&dentry->d_inode->i_mutex); |
2681 | if (!error) { | 2584 | if (!error) |
2682 | d_delete(dentry); | 2585 | d_delete(dentry); |
2683 | } | ||
2684 | dput(dentry); | ||
2685 | |||
2686 | return error; | 2586 | return error; |
2687 | } | 2587 | } |
2688 | 2588 | ||
@@ -3053,12 +2953,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname | |||
3053 | * HOWEVER, it relies on the assumption that any object with ->lookup() | 2953 | * HOWEVER, it relies on the assumption that any object with ->lookup() |
3054 | * has no more than 1 dentry. If "hybrid" objects will ever appear, | 2954 | * has no more than 1 dentry. If "hybrid" objects will ever appear, |
3055 | * we'd better make sure that there's no link(2) for them. | 2955 | * we'd better make sure that there's no link(2) for them. |
3056 | * d) some filesystems don't support opened-but-unlinked directories, | 2956 | * d) conversion from fhandle to dentry may come in the wrong moment - when |
3057 | * either because of layout or because they are not ready to deal with | ||
3058 | * all cases correctly. The latter will be fixed (taking this sort of | ||
3059 | * stuff into VFS), but the former is not going away. Solution: the same | ||
3060 | * trick as in rmdir(). | ||
3061 | * e) conversion from fhandle to dentry may come in the wrong moment - when | ||
3062 | * we are removing the target. Solution: we will have to grab ->i_mutex | 2957 | * we are removing the target. Solution: we will have to grab ->i_mutex |
3063 | * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on | 2958 | * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on |
3064 | * ->i_mutex on parents, which works but leads to some truly excessive | 2959 | * ->i_mutex on parents, which works but leads to some truly excessive |
@@ -3068,7 +2963,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
3068 | struct inode *new_dir, struct dentry *new_dentry) | 2963 | struct inode *new_dir, struct dentry *new_dentry) |
3069 | { | 2964 | { |
3070 | int error = 0; | 2965 | int error = 0; |
3071 | struct inode *target; | 2966 | struct inode *target = new_dentry->d_inode; |
3072 | 2967 | ||
3073 | /* | 2968 | /* |
3074 | * If we are going to change the parent - check write permissions, | 2969 | * If we are going to change the parent - check write permissions, |
@@ -3084,26 +2979,24 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
3084 | if (error) | 2979 | if (error) |
3085 | return error; | 2980 | return error; |
3086 | 2981 | ||
3087 | target = new_dentry->d_inode; | ||
3088 | if (target) | 2982 | if (target) |
3089 | mutex_lock(&target->i_mutex); | 2983 | mutex_lock(&target->i_mutex); |
3090 | if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) | 2984 | |
3091 | error = -EBUSY; | 2985 | error = -EBUSY; |
3092 | else { | 2986 | if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) |
3093 | if (target) | 2987 | goto out; |
3094 | dentry_unhash(new_dentry); | 2988 | |
3095 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | 2989 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); |
3096 | } | 2990 | if (error) |
2991 | goto out; | ||
2992 | |||
3097 | if (target) { | 2993 | if (target) { |
3098 | if (!error) { | 2994 | target->i_flags |= S_DEAD; |
3099 | target->i_flags |= S_DEAD; | 2995 | dont_mount(new_dentry); |
3100 | dont_mount(new_dentry); | ||
3101 | } | ||
3102 | mutex_unlock(&target->i_mutex); | ||
3103 | if (d_unhashed(new_dentry)) | ||
3104 | d_rehash(new_dentry); | ||
3105 | dput(new_dentry); | ||
3106 | } | 2996 | } |
2997 | out: | ||
2998 | if (target) | ||
2999 | mutex_unlock(&target->i_mutex); | ||
3107 | if (!error) | 3000 | if (!error) |
3108 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) | 3001 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) |
3109 | d_move(old_dentry,new_dentry); | 3002 | d_move(old_dentry,new_dentry); |
@@ -3113,7 +3006,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
3113 | static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, | 3006 | static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, |
3114 | struct inode *new_dir, struct dentry *new_dentry) | 3007 | struct inode *new_dir, struct dentry *new_dentry) |
3115 | { | 3008 | { |
3116 | struct inode *target; | 3009 | struct inode *target = new_dentry->d_inode; |
3117 | int error; | 3010 | int error; |
3118 | 3011 | ||
3119 | error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); | 3012 | error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); |
@@ -3121,19 +3014,22 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, | |||
3121 | return error; | 3014 | return error; |
3122 | 3015 | ||
3123 | dget(new_dentry); | 3016 | dget(new_dentry); |
3124 | target = new_dentry->d_inode; | ||
3125 | if (target) | 3017 | if (target) |
3126 | mutex_lock(&target->i_mutex); | 3018 | mutex_lock(&target->i_mutex); |
3019 | |||
3020 | error = -EBUSY; | ||
3127 | if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) | 3021 | if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) |
3128 | error = -EBUSY; | 3022 | goto out; |
3129 | else | 3023 | |
3130 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | 3024 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); |
3131 | if (!error) { | 3025 | if (error) |
3132 | if (target) | 3026 | goto out; |
3133 | dont_mount(new_dentry); | 3027 | |
3134 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) | 3028 | if (target) |
3135 | d_move(old_dentry, new_dentry); | 3029 | dont_mount(new_dentry); |
3136 | } | 3030 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) |
3031 | d_move(old_dentry, new_dentry); | ||
3032 | out: | ||
3137 | if (target) | 3033 | if (target) |
3138 | mutex_unlock(&target->i_mutex); | 3034 | mutex_unlock(&target->i_mutex); |
3139 | dput(new_dentry); | 3035 | dput(new_dentry); |
diff --git a/fs/namespace.c b/fs/namespace.c index d99bcf59e4c2..fe59bd145d21 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1695,7 +1695,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path) | |||
1695 | 1695 | ||
1696 | static int flags_to_propagation_type(int flags) | 1696 | static int flags_to_propagation_type(int flags) |
1697 | { | 1697 | { |
1698 | int type = flags & ~MS_REC; | 1698 | int type = flags & ~(MS_REC | MS_SILENT); |
1699 | 1699 | ||
1700 | /* Fail if any non-propagation flags are set */ | 1700 | /* Fail if any non-propagation flags are set */ |
1701 | if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) | 1701 | if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) |
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index f6946bb5cb55..e3e646b06404 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
@@ -1033,6 +1033,8 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry) | |||
1033 | DPRINTK("ncp_rmdir: removing %s/%s\n", | 1033 | DPRINTK("ncp_rmdir: removing %s/%s\n", |
1034 | dentry->d_parent->d_name.name, dentry->d_name.name); | 1034 | dentry->d_parent->d_name.name, dentry->d_name.name); |
1035 | 1035 | ||
1036 | dentry_unhash(dentry); | ||
1037 | |||
1036 | error = -EBUSY; | 1038 | error = -EBUSY; |
1037 | if (!d_unhashed(dentry)) | 1039 | if (!d_unhashed(dentry)) |
1038 | goto out; | 1040 | goto out; |
@@ -1139,6 +1141,9 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1139 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, | 1141 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, |
1140 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name); | 1142 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name); |
1141 | 1143 | ||
1144 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
1145 | dentry_unhash(new_dentry); | ||
1146 | |||
1142 | ncp_age_dentry(server, old_dentry); | 1147 | ncp_age_dentry(server, old_dentry); |
1143 | ncp_age_dentry(server, new_dentry); | 1148 | ncp_age_dentry(server, new_dentry); |
1144 | 1149 | ||
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 0250e4ce4893..202f370526a7 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -461,7 +461,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
461 | #endif | 461 | #endif |
462 | struct ncp_entry_info finfo; | 462 | struct ncp_entry_info finfo; |
463 | 463 | ||
464 | data.wdog_pid = NULL; | 464 | memset(&data, 0, sizeof(data)); |
465 | server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL); | 465 | server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL); |
466 | if (!server) | 466 | if (!server) |
467 | return -ENOMEM; | 467 | return -ENOMEM; |
@@ -496,7 +496,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
496 | struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data; | 496 | struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data; |
497 | 497 | ||
498 | data.flags = md->flags; | 498 | data.flags = md->flags; |
499 | data.int_flags = 0; | ||
500 | data.mounted_uid = md->mounted_uid; | 499 | data.mounted_uid = md->mounted_uid; |
501 | data.wdog_pid = find_get_pid(md->wdog_pid); | 500 | data.wdog_pid = find_get_pid(md->wdog_pid); |
502 | data.ncp_fd = md->ncp_fd; | 501 | data.ncp_fd = md->ncp_fd; |
@@ -507,7 +506,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
507 | data.file_mode = md->file_mode; | 506 | data.file_mode = md->file_mode; |
508 | data.dir_mode = md->dir_mode; | 507 | data.dir_mode = md->dir_mode; |
509 | data.info_fd = -1; | 508 | data.info_fd = -1; |
510 | data.mounted_vol[0] = 0; | ||
511 | } | 509 | } |
512 | break; | 510 | break; |
513 | default: | 511 | default: |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7237672216c8..424e47773a84 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -2042,11 +2042,14 @@ static void nfs_access_free_list(struct list_head *head) | |||
2042 | } | 2042 | } |
2043 | } | 2043 | } |
2044 | 2044 | ||
2045 | int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | 2045 | int nfs_access_cache_shrinker(struct shrinker *shrink, |
2046 | struct shrink_control *sc) | ||
2046 | { | 2047 | { |
2047 | LIST_HEAD(head); | 2048 | LIST_HEAD(head); |
2048 | struct nfs_inode *nfsi, *next; | 2049 | struct nfs_inode *nfsi, *next; |
2049 | struct nfs_access_entry *cache; | 2050 | struct nfs_access_entry *cache; |
2051 | int nr_to_scan = sc->nr_to_scan; | ||
2052 | gfp_t gfp_mask = sc->gfp_mask; | ||
2050 | 2053 | ||
2051 | if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) | 2054 | if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) |
2052 | return (nr_to_scan == 0) ? 0 : -1; | 2055 | return (nr_to_scan == 0) ? 0 : -1; |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ce118ce885dd..2df6ca7b5898 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -234,7 +234,7 @@ extern int nfs_init_client(struct nfs_client *clp, | |||
234 | 234 | ||
235 | /* dir.c */ | 235 | /* dir.c */ |
236 | extern int nfs_access_cache_shrinker(struct shrinker *shrink, | 236 | extern int nfs_access_cache_shrinker(struct shrinker *shrink, |
237 | int nr_to_scan, gfp_t gfp_mask); | 237 | struct shrink_control *sc); |
238 | 238 | ||
239 | /* inode.c */ | 239 | /* inode.c */ |
240 | extern struct workqueue_struct *nfsiod_workqueue; | 240 | extern struct workqueue_struct *nfsiod_workqueue; |
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 546849b3e88f..1102a5fbb744 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c | |||
@@ -334,6 +334,8 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
334 | struct nilfs_transaction_info ti; | 334 | struct nilfs_transaction_info ti; |
335 | int err; | 335 | int err; |
336 | 336 | ||
337 | dentry_unhash(dentry); | ||
338 | |||
337 | err = nilfs_transaction_begin(dir->i_sb, &ti, 0); | 339 | err = nilfs_transaction_begin(dir->i_sb, &ti, 0); |
338 | if (err) | 340 | if (err) |
339 | return err; | 341 | return err; |
@@ -369,6 +371,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
369 | struct nilfs_transaction_info ti; | 371 | struct nilfs_transaction_info ti; |
370 | int err; | 372 | int err; |
371 | 373 | ||
374 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
375 | dentry_unhash(new_dentry); | ||
376 | |||
372 | err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); | 377 | err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); |
373 | if (unlikely(err)) | 378 | if (unlikely(err)) |
374 | return err; | 379 | return err; |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 823bc35334e0..cdbaf5e97308 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/mount.h> | 41 | #include <linux/mount.h> |
42 | #include <linux/seq_file.h> | 42 | #include <linux/seq_file.h> |
43 | #include <linux/quotaops.h> | 43 | #include <linux/quotaops.h> |
44 | #include <linux/cleancache.h> | ||
44 | 45 | ||
45 | #define CREATE_TRACE_POINTS | 46 | #define CREATE_TRACE_POINTS |
46 | #include "ocfs2_trace.h" | 47 | #include "ocfs2_trace.h" |
@@ -2352,6 +2353,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2352 | mlog_errno(status); | 2353 | mlog_errno(status); |
2353 | goto bail; | 2354 | goto bail; |
2354 | } | 2355 | } |
2356 | cleancache_init_shared_fs((char *)&uuid_net_key, sb); | ||
2355 | 2357 | ||
2356 | bail: | 2358 | bail: |
2357 | return status; | 2359 | return status; |
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index de4ff29f1e05..c368360c35a1 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c | |||
@@ -240,8 +240,12 @@ static int omfs_remove(struct inode *dir, struct dentry *dentry) | |||
240 | struct inode *inode = dentry->d_inode; | 240 | struct inode *inode = dentry->d_inode; |
241 | int ret; | 241 | int ret; |
242 | 242 | ||
243 | if (S_ISDIR(inode->i_mode) && !omfs_dir_is_empty(inode)) | 243 | |
244 | return -ENOTEMPTY; | 244 | if (S_ISDIR(inode->i_mode)) { |
245 | dentry_unhash(dentry); | ||
246 | if (!omfs_dir_is_empty(inode)) | ||
247 | return -ENOTEMPTY; | ||
248 | } | ||
245 | 249 | ||
246 | ret = omfs_delete_entry(dentry); | 250 | ret = omfs_delete_entry(dentry); |
247 | if (ret) | 251 | if (ret) |
@@ -378,6 +382,9 @@ static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
378 | int err; | 382 | int err; |
379 | 383 | ||
380 | if (new_inode) { | 384 | if (new_inode) { |
385 | if (S_ISDIR(new_inode->i_mode)) | ||
386 | dentry_unhash(new_dentry); | ||
387 | |||
381 | /* overwriting existing file/dir */ | 388 | /* overwriting existing file/dir */ |
382 | err = omfs_remove(new_dir, new_dentry); | 389 | err = omfs_remove(new_dir, new_dentry); |
383 | if (err) | 390 | if (err) |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index d545e97d99c3..8ed4d3433199 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -255,7 +255,11 @@ ssize_t part_discard_alignment_show(struct device *dev, | |||
255 | struct device_attribute *attr, char *buf) | 255 | struct device_attribute *attr, char *buf) |
256 | { | 256 | { |
257 | struct hd_struct *p = dev_to_part(dev); | 257 | struct hd_struct *p = dev_to_part(dev); |
258 | return sprintf(buf, "%u\n", p->discard_alignment); | 258 | struct gendisk *disk = dev_to_disk(dev); |
259 | |||
260 | return sprintf(buf, "%u\n", | ||
261 | queue_limit_discard_alignment(&disk->queue->limits, | ||
262 | p->start_sect)); | ||
259 | } | 263 | } |
260 | 264 | ||
261 | ssize_t part_stat_show(struct device *dev, | 265 | ssize_t part_stat_show(struct device *dev, |
@@ -449,8 +453,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, | |||
449 | p->start_sect = start; | 453 | p->start_sect = start; |
450 | p->alignment_offset = | 454 | p->alignment_offset = |
451 | queue_limit_alignment_offset(&disk->queue->limits, start); | 455 | queue_limit_alignment_offset(&disk->queue->limits, start); |
452 | p->discard_alignment = | ||
453 | queue_limit_discard_alignment(&disk->queue->limits, start); | ||
454 | p->nr_sects = len; | 456 | p->nr_sects = len; |
455 | p->partno = partno; | 457 | p->partno = partno; |
456 | p->policy = get_disk_ro(disk); | 458 | p->policy = get_disk_ro(disk); |
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index df434c5f28fb..c1c729335924 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
@@ -20,6 +20,7 @@ proc-y += stat.o | |||
20 | proc-y += uptime.o | 20 | proc-y += uptime.o |
21 | proc-y += version.o | 21 | proc-y += version.o |
22 | proc-y += softirqs.o | 22 | proc-y += softirqs.o |
23 | proc-y += namespaces.o | ||
23 | proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o | 24 | proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o |
24 | proc-$(CONFIG_NET) += proc_net.o | 25 | proc-$(CONFIG_NET) += proc_net.o |
25 | proc-$(CONFIG_PROC_KCORE) += kcore.o | 26 | proc-$(CONFIG_PROC_KCORE) += kcore.o |
diff --git a/fs/proc/base.c b/fs/proc/base.c index dfa532730e55..dc8bca72b002 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode) | |||
600 | return allowed; | 600 | return allowed; |
601 | } | 601 | } |
602 | 602 | ||
603 | static int proc_setattr(struct dentry *dentry, struct iattr *attr) | 603 | int proc_setattr(struct dentry *dentry, struct iattr *attr) |
604 | { | 604 | { |
605 | int error; | 605 | int error; |
606 | struct inode *inode = dentry->d_inode; | 606 | struct inode *inode = dentry->d_inode; |
@@ -1736,8 +1736,7 @@ static int task_dumpable(struct task_struct *task) | |||
1736 | return 0; | 1736 | return 0; |
1737 | } | 1737 | } |
1738 | 1738 | ||
1739 | 1739 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) | |
1740 | static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) | ||
1741 | { | 1740 | { |
1742 | struct inode * inode; | 1741 | struct inode * inode; |
1743 | struct proc_inode *ei; | 1742 | struct proc_inode *ei; |
@@ -1779,7 +1778,7 @@ out_unlock: | |||
1779 | return NULL; | 1778 | return NULL; |
1780 | } | 1779 | } |
1781 | 1780 | ||
1782 | static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 1781 | int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
1783 | { | 1782 | { |
1784 | struct inode *inode = dentry->d_inode; | 1783 | struct inode *inode = dentry->d_inode; |
1785 | struct task_struct *task; | 1784 | struct task_struct *task; |
@@ -1820,7 +1819,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat | |||
1820 | * made this apply to all per process world readable and executable | 1819 | * made this apply to all per process world readable and executable |
1821 | * directories. | 1820 | * directories. |
1822 | */ | 1821 | */ |
1823 | static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) | 1822 | int pid_revalidate(struct dentry *dentry, struct nameidata *nd) |
1824 | { | 1823 | { |
1825 | struct inode *inode; | 1824 | struct inode *inode; |
1826 | struct task_struct *task; | 1825 | struct task_struct *task; |
@@ -1862,7 +1861,7 @@ static int pid_delete_dentry(const struct dentry * dentry) | |||
1862 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; | 1861 | return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; |
1863 | } | 1862 | } |
1864 | 1863 | ||
1865 | static const struct dentry_operations pid_dentry_operations = | 1864 | const struct dentry_operations pid_dentry_operations = |
1866 | { | 1865 | { |
1867 | .d_revalidate = pid_revalidate, | 1866 | .d_revalidate = pid_revalidate, |
1868 | .d_delete = pid_delete_dentry, | 1867 | .d_delete = pid_delete_dentry, |
@@ -1870,9 +1869,6 @@ static const struct dentry_operations pid_dentry_operations = | |||
1870 | 1869 | ||
1871 | /* Lookups */ | 1870 | /* Lookups */ |
1872 | 1871 | ||
1873 | typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | ||
1874 | struct task_struct *, const void *); | ||
1875 | |||
1876 | /* | 1872 | /* |
1877 | * Fill a directory entry. | 1873 | * Fill a directory entry. |
1878 | * | 1874 | * |
@@ -1885,8 +1881,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | |||
1885 | * reported by readdir in sync with the inode numbers reported | 1881 | * reported by readdir in sync with the inode numbers reported |
1886 | * by stat. | 1882 | * by stat. |
1887 | */ | 1883 | */ |
1888 | static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 1884 | int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, |
1889 | char *name, int len, | 1885 | const char *name, int len, |
1890 | instantiate_t instantiate, struct task_struct *task, const void *ptr) | 1886 | instantiate_t instantiate, struct task_struct *task, const void *ptr) |
1891 | { | 1887 | { |
1892 | struct dentry *child, *dir = filp->f_path.dentry; | 1888 | struct dentry *child, *dir = filp->f_path.dentry; |
@@ -2820,6 +2816,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2820 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), | 2816 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), |
2821 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), | 2817 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), |
2822 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), | 2818 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), |
2819 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), | ||
2823 | #ifdef CONFIG_NET | 2820 | #ifdef CONFIG_NET |
2824 | DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), | 2821 | DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), |
2825 | #endif | 2822 | #endif |
@@ -3168,6 +3165,7 @@ out_no_task: | |||
3168 | static const struct pid_entry tid_base_stuff[] = { | 3165 | static const struct pid_entry tid_base_stuff[] = { |
3169 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), | 3166 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), |
3170 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), | 3167 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), |
3168 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), | ||
3171 | REG("environ", S_IRUSR, proc_environ_operations), | 3169 | REG("environ", S_IRUSR, proc_environ_operations), |
3172 | INF("auxv", S_IRUSR, proc_pid_auxv), | 3170 | INF("auxv", S_IRUSR, proc_pid_auxv), |
3173 | ONE("status", S_IRUGO, proc_pid_status), | 3171 | ONE("status", S_IRUGO, proc_pid_status), |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index f1281339b6fa..f1637f17c37c 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -674,6 +674,7 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, | |||
674 | } | 674 | } |
675 | return ent; | 675 | return ent; |
676 | } | 676 | } |
677 | EXPORT_SYMBOL(proc_mkdir_mode); | ||
677 | 678 | ||
678 | struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, | 679 | struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, |
679 | struct proc_dir_entry *parent) | 680 | struct proc_dir_entry *parent) |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d15aa1b1cc8f..74b48cfa1bb2 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode) | |||
28 | { | 28 | { |
29 | struct proc_dir_entry *de; | 29 | struct proc_dir_entry *de; |
30 | struct ctl_table_header *head; | 30 | struct ctl_table_header *head; |
31 | const struct proc_ns_operations *ns_ops; | ||
31 | 32 | ||
32 | truncate_inode_pages(&inode->i_data, 0); | 33 | truncate_inode_pages(&inode->i_data, 0); |
33 | end_writeback(inode); | 34 | end_writeback(inode); |
@@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode) | |||
44 | rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); | 45 | rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); |
45 | sysctl_head_put(head); | 46 | sysctl_head_put(head); |
46 | } | 47 | } |
48 | /* Release any associated namespace */ | ||
49 | ns_ops = PROC_I(inode)->ns_ops; | ||
50 | if (ns_ops && ns_ops->put) | ||
51 | ns_ops->put(PROC_I(inode)->ns); | ||
47 | } | 52 | } |
48 | 53 | ||
49 | static struct kmem_cache * proc_inode_cachep; | 54 | static struct kmem_cache * proc_inode_cachep; |
@@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb) | |||
62 | ei->pde = NULL; | 67 | ei->pde = NULL; |
63 | ei->sysctl = NULL; | 68 | ei->sysctl = NULL; |
64 | ei->sysctl_entry = NULL; | 69 | ei->sysctl_entry = NULL; |
70 | ei->ns = NULL; | ||
71 | ei->ns_ops = NULL; | ||
65 | inode = &ei->vfs_inode; | 72 | inode = &ei->vfs_inode; |
66 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 73 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
67 | return inode; | 74 | return inode; |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c03e8d3a3a5b..7838e5cfec14 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -61,6 +61,14 @@ extern const struct file_operations proc_pagemap_operations; | |||
61 | extern const struct file_operations proc_net_operations; | 61 | extern const struct file_operations proc_net_operations; |
62 | extern const struct inode_operations proc_net_inode_operations; | 62 | extern const struct inode_operations proc_net_inode_operations; |
63 | 63 | ||
64 | struct proc_maps_private { | ||
65 | struct pid *pid; | ||
66 | struct task_struct *task; | ||
67 | #ifdef CONFIG_MMU | ||
68 | struct vm_area_struct *tail_vma; | ||
69 | #endif | ||
70 | }; | ||
71 | |||
64 | void proc_init_inodecache(void); | 72 | void proc_init_inodecache(void); |
65 | 73 | ||
66 | static inline struct pid *proc_pid(struct inode *inode) | 74 | static inline struct pid *proc_pid(struct inode *inode) |
@@ -119,3 +127,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); | |||
119 | */ | 127 | */ |
120 | int proc_readdir(struct file *, void *, filldir_t); | 128 | int proc_readdir(struct file *, void *, filldir_t); |
121 | struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); | 129 | struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); |
130 | |||
131 | |||
132 | |||
133 | /* Lookups */ | ||
134 | typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | ||
135 | struct task_struct *, const void *); | ||
136 | int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | ||
137 | const char *name, int len, | ||
138 | instantiate_t instantiate, struct task_struct *task, const void *ptr); | ||
139 | int pid_revalidate(struct dentry *dentry, struct nameidata *nd); | ||
140 | struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task); | ||
141 | extern const struct dentry_operations pid_dentry_operations; | ||
142 | int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); | ||
143 | int proc_setattr(struct dentry *dentry, struct iattr *attr); | ||
144 | |||
145 | extern const struct inode_operations proc_ns_dir_inode_operations; | ||
146 | extern const struct file_operations proc_ns_dir_operations; | ||
147 | |||
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c new file mode 100644 index 000000000000..781dec5bd682 --- /dev/null +++ b/fs/proc/namespaces.c | |||
@@ -0,0 +1,198 @@ | |||
1 | #include <linux/proc_fs.h> | ||
2 | #include <linux/nsproxy.h> | ||
3 | #include <linux/sched.h> | ||
4 | #include <linux/ptrace.h> | ||
5 | #include <linux/fs_struct.h> | ||
6 | #include <linux/mount.h> | ||
7 | #include <linux/path.h> | ||
8 | #include <linux/namei.h> | ||
9 | #include <linux/file.h> | ||
10 | #include <linux/utsname.h> | ||
11 | #include <net/net_namespace.h> | ||
12 | #include <linux/mnt_namespace.h> | ||
13 | #include <linux/ipc_namespace.h> | ||
14 | #include <linux/pid_namespace.h> | ||
15 | #include "internal.h" | ||
16 | |||
17 | |||
18 | static const struct proc_ns_operations *ns_entries[] = { | ||
19 | #ifdef CONFIG_NET_NS | ||
20 | &netns_operations, | ||
21 | #endif | ||
22 | #ifdef CONFIG_UTS_NS | ||
23 | &utsns_operations, | ||
24 | #endif | ||
25 | #ifdef CONFIG_IPC_NS | ||
26 | &ipcns_operations, | ||
27 | #endif | ||
28 | }; | ||
29 | |||
30 | static const struct file_operations ns_file_operations = { | ||
31 | .llseek = no_llseek, | ||
32 | }; | ||
33 | |||
34 | static struct dentry *proc_ns_instantiate(struct inode *dir, | ||
35 | struct dentry *dentry, struct task_struct *task, const void *ptr) | ||
36 | { | ||
37 | const struct proc_ns_operations *ns_ops = ptr; | ||
38 | struct inode *inode; | ||
39 | struct proc_inode *ei; | ||
40 | struct dentry *error = ERR_PTR(-ENOENT); | ||
41 | |||
42 | inode = proc_pid_make_inode(dir->i_sb, task); | ||
43 | if (!inode) | ||
44 | goto out; | ||
45 | |||
46 | ei = PROC_I(inode); | ||
47 | inode->i_mode = S_IFREG|S_IRUSR; | ||
48 | inode->i_fop = &ns_file_operations; | ||
49 | ei->ns_ops = ns_ops; | ||
50 | ei->ns = ns_ops->get(task); | ||
51 | if (!ei->ns) | ||
52 | goto out_iput; | ||
53 | |||
54 | dentry->d_op = &pid_dentry_operations; | ||
55 | d_add(dentry, inode); | ||
56 | /* Close the race of the process dying before we return the dentry */ | ||
57 | if (pid_revalidate(dentry, NULL)) | ||
58 | error = NULL; | ||
59 | out: | ||
60 | return error; | ||
61 | out_iput: | ||
62 | iput(inode); | ||
63 | goto out; | ||
64 | } | ||
65 | |||
66 | static int proc_ns_fill_cache(struct file *filp, void *dirent, | ||
67 | filldir_t filldir, struct task_struct *task, | ||
68 | const struct proc_ns_operations *ops) | ||
69 | { | ||
70 | return proc_fill_cache(filp, dirent, filldir, | ||
71 | ops->name, strlen(ops->name), | ||
72 | proc_ns_instantiate, task, ops); | ||
73 | } | ||
74 | |||
75 | static int proc_ns_dir_readdir(struct file *filp, void *dirent, | ||
76 | filldir_t filldir) | ||
77 | { | ||
78 | int i; | ||
79 | struct dentry *dentry = filp->f_path.dentry; | ||
80 | struct inode *inode = dentry->d_inode; | ||
81 | struct task_struct *task = get_proc_task(inode); | ||
82 | const struct proc_ns_operations **entry, **last; | ||
83 | ino_t ino; | ||
84 | int ret; | ||
85 | |||
86 | ret = -ENOENT; | ||
87 | if (!task) | ||
88 | goto out_no_task; | ||
89 | |||
90 | ret = -EPERM; | ||
91 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
92 | goto out; | ||
93 | |||
94 | ret = 0; | ||
95 | i = filp->f_pos; | ||
96 | switch (i) { | ||
97 | case 0: | ||
98 | ino = inode->i_ino; | ||
99 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
100 | goto out; | ||
101 | i++; | ||
102 | filp->f_pos++; | ||
103 | /* fall through */ | ||
104 | case 1: | ||
105 | ino = parent_ino(dentry); | ||
106 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | ||
107 | goto out; | ||
108 | i++; | ||
109 | filp->f_pos++; | ||
110 | /* fall through */ | ||
111 | default: | ||
112 | i -= 2; | ||
113 | if (i >= ARRAY_SIZE(ns_entries)) { | ||
114 | ret = 1; | ||
115 | goto out; | ||
116 | } | ||
117 | entry = ns_entries + i; | ||
118 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; | ||
119 | while (entry <= last) { | ||
120 | if (proc_ns_fill_cache(filp, dirent, filldir, | ||
121 | task, *entry) < 0) | ||
122 | goto out; | ||
123 | filp->f_pos++; | ||
124 | entry++; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | ret = 1; | ||
129 | out: | ||
130 | put_task_struct(task); | ||
131 | out_no_task: | ||
132 | return ret; | ||
133 | } | ||
134 | |||
135 | const struct file_operations proc_ns_dir_operations = { | ||
136 | .read = generic_read_dir, | ||
137 | .readdir = proc_ns_dir_readdir, | ||
138 | }; | ||
139 | |||
140 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, | ||
141 | struct dentry *dentry, struct nameidata *nd) | ||
142 | { | ||
143 | struct dentry *error; | ||
144 | struct task_struct *task = get_proc_task(dir); | ||
145 | const struct proc_ns_operations **entry, **last; | ||
146 | unsigned int len = dentry->d_name.len; | ||
147 | |||
148 | error = ERR_PTR(-ENOENT); | ||
149 | |||
150 | if (!task) | ||
151 | goto out_no_task; | ||
152 | |||
153 | error = ERR_PTR(-EPERM); | ||
154 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
155 | goto out; | ||
156 | |||
157 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; | ||
158 | for (entry = ns_entries; entry <= last; entry++) { | ||
159 | if (strlen((*entry)->name) != len) | ||
160 | continue; | ||
161 | if (!memcmp(dentry->d_name.name, (*entry)->name, len)) | ||
162 | break; | ||
163 | } | ||
164 | error = ERR_PTR(-ENOENT); | ||
165 | if (entry > last) | ||
166 | goto out; | ||
167 | |||
168 | error = proc_ns_instantiate(dir, dentry, task, *entry); | ||
169 | out: | ||
170 | put_task_struct(task); | ||
171 | out_no_task: | ||
172 | return error; | ||
173 | } | ||
174 | |||
175 | const struct inode_operations proc_ns_dir_inode_operations = { | ||
176 | .lookup = proc_ns_dir_lookup, | ||
177 | .getattr = pid_getattr, | ||
178 | .setattr = proc_setattr, | ||
179 | }; | ||
180 | |||
181 | struct file *proc_ns_fget(int fd) | ||
182 | { | ||
183 | struct file *file; | ||
184 | |||
185 | file = fget(fd); | ||
186 | if (!file) | ||
187 | return ERR_PTR(-EBADF); | ||
188 | |||
189 | if (file->f_op != &ns_file_operations) | ||
190 | goto out_invalid; | ||
191 | |||
192 | return file; | ||
193 | |||
194 | out_invalid: | ||
195 | fput(file); | ||
196 | return ERR_PTR(-EINVAL); | ||
197 | } | ||
198 | |||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 318d8654989b..db15935fa757 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -211,7 +211,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) | |||
211 | { | 211 | { |
212 | struct mm_struct *mm = vma->vm_mm; | 212 | struct mm_struct *mm = vma->vm_mm; |
213 | struct file *file = vma->vm_file; | 213 | struct file *file = vma->vm_file; |
214 | int flags = vma->vm_flags; | 214 | vm_flags_t flags = vma->vm_flags; |
215 | unsigned long ino = 0; | 215 | unsigned long ino = 0; |
216 | unsigned long long pgoff = 0; | 216 | unsigned long long pgoff = 0; |
217 | unsigned long start, end; | 217 | unsigned long start, end; |
@@ -858,7 +858,192 @@ const struct file_operations proc_pagemap_operations = { | |||
858 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | 858 | #endif /* CONFIG_PROC_PAGE_MONITOR */ |
859 | 859 | ||
860 | #ifdef CONFIG_NUMA | 860 | #ifdef CONFIG_NUMA |
861 | extern int show_numa_map(struct seq_file *m, void *v); | 861 | |
862 | struct numa_maps { | ||
863 | struct vm_area_struct *vma; | ||
864 | unsigned long pages; | ||
865 | unsigned long anon; | ||
866 | unsigned long active; | ||
867 | unsigned long writeback; | ||
868 | unsigned long mapcount_max; | ||
869 | unsigned long dirty; | ||
870 | unsigned long swapcache; | ||
871 | unsigned long node[MAX_NUMNODES]; | ||
872 | }; | ||
873 | |||
874 | struct numa_maps_private { | ||
875 | struct proc_maps_private proc_maps; | ||
876 | struct numa_maps md; | ||
877 | }; | ||
878 | |||
879 | static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) | ||
880 | { | ||
881 | int count = page_mapcount(page); | ||
882 | |||
883 | md->pages++; | ||
884 | if (pte_dirty || PageDirty(page)) | ||
885 | md->dirty++; | ||
886 | |||
887 | if (PageSwapCache(page)) | ||
888 | md->swapcache++; | ||
889 | |||
890 | if (PageActive(page) || PageUnevictable(page)) | ||
891 | md->active++; | ||
892 | |||
893 | if (PageWriteback(page)) | ||
894 | md->writeback++; | ||
895 | |||
896 | if (PageAnon(page)) | ||
897 | md->anon++; | ||
898 | |||
899 | if (count > md->mapcount_max) | ||
900 | md->mapcount_max = count; | ||
901 | |||
902 | md->node[page_to_nid(page)]++; | ||
903 | } | ||
904 | |||
905 | static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | ||
906 | unsigned long end, struct mm_walk *walk) | ||
907 | { | ||
908 | struct numa_maps *md; | ||
909 | spinlock_t *ptl; | ||
910 | pte_t *orig_pte; | ||
911 | pte_t *pte; | ||
912 | |||
913 | md = walk->private; | ||
914 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); | ||
915 | do { | ||
916 | struct page *page; | ||
917 | int nid; | ||
918 | |||
919 | if (!pte_present(*pte)) | ||
920 | continue; | ||
921 | |||
922 | page = vm_normal_page(md->vma, addr, *pte); | ||
923 | if (!page) | ||
924 | continue; | ||
925 | |||
926 | if (PageReserved(page)) | ||
927 | continue; | ||
928 | |||
929 | nid = page_to_nid(page); | ||
930 | if (!node_isset(nid, node_states[N_HIGH_MEMORY])) | ||
931 | continue; | ||
932 | |||
933 | gather_stats(page, md, pte_dirty(*pte)); | ||
934 | |||
935 | } while (pte++, addr += PAGE_SIZE, addr != end); | ||
936 | pte_unmap_unlock(orig_pte, ptl); | ||
937 | return 0; | ||
938 | } | ||
939 | #ifdef CONFIG_HUGETLB_PAGE | ||
940 | static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | ||
941 | unsigned long addr, unsigned long end, struct mm_walk *walk) | ||
942 | { | ||
943 | struct numa_maps *md; | ||
944 | struct page *page; | ||
945 | |||
946 | if (pte_none(*pte)) | ||
947 | return 0; | ||
948 | |||
949 | page = pte_page(*pte); | ||
950 | if (!page) | ||
951 | return 0; | ||
952 | |||
953 | md = walk->private; | ||
954 | gather_stats(page, md, pte_dirty(*pte)); | ||
955 | return 0; | ||
956 | } | ||
957 | |||
958 | #else | ||
959 | static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | ||
960 | unsigned long addr, unsigned long end, struct mm_walk *walk) | ||
961 | { | ||
962 | return 0; | ||
963 | } | ||
964 | #endif | ||
965 | |||
966 | /* | ||
967 | * Display pages allocated per node and memory policy via /proc. | ||
968 | */ | ||
969 | static int show_numa_map(struct seq_file *m, void *v) | ||
970 | { | ||
971 | struct numa_maps_private *numa_priv = m->private; | ||
972 | struct proc_maps_private *proc_priv = &numa_priv->proc_maps; | ||
973 | struct vm_area_struct *vma = v; | ||
974 | struct numa_maps *md = &numa_priv->md; | ||
975 | struct file *file = vma->vm_file; | ||
976 | struct mm_struct *mm = vma->vm_mm; | ||
977 | struct mm_walk walk = {}; | ||
978 | struct mempolicy *pol; | ||
979 | int n; | ||
980 | char buffer[50]; | ||
981 | |||
982 | if (!mm) | ||
983 | return 0; | ||
984 | |||
985 | /* Ensure we start with an empty set of numa_maps statistics. */ | ||
986 | memset(md, 0, sizeof(*md)); | ||
987 | |||
988 | md->vma = vma; | ||
989 | |||
990 | walk.hugetlb_entry = gather_hugetbl_stats; | ||
991 | walk.pmd_entry = gather_pte_stats; | ||
992 | walk.private = md; | ||
993 | walk.mm = mm; | ||
994 | |||
995 | pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); | ||
996 | mpol_to_str(buffer, sizeof(buffer), pol, 0); | ||
997 | mpol_cond_put(pol); | ||
998 | |||
999 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | ||
1000 | |||
1001 | if (file) { | ||
1002 | seq_printf(m, " file="); | ||
1003 | seq_path(m, &file->f_path, "\n\t= "); | ||
1004 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | ||
1005 | seq_printf(m, " heap"); | ||
1006 | } else if (vma->vm_start <= mm->start_stack && | ||
1007 | vma->vm_end >= mm->start_stack) { | ||
1008 | seq_printf(m, " stack"); | ||
1009 | } | ||
1010 | |||
1011 | walk_page_range(vma->vm_start, vma->vm_end, &walk); | ||
1012 | |||
1013 | if (!md->pages) | ||
1014 | goto out; | ||
1015 | |||
1016 | if (md->anon) | ||
1017 | seq_printf(m, " anon=%lu", md->anon); | ||
1018 | |||
1019 | if (md->dirty) | ||
1020 | seq_printf(m, " dirty=%lu", md->dirty); | ||
1021 | |||
1022 | if (md->pages != md->anon && md->pages != md->dirty) | ||
1023 | seq_printf(m, " mapped=%lu", md->pages); | ||
1024 | |||
1025 | if (md->mapcount_max > 1) | ||
1026 | seq_printf(m, " mapmax=%lu", md->mapcount_max); | ||
1027 | |||
1028 | if (md->swapcache) | ||
1029 | seq_printf(m, " swapcache=%lu", md->swapcache); | ||
1030 | |||
1031 | if (md->active < md->pages && !is_vm_hugetlb_page(vma)) | ||
1032 | seq_printf(m, " active=%lu", md->active); | ||
1033 | |||
1034 | if (md->writeback) | ||
1035 | seq_printf(m, " writeback=%lu", md->writeback); | ||
1036 | |||
1037 | for_each_node_state(n, N_HIGH_MEMORY) | ||
1038 | if (md->node[n]) | ||
1039 | seq_printf(m, " N%d=%lu", n, md->node[n]); | ||
1040 | out: | ||
1041 | seq_putc(m, '\n'); | ||
1042 | |||
1043 | if (m->count < m->size) | ||
1044 | m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0; | ||
1045 | return 0; | ||
1046 | } | ||
862 | 1047 | ||
863 | static const struct seq_operations proc_pid_numa_maps_op = { | 1048 | static const struct seq_operations proc_pid_numa_maps_op = { |
864 | .start = m_start, | 1049 | .start = m_start, |
@@ -869,7 +1054,20 @@ static const struct seq_operations proc_pid_numa_maps_op = { | |||
869 | 1054 | ||
870 | static int numa_maps_open(struct inode *inode, struct file *file) | 1055 | static int numa_maps_open(struct inode *inode, struct file *file) |
871 | { | 1056 | { |
872 | return do_maps_open(inode, file, &proc_pid_numa_maps_op); | 1057 | struct numa_maps_private *priv; |
1058 | int ret = -ENOMEM; | ||
1059 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
1060 | if (priv) { | ||
1061 | priv->proc_maps.pid = proc_pid(inode); | ||
1062 | ret = seq_open(file, &proc_pid_numa_maps_op); | ||
1063 | if (!ret) { | ||
1064 | struct seq_file *m = file->private_data; | ||
1065 | m->private = priv; | ||
1066 | } else { | ||
1067 | kfree(priv); | ||
1068 | } | ||
1069 | } | ||
1070 | return ret; | ||
873 | } | 1071 | } |
874 | 1072 | ||
875 | const struct file_operations proc_numa_maps_operations = { | 1073 | const struct file_operations proc_numa_maps_operations = { |
@@ -878,4 +1076,4 @@ const struct file_operations proc_numa_maps_operations = { | |||
878 | .llseek = seq_lseek, | 1076 | .llseek = seq_lseek, |
879 | .release = seq_release_private, | 1077 | .release = seq_release_private, |
880 | }; | 1078 | }; |
881 | #endif | 1079 | #endif /* CONFIG_NUMA */ |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index d3c032f5fa0a..5b572c89e6c4 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -691,8 +691,11 @@ static void prune_dqcache(int count) | |||
691 | * This is called from kswapd when we think we need some | 691 | * This is called from kswapd when we think we need some |
692 | * more memory | 692 | * more memory |
693 | */ | 693 | */ |
694 | static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | 694 | static int shrink_dqcache_memory(struct shrinker *shrink, |
695 | struct shrink_control *sc) | ||
695 | { | 696 | { |
697 | int nr = sc->nr_to_scan; | ||
698 | |||
696 | if (nr) { | 699 | if (nr) { |
697 | spin_lock(&dq_list_lock); | 700 | spin_lock(&dq_list_lock); |
698 | prune_dqcache(nr); | 701 | prune_dqcache(nr); |
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 118662690cdf..76c8164d5651 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c | |||
@@ -831,6 +831,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
831 | INITIALIZE_PATH(path); | 831 | INITIALIZE_PATH(path); |
832 | struct reiserfs_dir_entry de; | 832 | struct reiserfs_dir_entry de; |
833 | 833 | ||
834 | dentry_unhash(dentry); | ||
835 | |||
834 | /* we will be doing 2 balancings and update 2 stat data, we change quotas | 836 | /* we will be doing 2 balancings and update 2 stat data, we change quotas |
835 | * of the owner of the directory and of the owner of the parent directory. | 837 | * of the owner of the directory and of the owner of the parent directory. |
836 | * The quota structure is possibly deleted only on last iput => outside | 838 | * The quota structure is possibly deleted only on last iput => outside |
@@ -1225,6 +1227,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1225 | unsigned long savelink = 1; | 1227 | unsigned long savelink = 1; |
1226 | struct timespec ctime; | 1228 | struct timespec ctime; |
1227 | 1229 | ||
1230 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
1231 | dentry_unhash(new_dentry); | ||
1232 | |||
1228 | /* three balancings: (1) old name removal, (2) new name insertion | 1233 | /* three balancings: (1) old name removal, (2) new name insertion |
1229 | and (3) maybe "save" link insertion | 1234 | and (3) maybe "save" link insertion |
1230 | stat data updates: (1) old directory, | 1235 | stat data updates: (1) old directory, |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 47d2a4498b03..50f1abccd1cd 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -105,7 +105,6 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) | |||
105 | mutex_unlock(&dentry->d_inode->i_mutex); | 105 | mutex_unlock(&dentry->d_inode->i_mutex); |
106 | if (!error) | 106 | if (!error) |
107 | d_delete(dentry); | 107 | d_delete(dentry); |
108 | dput(dentry); | ||
109 | 108 | ||
110 | return error; | 109 | return error; |
111 | } | 110 | } |
diff --git a/fs/splice.c b/fs/splice.c index 50a5d978da16..aa866d309695 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -162,6 +162,14 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = { | |||
162 | .get = generic_pipe_buf_get, | 162 | .get = generic_pipe_buf_get, |
163 | }; | 163 | }; |
164 | 164 | ||
165 | static void wakeup_pipe_readers(struct pipe_inode_info *pipe) | ||
166 | { | ||
167 | smp_mb(); | ||
168 | if (waitqueue_active(&pipe->wait)) | ||
169 | wake_up_interruptible(&pipe->wait); | ||
170 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
171 | } | ||
172 | |||
165 | /** | 173 | /** |
166 | * splice_to_pipe - fill passed data into a pipe | 174 | * splice_to_pipe - fill passed data into a pipe |
167 | * @pipe: pipe to fill | 175 | * @pipe: pipe to fill |
@@ -247,12 +255,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
247 | 255 | ||
248 | pipe_unlock(pipe); | 256 | pipe_unlock(pipe); |
249 | 257 | ||
250 | if (do_wakeup) { | 258 | if (do_wakeup) |
251 | smp_mb(); | 259 | wakeup_pipe_readers(pipe); |
252 | if (waitqueue_active(&pipe->wait)) | ||
253 | wake_up_interruptible(&pipe->wait); | ||
254 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
255 | } | ||
256 | 260 | ||
257 | while (page_nr < spd_pages) | 261 | while (page_nr < spd_pages) |
258 | spd->spd_release(spd, page_nr++); | 262 | spd->spd_release(spd, page_nr++); |
@@ -1892,12 +1896,9 @@ retry: | |||
1892 | /* | 1896 | /* |
1893 | * If we put data in the output pipe, wakeup any potential readers. | 1897 | * If we put data in the output pipe, wakeup any potential readers. |
1894 | */ | 1898 | */ |
1895 | if (ret > 0) { | 1899 | if (ret > 0) |
1896 | smp_mb(); | 1900 | wakeup_pipe_readers(opipe); |
1897 | if (waitqueue_active(&opipe->wait)) | 1901 | |
1898 | wake_up_interruptible(&opipe->wait); | ||
1899 | kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); | ||
1900 | } | ||
1901 | if (input_wakeup) | 1902 | if (input_wakeup) |
1902 | wakeup_pipe_writers(ipipe); | 1903 | wakeup_pipe_writers(ipipe); |
1903 | 1904 | ||
@@ -1976,12 +1977,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1976 | /* | 1977 | /* |
1977 | * If we put data in the output pipe, wakeup any potential readers. | 1978 | * If we put data in the output pipe, wakeup any potential readers. |
1978 | */ | 1979 | */ |
1979 | if (ret > 0) { | 1980 | if (ret > 0) |
1980 | smp_mb(); | 1981 | wakeup_pipe_readers(opipe); |
1981 | if (waitqueue_active(&opipe->wait)) | ||
1982 | wake_up_interruptible(&opipe->wait); | ||
1983 | kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); | ||
1984 | } | ||
1985 | 1982 | ||
1986 | return ret; | 1983 | return ret; |
1987 | } | 1984 | } |
diff --git a/fs/super.c b/fs/super.c index c04f7e0b7ed2..c75593953c52 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/mutex.h> | 31 | #include <linux/mutex.h> |
32 | #include <linux/backing-dev.h> | 32 | #include <linux/backing-dev.h> |
33 | #include <linux/rculist_bl.h> | 33 | #include <linux/rculist_bl.h> |
34 | #include <linux/cleancache.h> | ||
34 | #include "internal.h" | 35 | #include "internal.h" |
35 | 36 | ||
36 | 37 | ||
@@ -112,6 +113,7 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
112 | s->s_maxbytes = MAX_NON_LFS; | 113 | s->s_maxbytes = MAX_NON_LFS; |
113 | s->s_op = &default_op; | 114 | s->s_op = &default_op; |
114 | s->s_time_gran = 1000000000; | 115 | s->s_time_gran = 1000000000; |
116 | s->cleancache_poolid = -1; | ||
115 | } | 117 | } |
116 | out: | 118 | out: |
117 | return s; | 119 | return s; |
@@ -177,6 +179,7 @@ void deactivate_locked_super(struct super_block *s) | |||
177 | { | 179 | { |
178 | struct file_system_type *fs = s->s_type; | 180 | struct file_system_type *fs = s->s_type; |
179 | if (atomic_dec_and_test(&s->s_active)) { | 181 | if (atomic_dec_and_test(&s->s_active)) { |
182 | cleancache_flush_fs(s); | ||
180 | fs->kill_sb(s); | 183 | fs->kill_sb(s); |
181 | /* | 184 | /* |
182 | * We need to call rcu_barrier so all the delayed rcu free | 185 | * We need to call rcu_barrier so all the delayed rcu free |
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index e474fbcf8bde..e2cc6756f3b1 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c | |||
@@ -196,6 +196,8 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry) | |||
196 | struct inode *inode = dentry->d_inode; | 196 | struct inode *inode = dentry->d_inode; |
197 | int err = -ENOTEMPTY; | 197 | int err = -ENOTEMPTY; |
198 | 198 | ||
199 | dentry_unhash(dentry); | ||
200 | |||
199 | if (sysv_empty_dir(inode)) { | 201 | if (sysv_empty_dir(inode)) { |
200 | err = sysv_unlink(dir, dentry); | 202 | err = sysv_unlink(dir, dentry); |
201 | if (!err) { | 203 | if (!err) { |
@@ -222,6 +224,9 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry, | |||
222 | struct sysv_dir_entry * old_de; | 224 | struct sysv_dir_entry * old_de; |
223 | int err = -ENOENT; | 225 | int err = -ENOENT; |
224 | 226 | ||
227 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
228 | dentry_unhash(new_dentry); | ||
229 | |||
225 | old_de = sysv_find_entry(old_dentry, &old_page); | 230 | old_de = sysv_find_entry(old_dentry, &old_page); |
226 | if (!old_de) | 231 | if (!old_de) |
227 | goto out; | 232 | goto out; |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ef5abd38f0bf..c2b80943560d 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -656,6 +656,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) | |||
656 | struct ubifs_inode *dir_ui = ubifs_inode(dir); | 656 | struct ubifs_inode *dir_ui = ubifs_inode(dir); |
657 | struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; | 657 | struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; |
658 | 658 | ||
659 | dentry_unhash(dentry); | ||
660 | |||
659 | /* | 661 | /* |
660 | * Budget request settings: deletion direntry, deletion inode and | 662 | * Budget request settings: deletion direntry, deletion inode and |
661 | * changing the parent inode. If budgeting fails, go ahead anyway | 663 | * changing the parent inode. If budgeting fails, go ahead anyway |
@@ -976,6 +978,9 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
976 | .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; | 978 | .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; |
977 | struct timespec time; | 979 | struct timespec time; |
978 | 980 | ||
981 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
982 | dentry_unhash(new_dentry); | ||
983 | |||
979 | /* | 984 | /* |
980 | * Budget request settings: deletion direntry, new direntry, removing | 985 | * Budget request settings: deletion direntry, new direntry, removing |
981 | * the old inode, and changing old and new parent directory inodes. | 986 | * the old inode, and changing old and new parent directory inodes. |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index f1dce848ef96..4d76594c2a8f 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -783,6 +783,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) | |||
783 | struct fileIdentDesc *fi, cfi; | 783 | struct fileIdentDesc *fi, cfi; |
784 | struct kernel_lb_addr tloc; | 784 | struct kernel_lb_addr tloc; |
785 | 785 | ||
786 | dentry_unhash(dentry); | ||
787 | |||
786 | retval = -ENOENT; | 788 | retval = -ENOENT; |
787 | fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); | 789 | fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); |
788 | if (!fi) | 790 | if (!fi) |
@@ -1081,6 +1083,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1081 | struct kernel_lb_addr tloc; | 1083 | struct kernel_lb_addr tloc; |
1082 | struct udf_inode_info *old_iinfo = UDF_I(old_inode); | 1084 | struct udf_inode_info *old_iinfo = UDF_I(old_inode); |
1083 | 1085 | ||
1086 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
1087 | dentry_unhash(new_dentry); | ||
1088 | |||
1084 | ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); | 1089 | ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); |
1085 | if (ofi) { | 1090 | if (ofi) { |
1086 | if (ofibh.sbh != ofibh.ebh) | 1091 | if (ofibh.sbh != ofibh.ebh) |
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 29309e25417f..953ebdfc5bf7 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c | |||
@@ -258,6 +258,8 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) | |||
258 | struct inode * inode = dentry->d_inode; | 258 | struct inode * inode = dentry->d_inode; |
259 | int err= -ENOTEMPTY; | 259 | int err= -ENOTEMPTY; |
260 | 260 | ||
261 | dentry_unhash(dentry); | ||
262 | |||
261 | lock_ufs(dir->i_sb); | 263 | lock_ufs(dir->i_sb); |
262 | if (ufs_empty_dir (inode)) { | 264 | if (ufs_empty_dir (inode)) { |
263 | err = ufs_unlink(dir, dentry); | 265 | err = ufs_unlink(dir, dentry); |
@@ -282,6 +284,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
282 | struct ufs_dir_entry *old_de; | 284 | struct ufs_dir_entry *old_de; |
283 | int err = -ENOENT; | 285 | int err = -ENOENT; |
284 | 286 | ||
287 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
288 | dentry_unhash(new_dentry); | ||
289 | |||
285 | old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); | 290 | old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); |
286 | if (!old_de) | 291 | if (!old_de) |
287 | goto out; | 292 | goto out; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 52b2b5da566e..5e68099db2a5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -1422,12 +1422,12 @@ restart: | |||
1422 | int | 1422 | int |
1423 | xfs_buftarg_shrink( | 1423 | xfs_buftarg_shrink( |
1424 | struct shrinker *shrink, | 1424 | struct shrinker *shrink, |
1425 | int nr_to_scan, | 1425 | struct shrink_control *sc) |
1426 | gfp_t mask) | ||
1427 | { | 1426 | { |
1428 | struct xfs_buftarg *btp = container_of(shrink, | 1427 | struct xfs_buftarg *btp = container_of(shrink, |
1429 | struct xfs_buftarg, bt_shrinker); | 1428 | struct xfs_buftarg, bt_shrinker); |
1430 | struct xfs_buf *bp; | 1429 | struct xfs_buf *bp; |
1430 | int nr_to_scan = sc->nr_to_scan; | ||
1431 | LIST_HEAD(dispose); | 1431 | LIST_HEAD(dispose); |
1432 | 1432 | ||
1433 | if (!nr_to_scan) | 1433 | if (!nr_to_scan) |
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c index d61611c88012..244e797dae32 100644 --- a/fs/xfs/linux-2.6/xfs_discard.c +++ b/fs/xfs/linux-2.6/xfs_discard.c | |||
@@ -191,3 +191,32 @@ xfs_ioc_trim( | |||
191 | return -XFS_ERROR(EFAULT); | 191 | return -XFS_ERROR(EFAULT); |
192 | return 0; | 192 | return 0; |
193 | } | 193 | } |
194 | |||
195 | int | ||
196 | xfs_discard_extents( | ||
197 | struct xfs_mount *mp, | ||
198 | struct list_head *list) | ||
199 | { | ||
200 | struct xfs_busy_extent *busyp; | ||
201 | int error = 0; | ||
202 | |||
203 | list_for_each_entry(busyp, list, list) { | ||
204 | trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, | ||
205 | busyp->length); | ||
206 | |||
207 | error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, | ||
208 | XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), | ||
209 | XFS_FSB_TO_BB(mp, busyp->length), | ||
210 | GFP_NOFS, 0); | ||
211 | if (error && error != EOPNOTSUPP) { | ||
212 | xfs_info(mp, | ||
213 | "discard failed for extent [0x%llu,%u], error %d", | ||
214 | (unsigned long long)busyp->bno, | ||
215 | busyp->length, | ||
216 | error); | ||
217 | return error; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | return 0; | ||
222 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h index e82b6dd3e127..344879aea646 100644 --- a/fs/xfs/linux-2.6/xfs_discard.h +++ b/fs/xfs/linux-2.6/xfs_discard.h | |||
@@ -2,7 +2,9 @@ | |||
2 | #define XFS_DISCARD_H 1 | 2 | #define XFS_DISCARD_H 1 |
3 | 3 | ||
4 | struct fstrim_range; | 4 | struct fstrim_range; |
5 | struct list_head; | ||
5 | 6 | ||
6 | extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); | 7 | extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); |
8 | extern int xfs_discard_extents(struct xfs_mount *, struct list_head *); | ||
7 | 9 | ||
8 | #endif /* XFS_DISCARD_H */ | 10 | #endif /* XFS_DISCARD_H */ |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index b0aa59e51fd0..98b9c91fcdf1 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -110,8 +110,10 @@ mempool_t *xfs_ioend_pool; | |||
110 | #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ | 110 | #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ |
111 | #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ | 111 | #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ |
112 | #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ | 112 | #define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ |
113 | #define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ | 113 | #define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */ |
114 | #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ | 114 | #define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */ |
115 | #define MNTOPT_DISCARD "discard" /* Discard unused blocks */ | ||
116 | #define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ | ||
115 | 117 | ||
116 | /* | 118 | /* |
117 | * Table driven mount option parser. | 119 | * Table driven mount option parser. |
@@ -355,6 +357,10 @@ xfs_parseargs( | |||
355 | mp->m_flags |= XFS_MOUNT_DELAYLOG; | 357 | mp->m_flags |= XFS_MOUNT_DELAYLOG; |
356 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { | 358 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { |
357 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; | 359 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; |
360 | } else if (!strcmp(this_char, MNTOPT_DISCARD)) { | ||
361 | mp->m_flags |= XFS_MOUNT_DISCARD; | ||
362 | } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { | ||
363 | mp->m_flags &= ~XFS_MOUNT_DISCARD; | ||
358 | } else if (!strcmp(this_char, "ihashsize")) { | 364 | } else if (!strcmp(this_char, "ihashsize")) { |
359 | xfs_warn(mp, | 365 | xfs_warn(mp, |
360 | "ihashsize no longer used, option is deprecated."); | 366 | "ihashsize no longer used, option is deprecated."); |
@@ -388,6 +394,13 @@ xfs_parseargs( | |||
388 | return EINVAL; | 394 | return EINVAL; |
389 | } | 395 | } |
390 | 396 | ||
397 | if ((mp->m_flags & XFS_MOUNT_DISCARD) && | ||
398 | !(mp->m_flags & XFS_MOUNT_DELAYLOG)) { | ||
399 | xfs_warn(mp, | ||
400 | "the discard option is incompatible with the nodelaylog option"); | ||
401 | return EINVAL; | ||
402 | } | ||
403 | |||
391 | #ifndef CONFIG_XFS_QUOTA | 404 | #ifndef CONFIG_XFS_QUOTA |
392 | if (XFS_IS_QUOTA_RUNNING(mp)) { | 405 | if (XFS_IS_QUOTA_RUNNING(mp)) { |
393 | xfs_warn(mp, "quota support not available in this kernel."); | 406 | xfs_warn(mp, "quota support not available in this kernel."); |
@@ -488,6 +501,7 @@ xfs_showargs( | |||
488 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, | 501 | { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, |
489 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, | 502 | { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, |
490 | { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, | 503 | { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, |
504 | { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, | ||
491 | { 0, NULL } | 505 | { 0, NULL } |
492 | }; | 506 | }; |
493 | static struct proc_xfs_info xfs_info_unset[] = { | 507 | static struct proc_xfs_info xfs_info_unset[] = { |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index cb1bb2080e44..8ecad5ff9f9b 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -1032,13 +1032,14 @@ xfs_reclaim_inodes( | |||
1032 | static int | 1032 | static int |
1033 | xfs_reclaim_inode_shrink( | 1033 | xfs_reclaim_inode_shrink( |
1034 | struct shrinker *shrink, | 1034 | struct shrinker *shrink, |
1035 | int nr_to_scan, | 1035 | struct shrink_control *sc) |
1036 | gfp_t gfp_mask) | ||
1037 | { | 1036 | { |
1038 | struct xfs_mount *mp; | 1037 | struct xfs_mount *mp; |
1039 | struct xfs_perag *pag; | 1038 | struct xfs_perag *pag; |
1040 | xfs_agnumber_t ag; | 1039 | xfs_agnumber_t ag; |
1041 | int reclaimable; | 1040 | int reclaimable; |
1041 | int nr_to_scan = sc->nr_to_scan; | ||
1042 | gfp_t gfp_mask = sc->gfp_mask; | ||
1042 | 1043 | ||
1043 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); | 1044 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); |
1044 | if (nr_to_scan) { | 1045 | if (nr_to_scan) { |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 69228aa8605a..b94dace4e785 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -60,7 +60,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); | |||
60 | 60 | ||
61 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); | 61 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); |
62 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); | 62 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); |
63 | STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t); | 63 | STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); |
64 | 64 | ||
65 | static struct shrinker xfs_qm_shaker = { | 65 | static struct shrinker xfs_qm_shaker = { |
66 | .shrink = xfs_qm_shake, | 66 | .shrink = xfs_qm_shake, |
@@ -2009,10 +2009,10 @@ xfs_qm_shake_freelist( | |||
2009 | STATIC int | 2009 | STATIC int |
2010 | xfs_qm_shake( | 2010 | xfs_qm_shake( |
2011 | struct shrinker *shrink, | 2011 | struct shrinker *shrink, |
2012 | int nr_to_scan, | 2012 | struct shrink_control *sc) |
2013 | gfp_t gfp_mask) | ||
2014 | { | 2013 | { |
2015 | int ndqused, nfree, n; | 2014 | int ndqused, nfree, n; |
2015 | gfp_t gfp_mask = sc->gfp_mask; | ||
2016 | 2016 | ||
2017 | if (!kmem_shake_allow(gfp_mask)) | 2017 | if (!kmem_shake_allow(gfp_mask)) |
2018 | return 0; | 2018 | return 0; |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index da0a561ffba2..6530769a999b 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -187,6 +187,9 @@ struct xfs_busy_extent { | |||
187 | xfs_agnumber_t agno; | 187 | xfs_agnumber_t agno; |
188 | xfs_agblock_t bno; | 188 | xfs_agblock_t bno; |
189 | xfs_extlen_t length; | 189 | xfs_extlen_t length; |
190 | unsigned int flags; | ||
191 | #define XFS_ALLOC_BUSY_DISCARDED 0x01 /* undergoing a discard op. */ | ||
192 | #define XFS_ALLOC_BUSY_SKIP_DISCARD 0x02 /* do not discard */ | ||
190 | }; | 193 | }; |
191 | 194 | ||
192 | /* | 195 | /* |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index acdced86413c..95862bbff56b 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -2469,7 +2469,7 @@ xfs_free_extent( | |||
2469 | 2469 | ||
2470 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); | 2470 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); |
2471 | if (!error) | 2471 | if (!error) |
2472 | xfs_alloc_busy_insert(tp, args.agno, args.agbno, len); | 2472 | xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0); |
2473 | error0: | 2473 | error0: |
2474 | xfs_perag_put(args.pag); | 2474 | xfs_perag_put(args.pag); |
2475 | return error; | 2475 | return error; |
@@ -2480,7 +2480,8 @@ xfs_alloc_busy_insert( | |||
2480 | struct xfs_trans *tp, | 2480 | struct xfs_trans *tp, |
2481 | xfs_agnumber_t agno, | 2481 | xfs_agnumber_t agno, |
2482 | xfs_agblock_t bno, | 2482 | xfs_agblock_t bno, |
2483 | xfs_extlen_t len) | 2483 | xfs_extlen_t len, |
2484 | unsigned int flags) | ||
2484 | { | 2485 | { |
2485 | struct xfs_busy_extent *new; | 2486 | struct xfs_busy_extent *new; |
2486 | struct xfs_busy_extent *busyp; | 2487 | struct xfs_busy_extent *busyp; |
@@ -2504,6 +2505,7 @@ xfs_alloc_busy_insert( | |||
2504 | new->bno = bno; | 2505 | new->bno = bno; |
2505 | new->length = len; | 2506 | new->length = len; |
2506 | INIT_LIST_HEAD(&new->list); | 2507 | INIT_LIST_HEAD(&new->list); |
2508 | new->flags = flags; | ||
2507 | 2509 | ||
2508 | /* trace before insert to be able to see failed inserts */ | 2510 | /* trace before insert to be able to see failed inserts */ |
2509 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); | 2511 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); |
@@ -2609,6 +2611,18 @@ xfs_alloc_busy_update_extent( | |||
2609 | xfs_agblock_t bend = bbno + busyp->length; | 2611 | xfs_agblock_t bend = bbno + busyp->length; |
2610 | 2612 | ||
2611 | /* | 2613 | /* |
2614 | * This extent is currently being discarded. Give the thread | ||
2615 | * performing the discard a chance to mark the extent unbusy | ||
2616 | * and retry. | ||
2617 | */ | ||
2618 | if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) { | ||
2619 | spin_unlock(&pag->pagb_lock); | ||
2620 | delay(1); | ||
2621 | spin_lock(&pag->pagb_lock); | ||
2622 | return false; | ||
2623 | } | ||
2624 | |||
2625 | /* | ||
2612 | * If there is a busy extent overlapping a user allocation, we have | 2626 | * If there is a busy extent overlapping a user allocation, we have |
2613 | * no choice but to force the log and retry the search. | 2627 | * no choice but to force the log and retry the search. |
2614 | * | 2628 | * |
@@ -2813,7 +2827,8 @@ restart: | |||
2813 | * If this is a metadata allocation, try to reuse the busy | 2827 | * If this is a metadata allocation, try to reuse the busy |
2814 | * extent instead of trimming the allocation. | 2828 | * extent instead of trimming the allocation. |
2815 | */ | 2829 | */ |
2816 | if (!args->userdata) { | 2830 | if (!args->userdata && |
2831 | !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) { | ||
2817 | if (!xfs_alloc_busy_update_extent(args->mp, args->pag, | 2832 | if (!xfs_alloc_busy_update_extent(args->mp, args->pag, |
2818 | busyp, fbno, flen, | 2833 | busyp, fbno, flen, |
2819 | false)) | 2834 | false)) |
@@ -2979,10 +2994,16 @@ xfs_alloc_busy_clear_one( | |||
2979 | kmem_free(busyp); | 2994 | kmem_free(busyp); |
2980 | } | 2995 | } |
2981 | 2996 | ||
2997 | /* | ||
2998 | * Remove all extents on the passed in list from the busy extents tree. | ||
2999 | * If do_discard is set skip extents that need to be discarded, and mark | ||
3000 | * these as undergoing a discard operation instead. | ||
3001 | */ | ||
2982 | void | 3002 | void |
2983 | xfs_alloc_busy_clear( | 3003 | xfs_alloc_busy_clear( |
2984 | struct xfs_mount *mp, | 3004 | struct xfs_mount *mp, |
2985 | struct list_head *list) | 3005 | struct list_head *list, |
3006 | bool do_discard) | ||
2986 | { | 3007 | { |
2987 | struct xfs_busy_extent *busyp, *n; | 3008 | struct xfs_busy_extent *busyp, *n; |
2988 | struct xfs_perag *pag = NULL; | 3009 | struct xfs_perag *pag = NULL; |
@@ -2999,7 +3020,11 @@ xfs_alloc_busy_clear( | |||
2999 | agno = busyp->agno; | 3020 | agno = busyp->agno; |
3000 | } | 3021 | } |
3001 | 3022 | ||
3002 | xfs_alloc_busy_clear_one(mp, pag, busyp); | 3023 | if (do_discard && busyp->length && |
3024 | !(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD)) | ||
3025 | busyp->flags = XFS_ALLOC_BUSY_DISCARDED; | ||
3026 | else | ||
3027 | xfs_alloc_busy_clear_one(mp, pag, busyp); | ||
3003 | } | 3028 | } |
3004 | 3029 | ||
3005 | if (pag) { | 3030 | if (pag) { |
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 240ad288f2f9..2f52b924be79 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -137,10 +137,11 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp, | |||
137 | #ifdef __KERNEL__ | 137 | #ifdef __KERNEL__ |
138 | void | 138 | void |
139 | xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, | 139 | xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, |
140 | xfs_agblock_t bno, xfs_extlen_t len); | 140 | xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags); |
141 | 141 | ||
142 | void | 142 | void |
143 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list); | 143 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list, |
144 | bool do_discard); | ||
144 | 145 | ||
145 | int | 146 | int |
146 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | 147 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, |
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 8b469d53599f..2b3518826a69 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
@@ -120,7 +120,8 @@ xfs_allocbt_free_block( | |||
120 | if (error) | 120 | if (error) |
121 | return error; | 121 | return error; |
122 | 122 | ||
123 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); | 123 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, |
124 | XFS_ALLOC_BUSY_SKIP_DISCARD); | ||
124 | xfs_trans_agbtree_delta(cur->bc_tp, -1); | 125 | xfs_trans_agbtree_delta(cur->bc_tp, -1); |
125 | return 0; | 126 | return 0; |
126 | } | 127 | } |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index fa00788de2f5..e546a33214c9 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -89,36 +89,19 @@ xfs_bmap_add_attrfork_local( | |||
89 | int *flags); /* inode logging flags */ | 89 | int *flags); /* inode logging flags */ |
90 | 90 | ||
91 | /* | 91 | /* |
92 | * Called by xfs_bmapi to update file extent records and the btree | ||
93 | * after allocating space (or doing a delayed allocation). | ||
94 | */ | ||
95 | STATIC int /* error */ | ||
96 | xfs_bmap_add_extent( | ||
97 | xfs_inode_t *ip, /* incore inode pointer */ | ||
98 | xfs_extnum_t idx, /* extent number to update/insert */ | ||
99 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | ||
100 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
101 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | ||
102 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | ||
103 | int *logflagsp, /* inode logging flags */ | ||
104 | int whichfork, /* data or attr fork */ | ||
105 | int rsvd); /* OK to allocate reserved blocks */ | ||
106 | |||
107 | /* | ||
108 | * Called by xfs_bmap_add_extent to handle cases converting a delayed | 92 | * Called by xfs_bmap_add_extent to handle cases converting a delayed |
109 | * allocation to a real allocation. | 93 | * allocation to a real allocation. |
110 | */ | 94 | */ |
111 | STATIC int /* error */ | 95 | STATIC int /* error */ |
112 | xfs_bmap_add_extent_delay_real( | 96 | xfs_bmap_add_extent_delay_real( |
113 | xfs_inode_t *ip, /* incore inode pointer */ | 97 | xfs_inode_t *ip, /* incore inode pointer */ |
114 | xfs_extnum_t idx, /* extent number to update/insert */ | 98 | xfs_extnum_t *idx, /* extent number to update/insert */ |
115 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | 99 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
116 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 100 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
117 | xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ | 101 | xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ |
118 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | 102 | xfs_fsblock_t *first, /* pointer to firstblock variable */ |
119 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | 103 | xfs_bmap_free_t *flist, /* list of extents to be freed */ |
120 | int *logflagsp, /* inode logging flags */ | 104 | int *logflagsp); /* inode logging flags */ |
121 | int rsvd); /* OK to allocate reserved blocks */ | ||
122 | 105 | ||
123 | /* | 106 | /* |
124 | * Called by xfs_bmap_add_extent to handle cases converting a hole | 107 | * Called by xfs_bmap_add_extent to handle cases converting a hole |
@@ -127,10 +110,9 @@ xfs_bmap_add_extent_delay_real( | |||
127 | STATIC int /* error */ | 110 | STATIC int /* error */ |
128 | xfs_bmap_add_extent_hole_delay( | 111 | xfs_bmap_add_extent_hole_delay( |
129 | xfs_inode_t *ip, /* incore inode pointer */ | 112 | xfs_inode_t *ip, /* incore inode pointer */ |
130 | xfs_extnum_t idx, /* extent number to update/insert */ | 113 | xfs_extnum_t *idx, /* extent number to update/insert */ |
131 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 114 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
132 | int *logflagsp,/* inode logging flags */ | 115 | int *logflagsp); /* inode logging flags */ |
133 | int rsvd); /* OK to allocate reserved blocks */ | ||
134 | 116 | ||
135 | /* | 117 | /* |
136 | * Called by xfs_bmap_add_extent to handle cases converting a hole | 118 | * Called by xfs_bmap_add_extent to handle cases converting a hole |
@@ -139,7 +121,7 @@ xfs_bmap_add_extent_hole_delay( | |||
139 | STATIC int /* error */ | 121 | STATIC int /* error */ |
140 | xfs_bmap_add_extent_hole_real( | 122 | xfs_bmap_add_extent_hole_real( |
141 | xfs_inode_t *ip, /* incore inode pointer */ | 123 | xfs_inode_t *ip, /* incore inode pointer */ |
142 | xfs_extnum_t idx, /* extent number to update/insert */ | 124 | xfs_extnum_t *idx, /* extent number to update/insert */ |
143 | xfs_btree_cur_t *cur, /* if null, not a btree */ | 125 | xfs_btree_cur_t *cur, /* if null, not a btree */ |
144 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 126 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
145 | int *logflagsp, /* inode logging flags */ | 127 | int *logflagsp, /* inode logging flags */ |
@@ -152,7 +134,7 @@ xfs_bmap_add_extent_hole_real( | |||
152 | STATIC int /* error */ | 134 | STATIC int /* error */ |
153 | xfs_bmap_add_extent_unwritten_real( | 135 | xfs_bmap_add_extent_unwritten_real( |
154 | xfs_inode_t *ip, /* incore inode pointer */ | 136 | xfs_inode_t *ip, /* incore inode pointer */ |
155 | xfs_extnum_t idx, /* extent number to update/insert */ | 137 | xfs_extnum_t *idx, /* extent number to update/insert */ |
156 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | 138 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
157 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 139 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
158 | int *logflagsp); /* inode logging flags */ | 140 | int *logflagsp); /* inode logging flags */ |
@@ -180,22 +162,6 @@ xfs_bmap_btree_to_extents( | |||
180 | int whichfork); /* data or attr fork */ | 162 | int whichfork); /* data or attr fork */ |
181 | 163 | ||
182 | /* | 164 | /* |
183 | * Called by xfs_bmapi to update file extent records and the btree | ||
184 | * after removing space (or undoing a delayed allocation). | ||
185 | */ | ||
186 | STATIC int /* error */ | ||
187 | xfs_bmap_del_extent( | ||
188 | xfs_inode_t *ip, /* incore inode pointer */ | ||
189 | xfs_trans_t *tp, /* current trans pointer */ | ||
190 | xfs_extnum_t idx, /* extent number to update/insert */ | ||
191 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | ||
192 | xfs_btree_cur_t *cur, /* if null, not a btree */ | ||
193 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | ||
194 | int *logflagsp,/* inode logging flags */ | ||
195 | int whichfork, /* data or attr fork */ | ||
196 | int rsvd); /* OK to allocate reserved blocks */ | ||
197 | |||
198 | /* | ||
199 | * Remove the entry "free" from the free item list. Prev points to the | 165 | * Remove the entry "free" from the free item list. Prev points to the |
200 | * previous entry, unless "free" is the head of the list. | 166 | * previous entry, unless "free" is the head of the list. |
201 | */ | 167 | */ |
@@ -474,14 +440,13 @@ xfs_bmap_add_attrfork_local( | |||
474 | STATIC int /* error */ | 440 | STATIC int /* error */ |
475 | xfs_bmap_add_extent( | 441 | xfs_bmap_add_extent( |
476 | xfs_inode_t *ip, /* incore inode pointer */ | 442 | xfs_inode_t *ip, /* incore inode pointer */ |
477 | xfs_extnum_t idx, /* extent number to update/insert */ | 443 | xfs_extnum_t *idx, /* extent number to update/insert */ |
478 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | 444 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
479 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 445 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
480 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | 446 | xfs_fsblock_t *first, /* pointer to firstblock variable */ |
481 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | 447 | xfs_bmap_free_t *flist, /* list of extents to be freed */ |
482 | int *logflagsp, /* inode logging flags */ | 448 | int *logflagsp, /* inode logging flags */ |
483 | int whichfork, /* data or attr fork */ | 449 | int whichfork) /* data or attr fork */ |
484 | int rsvd) /* OK to use reserved data blocks */ | ||
485 | { | 450 | { |
486 | xfs_btree_cur_t *cur; /* btree cursor or null */ | 451 | xfs_btree_cur_t *cur; /* btree cursor or null */ |
487 | xfs_filblks_t da_new; /* new count del alloc blocks used */ | 452 | xfs_filblks_t da_new; /* new count del alloc blocks used */ |
@@ -492,23 +457,27 @@ xfs_bmap_add_extent( | |||
492 | xfs_extnum_t nextents; /* number of extents in file now */ | 457 | xfs_extnum_t nextents; /* number of extents in file now */ |
493 | 458 | ||
494 | XFS_STATS_INC(xs_add_exlist); | 459 | XFS_STATS_INC(xs_add_exlist); |
460 | |||
495 | cur = *curp; | 461 | cur = *curp; |
496 | ifp = XFS_IFORK_PTR(ip, whichfork); | 462 | ifp = XFS_IFORK_PTR(ip, whichfork); |
497 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 463 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
498 | ASSERT(idx <= nextents); | ||
499 | da_old = da_new = 0; | 464 | da_old = da_new = 0; |
500 | error = 0; | 465 | error = 0; |
466 | |||
467 | ASSERT(*idx >= 0); | ||
468 | ASSERT(*idx <= nextents); | ||
469 | |||
501 | /* | 470 | /* |
502 | * This is the first extent added to a new/empty file. | 471 | * This is the first extent added to a new/empty file. |
503 | * Special case this one, so other routines get to assume there are | 472 | * Special case this one, so other routines get to assume there are |
504 | * already extents in the list. | 473 | * already extents in the list. |
505 | */ | 474 | */ |
506 | if (nextents == 0) { | 475 | if (nextents == 0) { |
507 | xfs_iext_insert(ip, 0, 1, new, | 476 | xfs_iext_insert(ip, *idx, 1, new, |
508 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); | 477 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); |
509 | 478 | ||
510 | ASSERT(cur == NULL); | 479 | ASSERT(cur == NULL); |
511 | ifp->if_lastex = 0; | 480 | |
512 | if (!isnullstartblock(new->br_startblock)) { | 481 | if (!isnullstartblock(new->br_startblock)) { |
513 | XFS_IFORK_NEXT_SET(ip, whichfork, 1); | 482 | XFS_IFORK_NEXT_SET(ip, whichfork, 1); |
514 | logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); | 483 | logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); |
@@ -522,27 +491,25 @@ xfs_bmap_add_extent( | |||
522 | if (cur) | 491 | if (cur) |
523 | ASSERT((cur->bc_private.b.flags & | 492 | ASSERT((cur->bc_private.b.flags & |
524 | XFS_BTCUR_BPRV_WASDEL) == 0); | 493 | XFS_BTCUR_BPRV_WASDEL) == 0); |
525 | if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, new, | 494 | error = xfs_bmap_add_extent_hole_delay(ip, idx, new, |
526 | &logflags, rsvd))) | 495 | &logflags); |
527 | goto done; | ||
528 | } | 496 | } |
529 | /* | 497 | /* |
530 | * Real allocation off the end of the file. | 498 | * Real allocation off the end of the file. |
531 | */ | 499 | */ |
532 | else if (idx == nextents) { | 500 | else if (*idx == nextents) { |
533 | if (cur) | 501 | if (cur) |
534 | ASSERT((cur->bc_private.b.flags & | 502 | ASSERT((cur->bc_private.b.flags & |
535 | XFS_BTCUR_BPRV_WASDEL) == 0); | 503 | XFS_BTCUR_BPRV_WASDEL) == 0); |
536 | if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, | 504 | error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new, |
537 | &logflags, whichfork))) | 505 | &logflags, whichfork); |
538 | goto done; | ||
539 | } else { | 506 | } else { |
540 | xfs_bmbt_irec_t prev; /* old extent at offset idx */ | 507 | xfs_bmbt_irec_t prev; /* old extent at offset idx */ |
541 | 508 | ||
542 | /* | 509 | /* |
543 | * Get the record referred to by idx. | 510 | * Get the record referred to by idx. |
544 | */ | 511 | */ |
545 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &prev); | 512 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &prev); |
546 | /* | 513 | /* |
547 | * If it's a real allocation record, and the new allocation ends | 514 | * If it's a real allocation record, and the new allocation ends |
548 | * after the start of the referred to record, then we're filling | 515 | * after the start of the referred to record, then we're filling |
@@ -557,22 +524,18 @@ xfs_bmap_add_extent( | |||
557 | if (cur) | 524 | if (cur) |
558 | ASSERT(cur->bc_private.b.flags & | 525 | ASSERT(cur->bc_private.b.flags & |
559 | XFS_BTCUR_BPRV_WASDEL); | 526 | XFS_BTCUR_BPRV_WASDEL); |
560 | if ((error = xfs_bmap_add_extent_delay_real(ip, | 527 | error = xfs_bmap_add_extent_delay_real(ip, |
561 | idx, &cur, new, &da_new, first, flist, | 528 | idx, &cur, new, &da_new, |
562 | &logflags, rsvd))) | 529 | first, flist, &logflags); |
563 | goto done; | ||
564 | } else if (new->br_state == XFS_EXT_NORM) { | ||
565 | ASSERT(new->br_state == XFS_EXT_NORM); | ||
566 | if ((error = xfs_bmap_add_extent_unwritten_real( | ||
567 | ip, idx, &cur, new, &logflags))) | ||
568 | goto done; | ||
569 | } else { | 530 | } else { |
570 | ASSERT(new->br_state == XFS_EXT_UNWRITTEN); | 531 | ASSERT(new->br_state == XFS_EXT_NORM || |
571 | if ((error = xfs_bmap_add_extent_unwritten_real( | 532 | new->br_state == XFS_EXT_UNWRITTEN); |
572 | ip, idx, &cur, new, &logflags))) | 533 | |
534 | error = xfs_bmap_add_extent_unwritten_real(ip, | ||
535 | idx, &cur, new, &logflags); | ||
536 | if (error) | ||
573 | goto done; | 537 | goto done; |
574 | } | 538 | } |
575 | ASSERT(*curp == cur || *curp == NULL); | ||
576 | } | 539 | } |
577 | /* | 540 | /* |
578 | * Otherwise we're filling in a hole with an allocation. | 541 | * Otherwise we're filling in a hole with an allocation. |
@@ -581,13 +544,15 @@ xfs_bmap_add_extent( | |||
581 | if (cur) | 544 | if (cur) |
582 | ASSERT((cur->bc_private.b.flags & | 545 | ASSERT((cur->bc_private.b.flags & |
583 | XFS_BTCUR_BPRV_WASDEL) == 0); | 546 | XFS_BTCUR_BPRV_WASDEL) == 0); |
584 | if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, | 547 | error = xfs_bmap_add_extent_hole_real(ip, idx, cur, |
585 | new, &logflags, whichfork))) | 548 | new, &logflags, whichfork); |
586 | goto done; | ||
587 | } | 549 | } |
588 | } | 550 | } |
589 | 551 | ||
552 | if (error) | ||
553 | goto done; | ||
590 | ASSERT(*curp == cur || *curp == NULL); | 554 | ASSERT(*curp == cur || *curp == NULL); |
555 | |||
591 | /* | 556 | /* |
592 | * Convert to a btree if necessary. | 557 | * Convert to a btree if necessary. |
593 | */ | 558 | */ |
@@ -615,7 +580,7 @@ xfs_bmap_add_extent( | |||
615 | ASSERT(nblks <= da_old); | 580 | ASSERT(nblks <= da_old); |
616 | if (nblks < da_old) | 581 | if (nblks < da_old) |
617 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, | 582 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, |
618 | (int64_t)(da_old - nblks), rsvd); | 583 | (int64_t)(da_old - nblks), 0); |
619 | } | 584 | } |
620 | /* | 585 | /* |
621 | * Clear out the allocated field, done with it now in any case. | 586 | * Clear out the allocated field, done with it now in any case. |
@@ -640,14 +605,13 @@ done: | |||
640 | STATIC int /* error */ | 605 | STATIC int /* error */ |
641 | xfs_bmap_add_extent_delay_real( | 606 | xfs_bmap_add_extent_delay_real( |
642 | xfs_inode_t *ip, /* incore inode pointer */ | 607 | xfs_inode_t *ip, /* incore inode pointer */ |
643 | xfs_extnum_t idx, /* extent number to update/insert */ | 608 | xfs_extnum_t *idx, /* extent number to update/insert */ |
644 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | 609 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
645 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 610 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
646 | xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ | 611 | xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */ |
647 | xfs_fsblock_t *first, /* pointer to firstblock variable */ | 612 | xfs_fsblock_t *first, /* pointer to firstblock variable */ |
648 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | 613 | xfs_bmap_free_t *flist, /* list of extents to be freed */ |
649 | int *logflagsp, /* inode logging flags */ | 614 | int *logflagsp) /* inode logging flags */ |
650 | int rsvd) /* OK to use reserved data block allocation */ | ||
651 | { | 615 | { |
652 | xfs_btree_cur_t *cur; /* btree cursor */ | 616 | xfs_btree_cur_t *cur; /* btree cursor */ |
653 | int diff; /* temp value */ | 617 | int diff; /* temp value */ |
@@ -673,7 +637,7 @@ xfs_bmap_add_extent_delay_real( | |||
673 | */ | 637 | */ |
674 | cur = *curp; | 638 | cur = *curp; |
675 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | 639 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); |
676 | ep = xfs_iext_get_ext(ifp, idx); | 640 | ep = xfs_iext_get_ext(ifp, *idx); |
677 | xfs_bmbt_get_all(ep, &PREV); | 641 | xfs_bmbt_get_all(ep, &PREV); |
678 | new_endoff = new->br_startoff + new->br_blockcount; | 642 | new_endoff = new->br_startoff + new->br_blockcount; |
679 | ASSERT(PREV.br_startoff <= new->br_startoff); | 643 | ASSERT(PREV.br_startoff <= new->br_startoff); |
@@ -692,9 +656,9 @@ xfs_bmap_add_extent_delay_real( | |||
692 | * Check and set flags if this segment has a left neighbor. | 656 | * Check and set flags if this segment has a left neighbor. |
693 | * Don't set contiguous if the combined extent would be too large. | 657 | * Don't set contiguous if the combined extent would be too large. |
694 | */ | 658 | */ |
695 | if (idx > 0) { | 659 | if (*idx > 0) { |
696 | state |= BMAP_LEFT_VALID; | 660 | state |= BMAP_LEFT_VALID; |
697 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); | 661 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); |
698 | 662 | ||
699 | if (isnullstartblock(LEFT.br_startblock)) | 663 | if (isnullstartblock(LEFT.br_startblock)) |
700 | state |= BMAP_LEFT_DELAY; | 664 | state |= BMAP_LEFT_DELAY; |
@@ -712,9 +676,9 @@ xfs_bmap_add_extent_delay_real( | |||
712 | * Don't set contiguous if the combined extent would be too large. | 676 | * Don't set contiguous if the combined extent would be too large. |
713 | * Also check for all-three-contiguous being too large. | 677 | * Also check for all-three-contiguous being too large. |
714 | */ | 678 | */ |
715 | if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { | 679 | if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { |
716 | state |= BMAP_RIGHT_VALID; | 680 | state |= BMAP_RIGHT_VALID; |
717 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); | 681 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); |
718 | 682 | ||
719 | if (isnullstartblock(RIGHT.br_startblock)) | 683 | if (isnullstartblock(RIGHT.br_startblock)) |
720 | state |= BMAP_RIGHT_DELAY; | 684 | state |= BMAP_RIGHT_DELAY; |
@@ -745,14 +709,14 @@ xfs_bmap_add_extent_delay_real( | |||
745 | * Filling in all of a previously delayed allocation extent. | 709 | * Filling in all of a previously delayed allocation extent. |
746 | * The left and right neighbors are both contiguous with new. | 710 | * The left and right neighbors are both contiguous with new. |
747 | */ | 711 | */ |
748 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 712 | --*idx; |
749 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 713 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
714 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
750 | LEFT.br_blockcount + PREV.br_blockcount + | 715 | LEFT.br_blockcount + PREV.br_blockcount + |
751 | RIGHT.br_blockcount); | 716 | RIGHT.br_blockcount); |
752 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 717 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
753 | 718 | ||
754 | xfs_iext_remove(ip, idx, 2, state); | 719 | xfs_iext_remove(ip, *idx + 1, 2, state); |
755 | ip->i_df.if_lastex = idx - 1; | ||
756 | ip->i_d.di_nextents--; | 720 | ip->i_d.di_nextents--; |
757 | if (cur == NULL) | 721 | if (cur == NULL) |
758 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 722 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -784,13 +748,14 @@ xfs_bmap_add_extent_delay_real( | |||
784 | * Filling in all of a previously delayed allocation extent. | 748 | * Filling in all of a previously delayed allocation extent. |
785 | * The left neighbor is contiguous, the right is not. | 749 | * The left neighbor is contiguous, the right is not. |
786 | */ | 750 | */ |
787 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 751 | --*idx; |
788 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 752 | |
753 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
754 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
789 | LEFT.br_blockcount + PREV.br_blockcount); | 755 | LEFT.br_blockcount + PREV.br_blockcount); |
790 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 756 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
791 | 757 | ||
792 | ip->i_df.if_lastex = idx - 1; | 758 | xfs_iext_remove(ip, *idx + 1, 1, state); |
793 | xfs_iext_remove(ip, idx, 1, state); | ||
794 | if (cur == NULL) | 759 | if (cur == NULL) |
795 | rval = XFS_ILOG_DEXT; | 760 | rval = XFS_ILOG_DEXT; |
796 | else { | 761 | else { |
@@ -814,14 +779,13 @@ xfs_bmap_add_extent_delay_real( | |||
814 | * Filling in all of a previously delayed allocation extent. | 779 | * Filling in all of a previously delayed allocation extent. |
815 | * The right neighbor is contiguous, the left is not. | 780 | * The right neighbor is contiguous, the left is not. |
816 | */ | 781 | */ |
817 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 782 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
818 | xfs_bmbt_set_startblock(ep, new->br_startblock); | 783 | xfs_bmbt_set_startblock(ep, new->br_startblock); |
819 | xfs_bmbt_set_blockcount(ep, | 784 | xfs_bmbt_set_blockcount(ep, |
820 | PREV.br_blockcount + RIGHT.br_blockcount); | 785 | PREV.br_blockcount + RIGHT.br_blockcount); |
821 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 786 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
822 | 787 | ||
823 | ip->i_df.if_lastex = idx; | 788 | xfs_iext_remove(ip, *idx + 1, 1, state); |
824 | xfs_iext_remove(ip, idx + 1, 1, state); | ||
825 | if (cur == NULL) | 789 | if (cur == NULL) |
826 | rval = XFS_ILOG_DEXT; | 790 | rval = XFS_ILOG_DEXT; |
827 | else { | 791 | else { |
@@ -837,6 +801,7 @@ xfs_bmap_add_extent_delay_real( | |||
837 | RIGHT.br_blockcount, PREV.br_state))) | 801 | RIGHT.br_blockcount, PREV.br_state))) |
838 | goto done; | 802 | goto done; |
839 | } | 803 | } |
804 | |||
840 | *dnew = 0; | 805 | *dnew = 0; |
841 | break; | 806 | break; |
842 | 807 | ||
@@ -846,11 +811,10 @@ xfs_bmap_add_extent_delay_real( | |||
846 | * Neither the left nor right neighbors are contiguous with | 811 | * Neither the left nor right neighbors are contiguous with |
847 | * the new one. | 812 | * the new one. |
848 | */ | 813 | */ |
849 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 814 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
850 | xfs_bmbt_set_startblock(ep, new->br_startblock); | 815 | xfs_bmbt_set_startblock(ep, new->br_startblock); |
851 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 816 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
852 | 817 | ||
853 | ip->i_df.if_lastex = idx; | ||
854 | ip->i_d.di_nextents++; | 818 | ip->i_d.di_nextents++; |
855 | if (cur == NULL) | 819 | if (cur == NULL) |
856 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 820 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -866,6 +830,7 @@ xfs_bmap_add_extent_delay_real( | |||
866 | goto done; | 830 | goto done; |
867 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); | 831 | XFS_WANT_CORRUPTED_GOTO(i == 1, done); |
868 | } | 832 | } |
833 | |||
869 | *dnew = 0; | 834 | *dnew = 0; |
870 | break; | 835 | break; |
871 | 836 | ||
@@ -874,17 +839,16 @@ xfs_bmap_add_extent_delay_real( | |||
874 | * Filling in the first part of a previous delayed allocation. | 839 | * Filling in the first part of a previous delayed allocation. |
875 | * The left neighbor is contiguous. | 840 | * The left neighbor is contiguous. |
876 | */ | 841 | */ |
877 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 842 | trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); |
878 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 843 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), |
879 | LEFT.br_blockcount + new->br_blockcount); | 844 | LEFT.br_blockcount + new->br_blockcount); |
880 | xfs_bmbt_set_startoff(ep, | 845 | xfs_bmbt_set_startoff(ep, |
881 | PREV.br_startoff + new->br_blockcount); | 846 | PREV.br_startoff + new->br_blockcount); |
882 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 847 | trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); |
883 | 848 | ||
884 | temp = PREV.br_blockcount - new->br_blockcount; | 849 | temp = PREV.br_blockcount - new->br_blockcount; |
885 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 850 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
886 | xfs_bmbt_set_blockcount(ep, temp); | 851 | xfs_bmbt_set_blockcount(ep, temp); |
887 | ip->i_df.if_lastex = idx - 1; | ||
888 | if (cur == NULL) | 852 | if (cur == NULL) |
889 | rval = XFS_ILOG_DEXT; | 853 | rval = XFS_ILOG_DEXT; |
890 | else { | 854 | else { |
@@ -904,7 +868,9 @@ xfs_bmap_add_extent_delay_real( | |||
904 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 868 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
905 | startblockval(PREV.br_startblock)); | 869 | startblockval(PREV.br_startblock)); |
906 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 870 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
907 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 871 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
872 | |||
873 | --*idx; | ||
908 | *dnew = temp; | 874 | *dnew = temp; |
909 | break; | 875 | break; |
910 | 876 | ||
@@ -913,12 +879,11 @@ xfs_bmap_add_extent_delay_real( | |||
913 | * Filling in the first part of a previous delayed allocation. | 879 | * Filling in the first part of a previous delayed allocation. |
914 | * The left neighbor is not contiguous. | 880 | * The left neighbor is not contiguous. |
915 | */ | 881 | */ |
916 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 882 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
917 | xfs_bmbt_set_startoff(ep, new_endoff); | 883 | xfs_bmbt_set_startoff(ep, new_endoff); |
918 | temp = PREV.br_blockcount - new->br_blockcount; | 884 | temp = PREV.br_blockcount - new->br_blockcount; |
919 | xfs_bmbt_set_blockcount(ep, temp); | 885 | xfs_bmbt_set_blockcount(ep, temp); |
920 | xfs_iext_insert(ip, idx, 1, new, state); | 886 | xfs_iext_insert(ip, *idx, 1, new, state); |
921 | ip->i_df.if_lastex = idx; | ||
922 | ip->i_d.di_nextents++; | 887 | ip->i_d.di_nextents++; |
923 | if (cur == NULL) | 888 | if (cur == NULL) |
924 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 889 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -946,9 +911,10 @@ xfs_bmap_add_extent_delay_real( | |||
946 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 911 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
947 | startblockval(PREV.br_startblock) - | 912 | startblockval(PREV.br_startblock) - |
948 | (cur ? cur->bc_private.b.allocated : 0)); | 913 | (cur ? cur->bc_private.b.allocated : 0)); |
949 | ep = xfs_iext_get_ext(ifp, idx + 1); | 914 | ep = xfs_iext_get_ext(ifp, *idx + 1); |
950 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 915 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
951 | trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); | 916 | trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_); |
917 | |||
952 | *dnew = temp; | 918 | *dnew = temp; |
953 | break; | 919 | break; |
954 | 920 | ||
@@ -958,15 +924,13 @@ xfs_bmap_add_extent_delay_real( | |||
958 | * The right neighbor is contiguous with the new allocation. | 924 | * The right neighbor is contiguous with the new allocation. |
959 | */ | 925 | */ |
960 | temp = PREV.br_blockcount - new->br_blockcount; | 926 | temp = PREV.br_blockcount - new->br_blockcount; |
961 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 927 | trace_xfs_bmap_pre_update(ip, *idx + 1, state, _THIS_IP_); |
962 | trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_); | ||
963 | xfs_bmbt_set_blockcount(ep, temp); | 928 | xfs_bmbt_set_blockcount(ep, temp); |
964 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), | 929 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx + 1), |
965 | new->br_startoff, new->br_startblock, | 930 | new->br_startoff, new->br_startblock, |
966 | new->br_blockcount + RIGHT.br_blockcount, | 931 | new->br_blockcount + RIGHT.br_blockcount, |
967 | RIGHT.br_state); | 932 | RIGHT.br_state); |
968 | trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); | 933 | trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_); |
969 | ip->i_df.if_lastex = idx + 1; | ||
970 | if (cur == NULL) | 934 | if (cur == NULL) |
971 | rval = XFS_ILOG_DEXT; | 935 | rval = XFS_ILOG_DEXT; |
972 | else { | 936 | else { |
@@ -983,10 +947,14 @@ xfs_bmap_add_extent_delay_real( | |||
983 | RIGHT.br_state))) | 947 | RIGHT.br_state))) |
984 | goto done; | 948 | goto done; |
985 | } | 949 | } |
950 | |||
986 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 951 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
987 | startblockval(PREV.br_startblock)); | 952 | startblockval(PREV.br_startblock)); |
953 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
988 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 954 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
989 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 955 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
956 | |||
957 | ++*idx; | ||
990 | *dnew = temp; | 958 | *dnew = temp; |
991 | break; | 959 | break; |
992 | 960 | ||
@@ -996,10 +964,9 @@ xfs_bmap_add_extent_delay_real( | |||
996 | * The right neighbor is not contiguous. | 964 | * The right neighbor is not contiguous. |
997 | */ | 965 | */ |
998 | temp = PREV.br_blockcount - new->br_blockcount; | 966 | temp = PREV.br_blockcount - new->br_blockcount; |
999 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 967 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1000 | xfs_bmbt_set_blockcount(ep, temp); | 968 | xfs_bmbt_set_blockcount(ep, temp); |
1001 | xfs_iext_insert(ip, idx + 1, 1, new, state); | 969 | xfs_iext_insert(ip, *idx + 1, 1, new, state); |
1002 | ip->i_df.if_lastex = idx + 1; | ||
1003 | ip->i_d.di_nextents++; | 970 | ip->i_d.di_nextents++; |
1004 | if (cur == NULL) | 971 | if (cur == NULL) |
1005 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 972 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1027,9 +994,11 @@ xfs_bmap_add_extent_delay_real( | |||
1027 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 994 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
1028 | startblockval(PREV.br_startblock) - | 995 | startblockval(PREV.br_startblock) - |
1029 | (cur ? cur->bc_private.b.allocated : 0)); | 996 | (cur ? cur->bc_private.b.allocated : 0)); |
1030 | ep = xfs_iext_get_ext(ifp, idx); | 997 | ep = xfs_iext_get_ext(ifp, *idx); |
1031 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 998 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
1032 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 999 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1000 | |||
1001 | ++*idx; | ||
1033 | *dnew = temp; | 1002 | *dnew = temp; |
1034 | break; | 1003 | break; |
1035 | 1004 | ||
@@ -1056,7 +1025,7 @@ xfs_bmap_add_extent_delay_real( | |||
1056 | */ | 1025 | */ |
1057 | temp = new->br_startoff - PREV.br_startoff; | 1026 | temp = new->br_startoff - PREV.br_startoff; |
1058 | temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; | 1027 | temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff; |
1059 | trace_xfs_bmap_pre_update(ip, idx, 0, _THIS_IP_); | 1028 | trace_xfs_bmap_pre_update(ip, *idx, 0, _THIS_IP_); |
1060 | xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ | 1029 | xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */ |
1061 | LEFT = *new; | 1030 | LEFT = *new; |
1062 | RIGHT.br_state = PREV.br_state; | 1031 | RIGHT.br_state = PREV.br_state; |
@@ -1065,8 +1034,7 @@ xfs_bmap_add_extent_delay_real( | |||
1065 | RIGHT.br_startoff = new_endoff; | 1034 | RIGHT.br_startoff = new_endoff; |
1066 | RIGHT.br_blockcount = temp2; | 1035 | RIGHT.br_blockcount = temp2; |
1067 | /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ | 1036 | /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */ |
1068 | xfs_iext_insert(ip, idx + 1, 2, &LEFT, state); | 1037 | xfs_iext_insert(ip, *idx + 1, 2, &LEFT, state); |
1069 | ip->i_df.if_lastex = idx + 1; | ||
1070 | ip->i_d.di_nextents++; | 1038 | ip->i_d.di_nextents++; |
1071 | if (cur == NULL) | 1039 | if (cur == NULL) |
1072 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1040 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1097,7 +1065,7 @@ xfs_bmap_add_extent_delay_real( | |||
1097 | (cur ? cur->bc_private.b.allocated : 0)); | 1065 | (cur ? cur->bc_private.b.allocated : 0)); |
1098 | if (diff > 0 && | 1066 | if (diff > 0 && |
1099 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, | 1067 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, |
1100 | -((int64_t)diff), rsvd)) { | 1068 | -((int64_t)diff), 0)) { |
1101 | /* | 1069 | /* |
1102 | * Ick gross gag me with a spoon. | 1070 | * Ick gross gag me with a spoon. |
1103 | */ | 1071 | */ |
@@ -1109,7 +1077,7 @@ xfs_bmap_add_extent_delay_real( | |||
1109 | if (!diff || | 1077 | if (!diff || |
1110 | !xfs_icsb_modify_counters(ip->i_mount, | 1078 | !xfs_icsb_modify_counters(ip->i_mount, |
1111 | XFS_SBS_FDBLOCKS, | 1079 | XFS_SBS_FDBLOCKS, |
1112 | -((int64_t)diff), rsvd)) | 1080 | -((int64_t)diff), 0)) |
1113 | break; | 1081 | break; |
1114 | } | 1082 | } |
1115 | if (temp2) { | 1083 | if (temp2) { |
@@ -1118,18 +1086,20 @@ xfs_bmap_add_extent_delay_real( | |||
1118 | if (!diff || | 1086 | if (!diff || |
1119 | !xfs_icsb_modify_counters(ip->i_mount, | 1087 | !xfs_icsb_modify_counters(ip->i_mount, |
1120 | XFS_SBS_FDBLOCKS, | 1088 | XFS_SBS_FDBLOCKS, |
1121 | -((int64_t)diff), rsvd)) | 1089 | -((int64_t)diff), 0)) |
1122 | break; | 1090 | break; |
1123 | } | 1091 | } |
1124 | } | 1092 | } |
1125 | } | 1093 | } |
1126 | ep = xfs_iext_get_ext(ifp, idx); | 1094 | ep = xfs_iext_get_ext(ifp, *idx); |
1127 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 1095 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
1128 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1096 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1129 | trace_xfs_bmap_pre_update(ip, idx + 2, state, _THIS_IP_); | 1097 | trace_xfs_bmap_pre_update(ip, *idx + 2, state, _THIS_IP_); |
1130 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx + 2), | 1098 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx + 2), |
1131 | nullstartblock((int)temp2)); | 1099 | nullstartblock((int)temp2)); |
1132 | trace_xfs_bmap_post_update(ip, idx + 2, state, _THIS_IP_); | 1100 | trace_xfs_bmap_post_update(ip, *idx + 2, state, _THIS_IP_); |
1101 | |||
1102 | ++*idx; | ||
1133 | *dnew = temp + temp2; | 1103 | *dnew = temp + temp2; |
1134 | break; | 1104 | break; |
1135 | 1105 | ||
@@ -1161,7 +1131,7 @@ done: | |||
1161 | STATIC int /* error */ | 1131 | STATIC int /* error */ |
1162 | xfs_bmap_add_extent_unwritten_real( | 1132 | xfs_bmap_add_extent_unwritten_real( |
1163 | xfs_inode_t *ip, /* incore inode pointer */ | 1133 | xfs_inode_t *ip, /* incore inode pointer */ |
1164 | xfs_extnum_t idx, /* extent number to update/insert */ | 1134 | xfs_extnum_t *idx, /* extent number to update/insert */ |
1165 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ | 1135 | xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ |
1166 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 1136 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
1167 | int *logflagsp) /* inode logging flags */ | 1137 | int *logflagsp) /* inode logging flags */ |
@@ -1188,7 +1158,7 @@ xfs_bmap_add_extent_unwritten_real( | |||
1188 | error = 0; | 1158 | error = 0; |
1189 | cur = *curp; | 1159 | cur = *curp; |
1190 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | 1160 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); |
1191 | ep = xfs_iext_get_ext(ifp, idx); | 1161 | ep = xfs_iext_get_ext(ifp, *idx); |
1192 | xfs_bmbt_get_all(ep, &PREV); | 1162 | xfs_bmbt_get_all(ep, &PREV); |
1193 | newext = new->br_state; | 1163 | newext = new->br_state; |
1194 | oldext = (newext == XFS_EXT_UNWRITTEN) ? | 1164 | oldext = (newext == XFS_EXT_UNWRITTEN) ? |
@@ -1211,9 +1181,9 @@ xfs_bmap_add_extent_unwritten_real( | |||
1211 | * Check and set flags if this segment has a left neighbor. | 1181 | * Check and set flags if this segment has a left neighbor. |
1212 | * Don't set contiguous if the combined extent would be too large. | 1182 | * Don't set contiguous if the combined extent would be too large. |
1213 | */ | 1183 | */ |
1214 | if (idx > 0) { | 1184 | if (*idx > 0) { |
1215 | state |= BMAP_LEFT_VALID; | 1185 | state |= BMAP_LEFT_VALID; |
1216 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &LEFT); | 1186 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT); |
1217 | 1187 | ||
1218 | if (isnullstartblock(LEFT.br_startblock)) | 1188 | if (isnullstartblock(LEFT.br_startblock)) |
1219 | state |= BMAP_LEFT_DELAY; | 1189 | state |= BMAP_LEFT_DELAY; |
@@ -1231,9 +1201,9 @@ xfs_bmap_add_extent_unwritten_real( | |||
1231 | * Don't set contiguous if the combined extent would be too large. | 1201 | * Don't set contiguous if the combined extent would be too large. |
1232 | * Also check for all-three-contiguous being too large. | 1202 | * Also check for all-three-contiguous being too large. |
1233 | */ | 1203 | */ |
1234 | if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { | 1204 | if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { |
1235 | state |= BMAP_RIGHT_VALID; | 1205 | state |= BMAP_RIGHT_VALID; |
1236 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx + 1), &RIGHT); | 1206 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); |
1237 | if (isnullstartblock(RIGHT.br_startblock)) | 1207 | if (isnullstartblock(RIGHT.br_startblock)) |
1238 | state |= BMAP_RIGHT_DELAY; | 1208 | state |= BMAP_RIGHT_DELAY; |
1239 | } | 1209 | } |
@@ -1262,14 +1232,15 @@ xfs_bmap_add_extent_unwritten_real( | |||
1262 | * Setting all of a previous oldext extent to newext. | 1232 | * Setting all of a previous oldext extent to newext. |
1263 | * The left and right neighbors are both contiguous with new. | 1233 | * The left and right neighbors are both contiguous with new. |
1264 | */ | 1234 | */ |
1265 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1235 | --*idx; |
1266 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 1236 | |
1237 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
1238 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
1267 | LEFT.br_blockcount + PREV.br_blockcount + | 1239 | LEFT.br_blockcount + PREV.br_blockcount + |
1268 | RIGHT.br_blockcount); | 1240 | RIGHT.br_blockcount); |
1269 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1241 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1270 | 1242 | ||
1271 | xfs_iext_remove(ip, idx, 2, state); | 1243 | xfs_iext_remove(ip, *idx + 1, 2, state); |
1272 | ip->i_df.if_lastex = idx - 1; | ||
1273 | ip->i_d.di_nextents -= 2; | 1244 | ip->i_d.di_nextents -= 2; |
1274 | if (cur == NULL) | 1245 | if (cur == NULL) |
1275 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1246 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1305,13 +1276,14 @@ xfs_bmap_add_extent_unwritten_real( | |||
1305 | * Setting all of a previous oldext extent to newext. | 1276 | * Setting all of a previous oldext extent to newext. |
1306 | * The left neighbor is contiguous, the right is not. | 1277 | * The left neighbor is contiguous, the right is not. |
1307 | */ | 1278 | */ |
1308 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1279 | --*idx; |
1309 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 1280 | |
1281 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
1282 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
1310 | LEFT.br_blockcount + PREV.br_blockcount); | 1283 | LEFT.br_blockcount + PREV.br_blockcount); |
1311 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1284 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1312 | 1285 | ||
1313 | ip->i_df.if_lastex = idx - 1; | 1286 | xfs_iext_remove(ip, *idx + 1, 1, state); |
1314 | xfs_iext_remove(ip, idx, 1, state); | ||
1315 | ip->i_d.di_nextents--; | 1287 | ip->i_d.di_nextents--; |
1316 | if (cur == NULL) | 1288 | if (cur == NULL) |
1317 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1289 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1341,13 +1313,12 @@ xfs_bmap_add_extent_unwritten_real( | |||
1341 | * Setting all of a previous oldext extent to newext. | 1313 | * Setting all of a previous oldext extent to newext. |
1342 | * The right neighbor is contiguous, the left is not. | 1314 | * The right neighbor is contiguous, the left is not. |
1343 | */ | 1315 | */ |
1344 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1316 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1345 | xfs_bmbt_set_blockcount(ep, | 1317 | xfs_bmbt_set_blockcount(ep, |
1346 | PREV.br_blockcount + RIGHT.br_blockcount); | 1318 | PREV.br_blockcount + RIGHT.br_blockcount); |
1347 | xfs_bmbt_set_state(ep, newext); | 1319 | xfs_bmbt_set_state(ep, newext); |
1348 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1320 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1349 | ip->i_df.if_lastex = idx; | 1321 | xfs_iext_remove(ip, *idx + 1, 1, state); |
1350 | xfs_iext_remove(ip, idx + 1, 1, state); | ||
1351 | ip->i_d.di_nextents--; | 1322 | ip->i_d.di_nextents--; |
1352 | if (cur == NULL) | 1323 | if (cur == NULL) |
1353 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1324 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1378,11 +1349,10 @@ xfs_bmap_add_extent_unwritten_real( | |||
1378 | * Neither the left nor right neighbors are contiguous with | 1349 | * Neither the left nor right neighbors are contiguous with |
1379 | * the new one. | 1350 | * the new one. |
1380 | */ | 1351 | */ |
1381 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1352 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1382 | xfs_bmbt_set_state(ep, newext); | 1353 | xfs_bmbt_set_state(ep, newext); |
1383 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1354 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1384 | 1355 | ||
1385 | ip->i_df.if_lastex = idx; | ||
1386 | if (cur == NULL) | 1356 | if (cur == NULL) |
1387 | rval = XFS_ILOG_DEXT; | 1357 | rval = XFS_ILOG_DEXT; |
1388 | else { | 1358 | else { |
@@ -1404,21 +1374,22 @@ xfs_bmap_add_extent_unwritten_real( | |||
1404 | * Setting the first part of a previous oldext extent to newext. | 1374 | * Setting the first part of a previous oldext extent to newext. |
1405 | * The left neighbor is contiguous. | 1375 | * The left neighbor is contiguous. |
1406 | */ | 1376 | */ |
1407 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1377 | trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_); |
1408 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 1378 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1), |
1409 | LEFT.br_blockcount + new->br_blockcount); | 1379 | LEFT.br_blockcount + new->br_blockcount); |
1410 | xfs_bmbt_set_startoff(ep, | 1380 | xfs_bmbt_set_startoff(ep, |
1411 | PREV.br_startoff + new->br_blockcount); | 1381 | PREV.br_startoff + new->br_blockcount); |
1412 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1382 | trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_); |
1413 | 1383 | ||
1414 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1384 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1415 | xfs_bmbt_set_startblock(ep, | 1385 | xfs_bmbt_set_startblock(ep, |
1416 | new->br_startblock + new->br_blockcount); | 1386 | new->br_startblock + new->br_blockcount); |
1417 | xfs_bmbt_set_blockcount(ep, | 1387 | xfs_bmbt_set_blockcount(ep, |
1418 | PREV.br_blockcount - new->br_blockcount); | 1388 | PREV.br_blockcount - new->br_blockcount); |
1419 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1389 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1390 | |||
1391 | --*idx; | ||
1420 | 1392 | ||
1421 | ip->i_df.if_lastex = idx - 1; | ||
1422 | if (cur == NULL) | 1393 | if (cur == NULL) |
1423 | rval = XFS_ILOG_DEXT; | 1394 | rval = XFS_ILOG_DEXT; |
1424 | else { | 1395 | else { |
@@ -1449,17 +1420,16 @@ xfs_bmap_add_extent_unwritten_real( | |||
1449 | * Setting the first part of a previous oldext extent to newext. | 1420 | * Setting the first part of a previous oldext extent to newext. |
1450 | * The left neighbor is not contiguous. | 1421 | * The left neighbor is not contiguous. |
1451 | */ | 1422 | */ |
1452 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1423 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1453 | ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); | 1424 | ASSERT(ep && xfs_bmbt_get_state(ep) == oldext); |
1454 | xfs_bmbt_set_startoff(ep, new_endoff); | 1425 | xfs_bmbt_set_startoff(ep, new_endoff); |
1455 | xfs_bmbt_set_blockcount(ep, | 1426 | xfs_bmbt_set_blockcount(ep, |
1456 | PREV.br_blockcount - new->br_blockcount); | 1427 | PREV.br_blockcount - new->br_blockcount); |
1457 | xfs_bmbt_set_startblock(ep, | 1428 | xfs_bmbt_set_startblock(ep, |
1458 | new->br_startblock + new->br_blockcount); | 1429 | new->br_startblock + new->br_blockcount); |
1459 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1430 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1460 | 1431 | ||
1461 | xfs_iext_insert(ip, idx, 1, new, state); | 1432 | xfs_iext_insert(ip, *idx, 1, new, state); |
1462 | ip->i_df.if_lastex = idx; | ||
1463 | ip->i_d.di_nextents++; | 1433 | ip->i_d.di_nextents++; |
1464 | if (cur == NULL) | 1434 | if (cur == NULL) |
1465 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1435 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1488,17 +1458,19 @@ xfs_bmap_add_extent_unwritten_real( | |||
1488 | * Setting the last part of a previous oldext extent to newext. | 1458 | * Setting the last part of a previous oldext extent to newext. |
1489 | * The right neighbor is contiguous with the new allocation. | 1459 | * The right neighbor is contiguous with the new allocation. |
1490 | */ | 1460 | */ |
1491 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1461 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1492 | trace_xfs_bmap_pre_update(ip, idx + 1, state, _THIS_IP_); | ||
1493 | xfs_bmbt_set_blockcount(ep, | 1462 | xfs_bmbt_set_blockcount(ep, |
1494 | PREV.br_blockcount - new->br_blockcount); | 1463 | PREV.br_blockcount - new->br_blockcount); |
1495 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1464 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1496 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, idx + 1), | 1465 | |
1466 | ++*idx; | ||
1467 | |||
1468 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); | ||
1469 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), | ||
1497 | new->br_startoff, new->br_startblock, | 1470 | new->br_startoff, new->br_startblock, |
1498 | new->br_blockcount + RIGHT.br_blockcount, newext); | 1471 | new->br_blockcount + RIGHT.br_blockcount, newext); |
1499 | trace_xfs_bmap_post_update(ip, idx + 1, state, _THIS_IP_); | 1472 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1500 | 1473 | ||
1501 | ip->i_df.if_lastex = idx + 1; | ||
1502 | if (cur == NULL) | 1474 | if (cur == NULL) |
1503 | rval = XFS_ILOG_DEXT; | 1475 | rval = XFS_ILOG_DEXT; |
1504 | else { | 1476 | else { |
@@ -1528,13 +1500,14 @@ xfs_bmap_add_extent_unwritten_real( | |||
1528 | * Setting the last part of a previous oldext extent to newext. | 1500 | * Setting the last part of a previous oldext extent to newext. |
1529 | * The right neighbor is not contiguous. | 1501 | * The right neighbor is not contiguous. |
1530 | */ | 1502 | */ |
1531 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1503 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1532 | xfs_bmbt_set_blockcount(ep, | 1504 | xfs_bmbt_set_blockcount(ep, |
1533 | PREV.br_blockcount - new->br_blockcount); | 1505 | PREV.br_blockcount - new->br_blockcount); |
1534 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1506 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1507 | |||
1508 | ++*idx; | ||
1509 | xfs_iext_insert(ip, *idx, 1, new, state); | ||
1535 | 1510 | ||
1536 | xfs_iext_insert(ip, idx + 1, 1, new, state); | ||
1537 | ip->i_df.if_lastex = idx + 1; | ||
1538 | ip->i_d.di_nextents++; | 1511 | ip->i_d.di_nextents++; |
1539 | if (cur == NULL) | 1512 | if (cur == NULL) |
1540 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1513 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1568,10 +1541,10 @@ xfs_bmap_add_extent_unwritten_real( | |||
1568 | * newext. Contiguity is impossible here. | 1541 | * newext. Contiguity is impossible here. |
1569 | * One extent becomes three extents. | 1542 | * One extent becomes three extents. |
1570 | */ | 1543 | */ |
1571 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1544 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1572 | xfs_bmbt_set_blockcount(ep, | 1545 | xfs_bmbt_set_blockcount(ep, |
1573 | new->br_startoff - PREV.br_startoff); | 1546 | new->br_startoff - PREV.br_startoff); |
1574 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1547 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1575 | 1548 | ||
1576 | r[0] = *new; | 1549 | r[0] = *new; |
1577 | r[1].br_startoff = new_endoff; | 1550 | r[1].br_startoff = new_endoff; |
@@ -1579,8 +1552,10 @@ xfs_bmap_add_extent_unwritten_real( | |||
1579 | PREV.br_startoff + PREV.br_blockcount - new_endoff; | 1552 | PREV.br_startoff + PREV.br_blockcount - new_endoff; |
1580 | r[1].br_startblock = new->br_startblock + new->br_blockcount; | 1553 | r[1].br_startblock = new->br_startblock + new->br_blockcount; |
1581 | r[1].br_state = oldext; | 1554 | r[1].br_state = oldext; |
1582 | xfs_iext_insert(ip, idx + 1, 2, &r[0], state); | 1555 | |
1583 | ip->i_df.if_lastex = idx + 1; | 1556 | ++*idx; |
1557 | xfs_iext_insert(ip, *idx, 2, &r[0], state); | ||
1558 | |||
1584 | ip->i_d.di_nextents += 2; | 1559 | ip->i_d.di_nextents += 2; |
1585 | if (cur == NULL) | 1560 | if (cur == NULL) |
1586 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; | 1561 | rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; |
@@ -1650,12 +1625,10 @@ done: | |||
1650 | STATIC int /* error */ | 1625 | STATIC int /* error */ |
1651 | xfs_bmap_add_extent_hole_delay( | 1626 | xfs_bmap_add_extent_hole_delay( |
1652 | xfs_inode_t *ip, /* incore inode pointer */ | 1627 | xfs_inode_t *ip, /* incore inode pointer */ |
1653 | xfs_extnum_t idx, /* extent number to update/insert */ | 1628 | xfs_extnum_t *idx, /* extent number to update/insert */ |
1654 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 1629 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
1655 | int *logflagsp, /* inode logging flags */ | 1630 | int *logflagsp) /* inode logging flags */ |
1656 | int rsvd) /* OK to allocate reserved blocks */ | ||
1657 | { | 1631 | { |
1658 | xfs_bmbt_rec_host_t *ep; /* extent record for idx */ | ||
1659 | xfs_ifork_t *ifp; /* inode fork pointer */ | 1632 | xfs_ifork_t *ifp; /* inode fork pointer */ |
1660 | xfs_bmbt_irec_t left; /* left neighbor extent entry */ | 1633 | xfs_bmbt_irec_t left; /* left neighbor extent entry */ |
1661 | xfs_filblks_t newlen=0; /* new indirect size */ | 1634 | xfs_filblks_t newlen=0; /* new indirect size */ |
@@ -1665,16 +1638,15 @@ xfs_bmap_add_extent_hole_delay( | |||
1665 | xfs_filblks_t temp=0; /* temp for indirect calculations */ | 1638 | xfs_filblks_t temp=0; /* temp for indirect calculations */ |
1666 | 1639 | ||
1667 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | 1640 | ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); |
1668 | ep = xfs_iext_get_ext(ifp, idx); | ||
1669 | state = 0; | 1641 | state = 0; |
1670 | ASSERT(isnullstartblock(new->br_startblock)); | 1642 | ASSERT(isnullstartblock(new->br_startblock)); |
1671 | 1643 | ||
1672 | /* | 1644 | /* |
1673 | * Check and set flags if this segment has a left neighbor | 1645 | * Check and set flags if this segment has a left neighbor |
1674 | */ | 1646 | */ |
1675 | if (idx > 0) { | 1647 | if (*idx > 0) { |
1676 | state |= BMAP_LEFT_VALID; | 1648 | state |= BMAP_LEFT_VALID; |
1677 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); | 1649 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); |
1678 | 1650 | ||
1679 | if (isnullstartblock(left.br_startblock)) | 1651 | if (isnullstartblock(left.br_startblock)) |
1680 | state |= BMAP_LEFT_DELAY; | 1652 | state |= BMAP_LEFT_DELAY; |
@@ -1684,9 +1656,9 @@ xfs_bmap_add_extent_hole_delay( | |||
1684 | * Check and set flags if the current (right) segment exists. | 1656 | * Check and set flags if the current (right) segment exists. |
1685 | * If it doesn't exist, we're converting the hole at end-of-file. | 1657 | * If it doesn't exist, we're converting the hole at end-of-file. |
1686 | */ | 1658 | */ |
1687 | if (idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { | 1659 | if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { |
1688 | state |= BMAP_RIGHT_VALID; | 1660 | state |= BMAP_RIGHT_VALID; |
1689 | xfs_bmbt_get_all(ep, &right); | 1661 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); |
1690 | 1662 | ||
1691 | if (isnullstartblock(right.br_startblock)) | 1663 | if (isnullstartblock(right.br_startblock)) |
1692 | state |= BMAP_RIGHT_DELAY; | 1664 | state |= BMAP_RIGHT_DELAY; |
@@ -1719,21 +1691,21 @@ xfs_bmap_add_extent_hole_delay( | |||
1719 | * on the left and on the right. | 1691 | * on the left and on the right. |
1720 | * Merge all three into a single extent record. | 1692 | * Merge all three into a single extent record. |
1721 | */ | 1693 | */ |
1694 | --*idx; | ||
1722 | temp = left.br_blockcount + new->br_blockcount + | 1695 | temp = left.br_blockcount + new->br_blockcount + |
1723 | right.br_blockcount; | 1696 | right.br_blockcount; |
1724 | 1697 | ||
1725 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1698 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1726 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); | 1699 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); |
1727 | oldlen = startblockval(left.br_startblock) + | 1700 | oldlen = startblockval(left.br_startblock) + |
1728 | startblockval(new->br_startblock) + | 1701 | startblockval(new->br_startblock) + |
1729 | startblockval(right.br_startblock); | 1702 | startblockval(right.br_startblock); |
1730 | newlen = xfs_bmap_worst_indlen(ip, temp); | 1703 | newlen = xfs_bmap_worst_indlen(ip, temp); |
1731 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), | 1704 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), |
1732 | nullstartblock((int)newlen)); | 1705 | nullstartblock((int)newlen)); |
1733 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1706 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1734 | 1707 | ||
1735 | xfs_iext_remove(ip, idx, 1, state); | 1708 | xfs_iext_remove(ip, *idx + 1, 1, state); |
1736 | ip->i_df.if_lastex = idx - 1; | ||
1737 | break; | 1709 | break; |
1738 | 1710 | ||
1739 | case BMAP_LEFT_CONTIG: | 1711 | case BMAP_LEFT_CONTIG: |
@@ -1742,17 +1714,17 @@ xfs_bmap_add_extent_hole_delay( | |||
1742 | * on the left. | 1714 | * on the left. |
1743 | * Merge the new allocation with the left neighbor. | 1715 | * Merge the new allocation with the left neighbor. |
1744 | */ | 1716 | */ |
1717 | --*idx; | ||
1745 | temp = left.br_blockcount + new->br_blockcount; | 1718 | temp = left.br_blockcount + new->br_blockcount; |
1746 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1719 | |
1747 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), temp); | 1720 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1721 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp); | ||
1748 | oldlen = startblockval(left.br_startblock) + | 1722 | oldlen = startblockval(left.br_startblock) + |
1749 | startblockval(new->br_startblock); | 1723 | startblockval(new->br_startblock); |
1750 | newlen = xfs_bmap_worst_indlen(ip, temp); | 1724 | newlen = xfs_bmap_worst_indlen(ip, temp); |
1751 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, idx - 1), | 1725 | xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx), |
1752 | nullstartblock((int)newlen)); | 1726 | nullstartblock((int)newlen)); |
1753 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1727 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1754 | |||
1755 | ip->i_df.if_lastex = idx - 1; | ||
1756 | break; | 1728 | break; |
1757 | 1729 | ||
1758 | case BMAP_RIGHT_CONTIG: | 1730 | case BMAP_RIGHT_CONTIG: |
@@ -1761,16 +1733,15 @@ xfs_bmap_add_extent_hole_delay( | |||
1761 | * on the right. | 1733 | * on the right. |
1762 | * Merge the new allocation with the right neighbor. | 1734 | * Merge the new allocation with the right neighbor. |
1763 | */ | 1735 | */ |
1764 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1736 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1765 | temp = new->br_blockcount + right.br_blockcount; | 1737 | temp = new->br_blockcount + right.br_blockcount; |
1766 | oldlen = startblockval(new->br_startblock) + | 1738 | oldlen = startblockval(new->br_startblock) + |
1767 | startblockval(right.br_startblock); | 1739 | startblockval(right.br_startblock); |
1768 | newlen = xfs_bmap_worst_indlen(ip, temp); | 1740 | newlen = xfs_bmap_worst_indlen(ip, temp); |
1769 | xfs_bmbt_set_allf(ep, new->br_startoff, | 1741 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), |
1742 | new->br_startoff, | ||
1770 | nullstartblock((int)newlen), temp, right.br_state); | 1743 | nullstartblock((int)newlen), temp, right.br_state); |
1771 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1744 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1772 | |||
1773 | ip->i_df.if_lastex = idx; | ||
1774 | break; | 1745 | break; |
1775 | 1746 | ||
1776 | case 0: | 1747 | case 0: |
@@ -1780,14 +1751,13 @@ xfs_bmap_add_extent_hole_delay( | |||
1780 | * Insert a new entry. | 1751 | * Insert a new entry. |
1781 | */ | 1752 | */ |
1782 | oldlen = newlen = 0; | 1753 | oldlen = newlen = 0; |
1783 | xfs_iext_insert(ip, idx, 1, new, state); | 1754 | xfs_iext_insert(ip, *idx, 1, new, state); |
1784 | ip->i_df.if_lastex = idx; | ||
1785 | break; | 1755 | break; |
1786 | } | 1756 | } |
1787 | if (oldlen != newlen) { | 1757 | if (oldlen != newlen) { |
1788 | ASSERT(oldlen > newlen); | 1758 | ASSERT(oldlen > newlen); |
1789 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, | 1759 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, |
1790 | (int64_t)(oldlen - newlen), rsvd); | 1760 | (int64_t)(oldlen - newlen), 0); |
1791 | /* | 1761 | /* |
1792 | * Nothing to do for disk quota accounting here. | 1762 | * Nothing to do for disk quota accounting here. |
1793 | */ | 1763 | */ |
@@ -1803,13 +1773,12 @@ xfs_bmap_add_extent_hole_delay( | |||
1803 | STATIC int /* error */ | 1773 | STATIC int /* error */ |
1804 | xfs_bmap_add_extent_hole_real( | 1774 | xfs_bmap_add_extent_hole_real( |
1805 | xfs_inode_t *ip, /* incore inode pointer */ | 1775 | xfs_inode_t *ip, /* incore inode pointer */ |
1806 | xfs_extnum_t idx, /* extent number to update/insert */ | 1776 | xfs_extnum_t *idx, /* extent number to update/insert */ |
1807 | xfs_btree_cur_t *cur, /* if null, not a btree */ | 1777 | xfs_btree_cur_t *cur, /* if null, not a btree */ |
1808 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ | 1778 | xfs_bmbt_irec_t *new, /* new data to add to file extents */ |
1809 | int *logflagsp, /* inode logging flags */ | 1779 | int *logflagsp, /* inode logging flags */ |
1810 | int whichfork) /* data or attr fork */ | 1780 | int whichfork) /* data or attr fork */ |
1811 | { | 1781 | { |
1812 | xfs_bmbt_rec_host_t *ep; /* pointer to extent entry ins. point */ | ||
1813 | int error; /* error return value */ | 1782 | int error; /* error return value */ |
1814 | int i; /* temp state */ | 1783 | int i; /* temp state */ |
1815 | xfs_ifork_t *ifp; /* inode fork pointer */ | 1784 | xfs_ifork_t *ifp; /* inode fork pointer */ |
@@ -1819,8 +1788,7 @@ xfs_bmap_add_extent_hole_real( | |||
1819 | int state; /* state bits, accessed thru macros */ | 1788 | int state; /* state bits, accessed thru macros */ |
1820 | 1789 | ||
1821 | ifp = XFS_IFORK_PTR(ip, whichfork); | 1790 | ifp = XFS_IFORK_PTR(ip, whichfork); |
1822 | ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); | 1791 | ASSERT(*idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); |
1823 | ep = xfs_iext_get_ext(ifp, idx); | ||
1824 | state = 0; | 1792 | state = 0; |
1825 | 1793 | ||
1826 | if (whichfork == XFS_ATTR_FORK) | 1794 | if (whichfork == XFS_ATTR_FORK) |
@@ -1829,9 +1797,9 @@ xfs_bmap_add_extent_hole_real( | |||
1829 | /* | 1797 | /* |
1830 | * Check and set flags if this segment has a left neighbor. | 1798 | * Check and set flags if this segment has a left neighbor. |
1831 | */ | 1799 | */ |
1832 | if (idx > 0) { | 1800 | if (*idx > 0) { |
1833 | state |= BMAP_LEFT_VALID; | 1801 | state |= BMAP_LEFT_VALID; |
1834 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx - 1), &left); | 1802 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left); |
1835 | if (isnullstartblock(left.br_startblock)) | 1803 | if (isnullstartblock(left.br_startblock)) |
1836 | state |= BMAP_LEFT_DELAY; | 1804 | state |= BMAP_LEFT_DELAY; |
1837 | } | 1805 | } |
@@ -1840,9 +1808,9 @@ xfs_bmap_add_extent_hole_real( | |||
1840 | * Check and set flags if this segment has a current value. | 1808 | * Check and set flags if this segment has a current value. |
1841 | * Not true if we're inserting into the "hole" at eof. | 1809 | * Not true if we're inserting into the "hole" at eof. |
1842 | */ | 1810 | */ |
1843 | if (idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { | 1811 | if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) { |
1844 | state |= BMAP_RIGHT_VALID; | 1812 | state |= BMAP_RIGHT_VALID; |
1845 | xfs_bmbt_get_all(ep, &right); | 1813 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right); |
1846 | if (isnullstartblock(right.br_startblock)) | 1814 | if (isnullstartblock(right.br_startblock)) |
1847 | state |= BMAP_RIGHT_DELAY; | 1815 | state |= BMAP_RIGHT_DELAY; |
1848 | } | 1816 | } |
@@ -1879,14 +1847,15 @@ xfs_bmap_add_extent_hole_real( | |||
1879 | * left and on the right. | 1847 | * left and on the right. |
1880 | * Merge all three into a single extent record. | 1848 | * Merge all three into a single extent record. |
1881 | */ | 1849 | */ |
1882 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1850 | --*idx; |
1883 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 1851 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1852 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
1884 | left.br_blockcount + new->br_blockcount + | 1853 | left.br_blockcount + new->br_blockcount + |
1885 | right.br_blockcount); | 1854 | right.br_blockcount); |
1886 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1855 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1856 | |||
1857 | xfs_iext_remove(ip, *idx + 1, 1, state); | ||
1887 | 1858 | ||
1888 | xfs_iext_remove(ip, idx, 1, state); | ||
1889 | ifp->if_lastex = idx - 1; | ||
1890 | XFS_IFORK_NEXT_SET(ip, whichfork, | 1859 | XFS_IFORK_NEXT_SET(ip, whichfork, |
1891 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | 1860 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); |
1892 | if (cur == NULL) { | 1861 | if (cur == NULL) { |
@@ -1921,12 +1890,12 @@ xfs_bmap_add_extent_hole_real( | |||
1921 | * on the left. | 1890 | * on the left. |
1922 | * Merge the new allocation with the left neighbor. | 1891 | * Merge the new allocation with the left neighbor. |
1923 | */ | 1892 | */ |
1924 | trace_xfs_bmap_pre_update(ip, idx - 1, state, _THIS_IP_); | 1893 | --*idx; |
1925 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, idx - 1), | 1894 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1895 | xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), | ||
1926 | left.br_blockcount + new->br_blockcount); | 1896 | left.br_blockcount + new->br_blockcount); |
1927 | trace_xfs_bmap_post_update(ip, idx - 1, state, _THIS_IP_); | 1897 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1928 | 1898 | ||
1929 | ifp->if_lastex = idx - 1; | ||
1930 | if (cur == NULL) { | 1899 | if (cur == NULL) { |
1931 | rval = xfs_ilog_fext(whichfork); | 1900 | rval = xfs_ilog_fext(whichfork); |
1932 | } else { | 1901 | } else { |
@@ -1952,13 +1921,13 @@ xfs_bmap_add_extent_hole_real( | |||
1952 | * on the right. | 1921 | * on the right. |
1953 | * Merge the new allocation with the right neighbor. | 1922 | * Merge the new allocation with the right neighbor. |
1954 | */ | 1923 | */ |
1955 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 1924 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
1956 | xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock, | 1925 | xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx), |
1926 | new->br_startoff, new->br_startblock, | ||
1957 | new->br_blockcount + right.br_blockcount, | 1927 | new->br_blockcount + right.br_blockcount, |
1958 | right.br_state); | 1928 | right.br_state); |
1959 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 1929 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
1960 | 1930 | ||
1961 | ifp->if_lastex = idx; | ||
1962 | if (cur == NULL) { | 1931 | if (cur == NULL) { |
1963 | rval = xfs_ilog_fext(whichfork); | 1932 | rval = xfs_ilog_fext(whichfork); |
1964 | } else { | 1933 | } else { |
@@ -1984,8 +1953,7 @@ xfs_bmap_add_extent_hole_real( | |||
1984 | * real allocation. | 1953 | * real allocation. |
1985 | * Insert a new entry. | 1954 | * Insert a new entry. |
1986 | */ | 1955 | */ |
1987 | xfs_iext_insert(ip, idx, 1, new, state); | 1956 | xfs_iext_insert(ip, *idx, 1, new, state); |
1988 | ifp->if_lastex = idx; | ||
1989 | XFS_IFORK_NEXT_SET(ip, whichfork, | 1957 | XFS_IFORK_NEXT_SET(ip, whichfork, |
1990 | XFS_IFORK_NEXTENTS(ip, whichfork) + 1); | 1958 | XFS_IFORK_NEXTENTS(ip, whichfork) + 1); |
1991 | if (cur == NULL) { | 1959 | if (cur == NULL) { |
@@ -2833,13 +2801,12 @@ STATIC int /* error */ | |||
2833 | xfs_bmap_del_extent( | 2801 | xfs_bmap_del_extent( |
2834 | xfs_inode_t *ip, /* incore inode pointer */ | 2802 | xfs_inode_t *ip, /* incore inode pointer */ |
2835 | xfs_trans_t *tp, /* current transaction pointer */ | 2803 | xfs_trans_t *tp, /* current transaction pointer */ |
2836 | xfs_extnum_t idx, /* extent number to update/delete */ | 2804 | xfs_extnum_t *idx, /* extent number to update/delete */ |
2837 | xfs_bmap_free_t *flist, /* list of extents to be freed */ | 2805 | xfs_bmap_free_t *flist, /* list of extents to be freed */ |
2838 | xfs_btree_cur_t *cur, /* if null, not a btree */ | 2806 | xfs_btree_cur_t *cur, /* if null, not a btree */ |
2839 | xfs_bmbt_irec_t *del, /* data to remove from extents */ | 2807 | xfs_bmbt_irec_t *del, /* data to remove from extents */ |
2840 | int *logflagsp, /* inode logging flags */ | 2808 | int *logflagsp, /* inode logging flags */ |
2841 | int whichfork, /* data or attr fork */ | 2809 | int whichfork) /* data or attr fork */ |
2842 | int rsvd) /* OK to allocate reserved blocks */ | ||
2843 | { | 2810 | { |
2844 | xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ | 2811 | xfs_filblks_t da_new; /* new delay-alloc indirect blocks */ |
2845 | xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ | 2812 | xfs_filblks_t da_old; /* old delay-alloc indirect blocks */ |
@@ -2870,10 +2837,10 @@ xfs_bmap_del_extent( | |||
2870 | 2837 | ||
2871 | mp = ip->i_mount; | 2838 | mp = ip->i_mount; |
2872 | ifp = XFS_IFORK_PTR(ip, whichfork); | 2839 | ifp = XFS_IFORK_PTR(ip, whichfork); |
2873 | ASSERT((idx >= 0) && (idx < ifp->if_bytes / | 2840 | ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / |
2874 | (uint)sizeof(xfs_bmbt_rec_t))); | 2841 | (uint)sizeof(xfs_bmbt_rec_t))); |
2875 | ASSERT(del->br_blockcount > 0); | 2842 | ASSERT(del->br_blockcount > 0); |
2876 | ep = xfs_iext_get_ext(ifp, idx); | 2843 | ep = xfs_iext_get_ext(ifp, *idx); |
2877 | xfs_bmbt_get_all(ep, &got); | 2844 | xfs_bmbt_get_all(ep, &got); |
2878 | ASSERT(got.br_startoff <= del->br_startoff); | 2845 | ASSERT(got.br_startoff <= del->br_startoff); |
2879 | del_endoff = del->br_startoff + del->br_blockcount; | 2846 | del_endoff = del->br_startoff + del->br_blockcount; |
@@ -2947,11 +2914,12 @@ xfs_bmap_del_extent( | |||
2947 | /* | 2914 | /* |
2948 | * Matches the whole extent. Delete the entry. | 2915 | * Matches the whole extent. Delete the entry. |
2949 | */ | 2916 | */ |
2950 | xfs_iext_remove(ip, idx, 1, | 2917 | xfs_iext_remove(ip, *idx, 1, |
2951 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); | 2918 | whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0); |
2952 | ifp->if_lastex = idx; | 2919 | --*idx; |
2953 | if (delay) | 2920 | if (delay) |
2954 | break; | 2921 | break; |
2922 | |||
2955 | XFS_IFORK_NEXT_SET(ip, whichfork, | 2923 | XFS_IFORK_NEXT_SET(ip, whichfork, |
2956 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | 2924 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); |
2957 | flags |= XFS_ILOG_CORE; | 2925 | flags |= XFS_ILOG_CORE; |
@@ -2968,21 +2936,20 @@ xfs_bmap_del_extent( | |||
2968 | /* | 2936 | /* |
2969 | * Deleting the first part of the extent. | 2937 | * Deleting the first part of the extent. |
2970 | */ | 2938 | */ |
2971 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 2939 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
2972 | xfs_bmbt_set_startoff(ep, del_endoff); | 2940 | xfs_bmbt_set_startoff(ep, del_endoff); |
2973 | temp = got.br_blockcount - del->br_blockcount; | 2941 | temp = got.br_blockcount - del->br_blockcount; |
2974 | xfs_bmbt_set_blockcount(ep, temp); | 2942 | xfs_bmbt_set_blockcount(ep, temp); |
2975 | ifp->if_lastex = idx; | ||
2976 | if (delay) { | 2943 | if (delay) { |
2977 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 2944 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
2978 | da_old); | 2945 | da_old); |
2979 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 2946 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
2980 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 2947 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2981 | da_new = temp; | 2948 | da_new = temp; |
2982 | break; | 2949 | break; |
2983 | } | 2950 | } |
2984 | xfs_bmbt_set_startblock(ep, del_endblock); | 2951 | xfs_bmbt_set_startblock(ep, del_endblock); |
2985 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 2952 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
2986 | if (!cur) { | 2953 | if (!cur) { |
2987 | flags |= xfs_ilog_fext(whichfork); | 2954 | flags |= xfs_ilog_fext(whichfork); |
2988 | break; | 2955 | break; |
@@ -2998,18 +2965,17 @@ xfs_bmap_del_extent( | |||
2998 | * Deleting the last part of the extent. | 2965 | * Deleting the last part of the extent. |
2999 | */ | 2966 | */ |
3000 | temp = got.br_blockcount - del->br_blockcount; | 2967 | temp = got.br_blockcount - del->br_blockcount; |
3001 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 2968 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
3002 | xfs_bmbt_set_blockcount(ep, temp); | 2969 | xfs_bmbt_set_blockcount(ep, temp); |
3003 | ifp->if_lastex = idx; | ||
3004 | if (delay) { | 2970 | if (delay) { |
3005 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), | 2971 | temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), |
3006 | da_old); | 2972 | da_old); |
3007 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); | 2973 | xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); |
3008 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 2974 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
3009 | da_new = temp; | 2975 | da_new = temp; |
3010 | break; | 2976 | break; |
3011 | } | 2977 | } |
3012 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 2978 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
3013 | if (!cur) { | 2979 | if (!cur) { |
3014 | flags |= xfs_ilog_fext(whichfork); | 2980 | flags |= xfs_ilog_fext(whichfork); |
3015 | break; | 2981 | break; |
@@ -3026,7 +2992,7 @@ xfs_bmap_del_extent( | |||
3026 | * Deleting the middle of the extent. | 2992 | * Deleting the middle of the extent. |
3027 | */ | 2993 | */ |
3028 | temp = del->br_startoff - got.br_startoff; | 2994 | temp = del->br_startoff - got.br_startoff; |
3029 | trace_xfs_bmap_pre_update(ip, idx, state, _THIS_IP_); | 2995 | trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); |
3030 | xfs_bmbt_set_blockcount(ep, temp); | 2996 | xfs_bmbt_set_blockcount(ep, temp); |
3031 | new.br_startoff = del_endoff; | 2997 | new.br_startoff = del_endoff; |
3032 | temp2 = got_endoff - del_endoff; | 2998 | temp2 = got_endoff - del_endoff; |
@@ -3113,9 +3079,9 @@ xfs_bmap_del_extent( | |||
3113 | } | 3079 | } |
3114 | } | 3080 | } |
3115 | } | 3081 | } |
3116 | trace_xfs_bmap_post_update(ip, idx, state, _THIS_IP_); | 3082 | trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); |
3117 | xfs_iext_insert(ip, idx + 1, 1, &new, state); | 3083 | xfs_iext_insert(ip, *idx + 1, 1, &new, state); |
3118 | ifp->if_lastex = idx + 1; | 3084 | ++*idx; |
3119 | break; | 3085 | break; |
3120 | } | 3086 | } |
3121 | /* | 3087 | /* |
@@ -3142,7 +3108,7 @@ xfs_bmap_del_extent( | |||
3142 | ASSERT(da_old >= da_new); | 3108 | ASSERT(da_old >= da_new); |
3143 | if (da_old > da_new) { | 3109 | if (da_old > da_new) { |
3144 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | 3110 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, |
3145 | (int64_t)(da_old - da_new), rsvd); | 3111 | (int64_t)(da_old - da_new), 0); |
3146 | } | 3112 | } |
3147 | done: | 3113 | done: |
3148 | *logflagsp = flags; | 3114 | *logflagsp = flags; |
@@ -4562,29 +4528,24 @@ xfs_bmapi( | |||
4562 | if (rt) { | 4528 | if (rt) { |
4563 | error = xfs_mod_incore_sb(mp, | 4529 | error = xfs_mod_incore_sb(mp, |
4564 | XFS_SBS_FREXTENTS, | 4530 | XFS_SBS_FREXTENTS, |
4565 | -((int64_t)extsz), (flags & | 4531 | -((int64_t)extsz), 0); |
4566 | XFS_BMAPI_RSVBLOCKS)); | ||
4567 | } else { | 4532 | } else { |
4568 | error = xfs_icsb_modify_counters(mp, | 4533 | error = xfs_icsb_modify_counters(mp, |
4569 | XFS_SBS_FDBLOCKS, | 4534 | XFS_SBS_FDBLOCKS, |
4570 | -((int64_t)alen), (flags & | 4535 | -((int64_t)alen), 0); |
4571 | XFS_BMAPI_RSVBLOCKS)); | ||
4572 | } | 4536 | } |
4573 | if (!error) { | 4537 | if (!error) { |
4574 | error = xfs_icsb_modify_counters(mp, | 4538 | error = xfs_icsb_modify_counters(mp, |
4575 | XFS_SBS_FDBLOCKS, | 4539 | XFS_SBS_FDBLOCKS, |
4576 | -((int64_t)indlen), (flags & | 4540 | -((int64_t)indlen), 0); |
4577 | XFS_BMAPI_RSVBLOCKS)); | ||
4578 | if (error && rt) | 4541 | if (error && rt) |
4579 | xfs_mod_incore_sb(mp, | 4542 | xfs_mod_incore_sb(mp, |
4580 | XFS_SBS_FREXTENTS, | 4543 | XFS_SBS_FREXTENTS, |
4581 | (int64_t)extsz, (flags & | 4544 | (int64_t)extsz, 0); |
4582 | XFS_BMAPI_RSVBLOCKS)); | ||
4583 | else if (error) | 4545 | else if (error) |
4584 | xfs_icsb_modify_counters(mp, | 4546 | xfs_icsb_modify_counters(mp, |
4585 | XFS_SBS_FDBLOCKS, | 4547 | XFS_SBS_FDBLOCKS, |
4586 | (int64_t)alen, (flags & | 4548 | (int64_t)alen, 0); |
4587 | XFS_BMAPI_RSVBLOCKS)); | ||
4588 | } | 4549 | } |
4589 | 4550 | ||
4590 | if (error) { | 4551 | if (error) { |
@@ -4701,13 +4662,12 @@ xfs_bmapi( | |||
4701 | if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) | 4662 | if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) |
4702 | got.br_state = XFS_EXT_UNWRITTEN; | 4663 | got.br_state = XFS_EXT_UNWRITTEN; |
4703 | } | 4664 | } |
4704 | error = xfs_bmap_add_extent(ip, lastx, &cur, &got, | 4665 | error = xfs_bmap_add_extent(ip, &lastx, &cur, &got, |
4705 | firstblock, flist, &tmp_logflags, | 4666 | firstblock, flist, &tmp_logflags, |
4706 | whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); | 4667 | whichfork); |
4707 | logflags |= tmp_logflags; | 4668 | logflags |= tmp_logflags; |
4708 | if (error) | 4669 | if (error) |
4709 | goto error0; | 4670 | goto error0; |
4710 | lastx = ifp->if_lastex; | ||
4711 | ep = xfs_iext_get_ext(ifp, lastx); | 4671 | ep = xfs_iext_get_ext(ifp, lastx); |
4712 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 4672 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
4713 | xfs_bmbt_get_all(ep, &got); | 4673 | xfs_bmbt_get_all(ep, &got); |
@@ -4803,13 +4763,12 @@ xfs_bmapi( | |||
4803 | mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) | 4763 | mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) |
4804 | ? XFS_EXT_NORM | 4764 | ? XFS_EXT_NORM |
4805 | : XFS_EXT_UNWRITTEN; | 4765 | : XFS_EXT_UNWRITTEN; |
4806 | error = xfs_bmap_add_extent(ip, lastx, &cur, mval, | 4766 | error = xfs_bmap_add_extent(ip, &lastx, &cur, mval, |
4807 | firstblock, flist, &tmp_logflags, | 4767 | firstblock, flist, &tmp_logflags, |
4808 | whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); | 4768 | whichfork); |
4809 | logflags |= tmp_logflags; | 4769 | logflags |= tmp_logflags; |
4810 | if (error) | 4770 | if (error) |
4811 | goto error0; | 4771 | goto error0; |
4812 | lastx = ifp->if_lastex; | ||
4813 | ep = xfs_iext_get_ext(ifp, lastx); | 4772 | ep = xfs_iext_get_ext(ifp, lastx); |
4814 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); | 4773 | nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); |
4815 | xfs_bmbt_get_all(ep, &got); | 4774 | xfs_bmbt_get_all(ep, &got); |
@@ -4868,14 +4827,14 @@ xfs_bmapi( | |||
4868 | /* | 4827 | /* |
4869 | * Else go on to the next record. | 4828 | * Else go on to the next record. |
4870 | */ | 4829 | */ |
4871 | ep = xfs_iext_get_ext(ifp, ++lastx); | ||
4872 | prev = got; | 4830 | prev = got; |
4873 | if (lastx >= nextents) | 4831 | if (++lastx < nextents) { |
4874 | eof = 1; | 4832 | ep = xfs_iext_get_ext(ifp, lastx); |
4875 | else | ||
4876 | xfs_bmbt_get_all(ep, &got); | 4833 | xfs_bmbt_get_all(ep, &got); |
4834 | } else { | ||
4835 | eof = 1; | ||
4836 | } | ||
4877 | } | 4837 | } |
4878 | ifp->if_lastex = lastx; | ||
4879 | *nmap = n; | 4838 | *nmap = n; |
4880 | /* | 4839 | /* |
4881 | * Transform from btree to extents, give it cur. | 4840 | * Transform from btree to extents, give it cur. |
@@ -4984,7 +4943,6 @@ xfs_bmapi_single( | |||
4984 | ASSERT(!isnullstartblock(got.br_startblock)); | 4943 | ASSERT(!isnullstartblock(got.br_startblock)); |
4985 | ASSERT(bno < got.br_startoff + got.br_blockcount); | 4944 | ASSERT(bno < got.br_startoff + got.br_blockcount); |
4986 | *fsb = got.br_startblock + (bno - got.br_startoff); | 4945 | *fsb = got.br_startblock + (bno - got.br_startoff); |
4987 | ifp->if_lastex = lastx; | ||
4988 | return 0; | 4946 | return 0; |
4989 | } | 4947 | } |
4990 | 4948 | ||
@@ -5026,7 +4984,6 @@ xfs_bunmapi( | |||
5026 | int tmp_logflags; /* partial logging flags */ | 4984 | int tmp_logflags; /* partial logging flags */ |
5027 | int wasdel; /* was a delayed alloc extent */ | 4985 | int wasdel; /* was a delayed alloc extent */ |
5028 | int whichfork; /* data or attribute fork */ | 4986 | int whichfork; /* data or attribute fork */ |
5029 | int rsvd; /* OK to allocate reserved blocks */ | ||
5030 | xfs_fsblock_t sum; | 4987 | xfs_fsblock_t sum; |
5031 | 4988 | ||
5032 | trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); | 4989 | trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); |
@@ -5044,7 +5001,7 @@ xfs_bunmapi( | |||
5044 | mp = ip->i_mount; | 5001 | mp = ip->i_mount; |
5045 | if (XFS_FORCED_SHUTDOWN(mp)) | 5002 | if (XFS_FORCED_SHUTDOWN(mp)) |
5046 | return XFS_ERROR(EIO); | 5003 | return XFS_ERROR(EIO); |
5047 | rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0; | 5004 | |
5048 | ASSERT(len > 0); | 5005 | ASSERT(len > 0); |
5049 | ASSERT(nexts >= 0); | 5006 | ASSERT(nexts >= 0); |
5050 | ASSERT(ifp->if_ext_max == | 5007 | ASSERT(ifp->if_ext_max == |
@@ -5160,9 +5117,9 @@ xfs_bunmapi( | |||
5160 | del.br_blockcount = mod; | 5117 | del.br_blockcount = mod; |
5161 | } | 5118 | } |
5162 | del.br_state = XFS_EXT_UNWRITTEN; | 5119 | del.br_state = XFS_EXT_UNWRITTEN; |
5163 | error = xfs_bmap_add_extent(ip, lastx, &cur, &del, | 5120 | error = xfs_bmap_add_extent(ip, &lastx, &cur, &del, |
5164 | firstblock, flist, &logflags, | 5121 | firstblock, flist, &logflags, |
5165 | XFS_DATA_FORK, 0); | 5122 | XFS_DATA_FORK); |
5166 | if (error) | 5123 | if (error) |
5167 | goto error0; | 5124 | goto error0; |
5168 | goto nodelete; | 5125 | goto nodelete; |
@@ -5188,9 +5145,12 @@ xfs_bunmapi( | |||
5188 | */ | 5145 | */ |
5189 | ASSERT(bno >= del.br_blockcount); | 5146 | ASSERT(bno >= del.br_blockcount); |
5190 | bno -= del.br_blockcount; | 5147 | bno -= del.br_blockcount; |
5191 | if (bno < got.br_startoff) { | 5148 | if (got.br_startoff > bno) { |
5192 | if (--lastx >= 0) | 5149 | if (--lastx >= 0) { |
5193 | xfs_bmbt_get_all(--ep, &got); | 5150 | ep = xfs_iext_get_ext(ifp, |
5151 | lastx); | ||
5152 | xfs_bmbt_get_all(ep, &got); | ||
5153 | } | ||
5194 | } | 5154 | } |
5195 | continue; | 5155 | continue; |
5196 | } else if (del.br_state == XFS_EXT_UNWRITTEN) { | 5156 | } else if (del.br_state == XFS_EXT_UNWRITTEN) { |
@@ -5214,18 +5174,19 @@ xfs_bunmapi( | |||
5214 | prev.br_startoff = start; | 5174 | prev.br_startoff = start; |
5215 | } | 5175 | } |
5216 | prev.br_state = XFS_EXT_UNWRITTEN; | 5176 | prev.br_state = XFS_EXT_UNWRITTEN; |
5217 | error = xfs_bmap_add_extent(ip, lastx - 1, &cur, | 5177 | lastx--; |
5178 | error = xfs_bmap_add_extent(ip, &lastx, &cur, | ||
5218 | &prev, firstblock, flist, &logflags, | 5179 | &prev, firstblock, flist, &logflags, |
5219 | XFS_DATA_FORK, 0); | 5180 | XFS_DATA_FORK); |
5220 | if (error) | 5181 | if (error) |
5221 | goto error0; | 5182 | goto error0; |
5222 | goto nodelete; | 5183 | goto nodelete; |
5223 | } else { | 5184 | } else { |
5224 | ASSERT(del.br_state == XFS_EXT_NORM); | 5185 | ASSERT(del.br_state == XFS_EXT_NORM); |
5225 | del.br_state = XFS_EXT_UNWRITTEN; | 5186 | del.br_state = XFS_EXT_UNWRITTEN; |
5226 | error = xfs_bmap_add_extent(ip, lastx, &cur, | 5187 | error = xfs_bmap_add_extent(ip, &lastx, &cur, |
5227 | &del, firstblock, flist, &logflags, | 5188 | &del, firstblock, flist, &logflags, |
5228 | XFS_DATA_FORK, 0); | 5189 | XFS_DATA_FORK); |
5229 | if (error) | 5190 | if (error) |
5230 | goto error0; | 5191 | goto error0; |
5231 | goto nodelete; | 5192 | goto nodelete; |
@@ -5240,13 +5201,13 @@ xfs_bunmapi( | |||
5240 | rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); | 5201 | rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); |
5241 | do_div(rtexts, mp->m_sb.sb_rextsize); | 5202 | do_div(rtexts, mp->m_sb.sb_rextsize); |
5242 | xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, | 5203 | xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, |
5243 | (int64_t)rtexts, rsvd); | 5204 | (int64_t)rtexts, 0); |
5244 | (void)xfs_trans_reserve_quota_nblks(NULL, | 5205 | (void)xfs_trans_reserve_quota_nblks(NULL, |
5245 | ip, -((long)del.br_blockcount), 0, | 5206 | ip, -((long)del.br_blockcount), 0, |
5246 | XFS_QMOPT_RES_RTBLKS); | 5207 | XFS_QMOPT_RES_RTBLKS); |
5247 | } else { | 5208 | } else { |
5248 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | 5209 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, |
5249 | (int64_t)del.br_blockcount, rsvd); | 5210 | (int64_t)del.br_blockcount, 0); |
5250 | (void)xfs_trans_reserve_quota_nblks(NULL, | 5211 | (void)xfs_trans_reserve_quota_nblks(NULL, |
5251 | ip, -((long)del.br_blockcount), 0, | 5212 | ip, -((long)del.br_blockcount), 0, |
5252 | XFS_QMOPT_RES_REGBLKS); | 5213 | XFS_QMOPT_RES_REGBLKS); |
@@ -5277,31 +5238,29 @@ xfs_bunmapi( | |||
5277 | error = XFS_ERROR(ENOSPC); | 5238 | error = XFS_ERROR(ENOSPC); |
5278 | goto error0; | 5239 | goto error0; |
5279 | } | 5240 | } |
5280 | error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del, | 5241 | error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, |
5281 | &tmp_logflags, whichfork, rsvd); | 5242 | &tmp_logflags, whichfork); |
5282 | logflags |= tmp_logflags; | 5243 | logflags |= tmp_logflags; |
5283 | if (error) | 5244 | if (error) |
5284 | goto error0; | 5245 | goto error0; |
5285 | bno = del.br_startoff - 1; | 5246 | bno = del.br_startoff - 1; |
5286 | nodelete: | 5247 | nodelete: |
5287 | lastx = ifp->if_lastex; | ||
5288 | /* | 5248 | /* |
5289 | * If not done go on to the next (previous) record. | 5249 | * If not done go on to the next (previous) record. |
5290 | * Reset ep in case the extents array was re-alloced. | ||
5291 | */ | 5250 | */ |
5292 | ep = xfs_iext_get_ext(ifp, lastx); | ||
5293 | if (bno != (xfs_fileoff_t)-1 && bno >= start) { | 5251 | if (bno != (xfs_fileoff_t)-1 && bno >= start) { |
5294 | if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) || | 5252 | if (lastx >= 0) { |
5295 | xfs_bmbt_get_startoff(ep) > bno) { | 5253 | ep = xfs_iext_get_ext(ifp, lastx); |
5296 | if (--lastx >= 0) | 5254 | if (xfs_bmbt_get_startoff(ep) > bno) { |
5297 | ep = xfs_iext_get_ext(ifp, lastx); | 5255 | if (--lastx >= 0) |
5298 | } | 5256 | ep = xfs_iext_get_ext(ifp, |
5299 | if (lastx >= 0) | 5257 | lastx); |
5258 | } | ||
5300 | xfs_bmbt_get_all(ep, &got); | 5259 | xfs_bmbt_get_all(ep, &got); |
5260 | } | ||
5301 | extno++; | 5261 | extno++; |
5302 | } | 5262 | } |
5303 | } | 5263 | } |
5304 | ifp->if_lastex = lastx; | ||
5305 | *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; | 5264 | *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0; |
5306 | ASSERT(ifp->if_ext_max == | 5265 | ASSERT(ifp->if_ext_max == |
5307 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); | 5266 | XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t)); |
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 3651191daea1..c62234bde053 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -69,7 +69,6 @@ typedef struct xfs_bmap_free | |||
69 | #define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ | 69 | #define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */ |
70 | #define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ | 70 | #define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */ |
71 | #define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */ | 71 | #define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */ |
72 | #define XFS_BMAPI_RSVBLOCKS 0x020 /* OK to alloc. reserved data blocks */ | ||
73 | #define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */ | 72 | #define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */ |
74 | #define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ | 73 | #define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ |
75 | /* combine contig. space */ | 74 | /* combine contig. space */ |
@@ -87,7 +86,6 @@ typedef struct xfs_bmap_free | |||
87 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ | 86 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ |
88 | { XFS_BMAPI_METADATA, "METADATA" }, \ | 87 | { XFS_BMAPI_METADATA, "METADATA" }, \ |
89 | { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ | 88 | { XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \ |
90 | { XFS_BMAPI_RSVBLOCKS, "RSVBLOCKS" }, \ | ||
91 | { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ | 89 | { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ |
92 | { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ | 90 | { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ |
93 | { XFS_BMAPI_CONTIG, "CONTIG" }, \ | 91 | { XFS_BMAPI_CONTIG, "CONTIG" }, \ |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c8e3349c287c..a098a20ca63e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -920,7 +920,6 @@ xfs_iread_extents( | |||
920 | /* | 920 | /* |
921 | * We know that the size is valid (it's checked in iformat_btree) | 921 | * We know that the size is valid (it's checked in iformat_btree) |
922 | */ | 922 | */ |
923 | ifp->if_lastex = NULLEXTNUM; | ||
924 | ifp->if_bytes = ifp->if_real_bytes = 0; | 923 | ifp->if_bytes = ifp->if_real_bytes = 0; |
925 | ifp->if_flags |= XFS_IFEXTENTS; | 924 | ifp->if_flags |= XFS_IFEXTENTS; |
926 | xfs_iext_add(ifp, 0, nextents); | 925 | xfs_iext_add(ifp, 0, nextents); |
@@ -2558,12 +2557,9 @@ xfs_iflush_fork( | |||
2558 | case XFS_DINODE_FMT_EXTENTS: | 2557 | case XFS_DINODE_FMT_EXTENTS: |
2559 | ASSERT((ifp->if_flags & XFS_IFEXTENTS) || | 2558 | ASSERT((ifp->if_flags & XFS_IFEXTENTS) || |
2560 | !(iip->ili_format.ilf_fields & extflag[whichfork])); | 2559 | !(iip->ili_format.ilf_fields & extflag[whichfork])); |
2561 | ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) || | ||
2562 | (ifp->if_bytes == 0)); | ||
2563 | ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) || | ||
2564 | (ifp->if_bytes > 0)); | ||
2565 | if ((iip->ili_format.ilf_fields & extflag[whichfork]) && | 2560 | if ((iip->ili_format.ilf_fields & extflag[whichfork]) && |
2566 | (ifp->if_bytes > 0)) { | 2561 | (ifp->if_bytes > 0)) { |
2562 | ASSERT(xfs_iext_get_ext(ifp, 0)); | ||
2567 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); | 2563 | ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); |
2568 | (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, | 2564 | (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, |
2569 | whichfork); | 2565 | whichfork); |
@@ -3112,6 +3108,8 @@ xfs_iext_get_ext( | |||
3112 | xfs_extnum_t idx) /* index of target extent */ | 3108 | xfs_extnum_t idx) /* index of target extent */ |
3113 | { | 3109 | { |
3114 | ASSERT(idx >= 0); | 3110 | ASSERT(idx >= 0); |
3111 | ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); | ||
3112 | |||
3115 | if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { | 3113 | if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { |
3116 | return ifp->if_u1.if_ext_irec->er_extbuf; | 3114 | return ifp->if_u1.if_ext_irec->er_extbuf; |
3117 | } else if (ifp->if_flags & XFS_IFEXTIREC) { | 3115 | } else if (ifp->if_flags & XFS_IFEXTIREC) { |
@@ -3191,7 +3189,6 @@ xfs_iext_add( | |||
3191 | } | 3189 | } |
3192 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; | 3190 | ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; |
3193 | ifp->if_real_bytes = 0; | 3191 | ifp->if_real_bytes = 0; |
3194 | ifp->if_lastex = nextents + ext_diff; | ||
3195 | } | 3192 | } |
3196 | /* | 3193 | /* |
3197 | * Otherwise use a linear (direct) extent list. | 3194 | * Otherwise use a linear (direct) extent list. |
@@ -3886,8 +3883,10 @@ xfs_iext_idx_to_irec( | |||
3886 | xfs_extnum_t page_idx = *idxp; /* extent index in target list */ | 3883 | xfs_extnum_t page_idx = *idxp; /* extent index in target list */ |
3887 | 3884 | ||
3888 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | 3885 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); |
3889 | ASSERT(page_idx >= 0 && page_idx <= | 3886 | ASSERT(page_idx >= 0); |
3890 | ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)); | 3887 | ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t)); |
3888 | ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc); | ||
3889 | |||
3891 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | 3890 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; |
3892 | erp_idx = 0; | 3891 | erp_idx = 0; |
3893 | low = 0; | 3892 | low = 0; |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ff4e2a30227d..3ae6d58e5473 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -67,7 +67,6 @@ typedef struct xfs_ifork { | |||
67 | short if_broot_bytes; /* bytes allocated for root */ | 67 | short if_broot_bytes; /* bytes allocated for root */ |
68 | unsigned char if_flags; /* per-fork flags */ | 68 | unsigned char if_flags; /* per-fork flags */ |
69 | unsigned char if_ext_max; /* max # of extent records */ | 69 | unsigned char if_ext_max; /* max # of extent records */ |
70 | xfs_extnum_t if_lastex; /* last if_extents used */ | ||
71 | union { | 70 | union { |
72 | xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ | 71 | xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ |
73 | xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ | 72 | xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 7d56e88a3f0e..c7755d5a5fbe 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "xfs_mount.h" | 29 | #include "xfs_mount.h" |
30 | #include "xfs_error.h" | 30 | #include "xfs_error.h" |
31 | #include "xfs_alloc.h" | 31 | #include "xfs_alloc.h" |
32 | #include "xfs_discard.h" | ||
32 | 33 | ||
33 | /* | 34 | /* |
34 | * Perform initial CIL structure initialisation. If the CIL is not | 35 | * Perform initial CIL structure initialisation. If the CIL is not |
@@ -361,18 +362,28 @@ xlog_cil_committed( | |||
361 | int abort) | 362 | int abort) |
362 | { | 363 | { |
363 | struct xfs_cil_ctx *ctx = args; | 364 | struct xfs_cil_ctx *ctx = args; |
365 | struct xfs_mount *mp = ctx->cil->xc_log->l_mp; | ||
364 | 366 | ||
365 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, | 367 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, |
366 | ctx->start_lsn, abort); | 368 | ctx->start_lsn, abort); |
367 | 369 | ||
368 | xfs_alloc_busy_sort(&ctx->busy_extents); | 370 | xfs_alloc_busy_sort(&ctx->busy_extents); |
369 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents); | 371 | xfs_alloc_busy_clear(mp, &ctx->busy_extents, |
372 | (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); | ||
370 | 373 | ||
371 | spin_lock(&ctx->cil->xc_cil_lock); | 374 | spin_lock(&ctx->cil->xc_cil_lock); |
372 | list_del(&ctx->committing); | 375 | list_del(&ctx->committing); |
373 | spin_unlock(&ctx->cil->xc_cil_lock); | 376 | spin_unlock(&ctx->cil->xc_cil_lock); |
374 | 377 | ||
375 | xlog_cil_free_logvec(ctx->lv_chain); | 378 | xlog_cil_free_logvec(ctx->lv_chain); |
379 | |||
380 | if (!list_empty(&ctx->busy_extents)) { | ||
381 | ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); | ||
382 | |||
383 | xfs_discard_extents(mp, &ctx->busy_extents); | ||
384 | xfs_alloc_busy_clear(mp, &ctx->busy_extents, false); | ||
385 | } | ||
386 | |||
376 | kmem_free(ctx); | 387 | kmem_free(ctx); |
377 | } | 388 | } |
378 | 389 | ||
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 19af0ab0d0c6..3d68bb267c5f 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -224,6 +224,7 @@ typedef struct xfs_mount { | |||
224 | #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem | 224 | #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem |
225 | operations, typically for | 225 | operations, typically for |
226 | disk errors in metadata */ | 226 | disk errors in metadata */ |
227 | #define XFS_MOUNT_DISCARD (1ULL << 5) /* discard unused blocks */ | ||
227 | #define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to | 228 | #define XFS_MOUNT_RETERR (1ULL << 6) /* return alignment errors to |
228 | user */ | 229 | user */ |
229 | #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment | 230 | #define XFS_MOUNT_NOALIGN (1ULL << 7) /* turn off stripe alignment |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index d1f24858ccc4..7c7bc2b786bd 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -609,7 +609,7 @@ xfs_trans_free( | |||
609 | struct xfs_trans *tp) | 609 | struct xfs_trans *tp) |
610 | { | 610 | { |
611 | xfs_alloc_busy_sort(&tp->t_busy); | 611 | xfs_alloc_busy_sort(&tp->t_busy); |
612 | xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy); | 612 | xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy, false); |
613 | 613 | ||
614 | atomic_dec(&tp->t_mountp->m_active_trans); | 614 | atomic_dec(&tp->t_mountp->m_active_trans); |
615 | xfs_trans_free_dqinfo(tp); | 615 | xfs_trans_free_dqinfo(tp); |