aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm/mm/mmu.c1
-rw-r--r--arch/m68k/mm/init.c1
-rw-r--r--arch/sparc/kernel/sparc_ksyms.c2
-rw-r--r--arch/sparc64/mm/init.c1
-rw-r--r--drivers/edac/pasemi_edac.c1
-rw-r--r--fs/ext3/inode.c14
-rw-r--r--fs/ext4/acl.c12
-rw-r--r--fs/ext4/balloc.c33
-rw-r--r--fs/ext4/bitmap.c2
-rw-r--r--fs/ext4/dir.c4
-rw-r--r--fs/ext4/ext4.h (renamed from include/linux/ext4_fs.h)18
-rw-r--r--fs/ext4/ext4_extents.h (renamed from include/linux/ext4_fs_extents.h)8
-rw-r--r--fs/ext4/ext4_i.h (renamed from include/linux/ext4_fs_i.h)8
-rw-r--r--fs/ext4/ext4_jbd2.c14
-rw-r--r--fs/ext4/ext4_jbd2.h (renamed from include/linux/ext4_jbd2.h)10
-rw-r--r--fs/ext4/ext4_sb.h (renamed from include/linux/ext4_fs_sb.h)8
-rw-r--r--fs/ext4/extents.c354
-rw-r--r--fs/ext4/file.c6
-rw-r--r--fs/ext4/fsync.c7
-rw-r--r--fs/ext4/hash.c2
-rw-r--r--fs/ext4/ialloc.c44
-rw-r--r--fs/ext4/inode.c57
-rw-r--r--fs/ext4/ioctl.c16
-rw-r--r--fs/ext4/mballoc.c437
-rw-r--r--fs/ext4/mballoc.h304
-rw-r--r--fs/ext4/migrate.c43
-rw-r--r--fs/ext4/namei.c44
-rw-r--r--fs/ext4/resize.c83
-rw-r--r--fs/ext4/super.c66
-rw-r--r--fs/ext4/symlink.c2
-rw-r--r--fs/ext4/xattr.c40
-rw-r--r--fs/ext4/xattr.h7
-rw-r--r--fs/ext4/xattr_security.c4
-rw-r--r--fs/ext4/xattr_trusted.c4
-rw-r--r--fs/ext4/xattr_user.c4
-rw-r--r--fs/jbd2/commit.c19
-rw-r--r--fs/jbd2/journal.c38
-rw-r--r--fs/jbd2/revoke.c165
-rw-r--r--fs/jbd2/transaction.c41
-rw-r--r--fs/xfs/Kconfig13
-rw-r--r--fs/xfs/linux-2.6/mrlock.h60
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c75
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c21
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h24
-rw-r--r--fs/xfs/quota/xfs_dquot.c4
-rw-r--r--fs/xfs/quota/xfs_qm.c27
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c6
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h5
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c2
-rw-r--r--fs/xfs/xfs.h2
-rw-r--r--fs/xfs/xfs_acl.c53
-rw-r--r--fs/xfs/xfs_attr.c93
-rw-r--r--fs/xfs/xfs_attr.h6
-rw-r--r--fs/xfs/xfs_bmap.c1
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_fsops.c8
-rw-r--r--fs/xfs/xfs_ialloc.c10
-rw-r--r--fs/xfs/xfs_iget.c140
-rw-r--r--fs/xfs/xfs_inode.c25
-rw-r--r--fs/xfs/xfs_inode.h16
-rw-r--r--fs/xfs/xfs_inode_item.c12
-rw-r--r--fs/xfs/xfs_iomap.c19
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_mount.c83
-rw-r--r--fs/xfs/xfs_mount.h7
-rw-r--r--fs/xfs/xfs_rename.c252
-rw-r--r--fs/xfs/xfs_trans_inode.c12
-rw-r--r--fs/xfs/xfs_utils.c45
-rw-r--r--fs/xfs/xfs_utils.h2
-rw-r--r--fs/xfs/xfs_vfsops.c1
-rw-r--r--fs/xfs/xfs_vnodeops.c274
-rw-r--r--fs/xfs/xfs_vnodeops.h8
79 files changed, 1483 insertions, 1776 deletions
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index d41a75ed3dce..2d6d682c206a 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -35,6 +35,7 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
35 * zero-initialized data and COW. 35 * zero-initialized data and COW.
36 */ 36 */
37struct page *empty_zero_page; 37struct page *empty_zero_page;
38EXPORT_SYMBOL(empty_zero_page);
38 39
39/* 40/*
40 * The pmd table for the upper-most set of pages. 41 * The pmd table for the upper-most set of pages.
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index a2bb01f59642..d8fb9c5303cc 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -69,6 +69,7 @@ void __init m68k_setup_node(int node)
69 */ 69 */
70 70
71void *empty_zero_page; 71void *empty_zero_page;
72EXPORT_SYMBOL(empty_zero_page);
72 73
73void show_mem(void) 74void show_mem(void)
74{ 75{
diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c
index 0bcf98a7ef38..aa8ee06cf488 100644
--- a/arch/sparc/kernel/sparc_ksyms.c
+++ b/arch/sparc/kernel/sparc_ksyms.c
@@ -282,3 +282,5 @@ EXPORT_SYMBOL(do_BUG);
282 282
283/* Sun Power Management Idle Handler */ 283/* Sun Power Management Idle Handler */
284EXPORT_SYMBOL(pm_idle); 284EXPORT_SYMBOL(pm_idle);
285
286EXPORT_SYMBOL(empty_zero_page);
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 8c2b50e8abc6..4cad0b32b0af 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -160,6 +160,7 @@ extern unsigned int sparc_ramdisk_image;
160extern unsigned int sparc_ramdisk_size; 160extern unsigned int sparc_ramdisk_size;
161 161
162struct page *mem_map_zero __read_mostly; 162struct page *mem_map_zero __read_mostly;
163EXPORT_SYMBOL(mem_map_zero);
163 164
164unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly; 165unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly;
165 166
diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c
index 3fd65a563848..8e6b91bd2e99 100644
--- a/drivers/edac/pasemi_edac.c
+++ b/drivers/edac/pasemi_edac.c
@@ -26,6 +26,7 @@
26#include <linux/pci.h> 26#include <linux/pci.h>
27#include <linux/pci_ids.h> 27#include <linux/pci_ids.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/edac.h>
29#include "edac_core.h" 30#include "edac_core.h"
30 31
31#define MODULE_NAME "pasemi_edac" 32#define MODULE_NAME "pasemi_edac"
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index cc47b76091bf..6ae4ecf3ce40 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1261,10 +1261,11 @@ static int ext3_ordered_write_end(struct file *file,
1261 new_i_size = pos + copied; 1261 new_i_size = pos + copied;
1262 if (new_i_size > EXT3_I(inode)->i_disksize) 1262 if (new_i_size > EXT3_I(inode)->i_disksize)
1263 EXT3_I(inode)->i_disksize = new_i_size; 1263 EXT3_I(inode)->i_disksize = new_i_size;
1264 copied = ext3_generic_write_end(file, mapping, pos, len, copied, 1264 ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
1265 page, fsdata); 1265 page, fsdata);
1266 if (copied < 0) 1266 copied = ret2;
1267 ret = copied; 1267 if (ret2 < 0)
1268 ret = ret2;
1268 } 1269 }
1269 ret2 = ext3_journal_stop(handle); 1270 ret2 = ext3_journal_stop(handle);
1270 if (!ret) 1271 if (!ret)
@@ -1289,10 +1290,11 @@ static int ext3_writeback_write_end(struct file *file,
1289 if (new_i_size > EXT3_I(inode)->i_disksize) 1290 if (new_i_size > EXT3_I(inode)->i_disksize)
1290 EXT3_I(inode)->i_disksize = new_i_size; 1291 EXT3_I(inode)->i_disksize = new_i_size;
1291 1292
1292 copied = ext3_generic_write_end(file, mapping, pos, len, copied, 1293 ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
1293 page, fsdata); 1294 page, fsdata);
1294 if (copied < 0) 1295 copied = ret2;
1295 ret = copied; 1296 if (ret2 < 0)
1297 ret = ret2;
1296 1298
1297 ret2 = ext3_journal_stop(handle); 1299 ret2 = ext3_journal_stop(handle);
1298 if (!ret) 1300 if (!ret)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index a8bae8cd1d5d..3c8dab880d91 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -9,8 +9,8 @@
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/ext4_jbd2.h> 12#include "ext4_jbd2.h"
13#include <linux/ext4_fs.h> 13#include "ext4.h"
14#include "xattr.h" 14#include "xattr.h"
15#include "acl.h" 15#include "acl.h"
16 16
@@ -37,7 +37,7 @@ ext4_acl_from_disk(const void *value, size_t size)
37 return ERR_PTR(-EINVAL); 37 return ERR_PTR(-EINVAL);
38 if (count == 0) 38 if (count == 0)
39 return NULL; 39 return NULL;
40 acl = posix_acl_alloc(count, GFP_KERNEL); 40 acl = posix_acl_alloc(count, GFP_NOFS);
41 if (!acl) 41 if (!acl)
42 return ERR_PTR(-ENOMEM); 42 return ERR_PTR(-ENOMEM);
43 for (n=0; n < count; n++) { 43 for (n=0; n < count; n++) {
@@ -91,7 +91,7 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
91 91
92 *size = ext4_acl_size(acl->a_count); 92 *size = ext4_acl_size(acl->a_count);
93 ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count * 93 ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
94 sizeof(ext4_acl_entry), GFP_KERNEL); 94 sizeof(ext4_acl_entry), GFP_NOFS);
95 if (!ext_acl) 95 if (!ext_acl)
96 return ERR_PTR(-ENOMEM); 96 return ERR_PTR(-ENOMEM);
97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); 97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
@@ -187,7 +187,7 @@ ext4_get_acl(struct inode *inode, int type)
187 } 187 }
188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0); 188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
189 if (retval > 0) { 189 if (retval > 0) {
190 value = kmalloc(retval, GFP_KERNEL); 190 value = kmalloc(retval, GFP_NOFS);
191 if (!value) 191 if (!value)
192 return ERR_PTR(-ENOMEM); 192 return ERR_PTR(-ENOMEM);
193 retval = ext4_xattr_get(inode, name_index, "", value, retval); 193 retval = ext4_xattr_get(inode, name_index, "", value, retval);
@@ -335,7 +335,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
335 if (error) 335 if (error)
336 goto cleanup; 336 goto cleanup;
337 } 337 }
338 clone = posix_acl_clone(acl, GFP_KERNEL); 338 clone = posix_acl_clone(acl, GFP_NOFS);
339 error = -ENOMEM; 339 error = -ENOMEM;
340 if (!clone) 340 if (!clone)
341 goto cleanup; 341 goto cleanup;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0737e05ba3dd..da994374ec3b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -15,12 +15,12 @@
15#include <linux/capability.h> 15#include <linux/capability.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h> 17#include <linux/jbd2.h>
18#include <linux/ext4_fs.h>
19#include <linux/ext4_jbd2.h>
20#include <linux/quotaops.h> 18#include <linux/quotaops.h>
21#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
22 20#include "ext4.h"
21#include "ext4_jbd2.h"
23#include "group.h" 22#include "group.h"
23
24/* 24/*
25 * balloc.c contains the blocks allocation and deallocation routines 25 * balloc.c contains the blocks allocation and deallocation routines
26 */ 26 */
@@ -48,7 +48,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
49 ext4_group_t block_group, struct ext4_group_desc *gdp) 49 ext4_group_t block_group, struct ext4_group_desc *gdp)
50{ 50{
51 unsigned long start;
52 int bit, bit_max; 51 int bit, bit_max;
53 unsigned free_blocks, group_blocks; 52 unsigned free_blocks, group_blocks;
54 struct ext4_sb_info *sbi = EXT4_SB(sb); 53 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -59,7 +58,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
59 /* If checksum is bad mark all blocks used to prevent allocation 58 /* If checksum is bad mark all blocks used to prevent allocation
60 * essentially implementing a per-group read-only flag. */ 59 * essentially implementing a per-group read-only flag. */
61 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 60 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
62 ext4_error(sb, __FUNCTION__, 61 ext4_error(sb, __func__,
63 "Checksum bad for group %lu\n", block_group); 62 "Checksum bad for group %lu\n", block_group);
64 gdp->bg_free_blocks_count = 0; 63 gdp->bg_free_blocks_count = 0;
65 gdp->bg_free_inodes_count = 0; 64 gdp->bg_free_inodes_count = 0;
@@ -106,11 +105,12 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
106 free_blocks = group_blocks - bit_max; 105 free_blocks = group_blocks - bit_max;
107 106
108 if (bh) { 107 if (bh) {
108 ext4_fsblk_t start;
109
109 for (bit = 0; bit < bit_max; bit++) 110 for (bit = 0; bit < bit_max; bit++)
110 ext4_set_bit(bit, bh->b_data); 111 ext4_set_bit(bit, bh->b_data);
111 112
112 start = block_group * EXT4_BLOCKS_PER_GROUP(sb) + 113 start = ext4_group_first_block_no(sb, block_group);
113 le32_to_cpu(sbi->s_es->s_first_data_block);
114 114
115 /* Set bits for block and inode bitmaps, and inode table */ 115 /* Set bits for block and inode bitmaps, and inode table */
116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); 116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
@@ -235,7 +235,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
235 return 1; 235 return 1;
236 236
237err_out: 237err_out:
238 ext4_error(sb, __FUNCTION__, 238 ext4_error(sb, __func__,
239 "Invalid block bitmap - " 239 "Invalid block bitmap - "
240 "block_group = %d, block = %llu", 240 "block_group = %d, block = %llu",
241 block_group, bitmap_blk); 241 block_group, bitmap_blk);
@@ -264,7 +264,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
264 bitmap_blk = ext4_block_bitmap(sb, desc); 264 bitmap_blk = ext4_block_bitmap(sb, desc);
265 bh = sb_getblk(sb, bitmap_blk); 265 bh = sb_getblk(sb, bitmap_blk);
266 if (unlikely(!bh)) { 266 if (unlikely(!bh)) {
267 ext4_error(sb, __FUNCTION__, 267 ext4_error(sb, __func__,
268 "Cannot read block bitmap - " 268 "Cannot read block bitmap - "
269 "block_group = %d, block_bitmap = %llu", 269 "block_group = %d, block_bitmap = %llu",
270 (int)block_group, (unsigned long long)bitmap_blk); 270 (int)block_group, (unsigned long long)bitmap_blk);
@@ -281,7 +281,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
281 } 281 }
282 if (bh_submit_read(bh) < 0) { 282 if (bh_submit_read(bh) < 0) {
283 put_bh(bh); 283 put_bh(bh);
284 ext4_error(sb, __FUNCTION__, 284 ext4_error(sb, __func__,
285 "Cannot read block bitmap - " 285 "Cannot read block bitmap - "
286 "block_group = %d, block_bitmap = %llu", 286 "block_group = %d, block_bitmap = %llu",
287 (int)block_group, (unsigned long long)bitmap_blk); 287 (int)block_group, (unsigned long long)bitmap_blk);
@@ -360,7 +360,7 @@ restart:
360 BUG(); 360 BUG();
361} 361}
362#define rsv_window_dump(root, verbose) \ 362#define rsv_window_dump(root, verbose) \
363 __rsv_window_dump((root), (verbose), __FUNCTION__) 363 __rsv_window_dump((root), (verbose), __func__)
364#else 364#else
365#define rsv_window_dump(root, verbose) do {} while (0) 365#define rsv_window_dump(root, verbose) do {} while (0)
366#endif 366#endif
@@ -740,7 +740,7 @@ do_more:
740 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 740 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
741 bit + i, bitmap_bh->b_data)) { 741 bit + i, bitmap_bh->b_data)) {
742 jbd_unlock_bh_state(bitmap_bh); 742 jbd_unlock_bh_state(bitmap_bh);
743 ext4_error(sb, __FUNCTION__, 743 ext4_error(sb, __func__,
744 "bit already cleared for block %llu", 744 "bit already cleared for block %llu",
745 (ext4_fsblk_t)(block + i)); 745 (ext4_fsblk_t)(block + i));
746 jbd_lock_bh_state(bitmap_bh); 746 jbd_lock_bh_state(bitmap_bh);
@@ -752,9 +752,7 @@ do_more:
752 jbd_unlock_bh_state(bitmap_bh); 752 jbd_unlock_bh_state(bitmap_bh);
753 753
754 spin_lock(sb_bgl_lock(sbi, block_group)); 754 spin_lock(sb_bgl_lock(sbi, block_group));
755 desc->bg_free_blocks_count = 755 le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
756 cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
757 group_freed);
758 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 756 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
759 spin_unlock(sb_bgl_lock(sbi, block_group)); 757 spin_unlock(sb_bgl_lock(sbi, block_group));
760 percpu_counter_add(&sbi->s_freeblocks_counter, count); 758 percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -1798,7 +1796,7 @@ allocated:
1798 if (ext4_test_bit(grp_alloc_blk+i, 1796 if (ext4_test_bit(grp_alloc_blk+i,
1799 bh2jh(bitmap_bh)->b_committed_data)) { 1797 bh2jh(bitmap_bh)->b_committed_data)) {
1800 printk("%s: block was unexpectedly set in " 1798 printk("%s: block was unexpectedly set in "
1801 "b_committed_data\n", __FUNCTION__); 1799 "b_committed_data\n", __func__);
1802 } 1800 }
1803 } 1801 }
1804 } 1802 }
@@ -1823,8 +1821,7 @@ allocated:
1823 spin_lock(sb_bgl_lock(sbi, group_no)); 1821 spin_lock(sb_bgl_lock(sbi, group_no));
1824 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) 1822 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1825 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 1823 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
1826 gdp->bg_free_blocks_count = 1824 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1827 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
1828 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); 1825 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1829 spin_unlock(sb_bgl_lock(sbi, group_no)); 1826 spin_unlock(sb_bgl_lock(sbi, group_no));
1830 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1827 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 420554f8f79d..d37ea6750454 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -9,7 +9,7 @@
9 9
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/jbd2.h> 11#include <linux/jbd2.h>
12#include <linux/ext4_fs.h> 12#include "ext4.h"
13 13
14#ifdef EXT4FS_DEBUG 14#ifdef EXT4FS_DEBUG
15 15
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2c23bade9aa6..2bf0331ea194 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -23,10 +23,10 @@
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/jbd2.h> 25#include <linux/jbd2.h>
26#include <linux/ext4_fs.h>
27#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
28#include <linux/slab.h> 27#include <linux/slab.h>
29#include <linux/rbtree.h> 28#include <linux/rbtree.h>
29#include "ext4.h"
30 30
31static unsigned char ext4_filetype_table[] = { 31static unsigned char ext4_filetype_table[] = {
32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -42,7 +42,7 @@ const struct file_operations ext4_dir_operations = {
42 .llseek = generic_file_llseek, 42 .llseek = generic_file_llseek,
43 .read = generic_read_dir, 43 .read = generic_read_dir,
44 .readdir = ext4_readdir, /* we take BKL. needed?*/ 44 .readdir = ext4_readdir, /* we take BKL. needed?*/
45 .ioctl = ext4_ioctl, /* BKL held */ 45 .unlocked_ioctl = ext4_ioctl,
46#ifdef CONFIG_COMPAT 46#ifdef CONFIG_COMPAT
47 .compat_ioctl = ext4_compat_ioctl, 47 .compat_ioctl = ext4_compat_ioctl,
48#endif 48#endif
diff --git a/include/linux/ext4_fs.h b/fs/ext4/ext4.h
index 250032548597..8158083f7ac0 100644
--- a/include/linux/ext4_fs.h
+++ b/fs/ext4/ext4.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/include/linux/ext4_fs.h 2 * ext4.h
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
@@ -13,14 +13,13 @@
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */ 14 */
15 15
16#ifndef _LINUX_EXT4_FS_H 16#ifndef _EXT4_H
17#define _LINUX_EXT4_FS_H 17#define _EXT4_H
18 18
19#include <linux/types.h> 19#include <linux/types.h>
20#include <linux/blkdev.h> 20#include <linux/blkdev.h>
21#include <linux/magic.h> 21#include <linux/magic.h>
22 22#include "ext4_i.h"
23#include <linux/ext4_fs_i.h>
24 23
25/* 24/*
26 * The second extended filesystem constants/structures 25 * The second extended filesystem constants/structures
@@ -176,8 +175,7 @@ struct ext4_group_desc
176#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ 175#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
177 176
178#ifdef __KERNEL__ 177#ifdef __KERNEL__
179#include <linux/ext4_fs_i.h> 178#include "ext4_sb.h"
180#include <linux/ext4_fs_sb.h>
181#endif 179#endif
182/* 180/*
183 * Macro-instructions used to manage group descriptors 181 * Macro-instructions used to manage group descriptors
@@ -231,6 +229,7 @@ struct ext4_group_desc
231#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ 229#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
232#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ 230#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
233#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ 231#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
232#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */
234#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ 233#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
235 234
236#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ 235#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
@@ -1049,8 +1048,7 @@ extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
1049 struct address_space *mapping, loff_t from); 1048 struct address_space *mapping, loff_t from);
1050 1049
1051/* ioctl.c */ 1050/* ioctl.c */
1052extern int ext4_ioctl (struct inode *, struct file *, unsigned int, 1051extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1053 unsigned long);
1054extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); 1052extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
1055 1053
1056/* migrate.c */ 1054/* migrate.c */
@@ -1204,4 +1202,4 @@ extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
1204 int extend_disksize); 1202 int extend_disksize);
1205#endif /* __KERNEL__ */ 1203#endif /* __KERNEL__ */
1206 1204
1207#endif /* _LINUX_EXT4_FS_H */ 1205#endif /* _EXT4_H */
diff --git a/include/linux/ext4_fs_extents.h b/fs/ext4/ext4_extents.h
index 1285c583b2d8..75333b595fab 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -16,10 +16,10 @@
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17 */ 17 */
18 18
19#ifndef _LINUX_EXT4_EXTENTS 19#ifndef _EXT4_EXTENTS
20#define _LINUX_EXT4_EXTENTS 20#define _EXT4_EXTENTS
21 21
22#include <linux/ext4_fs.h> 22#include "ext4.h"
23 23
24/* 24/*
25 * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks 25 * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks
@@ -228,5 +228,5 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
228extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, 228extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
229 ext4_lblk_t *, ext4_fsblk_t *); 229 ext4_lblk_t *, ext4_fsblk_t *);
230extern void ext4_ext_drop_refs(struct ext4_ext_path *); 230extern void ext4_ext_drop_refs(struct ext4_ext_path *);
231#endif /* _LINUX_EXT4_EXTENTS */ 231#endif /* _EXT4_EXTENTS */
232 232
diff --git a/include/linux/ext4_fs_i.h b/fs/ext4/ext4_i.h
index d5508d3cf290..26a4ae255d79 100644
--- a/include/linux/ext4_fs_i.h
+++ b/fs/ext4/ext4_i.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/include/linux/ext4_fs_i.h 2 * ext4_i.h
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
@@ -13,8 +13,8 @@
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */ 14 */
15 15
16#ifndef _LINUX_EXT4_FS_I 16#ifndef _EXT4_I
17#define _LINUX_EXT4_FS_I 17#define _EXT4_I
18 18
19#include <linux/rwsem.h> 19#include <linux/rwsem.h>
20#include <linux/rbtree.h> 20#include <linux/rbtree.h>
@@ -164,4 +164,4 @@ struct ext4_inode_info {
164 spinlock_t i_prealloc_lock; 164 spinlock_t i_prealloc_lock;
165}; 165};
166 166
167#endif /* _LINUX_EXT4_FS_I */ 167#endif /* _EXT4_I */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d6afe4e27340..c75384b34f2c 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -2,14 +2,14 @@
2 * Interface between ext4 and JBD 2 * Interface between ext4 and JBD
3 */ 3 */
4 4
5#include <linux/ext4_jbd2.h> 5#include "ext4_jbd2.h"
6 6
7int __ext4_journal_get_undo_access(const char *where, handle_t *handle, 7int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 8 struct buffer_head *bh)
9{ 9{
10 int err = jbd2_journal_get_undo_access(handle, bh); 10 int err = jbd2_journal_get_undo_access(handle, bh);
11 if (err) 11 if (err)
12 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 12 ext4_journal_abort_handle(where, __func__, bh, handle, err);
13 return err; 13 return err;
14} 14}
15 15
@@ -18,7 +18,7 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
18{ 18{
19 int err = jbd2_journal_get_write_access(handle, bh); 19 int err = jbd2_journal_get_write_access(handle, bh);
20 if (err) 20 if (err)
21 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 21 ext4_journal_abort_handle(where, __func__, bh, handle, err);
22 return err; 22 return err;
23} 23}
24 24
@@ -27,7 +27,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
27{ 27{
28 int err = jbd2_journal_forget(handle, bh); 28 int err = jbd2_journal_forget(handle, bh);
29 if (err) 29 if (err)
30 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 30 ext4_journal_abort_handle(where, __func__, bh, handle, err);
31 return err; 31 return err;
32} 32}
33 33
@@ -36,7 +36,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
36{ 36{
37 int err = jbd2_journal_revoke(handle, blocknr, bh); 37 int err = jbd2_journal_revoke(handle, blocknr, bh);
38 if (err) 38 if (err)
39 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 39 ext4_journal_abort_handle(where, __func__, bh, handle, err);
40 return err; 40 return err;
41} 41}
42 42
@@ -45,7 +45,7 @@ int __ext4_journal_get_create_access(const char *where,
45{ 45{
46 int err = jbd2_journal_get_create_access(handle, bh); 46 int err = jbd2_journal_get_create_access(handle, bh);
47 if (err) 47 if (err)
48 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 48 ext4_journal_abort_handle(where, __func__, bh, handle, err);
49 return err; 49 return err;
50} 50}
51 51
@@ -54,6 +54,6 @@ int __ext4_journal_dirty_metadata(const char *where,
54{ 54{
55 int err = jbd2_journal_dirty_metadata(handle, bh); 55 int err = jbd2_journal_dirty_metadata(handle, bh);
56 if (err) 56 if (err)
57 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 57 ext4_journal_abort_handle(where, __func__, bh, handle, err);
58 return err; 58 return err;
59} 59}
diff --git a/include/linux/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 38c71d3c8dbf..9255a7d28b24 100644
--- a/include/linux/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/include/linux/ext4_jbd2.h 2 * ext4_jbd2.h
3 * 3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 * 5 *
@@ -12,12 +12,12 @@
12 * Ext4-specific journaling extensions. 12 * Ext4-specific journaling extensions.
13 */ 13 */
14 14
15#ifndef _LINUX_EXT4_JBD2_H 15#ifndef _EXT4_JBD2_H
16#define _LINUX_EXT4_JBD2_H 16#define _EXT4_JBD2_H
17 17
18#include <linux/fs.h> 18#include <linux/fs.h>
19#include <linux/jbd2.h> 19#include <linux/jbd2.h>
20#include <linux/ext4_fs.h> 20#include "ext4.h"
21 21
22#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal) 22#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal)
23 23
@@ -228,4 +228,4 @@ static inline int ext4_should_writeback_data(struct inode *inode)
228 return 0; 228 return 0;
229} 229}
230 230
231#endif /* _LINUX_EXT4_JBD2_H */ 231#endif /* _EXT4_JBD2_H */
diff --git a/include/linux/ext4_fs_sb.h b/fs/ext4/ext4_sb.h
index abaae2c8cccf..5802e69f2191 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * linux/include/linux/ext4_fs_sb.h 2 * ext4_sb.h
3 * 3 *
4 * Copyright (C) 1992, 1993, 1994, 1995 4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr) 5 * Remy Card (card@masi.ibp.fr)
@@ -13,8 +13,8 @@
13 * Copyright (C) 1991, 1992 Linus Torvalds 13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */ 14 */
15 15
16#ifndef _LINUX_EXT4_FS_SB 16#ifndef _EXT4_SB
17#define _LINUX_EXT4_FS_SB 17#define _EXT4_SB
18 18
19#ifdef __KERNEL__ 19#ifdef __KERNEL__
20#include <linux/timer.h> 20#include <linux/timer.h>
@@ -145,4 +145,4 @@ struct ext4_sb_info {
145 struct ext4_locality_group *s_locality_groups; 145 struct ext4_locality_group *s_locality_groups;
146}; 146};
147 147
148#endif /* _LINUX_EXT4_FS_SB */ 148#endif /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9ae6e67090cd..47929c4e3dae 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -32,7 +32,6 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/fs.h> 33#include <linux/fs.h>
34#include <linux/time.h> 34#include <linux/time.h>
35#include <linux/ext4_jbd2.h>
36#include <linux/jbd2.h> 35#include <linux/jbd2.h>
37#include <linux/highuid.h> 36#include <linux/highuid.h>
38#include <linux/pagemap.h> 37#include <linux/pagemap.h>
@@ -40,8 +39,9 @@
40#include <linux/string.h> 39#include <linux/string.h>
41#include <linux/slab.h> 40#include <linux/slab.h>
42#include <linux/falloc.h> 41#include <linux/falloc.h>
43#include <linux/ext4_fs_extents.h>
44#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include "ext4_jbd2.h"
44#include "ext4_extents.h"
45 45
46 46
47/* 47/*
@@ -308,7 +308,7 @@ corrupted:
308} 308}
309 309
310#define ext4_ext_check_header(inode, eh, depth) \ 310#define ext4_ext_check_header(inode, eh, depth) \
311 __ext4_ext_check_header(__FUNCTION__, inode, eh, depth) 311 __ext4_ext_check_header(__func__, inode, eh, depth)
312 312
313#ifdef EXT_DEBUG 313#ifdef EXT_DEBUG
314static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 314static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -614,7 +614,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
614 614
615 ix->ei_block = cpu_to_le32(logical); 615 ix->ei_block = cpu_to_le32(logical);
616 ext4_idx_store_pblock(ix, ptr); 616 ext4_idx_store_pblock(ix, ptr);
617 curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); 617 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
618 618
619 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) 619 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
620 > le16_to_cpu(curp->p_hdr->eh_max)); 620 > le16_to_cpu(curp->p_hdr->eh_max));
@@ -736,7 +736,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
736 } 736 }
737 if (m) { 737 if (m) {
738 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); 738 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
739 neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m); 739 le16_add_cpu(&neh->eh_entries, m);
740 } 740 }
741 741
742 set_buffer_uptodate(bh); 742 set_buffer_uptodate(bh);
@@ -753,8 +753,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
753 err = ext4_ext_get_access(handle, inode, path + depth); 753 err = ext4_ext_get_access(handle, inode, path + depth);
754 if (err) 754 if (err)
755 goto cleanup; 755 goto cleanup;
756 path[depth].p_hdr->eh_entries = 756 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
757 cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
758 err = ext4_ext_dirty(handle, inode, path + depth); 757 err = ext4_ext_dirty(handle, inode, path + depth);
759 if (err) 758 if (err)
760 goto cleanup; 759 goto cleanup;
@@ -817,8 +816,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
817 if (m) { 816 if (m) {
818 memmove(++fidx, path[i].p_idx - m, 817 memmove(++fidx, path[i].p_idx - m,
819 sizeof(struct ext4_extent_idx) * m); 818 sizeof(struct ext4_extent_idx) * m);
820 neh->eh_entries = 819 le16_add_cpu(&neh->eh_entries, m);
821 cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
822 } 820 }
823 set_buffer_uptodate(bh); 821 set_buffer_uptodate(bh);
824 unlock_buffer(bh); 822 unlock_buffer(bh);
@@ -834,7 +832,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
834 err = ext4_ext_get_access(handle, inode, path + i); 832 err = ext4_ext_get_access(handle, inode, path + i);
835 if (err) 833 if (err)
836 goto cleanup; 834 goto cleanup;
837 path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m); 835 le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
838 err = ext4_ext_dirty(handle, inode, path + i); 836 err = ext4_ext_dirty(handle, inode, path + i);
839 if (err) 837 if (err)
840 goto cleanup; 838 goto cleanup;
@@ -1369,7 +1367,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
1369 * sizeof(struct ext4_extent); 1367 * sizeof(struct ext4_extent);
1370 memmove(ex + 1, ex + 2, len); 1368 memmove(ex + 1, ex + 2, len);
1371 } 1369 }
1372 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1); 1370 le16_add_cpu(&eh->eh_entries, -1);
1373 merge_done = 1; 1371 merge_done = 1;
1374 WARN_ON(eh->eh_entries == 0); 1372 WARN_ON(eh->eh_entries == 0);
1375 if (!eh->eh_entries) 1373 if (!eh->eh_entries)
@@ -1560,7 +1558,7 @@ has_space:
1560 path[depth].p_ext = nearex; 1558 path[depth].p_ext = nearex;
1561 } 1559 }
1562 1560
1563 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); 1561 le16_add_cpu(&eh->eh_entries, 1);
1564 nearex = path[depth].p_ext; 1562 nearex = path[depth].p_ext;
1565 nearex->ee_block = newext->ee_block; 1563 nearex->ee_block = newext->ee_block;
1566 ext4_ext_store_pblock(nearex, ext_pblock(newext)); 1564 ext4_ext_store_pblock(nearex, ext_pblock(newext));
@@ -1699,7 +1697,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1699 err = ext4_ext_get_access(handle, inode, path); 1697 err = ext4_ext_get_access(handle, inode, path);
1700 if (err) 1698 if (err)
1701 return err; 1699 return err;
1702 path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); 1700 le16_add_cpu(&path->p_hdr->eh_entries, -1);
1703 err = ext4_ext_dirty(handle, inode, path); 1701 err = ext4_ext_dirty(handle, inode, path);
1704 if (err) 1702 if (err)
1705 return err; 1703 return err;
@@ -1902,7 +1900,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1902 if (num == 0) { 1900 if (num == 0) {
1903 /* this extent is removed; mark slot entirely unused */ 1901 /* this extent is removed; mark slot entirely unused */
1904 ext4_ext_store_pblock(ex, 0); 1902 ext4_ext_store_pblock(ex, 0);
1905 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); 1903 le16_add_cpu(&eh->eh_entries, -1);
1906 } 1904 }
1907 1905
1908 ex->ee_block = cpu_to_le32(block); 1906 ex->ee_block = cpu_to_le32(block);
@@ -1979,7 +1977,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
1979 * We start scanning from right side, freeing all the blocks 1977 * We start scanning from right side, freeing all the blocks
1980 * after i_size and walking into the tree depth-wise. 1978 * after i_size and walking into the tree depth-wise.
1981 */ 1979 */
1982 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); 1980 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
1983 if (path == NULL) { 1981 if (path == NULL) {
1984 ext4_journal_stop(handle); 1982 ext4_journal_stop(handle);
1985 return -ENOMEM; 1983 return -ENOMEM;
@@ -2138,6 +2136,82 @@ void ext4_ext_release(struct super_block *sb)
2138#endif 2136#endif
2139} 2137}
2140 2138
2139static void bi_complete(struct bio *bio, int error)
2140{
2141 complete((struct completion *)bio->bi_private);
2142}
2143
2144/* FIXME!! we need to try to merge to left or right after zero-out */
2145static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2146{
2147 int ret = -EIO;
2148 struct bio *bio;
2149 int blkbits, blocksize;
2150 sector_t ee_pblock;
2151 struct completion event;
2152 unsigned int ee_len, len, done, offset;
2153
2154
2155 blkbits = inode->i_blkbits;
2156 blocksize = inode->i_sb->s_blocksize;
2157 ee_len = ext4_ext_get_actual_len(ex);
2158 ee_pblock = ext_pblock(ex);
2159
2160 /* convert ee_pblock to 512 byte sectors */
2161 ee_pblock = ee_pblock << (blkbits - 9);
2162
2163 while (ee_len > 0) {
2164
2165 if (ee_len > BIO_MAX_PAGES)
2166 len = BIO_MAX_PAGES;
2167 else
2168 len = ee_len;
2169
2170 bio = bio_alloc(GFP_NOIO, len);
2171 if (!bio)
2172 return -ENOMEM;
2173 bio->bi_sector = ee_pblock;
2174 bio->bi_bdev = inode->i_sb->s_bdev;
2175
2176 done = 0;
2177 offset = 0;
2178 while (done < len) {
2179 ret = bio_add_page(bio, ZERO_PAGE(0),
2180 blocksize, offset);
2181 if (ret != blocksize) {
2182 /*
2183 * We can't add any more pages because of
2184 * hardware limitations. Start a new bio.
2185 */
2186 break;
2187 }
2188 done++;
2189 offset += blocksize;
2190 if (offset >= PAGE_CACHE_SIZE)
2191 offset = 0;
2192 }
2193
2194 init_completion(&event);
2195 bio->bi_private = &event;
2196 bio->bi_end_io = bi_complete;
2197 submit_bio(WRITE, bio);
2198 wait_for_completion(&event);
2199
2200 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
2201 ret = 0;
2202 else {
2203 ret = -EIO;
2204 break;
2205 }
2206 bio_put(bio);
2207 ee_len -= done;
2208 ee_pblock += done << (blkbits - 9);
2209 }
2210 return ret;
2211}
2212
2213#define EXT4_EXT_ZERO_LEN 7
2214
2141/* 2215/*
2142 * This function is called by ext4_ext_get_blocks() if someone tries to write 2216 * This function is called by ext4_ext_get_blocks() if someone tries to write
2143 * to an uninitialized extent. It may result in splitting the uninitialized 2217 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -2154,7 +2228,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2154 ext4_lblk_t iblock, 2228 ext4_lblk_t iblock,
2155 unsigned long max_blocks) 2229 unsigned long max_blocks)
2156{ 2230{
2157 struct ext4_extent *ex, newex; 2231 struct ext4_extent *ex, newex, orig_ex;
2158 struct ext4_extent *ex1 = NULL; 2232 struct ext4_extent *ex1 = NULL;
2159 struct ext4_extent *ex2 = NULL; 2233 struct ext4_extent *ex2 = NULL;
2160 struct ext4_extent *ex3 = NULL; 2234 struct ext4_extent *ex3 = NULL;
@@ -2173,10 +2247,26 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2173 allocated = ee_len - (iblock - ee_block); 2247 allocated = ee_len - (iblock - ee_block);
2174 newblock = iblock - ee_block + ext_pblock(ex); 2248 newblock = iblock - ee_block + ext_pblock(ex);
2175 ex2 = ex; 2249 ex2 = ex;
2250 orig_ex.ee_block = ex->ee_block;
2251 orig_ex.ee_len = cpu_to_le16(ee_len);
2252 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
2176 2253
2177 err = ext4_ext_get_access(handle, inode, path + depth); 2254 err = ext4_ext_get_access(handle, inode, path + depth);
2178 if (err) 2255 if (err)
2179 goto out; 2256 goto out;
2257 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
2258 if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
2259 err = ext4_ext_zeroout(inode, &orig_ex);
2260 if (err)
2261 goto fix_extent_len;
2262 /* update the extent length and mark as initialized */
2263 ex->ee_block = orig_ex.ee_block;
2264 ex->ee_len = orig_ex.ee_len;
2265 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2266 ext4_ext_dirty(handle, inode, path + depth);
2267 /* zeroed the full extent */
2268 return allocated;
2269 }
2180 2270
2181 /* ex1: ee_block to iblock - 1 : uninitialized */ 2271 /* ex1: ee_block to iblock - 1 : uninitialized */
2182 if (iblock > ee_block) { 2272 if (iblock > ee_block) {
@@ -2195,19 +2285,103 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2195 /* ex3: to ee_block + ee_len : uninitialised */ 2285 /* ex3: to ee_block + ee_len : uninitialised */
2196 if (allocated > max_blocks) { 2286 if (allocated > max_blocks) {
2197 unsigned int newdepth; 2287 unsigned int newdepth;
2288 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
2289 if (allocated <= EXT4_EXT_ZERO_LEN) {
2290 /* Mark first half uninitialized.
2291 * Mark second half initialized and zero out the
2292 * initialized extent
2293 */
2294 ex->ee_block = orig_ex.ee_block;
2295 ex->ee_len = cpu_to_le16(ee_len - allocated);
2296 ext4_ext_mark_uninitialized(ex);
2297 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2298 ext4_ext_dirty(handle, inode, path + depth);
2299
2300 ex3 = &newex;
2301 ex3->ee_block = cpu_to_le32(iblock);
2302 ext4_ext_store_pblock(ex3, newblock);
2303 ex3->ee_len = cpu_to_le16(allocated);
2304 err = ext4_ext_insert_extent(handle, inode, path, ex3);
2305 if (err == -ENOSPC) {
2306 err = ext4_ext_zeroout(inode, &orig_ex);
2307 if (err)
2308 goto fix_extent_len;
2309 ex->ee_block = orig_ex.ee_block;
2310 ex->ee_len = orig_ex.ee_len;
2311 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2312 ext4_ext_dirty(handle, inode, path + depth);
2313 /* zeroed the full extent */
2314 return allocated;
2315
2316 } else if (err)
2317 goto fix_extent_len;
2318
2319 /*
2320 * We need to zero out the second half because
2321 * an fallocate request can update file size and
2322 * converting the second half to initialized extent
2323 * implies that we can leak some junk data to user
2324 * space.
2325 */
2326 err = ext4_ext_zeroout(inode, ex3);
2327 if (err) {
2328 /*
2329 * We should actually mark the
2330 * second half as uninit and return error
2331 * Insert would have changed the extent
2332 */
2333 depth = ext_depth(inode);
2334 ext4_ext_drop_refs(path);
2335 path = ext4_ext_find_extent(inode,
2336 iblock, path);
2337 if (IS_ERR(path)) {
2338 err = PTR_ERR(path);
2339 return err;
2340 }
2341 ex = path[depth].p_ext;
2342 err = ext4_ext_get_access(handle, inode,
2343 path + depth);
2344 if (err)
2345 return err;
2346 ext4_ext_mark_uninitialized(ex);
2347 ext4_ext_dirty(handle, inode, path + depth);
2348 return err;
2349 }
2350
2351 /* zeroed the second half */
2352 return allocated;
2353 }
2198 ex3 = &newex; 2354 ex3 = &newex;
2199 ex3->ee_block = cpu_to_le32(iblock + max_blocks); 2355 ex3->ee_block = cpu_to_le32(iblock + max_blocks);
2200 ext4_ext_store_pblock(ex3, newblock + max_blocks); 2356 ext4_ext_store_pblock(ex3, newblock + max_blocks);
2201 ex3->ee_len = cpu_to_le16(allocated - max_blocks); 2357 ex3->ee_len = cpu_to_le16(allocated - max_blocks);
2202 ext4_ext_mark_uninitialized(ex3); 2358 ext4_ext_mark_uninitialized(ex3);
2203 err = ext4_ext_insert_extent(handle, inode, path, ex3); 2359 err = ext4_ext_insert_extent(handle, inode, path, ex3);
2204 if (err) 2360 if (err == -ENOSPC) {
2205 goto out; 2361 err = ext4_ext_zeroout(inode, &orig_ex);
2362 if (err)
2363 goto fix_extent_len;
2364 /* update the extent length and mark as initialized */
2365 ex->ee_block = orig_ex.ee_block;
2366 ex->ee_len = orig_ex.ee_len;
2367 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2368 ext4_ext_dirty(handle, inode, path + depth);
2369 /* zeroed the full extent */
2370 return allocated;
2371
2372 } else if (err)
2373 goto fix_extent_len;
2206 /* 2374 /*
2207 * The depth, and hence eh & ex might change 2375 * The depth, and hence eh & ex might change
2208 * as part of the insert above. 2376 * as part of the insert above.
2209 */ 2377 */
2210 newdepth = ext_depth(inode); 2378 newdepth = ext_depth(inode);
2379 /*
2380 * update the extent length after successfull insert of the
2381 * split extent
2382 */
2383 orig_ex.ee_len = cpu_to_le16(ee_len -
2384 ext4_ext_get_actual_len(ex3));
2211 if (newdepth != depth) { 2385 if (newdepth != depth) {
2212 depth = newdepth; 2386 depth = newdepth;
2213 ext4_ext_drop_refs(path); 2387 ext4_ext_drop_refs(path);
@@ -2226,6 +2400,24 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2226 goto out; 2400 goto out;
2227 } 2401 }
2228 allocated = max_blocks; 2402 allocated = max_blocks;
2403
2404 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
2405 * to insert a extent in the middle zerout directly
2406 * otherwise give the extent a chance to merge to left
2407 */
2408 if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
2409 iblock != ee_block) {
2410 err = ext4_ext_zeroout(inode, &orig_ex);
2411 if (err)
2412 goto fix_extent_len;
2413 /* update the extent length and mark as initialized */
2414 ex->ee_block = orig_ex.ee_block;
2415 ex->ee_len = orig_ex.ee_len;
2416 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2417 ext4_ext_dirty(handle, inode, path + depth);
2418 /* zero out the first half */
2419 return allocated;
2420 }
2229 } 2421 }
2230 /* 2422 /*
2231 * If there was a change of depth as part of the 2423 * If there was a change of depth as part of the
@@ -2282,8 +2474,29 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2282 goto out; 2474 goto out;
2283insert: 2475insert:
2284 err = ext4_ext_insert_extent(handle, inode, path, &newex); 2476 err = ext4_ext_insert_extent(handle, inode, path, &newex);
2477 if (err == -ENOSPC) {
2478 err = ext4_ext_zeroout(inode, &orig_ex);
2479 if (err)
2480 goto fix_extent_len;
2481 /* update the extent length and mark as initialized */
2482 ex->ee_block = orig_ex.ee_block;
2483 ex->ee_len = orig_ex.ee_len;
2484 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2485 ext4_ext_dirty(handle, inode, path + depth);
2486 /* zero out the first half */
2487 return allocated;
2488 } else if (err)
2489 goto fix_extent_len;
2285out: 2490out:
2286 return err ? err : allocated; 2491 return err ? err : allocated;
2492
2493fix_extent_len:
2494 ex->ee_block = orig_ex.ee_block;
2495 ex->ee_len = orig_ex.ee_len;
2496 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2497 ext4_ext_mark_uninitialized(ex);
2498 ext4_ext_dirty(handle, inode, path + depth);
2499 return err;
2287} 2500}
2288 2501
2289/* 2502/*
@@ -2393,8 +2606,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2393 } 2606 }
2394 if (create == EXT4_CREATE_UNINITIALIZED_EXT) 2607 if (create == EXT4_CREATE_UNINITIALIZED_EXT)
2395 goto out; 2608 goto out;
2396 if (!create) 2609 if (!create) {
2610 /*
2611 * We have blocks reserved already. We
2612 * return allocated blocks so that delalloc
2613 * won't do block reservation for us. But
2614 * the buffer head will be unmapped so that
2615 * a read from the block returns 0s.
2616 */
2617 if (allocated > max_blocks)
2618 allocated = max_blocks;
2619 /* mark the buffer unwritten */
2620 __set_bit(BH_Unwritten, &bh_result->b_state);
2397 goto out2; 2621 goto out2;
2622 }
2398 2623
2399 ret = ext4_ext_convert_to_initialized(handle, inode, 2624 ret = ext4_ext_convert_to_initialized(handle, inode,
2400 path, iblock, 2625 path, iblock,
@@ -2584,6 +2809,8 @@ out_stop:
2584 ext4_orphan_del(handle, inode); 2809 ext4_orphan_del(handle, inode);
2585 2810
2586 up_write(&EXT4_I(inode)->i_data_sem); 2811 up_write(&EXT4_I(inode)->i_data_sem);
2812 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
2813 ext4_mark_inode_dirty(handle, inode);
2587 ext4_journal_stop(handle); 2814 ext4_journal_stop(handle);
2588} 2815}
2589 2816
@@ -2608,6 +2835,28 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
2608 return needed; 2835 return needed;
2609} 2836}
2610 2837
2838static void ext4_falloc_update_inode(struct inode *inode,
2839 int mode, loff_t new_size, int update_ctime)
2840{
2841 struct timespec now;
2842
2843 if (update_ctime) {
2844 now = current_fs_time(inode->i_sb);
2845 if (!timespec_equal(&inode->i_ctime, &now))
2846 inode->i_ctime = now;
2847 }
2848 /*
2849 * Update only when preallocation was requested beyond
2850 * the file size.
2851 */
2852 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
2853 new_size > i_size_read(inode)) {
2854 i_size_write(inode, new_size);
2855 EXT4_I(inode)->i_disksize = new_size;
2856 }
2857
2858}
2859
2611/* 2860/*
2612 * preallocate space for a file. This implements ext4's fallocate inode 2861 * preallocate space for a file. This implements ext4's fallocate inode
2613 * operation, which gets called from sys_fallocate system call. 2862 * operation, which gets called from sys_fallocate system call.
@@ -2619,8 +2868,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2619{ 2868{
2620 handle_t *handle; 2869 handle_t *handle;
2621 ext4_lblk_t block; 2870 ext4_lblk_t block;
2871 loff_t new_size;
2622 unsigned long max_blocks; 2872 unsigned long max_blocks;
2623 ext4_fsblk_t nblocks = 0;
2624 int ret = 0; 2873 int ret = 0;
2625 int ret2 = 0; 2874 int ret2 = 0;
2626 int retries = 0; 2875 int retries = 0;
@@ -2639,9 +2888,12 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2639 return -ENODEV; 2888 return -ENODEV;
2640 2889
2641 block = offset >> blkbits; 2890 block = offset >> blkbits;
2891 /*
2892 * We can't just convert len to max_blocks because
2893 * If blocksize = 4096 offset = 3072 and len = 2048
2894 */
2642 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 2895 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
2643 - block; 2896 - block;
2644
2645 /* 2897 /*
2646 * credits to insert 1 extent into extent tree + buffers to be able to 2898 * credits to insert 1 extent into extent tree + buffers to be able to
2647 * modify 1 super block, 1 block bitmap and 1 group descriptor. 2899 * modify 1 super block, 1 block bitmap and 1 group descriptor.
@@ -2657,7 +2909,6 @@ retry:
2657 ret = PTR_ERR(handle); 2909 ret = PTR_ERR(handle);
2658 break; 2910 break;
2659 } 2911 }
2660
2661 ret = ext4_get_blocks_wrap(handle, inode, block, 2912 ret = ext4_get_blocks_wrap(handle, inode, block,
2662 max_blocks, &map_bh, 2913 max_blocks, &map_bh,
2663 EXT4_CREATE_UNINITIALIZED_EXT, 0); 2914 EXT4_CREATE_UNINITIALIZED_EXT, 0);
@@ -2673,61 +2924,24 @@ retry:
2673 ret2 = ext4_journal_stop(handle); 2924 ret2 = ext4_journal_stop(handle);
2674 break; 2925 break;
2675 } 2926 }
2676 if (ret > 0) { 2927 if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
2677 /* check wrap through sign-bit/zero here */ 2928 blkbits) >> blkbits))
2678 if ((block + ret) < 0 || (block + ret) < block) { 2929 new_size = offset + len;
2679 ret = -EIO; 2930 else
2680 ext4_mark_inode_dirty(handle, inode); 2931 new_size = (block + ret) << blkbits;
2681 ret2 = ext4_journal_stop(handle);
2682 break;
2683 }
2684 if (buffer_new(&map_bh) && ((block + ret) >
2685 (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
2686 >> blkbits)))
2687 nblocks = nblocks + ret;
2688 }
2689
2690 /* Update ctime if new blocks get allocated */
2691 if (nblocks) {
2692 struct timespec now;
2693
2694 now = current_fs_time(inode->i_sb);
2695 if (!timespec_equal(&inode->i_ctime, &now))
2696 inode->i_ctime = now;
2697 }
2698 2932
2933 ext4_falloc_update_inode(inode, mode, new_size,
2934 buffer_new(&map_bh));
2699 ext4_mark_inode_dirty(handle, inode); 2935 ext4_mark_inode_dirty(handle, inode);
2700 ret2 = ext4_journal_stop(handle); 2936 ret2 = ext4_journal_stop(handle);
2701 if (ret2) 2937 if (ret2)
2702 break; 2938 break;
2703 } 2939 }
2704 2940 if (ret == -ENOSPC &&
2705 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2941 ext4_should_retry_alloc(inode->i_sb, &retries)) {
2942 ret = 0;
2706 goto retry; 2943 goto retry;
2707
2708 /*
2709 * Time to update the file size.
2710 * Update only when preallocation was requested beyond the file size.
2711 */
2712 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
2713 (offset + len) > i_size_read(inode)) {
2714 if (ret > 0) {
2715 /*
2716 * if no error, we assume preallocation succeeded
2717 * completely
2718 */
2719 i_size_write(inode, offset + len);
2720 EXT4_I(inode)->i_disksize = i_size_read(inode);
2721 } else if (ret < 0 && nblocks) {
2722 /* Handle partial allocation scenario */
2723 loff_t newsize;
2724
2725 newsize = (nblocks << blkbits) + i_size_read(inode);
2726 i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
2727 EXT4_I(inode)->i_disksize = i_size_read(inode);
2728 }
2729 } 2944 }
2730
2731 mutex_unlock(&inode->i_mutex); 2945 mutex_unlock(&inode->i_mutex);
2732 return ret > 0 ? ret2 : ret; 2946 return ret > 0 ? ret2 : ret;
2733} 2947}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac35ec58db55..4159be6366ab 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,8 +21,8 @@
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/ext4_fs.h> 24#include "ext4.h"
25#include <linux/ext4_jbd2.h> 25#include "ext4_jbd2.h"
26#include "xattr.h" 26#include "xattr.h"
27#include "acl.h" 27#include "acl.h"
28 28
@@ -129,7 +129,7 @@ const struct file_operations ext4_file_operations = {
129 .write = do_sync_write, 129 .write = do_sync_write,
130 .aio_read = generic_file_aio_read, 130 .aio_read = generic_file_aio_read,
131 .aio_write = ext4_file_write, 131 .aio_write = ext4_file_write,
132 .ioctl = ext4_ioctl, 132 .unlocked_ioctl = ext4_ioctl,
133#ifdef CONFIG_COMPAT 133#ifdef CONFIG_COMPAT
134 .compat_ioctl = ext4_compat_ioctl, 134 .compat_ioctl = ext4_compat_ioctl,
135#endif 135#endif
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 8d50879d1c2c..1c8ba48d4f8d 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -27,8 +27,8 @@
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/ext4_fs.h> 30#include "ext4.h"
31#include <linux/ext4_jbd2.h> 31#include "ext4_jbd2.h"
32 32
33/* 33/*
34 * akpm: A new design for ext4_sync_file(). 34 * akpm: A new design for ext4_sync_file().
@@ -72,6 +72,9 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
72 goto out; 72 goto out;
73 } 73 }
74 74
75 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
76 goto out;
77
75 /* 78 /*
76 * The VFS has written the file data. If the inode is unaltered 79 * The VFS has written the file data. If the inode is unaltered
77 * then we need not start a commit. 80 * then we need not start a commit.
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 1555024e3b36..1d6329dbe390 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -11,8 +11,8 @@
11 11
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/jbd2.h> 13#include <linux/jbd2.h>
14#include <linux/ext4_fs.h>
15#include <linux/cryptohash.h> 14#include <linux/cryptohash.h>
15#include "ext4.h"
16 16
17#define DELTA 0x9E3779B9 17#define DELTA 0x9E3779B9
18 18
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 486e46a3918d..c6efbab0c801 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -15,8 +15,6 @@
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h> 17#include <linux/jbd2.h>
18#include <linux/ext4_fs.h>
19#include <linux/ext4_jbd2.h>
20#include <linux/stat.h> 18#include <linux/stat.h>
21#include <linux/string.h> 19#include <linux/string.h>
22#include <linux/quotaops.h> 20#include <linux/quotaops.h>
@@ -25,7 +23,8 @@
25#include <linux/bitops.h> 23#include <linux/bitops.h>
26#include <linux/blkdev.h> 24#include <linux/blkdev.h>
27#include <asm/byteorder.h> 25#include <asm/byteorder.h>
28 26#include "ext4.h"
27#include "ext4_jbd2.h"
29#include "xattr.h" 28#include "xattr.h"
30#include "acl.h" 29#include "acl.h"
31#include "group.h" 30#include "group.h"
@@ -75,7 +74,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
75 /* If checksum is bad mark all blocks and inodes use to prevent 74 /* If checksum is bad mark all blocks and inodes use to prevent
76 * allocation, essentially implementing a per-group read-only flag. */ 75 * allocation, essentially implementing a per-group read-only flag. */
77 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 76 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
78 ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n", 77 ext4_error(sb, __func__, "Checksum bad for group %lu\n",
79 block_group); 78 block_group);
80 gdp->bg_free_blocks_count = 0; 79 gdp->bg_free_blocks_count = 0;
81 gdp->bg_free_inodes_count = 0; 80 gdp->bg_free_inodes_count = 0;
@@ -223,11 +222,9 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
223 222
224 if (gdp) { 223 if (gdp) {
225 spin_lock(sb_bgl_lock(sbi, block_group)); 224 spin_lock(sb_bgl_lock(sbi, block_group));
226 gdp->bg_free_inodes_count = cpu_to_le16( 225 le16_add_cpu(&gdp->bg_free_inodes_count, 1);
227 le16_to_cpu(gdp->bg_free_inodes_count) + 1);
228 if (is_directory) 226 if (is_directory)
229 gdp->bg_used_dirs_count = cpu_to_le16( 227 le16_add_cpu(&gdp->bg_used_dirs_count, -1);
230 le16_to_cpu(gdp->bg_used_dirs_count) - 1);
231 gdp->bg_checksum = ext4_group_desc_csum(sbi, 228 gdp->bg_checksum = ext4_group_desc_csum(sbi,
232 block_group, gdp); 229 block_group, gdp);
233 spin_unlock(sb_bgl_lock(sbi, block_group)); 230 spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -588,7 +585,7 @@ got:
588 ino++; 585 ino++;
589 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 586 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
590 ino > EXT4_INODES_PER_GROUP(sb)) { 587 ino > EXT4_INODES_PER_GROUP(sb)) {
591 ext4_error(sb, __FUNCTION__, 588 ext4_error(sb, __func__,
592 "reserved inode or inode > inodes count - " 589 "reserved inode or inode > inodes count - "
593 "block_group = %lu, inode=%lu", group, 590 "block_group = %lu, inode=%lu", group,
594 ino + group * EXT4_INODES_PER_GROUP(sb)); 591 ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -664,11 +661,9 @@ got:
664 cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); 661 cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
665 } 662 }
666 663
667 gdp->bg_free_inodes_count = 664 le16_add_cpu(&gdp->bg_free_inodes_count, -1);
668 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
669 if (S_ISDIR(mode)) { 665 if (S_ISDIR(mode)) {
670 gdp->bg_used_dirs_count = 666 le16_add_cpu(&gdp->bg_used_dirs_count, 1);
671 cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
672 } 667 }
673 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 668 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
674 spin_unlock(sb_bgl_lock(sbi, group)); 669 spin_unlock(sb_bgl_lock(sbi, group));
@@ -744,23 +739,24 @@ got:
744 if (err) 739 if (err)
745 goto fail_free_drop; 740 goto fail_free_drop;
746 741
747 err = ext4_mark_inode_dirty(handle, inode);
748 if (err) {
749 ext4_std_error(sb, err);
750 goto fail_free_drop;
751 }
752 if (test_opt(sb, EXTENTS)) { 742 if (test_opt(sb, EXTENTS)) {
753 /* set extent flag only for directory and file */ 743 /* set extent flag only for diretory, file and normal symlink*/
754 if (S_ISDIR(mode) || S_ISREG(mode)) { 744 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
755 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; 745 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
756 ext4_ext_tree_init(handle, inode); 746 ext4_ext_tree_init(handle, inode);
757 err = ext4_update_incompat_feature(handle, sb, 747 err = ext4_update_incompat_feature(handle, sb,
758 EXT4_FEATURE_INCOMPAT_EXTENTS); 748 EXT4_FEATURE_INCOMPAT_EXTENTS);
759 if (err) 749 if (err)
760 goto fail; 750 goto fail_free_drop;
761 } 751 }
762 } 752 }
763 753
754 err = ext4_mark_inode_dirty(handle, inode);
755 if (err) {
756 ext4_std_error(sb, err);
757 goto fail_free_drop;
758 }
759
764 ext4_debug("allocating inode %lu\n", inode->i_ino); 760 ext4_debug("allocating inode %lu\n", inode->i_ino);
765 goto really_out; 761 goto really_out;
766fail: 762fail:
@@ -796,7 +792,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
796 792
797 /* Error cases - e2fsck has already cleaned up for us */ 793 /* Error cases - e2fsck has already cleaned up for us */
798 if (ino > max_ino) { 794 if (ino > max_ino) {
799 ext4_warning(sb, __FUNCTION__, 795 ext4_warning(sb, __func__,
800 "bad orphan ino %lu! e2fsck was run?", ino); 796 "bad orphan ino %lu! e2fsck was run?", ino);
801 goto error; 797 goto error;
802 } 798 }
@@ -805,7 +801,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
805 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 801 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
806 bitmap_bh = read_inode_bitmap(sb, block_group); 802 bitmap_bh = read_inode_bitmap(sb, block_group);
807 if (!bitmap_bh) { 803 if (!bitmap_bh) {
808 ext4_warning(sb, __FUNCTION__, 804 ext4_warning(sb, __func__,
809 "inode bitmap error for orphan %lu", ino); 805 "inode bitmap error for orphan %lu", ino);
810 goto error; 806 goto error;
811 } 807 }
@@ -830,7 +826,7 @@ iget_failed:
830 err = PTR_ERR(inode); 826 err = PTR_ERR(inode);
831 inode = NULL; 827 inode = NULL;
832bad_orphan: 828bad_orphan:
833 ext4_warning(sb, __FUNCTION__, 829 ext4_warning(sb, __func__,
834 "bad orphan inode %lu! e2fsck was run?", ino); 830 "bad orphan inode %lu! e2fsck was run?", ino);
835 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", 831 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
836 bit, (unsigned long long)bitmap_bh->b_blocknr, 832 bit, (unsigned long long)bitmap_bh->b_blocknr,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8fab233cb05f..8d9707746413 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -25,7 +25,6 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/time.h> 27#include <linux/time.h>
28#include <linux/ext4_jbd2.h>
29#include <linux/jbd2.h> 28#include <linux/jbd2.h>
30#include <linux/highuid.h> 29#include <linux/highuid.h>
31#include <linux/pagemap.h> 30#include <linux/pagemap.h>
@@ -36,6 +35,7 @@
36#include <linux/mpage.h> 35#include <linux/mpage.h>
37#include <linux/uio.h> 36#include <linux/uio.h>
38#include <linux/bio.h> 37#include <linux/bio.h>
38#include "ext4_jbd2.h"
39#include "xattr.h" 39#include "xattr.h"
40#include "acl.h" 40#include "acl.h"
41 41
@@ -93,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
93 BUFFER_TRACE(bh, "call ext4_journal_revoke"); 93 BUFFER_TRACE(bh, "call ext4_journal_revoke");
94 err = ext4_journal_revoke(handle, blocknr, bh); 94 err = ext4_journal_revoke(handle, blocknr, bh);
95 if (err) 95 if (err)
96 ext4_abort(inode->i_sb, __FUNCTION__, 96 ext4_abort(inode->i_sb, __func__,
97 "error %d when attempting revoke", err); 97 "error %d when attempting revoke", err);
98 BUFFER_TRACE(bh, "exit"); 98 BUFFER_TRACE(bh, "exit");
99 return err; 99 return err;
@@ -985,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
985 } else { 985 } else {
986 retval = ext4_get_blocks_handle(handle, inode, block, 986 retval = ext4_get_blocks_handle(handle, inode, block,
987 max_blocks, bh, create, extend_disksize); 987 max_blocks, bh, create, extend_disksize);
988
989 if (retval > 0 && buffer_new(bh)) {
990 /*
991 * We allocated new blocks which will result in
992 * i_data's format changing. Force the migrate
993 * to fail by clearing migrate flags
994 */
995 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
996 ~EXT4_EXT_MIGRATE;
997 }
988 } 998 }
989 up_write((&EXT4_I(inode)->i_data_sem)); 999 up_write((&EXT4_I(inode)->i_data_sem));
990 return retval; 1000 return retval;
@@ -1230,7 +1240,7 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1230{ 1240{
1231 int err = jbd2_journal_dirty_data(handle, bh); 1241 int err = jbd2_journal_dirty_data(handle, bh);
1232 if (err) 1242 if (err)
1233 ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__, 1243 ext4_journal_abort_handle(__func__, __func__,
1234 bh, handle, err); 1244 bh, handle, err);
1235 return err; 1245 return err;
1236} 1246}
@@ -1301,10 +1311,11 @@ static int ext4_ordered_write_end(struct file *file,
1301 new_i_size = pos + copied; 1311 new_i_size = pos + copied;
1302 if (new_i_size > EXT4_I(inode)->i_disksize) 1312 if (new_i_size > EXT4_I(inode)->i_disksize)
1303 EXT4_I(inode)->i_disksize = new_i_size; 1313 EXT4_I(inode)->i_disksize = new_i_size;
1304 copied = ext4_generic_write_end(file, mapping, pos, len, copied, 1314 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1305 page, fsdata); 1315 page, fsdata);
1306 if (copied < 0) 1316 copied = ret2;
1307 ret = copied; 1317 if (ret2 < 0)
1318 ret = ret2;
1308 } 1319 }
1309 ret2 = ext4_journal_stop(handle); 1320 ret2 = ext4_journal_stop(handle);
1310 if (!ret) 1321 if (!ret)
@@ -1329,10 +1340,11 @@ static int ext4_writeback_write_end(struct file *file,
1329 if (new_i_size > EXT4_I(inode)->i_disksize) 1340 if (new_i_size > EXT4_I(inode)->i_disksize)
1330 EXT4_I(inode)->i_disksize = new_i_size; 1341 EXT4_I(inode)->i_disksize = new_i_size;
1331 1342
1332 copied = ext4_generic_write_end(file, mapping, pos, len, copied, 1343 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1333 page, fsdata); 1344 page, fsdata);
1334 if (copied < 0) 1345 copied = ret2;
1335 ret = copied; 1346 if (ret2 < 0)
1347 ret = ret2;
1336 1348
1337 ret2 = ext4_journal_stop(handle); 1349 ret2 = ext4_journal_stop(handle);
1338 if (!ret) 1350 if (!ret)
@@ -2501,12 +2513,10 @@ out_stop:
2501static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, 2513static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
2502 unsigned long ino, struct ext4_iloc *iloc) 2514 unsigned long ino, struct ext4_iloc *iloc)
2503{ 2515{
2504 unsigned long desc, group_desc;
2505 ext4_group_t block_group; 2516 ext4_group_t block_group;
2506 unsigned long offset; 2517 unsigned long offset;
2507 ext4_fsblk_t block; 2518 ext4_fsblk_t block;
2508 struct buffer_head *bh; 2519 struct ext4_group_desc *gdp;
2509 struct ext4_group_desc * gdp;
2510 2520
2511 if (!ext4_valid_inum(sb, ino)) { 2521 if (!ext4_valid_inum(sb, ino)) {
2512 /* 2522 /*
@@ -2518,22 +2528,10 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
2518 } 2528 }
2519 2529
2520 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 2530 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
2521 if (block_group >= EXT4_SB(sb)->s_groups_count) { 2531 gdp = ext4_get_group_desc(sb, block_group, NULL);
2522 ext4_error(sb,"ext4_get_inode_block","group >= groups count"); 2532 if (!gdp)
2523 return 0; 2533 return 0;
2524 }
2525 smp_rmb();
2526 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
2527 desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2528 bh = EXT4_SB(sb)->s_group_desc[group_desc];
2529 if (!bh) {
2530 ext4_error (sb, "ext4_get_inode_block",
2531 "Descriptor not loaded");
2532 return 0;
2533 }
2534 2534
2535 gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
2536 desc * EXT4_DESC_SIZE(sb));
2537 /* 2535 /*
2538 * Figure out the offset within the block group inode table 2536 * Figure out the offset within the block group inode table
2539 */ 2537 */
@@ -2976,7 +2974,8 @@ static int ext4_do_update_inode(handle_t *handle,
2976 if (ext4_inode_blocks_set(handle, raw_inode, ei)) 2974 if (ext4_inode_blocks_set(handle, raw_inode, ei))
2977 goto out_brelse; 2975 goto out_brelse;
2978 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 2976 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
2979 raw_inode->i_flags = cpu_to_le32(ei->i_flags); 2977 /* clear the migrate flag in the raw_inode */
2978 raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
2980 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 2979 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
2981 cpu_to_le32(EXT4_OS_HURD)) 2980 cpu_to_le32(EXT4_OS_HURD))
2982 raw_inode->i_file_acl_high = 2981 raw_inode->i_file_acl_high =
@@ -3374,7 +3373,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
3374 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; 3373 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
3375 if (mnt_count != 3374 if (mnt_count !=
3376 le16_to_cpu(sbi->s_es->s_mnt_count)) { 3375 le16_to_cpu(sbi->s_es->s_mnt_count)) {
3377 ext4_warning(inode->i_sb, __FUNCTION__, 3376 ext4_warning(inode->i_sb, __func__,
3378 "Unable to expand inode %lu. Delete" 3377 "Unable to expand inode %lu. Delete"
3379 " some EAs or run e2fsck.", 3378 " some EAs or run e2fsck.",
3380 inode->i_ino); 3379 inode->i_ino);
@@ -3415,7 +3414,7 @@ void ext4_dirty_inode(struct inode *inode)
3415 current_handle->h_transaction != handle->h_transaction) { 3414 current_handle->h_transaction != handle->h_transaction) {
3416 /* This task has a transaction open against a different fs */ 3415 /* This task has a transaction open against a different fs */
3417 printk(KERN_EMERG "%s: transactions do not match!\n", 3416 printk(KERN_EMERG "%s: transactions do not match!\n",
3418 __FUNCTION__); 3417 __func__);
3419 } else { 3418 } else {
3420 jbd_debug(5, "marking dirty. outer handle=%p\n", 3419 jbd_debug(5, "marking dirty. outer handle=%p\n",
3421 current_handle); 3420 current_handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 25b13ede8086..7a6c2f1faba6 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -10,17 +10,17 @@
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/jbd2.h> 11#include <linux/jbd2.h>
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/ext4_fs.h>
14#include <linux/ext4_jbd2.h>
15#include <linux/time.h> 13#include <linux/time.h>
16#include <linux/compat.h> 14#include <linux/compat.h>
17#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
18#include <linux/mount.h> 16#include <linux/mount.h>
19#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include "ext4_jbd2.h"
19#include "ext4.h"
20 20
21int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, 21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
22 unsigned long arg)
23{ 22{
23 struct inode *inode = filp->f_dentry->d_inode;
24 struct ext4_inode_info *ei = EXT4_I(inode); 24 struct ext4_inode_info *ei = EXT4_I(inode);
25 unsigned int flags; 25 unsigned int flags;
26 unsigned short rsv_window_size; 26 unsigned short rsv_window_size;
@@ -277,9 +277,6 @@ setversion_out:
277#ifdef CONFIG_COMPAT 277#ifdef CONFIG_COMPAT
278long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 278long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
279{ 279{
280 struct inode *inode = file->f_path.dentry->d_inode;
281 int ret;
282
283 /* These are just misnamed, they actually get/put from/to user an int */ 280 /* These are just misnamed, they actually get/put from/to user an int */
284 switch (cmd) { 281 switch (cmd) {
285 case EXT4_IOC32_GETFLAGS: 282 case EXT4_IOC32_GETFLAGS:
@@ -319,9 +316,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
319 default: 316 default:
320 return -ENOIOCTLCMD; 317 return -ENOIOCTLCMD;
321 } 318 }
322 lock_kernel(); 319 return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
323 ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
324 unlock_kernel();
325 return ret;
326} 320}
327#endif 321#endif
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9d57695de746..fbec2ef93797 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,21 +21,7 @@
21 * mballoc.c contains the multiblocks allocation routines 21 * mballoc.c contains the multiblocks allocation routines
22 */ 22 */
23 23
24#include <linux/time.h> 24#include "mballoc.h"
25#include <linux/fs.h>
26#include <linux/namei.h>
27#include <linux/ext4_jbd2.h>
28#include <linux/ext4_fs.h>
29#include <linux/quotaops.h>
30#include <linux/buffer_head.h>
31#include <linux/module.h>
32#include <linux/swap.h>
33#include <linux/proc_fs.h>
34#include <linux/pagemap.h>
35#include <linux/seq_file.h>
36#include <linux/version.h>
37#include "group.h"
38
39/* 25/*
40 * MUSTDO: 26 * MUSTDO:
41 * - test ext4_ext_search_left() and ext4_ext_search_right() 27 * - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -345,288 +331,6 @@
345 * 331 *
346 */ 332 */
347 333
348/*
349 * with AGGRESSIVE_CHECK allocator runs consistency checks over
350 * structures. these checks slow things down a lot
351 */
352#define AGGRESSIVE_CHECK__
353
354/*
355 * with DOUBLE_CHECK defined mballoc creates persistent in-core
356 * bitmaps, maintains and uses them to check for double allocations
357 */
358#define DOUBLE_CHECK__
359
360/*
361 */
362#define MB_DEBUG__
363#ifdef MB_DEBUG
364#define mb_debug(fmt, a...) printk(fmt, ##a)
365#else
366#define mb_debug(fmt, a...)
367#endif
368
369/*
370 * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
371 * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
372 */
373#define EXT4_MB_HISTORY
374#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
375#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
376#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
377#define EXT4_MB_HISTORY_FREE 8 /* free */
378
379#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
380 EXT4_MB_HISTORY_PREALLOC)
381
382/*
383 * How long mballoc can look for a best extent (in found extents)
384 */
385#define MB_DEFAULT_MAX_TO_SCAN 200
386
387/*
388 * How long mballoc must look for a best extent
389 */
390#define MB_DEFAULT_MIN_TO_SCAN 10
391
392/*
393 * How many groups mballoc will scan looking for the best chunk
394 */
395#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
396
397/*
398 * with 'ext4_mb_stats' allocator will collect stats that will be
399 * shown at umount. The collecting costs though!
400 */
401#define MB_DEFAULT_STATS 1
402
403/*
404 * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
405 * by the stream allocator, which purpose is to pack requests
406 * as close each to other as possible to produce smooth I/O traffic
407 * We use locality group prealloc space for stream request.
408 * We can tune the same via /proc/fs/ext4/<parition>/stream_req
409 */
410#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
411
412/*
413 * for which requests use 2^N search using buddies
414 */
415#define MB_DEFAULT_ORDER2_REQS 2
416
417/*
418 * default group prealloc size 512 blocks
419 */
420#define MB_DEFAULT_GROUP_PREALLOC 512
421
422static struct kmem_cache *ext4_pspace_cachep;
423static struct kmem_cache *ext4_ac_cachep;
424
425#ifdef EXT4_BB_MAX_BLOCKS
426#undef EXT4_BB_MAX_BLOCKS
427#endif
428#define EXT4_BB_MAX_BLOCKS 30
429
430struct ext4_free_metadata {
431 ext4_group_t group;
432 unsigned short num;
433 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
434 struct list_head list;
435};
436
437struct ext4_group_info {
438 unsigned long bb_state;
439 unsigned long bb_tid;
440 struct ext4_free_metadata *bb_md_cur;
441 unsigned short bb_first_free;
442 unsigned short bb_free;
443 unsigned short bb_fragments;
444 struct list_head bb_prealloc_list;
445#ifdef DOUBLE_CHECK
446 void *bb_bitmap;
447#endif
448 unsigned short bb_counters[];
449};
450
451#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
452#define EXT4_GROUP_INFO_LOCKED_BIT 1
453
454#define EXT4_MB_GRP_NEED_INIT(grp) \
455 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
456
457
458struct ext4_prealloc_space {
459 struct list_head pa_inode_list;
460 struct list_head pa_group_list;
461 union {
462 struct list_head pa_tmp_list;
463 struct rcu_head pa_rcu;
464 } u;
465 spinlock_t pa_lock;
466 atomic_t pa_count;
467 unsigned pa_deleted;
468 ext4_fsblk_t pa_pstart; /* phys. block */
469 ext4_lblk_t pa_lstart; /* log. block */
470 unsigned short pa_len; /* len of preallocated chunk */
471 unsigned short pa_free; /* how many blocks are free */
472 unsigned short pa_linear; /* consumed in one direction
473 * strictly, for grp prealloc */
474 spinlock_t *pa_obj_lock;
475 struct inode *pa_inode; /* hack, for history only */
476};
477
478
479struct ext4_free_extent {
480 ext4_lblk_t fe_logical;
481 ext4_grpblk_t fe_start;
482 ext4_group_t fe_group;
483 int fe_len;
484};
485
486/*
487 * Locality group:
488 * we try to group all related changes together
489 * so that writeback can flush/allocate them together as well
490 */
491struct ext4_locality_group {
492 /* for allocator */
493 struct mutex lg_mutex; /* to serialize allocates */
494 struct list_head lg_prealloc_list;/* list of preallocations */
495 spinlock_t lg_prealloc_lock;
496};
497
498struct ext4_allocation_context {
499 struct inode *ac_inode;
500 struct super_block *ac_sb;
501
502 /* original request */
503 struct ext4_free_extent ac_o_ex;
504
505 /* goal request (after normalization) */
506 struct ext4_free_extent ac_g_ex;
507
508 /* the best found extent */
509 struct ext4_free_extent ac_b_ex;
510
511 /* copy of the bext found extent taken before preallocation efforts */
512 struct ext4_free_extent ac_f_ex;
513
514 /* number of iterations done. we have to track to limit searching */
515 unsigned long ac_ex_scanned;
516 __u16 ac_groups_scanned;
517 __u16 ac_found;
518 __u16 ac_tail;
519 __u16 ac_buddy;
520 __u16 ac_flags; /* allocation hints */
521 __u8 ac_status;
522 __u8 ac_criteria;
523 __u8 ac_repeats;
524 __u8 ac_2order; /* if request is to allocate 2^N blocks and
525 * N > 0, the field stores N, otherwise 0 */
526 __u8 ac_op; /* operation, for history only */
527 struct page *ac_bitmap_page;
528 struct page *ac_buddy_page;
529 struct ext4_prealloc_space *ac_pa;
530 struct ext4_locality_group *ac_lg;
531};
532
533#define AC_STATUS_CONTINUE 1
534#define AC_STATUS_FOUND 2
535#define AC_STATUS_BREAK 3
536
537struct ext4_mb_history {
538 struct ext4_free_extent orig; /* orig allocation */
539 struct ext4_free_extent goal; /* goal allocation */
540 struct ext4_free_extent result; /* result allocation */
541 unsigned pid;
542 unsigned ino;
543 __u16 found; /* how many extents have been found */
544 __u16 groups; /* how many groups have been scanned */
545 __u16 tail; /* what tail broke some buddy */
546 __u16 buddy; /* buddy the tail ^^^ broke */
547 __u16 flags;
548 __u8 cr:3; /* which phase the result extent was found at */
549 __u8 op:4;
550 __u8 merged:1;
551};
552
553struct ext4_buddy {
554 struct page *bd_buddy_page;
555 void *bd_buddy;
556 struct page *bd_bitmap_page;
557 void *bd_bitmap;
558 struct ext4_group_info *bd_info;
559 struct super_block *bd_sb;
560 __u16 bd_blkbits;
561 ext4_group_t bd_group;
562};
563#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
564#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
565
566#ifndef EXT4_MB_HISTORY
567static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
568{
569 return;
570}
571#else
572static void ext4_mb_store_history(struct ext4_allocation_context *ac);
573#endif
574
575#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
576
577static struct proc_dir_entry *proc_root_ext4;
578struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
579ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
580 ext4_fsblk_t goal, unsigned long *count, int *errp);
581
582static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
583 ext4_group_t group);
584static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
585static void ext4_mb_free_committed_blocks(struct super_block *);
586static void ext4_mb_return_to_preallocation(struct inode *inode,
587 struct ext4_buddy *e4b, sector_t block,
588 int count);
589static void ext4_mb_put_pa(struct ext4_allocation_context *,
590 struct super_block *, struct ext4_prealloc_space *pa);
591static int ext4_mb_init_per_dev_proc(struct super_block *sb);
592static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
593
594
595static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
596{
597 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
598
599 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
600}
601
602static inline void ext4_unlock_group(struct super_block *sb,
603 ext4_group_t group)
604{
605 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
606
607 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
608}
609
610static inline int ext4_is_group_locked(struct super_block *sb,
611 ext4_group_t group)
612{
613 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
614
615 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
616 &(grinfo->bb_state));
617}
618
619static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
620 struct ext4_free_extent *fex)
621{
622 ext4_fsblk_t block;
623
624 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
625 + fex->fe_start
626 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
627 return block;
628}
629
630static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 334static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
631{ 335{
632#if BITS_PER_LONG == 64 336#if BITS_PER_LONG == 64
@@ -736,7 +440,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
736 blocknr += 440 blocknr +=
737 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 441 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
738 442
739 ext4_error(sb, __FUNCTION__, "double-free of inode" 443 ext4_error(sb, __func__, "double-free of inode"
740 " %lu's block %llu(bit %u in group %lu)\n", 444 " %lu's block %llu(bit %u in group %lu)\n",
741 inode ? inode->i_ino : 0, blocknr, 445 inode ? inode->i_ino : 0, blocknr,
742 first + i, e4b->bd_group); 446 first + i, e4b->bd_group);
@@ -898,17 +602,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
898 list_for_each(cur, &grp->bb_prealloc_list) { 602 list_for_each(cur, &grp->bb_prealloc_list) {
899 ext4_group_t groupnr; 603 ext4_group_t groupnr;
900 struct ext4_prealloc_space *pa; 604 struct ext4_prealloc_space *pa;
901 pa = list_entry(cur, struct ext4_prealloc_space, group_list); 605 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
902 ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k); 606 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
903 MB_CHECK_ASSERT(groupnr == e4b->bd_group); 607 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
904 for (i = 0; i < pa->len; i++) 608 for (i = 0; i < pa->pa_len; i++)
905 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); 609 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
906 } 610 }
907 return 0; 611 return 0;
908} 612}
909#undef MB_CHECK_ASSERT 613#undef MB_CHECK_ASSERT
910#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ 614#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
911 __FILE__, __FUNCTION__, __LINE__) 615 __FILE__, __func__, __LINE__)
912#else 616#else
913#define mb_check_buddy(e4b) 617#define mb_check_buddy(e4b)
914#endif 618#endif
@@ -982,7 +686,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
982 grp->bb_fragments = fragments; 686 grp->bb_fragments = fragments;
983 687
984 if (free != grp->bb_free) { 688 if (free != grp->bb_free) {
985 ext4_error(sb, __FUNCTION__, 689 ext4_error(sb, __func__,
986 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", 690 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
987 group, free, grp->bb_free); 691 group, free, grp->bb_free);
988 /* 692 /*
@@ -1168,8 +872,9 @@ out:
1168 return err; 872 return err;
1169} 873}
1170 874
1171static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, 875static noinline_for_stack int
1172 struct ext4_buddy *e4b) 876ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
877 struct ext4_buddy *e4b)
1173{ 878{
1174 struct ext4_sb_info *sbi = EXT4_SB(sb); 879 struct ext4_sb_info *sbi = EXT4_SB(sb);
1175 struct inode *inode = sbi->s_buddy_cache; 880 struct inode *inode = sbi->s_buddy_cache;
@@ -1367,7 +1072,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1367 blocknr += 1072 blocknr +=
1368 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 1073 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1369 1074
1370 ext4_error(sb, __FUNCTION__, "double-free of inode" 1075 ext4_error(sb, __func__, "double-free of inode"
1371 " %lu's block %llu(bit %u in group %lu)\n", 1076 " %lu's block %llu(bit %u in group %lu)\n",
1372 inode ? inode->i_ino : 0, blocknr, block, 1077 inode ? inode->i_ino : 0, blocknr, block,
1373 e4b->bd_group); 1078 e4b->bd_group);
@@ -1848,7 +1553,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1848 * free blocks even though group info says we 1553 * free blocks even though group info says we
1849 * we have free blocks 1554 * we have free blocks
1850 */ 1555 */
1851 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1556 ext4_error(sb, __func__, "%d free blocks as per "
1852 "group info. But bitmap says 0\n", 1557 "group info. But bitmap says 0\n",
1853 free); 1558 free);
1854 break; 1559 break;
@@ -1857,7 +1562,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1857 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1562 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1858 BUG_ON(ex.fe_len <= 0); 1563 BUG_ON(ex.fe_len <= 0);
1859 if (free < ex.fe_len) { 1564 if (free < ex.fe_len) {
1860 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1565 ext4_error(sb, __func__, "%d free blocks as per "
1861 "group info. But got %d blocks\n", 1566 "group info. But got %d blocks\n",
1862 free, ex.fe_len); 1567 free, ex.fe_len);
1863 /* 1568 /*
@@ -1965,7 +1670,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1965 return 0; 1670 return 0;
1966} 1671}
1967 1672
1968static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1673static noinline_for_stack int
1674ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1969{ 1675{
1970 ext4_group_t group; 1676 ext4_group_t group;
1971 ext4_group_t i; 1677 ext4_group_t i;
@@ -2465,7 +2171,8 @@ static void ext4_mb_history_init(struct super_block *sb)
2465 /* if we can't allocate history, then we simple won't use it */ 2171 /* if we can't allocate history, then we simple won't use it */
2466} 2172}
2467 2173
2468static void ext4_mb_store_history(struct ext4_allocation_context *ac) 2174static noinline_for_stack void
2175ext4_mb_store_history(struct ext4_allocation_context *ac)
2469{ 2176{
2470 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2177 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2471 struct ext4_mb_history h; 2178 struct ext4_mb_history h;
@@ -2565,13 +2272,13 @@ static int ext4_mb_init_backend(struct super_block *sb)
2565 meta_group_info[j] = kzalloc(len, GFP_KERNEL); 2272 meta_group_info[j] = kzalloc(len, GFP_KERNEL);
2566 if (meta_group_info[j] == NULL) { 2273 if (meta_group_info[j] == NULL) {
2567 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); 2274 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2568 i--;
2569 goto err_freebuddy; 2275 goto err_freebuddy;
2570 } 2276 }
2571 desc = ext4_get_group_desc(sb, i, NULL); 2277 desc = ext4_get_group_desc(sb, i, NULL);
2572 if (desc == NULL) { 2278 if (desc == NULL) {
2573 printk(KERN_ERR 2279 printk(KERN_ERR
2574 "EXT4-fs: can't read descriptor %lu\n", i); 2280 "EXT4-fs: can't read descriptor %lu\n", i);
2281 i++;
2575 goto err_freebuddy; 2282 goto err_freebuddy;
2576 } 2283 }
2577 memset(meta_group_info[j], 0, len); 2284 memset(meta_group_info[j], 0, len);
@@ -2611,13 +2318,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
2611 return 0; 2318 return 0;
2612 2319
2613err_freebuddy: 2320err_freebuddy:
2614 while (i >= 0) { 2321 while (i-- > 0)
2615 kfree(ext4_get_group_info(sb, i)); 2322 kfree(ext4_get_group_info(sb, i));
2616 i--;
2617 }
2618 i = num_meta_group_infos; 2323 i = num_meta_group_infos;
2619err_freemeta: 2324err_freemeta:
2620 while (--i >= 0) 2325 while (i-- > 0)
2621 kfree(sbi->s_group_info[i]); 2326 kfree(sbi->s_group_info[i]);
2622 iput(sbi->s_buddy_cache); 2327 iput(sbi->s_buddy_cache);
2623err_freesgi: 2328err_freesgi:
@@ -2801,7 +2506,8 @@ int ext4_mb_release(struct super_block *sb)
2801 return 0; 2506 return 0;
2802} 2507}
2803 2508
2804static void ext4_mb_free_committed_blocks(struct super_block *sb) 2509static noinline_for_stack void
2510ext4_mb_free_committed_blocks(struct super_block *sb)
2805{ 2511{
2806 struct ext4_sb_info *sbi = EXT4_SB(sb); 2512 struct ext4_sb_info *sbi = EXT4_SB(sb);
2807 int err; 2513 int err;
@@ -3021,7 +2727,8 @@ void exit_ext4_mballoc(void)
3021 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps 2727 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
3022 * Returns 0 if success or error code 2728 * Returns 0 if success or error code
3023 */ 2729 */
3024static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2730static noinline_for_stack int
2731ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3025 handle_t *handle) 2732 handle_t *handle)
3026{ 2733{
3027 struct buffer_head *bitmap_bh = NULL; 2734 struct buffer_head *bitmap_bh = NULL;
@@ -3070,7 +2777,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3070 in_range(block, ext4_inode_table(sb, gdp), 2777 in_range(block, ext4_inode_table(sb, gdp),
3071 EXT4_SB(sb)->s_itb_per_group)) { 2778 EXT4_SB(sb)->s_itb_per_group)) {
3072 2779
3073 ext4_error(sb, __FUNCTION__, 2780 ext4_error(sb, __func__,
3074 "Allocating block in system zone - block = %llu", 2781 "Allocating block in system zone - block = %llu",
3075 block); 2782 block);
3076 } 2783 }
@@ -3094,9 +2801,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3094 ac->ac_b_ex.fe_group, 2801 ac->ac_b_ex.fe_group,
3095 gdp)); 2802 gdp));
3096 } 2803 }
3097 gdp->bg_free_blocks_count = 2804 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
3098 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
3099 - ac->ac_b_ex.fe_len);
3100 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2805 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
3101 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2806 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
3102 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2807 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3130,7 +2835,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3130 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; 2835 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
3131 else 2836 else
3132 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; 2837 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3133 mb_debug("#%u: goal %lu blocks for locality group\n", 2838 mb_debug("#%u: goal %u blocks for locality group\n",
3134 current->pid, ac->ac_g_ex.fe_len); 2839 current->pid, ac->ac_g_ex.fe_len);
3135} 2840}
3136 2841
@@ -3138,15 +2843,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3138 * Normalization means making request better in terms of 2843 * Normalization means making request better in terms of
3139 * size and alignment 2844 * size and alignment
3140 */ 2845 */
3141static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, 2846static noinline_for_stack void
2847ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3142 struct ext4_allocation_request *ar) 2848 struct ext4_allocation_request *ar)
3143{ 2849{
3144 int bsbits, max; 2850 int bsbits, max;
3145 ext4_lblk_t end; 2851 ext4_lblk_t end;
3146 struct list_head *cur;
3147 loff_t size, orig_size, start_off; 2852 loff_t size, orig_size, start_off;
3148 ext4_lblk_t start, orig_start; 2853 ext4_lblk_t start, orig_start;
3149 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 2854 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2855 struct ext4_prealloc_space *pa;
3150 2856
3151 /* do normalize only data requests, metadata requests 2857 /* do normalize only data requests, metadata requests
3152 do not need preallocation */ 2858 do not need preallocation */
@@ -3232,12 +2938,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3232 2938
3233 /* check we don't cross already preallocated blocks */ 2939 /* check we don't cross already preallocated blocks */
3234 rcu_read_lock(); 2940 rcu_read_lock();
3235 list_for_each_rcu(cur, &ei->i_prealloc_list) { 2941 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3236 struct ext4_prealloc_space *pa;
3237 unsigned long pa_end; 2942 unsigned long pa_end;
3238 2943
3239 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3240
3241 if (pa->pa_deleted) 2944 if (pa->pa_deleted)
3242 continue; 2945 continue;
3243 spin_lock(&pa->pa_lock); 2946 spin_lock(&pa->pa_lock);
@@ -3279,10 +2982,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3279 2982
3280 /* XXX: extra loop to check we really don't overlap preallocations */ 2983 /* XXX: extra loop to check we really don't overlap preallocations */
3281 rcu_read_lock(); 2984 rcu_read_lock();
3282 list_for_each_rcu(cur, &ei->i_prealloc_list) { 2985 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3283 struct ext4_prealloc_space *pa;
3284 unsigned long pa_end; 2986 unsigned long pa_end;
3285 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3286 spin_lock(&pa->pa_lock); 2987 spin_lock(&pa->pa_lock);
3287 if (pa->pa_deleted == 0) { 2988 if (pa->pa_deleted == 0) {
3288 pa_end = pa->pa_lstart + pa->pa_len; 2989 pa_end = pa->pa_lstart + pa->pa_len;
@@ -3374,7 +3075,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3374 BUG_ON(pa->pa_free < len); 3075 BUG_ON(pa->pa_free < len);
3375 pa->pa_free -= len; 3076 pa->pa_free -= len;
3376 3077
3377 mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa); 3078 mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
3378} 3079}
3379 3080
3380/* 3081/*
@@ -3404,12 +3105,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3404/* 3105/*
3405 * search goal blocks in preallocated space 3106 * search goal blocks in preallocated space
3406 */ 3107 */
3407static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3108static noinline_for_stack int
3109ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3408{ 3110{
3409 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3111 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3410 struct ext4_locality_group *lg; 3112 struct ext4_locality_group *lg;
3411 struct ext4_prealloc_space *pa; 3113 struct ext4_prealloc_space *pa;
3412 struct list_head *cur;
3413 3114
3414 /* only data can be preallocated */ 3115 /* only data can be preallocated */
3415 if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) 3116 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3417,8 +3118,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3417 3118
3418 /* first, try per-file preallocation */ 3119 /* first, try per-file preallocation */
3419 rcu_read_lock(); 3120 rcu_read_lock();
3420 list_for_each_rcu(cur, &ei->i_prealloc_list) { 3121 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3421 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3422 3122
3423 /* all fields in this condition don't change, 3123 /* all fields in this condition don't change,
3424 * so we can skip locking for them */ 3124 * so we can skip locking for them */
@@ -3450,8 +3150,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3450 return 0; 3150 return 0;
3451 3151
3452 rcu_read_lock(); 3152 rcu_read_lock();
3453 list_for_each_rcu(cur, &lg->lg_prealloc_list) { 3153 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
3454 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3455 spin_lock(&pa->pa_lock); 3154 spin_lock(&pa->pa_lock);
3456 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { 3155 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
3457 atomic_inc(&pa->pa_count); 3156 atomic_inc(&pa->pa_count);
@@ -3571,7 +3270,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3571/* 3270/*
3572 * creates new preallocated space for given inode 3271 * creates new preallocated space for given inode
3573 */ 3272 */
3574static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) 3273static noinline_for_stack int
3274ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3575{ 3275{
3576 struct super_block *sb = ac->ac_sb; 3276 struct super_block *sb = ac->ac_sb;
3577 struct ext4_prealloc_space *pa; 3277 struct ext4_prealloc_space *pa;
@@ -3658,7 +3358,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3658/* 3358/*
3659 * creates new preallocated space for locality group inodes belongs to 3359 * creates new preallocated space for locality group inodes belongs to
3660 */ 3360 */
3661static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac) 3361static noinline_for_stack int
3362ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3662{ 3363{
3663 struct super_block *sb = ac->ac_sb; 3364 struct super_block *sb = ac->ac_sb;
3664 struct ext4_locality_group *lg; 3365 struct ext4_locality_group *lg;
@@ -3731,11 +3432,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3731 * the caller MUST hold group/inode locks. 3432 * the caller MUST hold group/inode locks.
3732 * TODO: optimize the case when there are no in-core structures yet 3433 * TODO: optimize the case when there are no in-core structures yet
3733 */ 3434 */
3734static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, 3435static noinline_for_stack int
3735 struct buffer_head *bitmap_bh, 3436ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3736 struct ext4_prealloc_space *pa) 3437 struct ext4_prealloc_space *pa,
3438 struct ext4_allocation_context *ac)
3737{ 3439{
3738 struct ext4_allocation_context *ac;
3739 struct super_block *sb = e4b->bd_sb; 3440 struct super_block *sb = e4b->bd_sb;
3740 struct ext4_sb_info *sbi = EXT4_SB(sb); 3441 struct ext4_sb_info *sbi = EXT4_SB(sb);
3741 unsigned long end; 3442 unsigned long end;
@@ -3751,8 +3452,6 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3751 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3452 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3752 end = bit + pa->pa_len; 3453 end = bit + pa->pa_len;
3753 3454
3754 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3755
3756 if (ac) { 3455 if (ac) {
3757 ac->ac_sb = sb; 3456 ac->ac_sb = sb;
3758 ac->ac_inode = pa->pa_inode; 3457 ac->ac_inode = pa->pa_inode;
@@ -3789,7 +3488,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3789 pa, (unsigned long) pa->pa_lstart, 3488 pa, (unsigned long) pa->pa_lstart,
3790 (unsigned long) pa->pa_pstart, 3489 (unsigned long) pa->pa_pstart,
3791 (unsigned long) pa->pa_len); 3490 (unsigned long) pa->pa_len);
3792 ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", 3491 ext4_error(sb, __func__, "free %u, pa_free %u\n",
3793 free, pa->pa_free); 3492 free, pa->pa_free);
3794 /* 3493 /*
3795 * pa is already deleted so we use the value obtained 3494 * pa is already deleted so we use the value obtained
@@ -3797,22 +3496,19 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3797 */ 3496 */
3798 } 3497 }
3799 atomic_add(free, &sbi->s_mb_discarded); 3498 atomic_add(free, &sbi->s_mb_discarded);
3800 if (ac)
3801 kmem_cache_free(ext4_ac_cachep, ac);
3802 3499
3803 return err; 3500 return err;
3804} 3501}
3805 3502
3806static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, 3503static noinline_for_stack int
3807 struct ext4_prealloc_space *pa) 3504ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3505 struct ext4_prealloc_space *pa,
3506 struct ext4_allocation_context *ac)
3808{ 3507{
3809 struct ext4_allocation_context *ac;
3810 struct super_block *sb = e4b->bd_sb; 3508 struct super_block *sb = e4b->bd_sb;
3811 ext4_group_t group; 3509 ext4_group_t group;
3812 ext4_grpblk_t bit; 3510 ext4_grpblk_t bit;
3813 3511
3814 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3815
3816 if (ac) 3512 if (ac)
3817 ac->ac_op = EXT4_MB_HISTORY_DISCARD; 3513 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3818 3514
@@ -3830,7 +3526,6 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3830 ac->ac_b_ex.fe_len = pa->pa_len; 3526 ac->ac_b_ex.fe_len = pa->pa_len;
3831 ac->ac_b_ex.fe_logical = 0; 3527 ac->ac_b_ex.fe_logical = 0;
3832 ext4_mb_store_history(ac); 3528 ext4_mb_store_history(ac);
3833 kmem_cache_free(ext4_ac_cachep, ac);
3834 } 3529 }
3835 3530
3836 return 0; 3531 return 0;
@@ -3845,12 +3540,14 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3845 * - how many do we discard 3540 * - how many do we discard
3846 * 1) how many requested 3541 * 1) how many requested
3847 */ 3542 */
3848static int ext4_mb_discard_group_preallocations(struct super_block *sb, 3543static noinline_for_stack int
3544ext4_mb_discard_group_preallocations(struct super_block *sb,
3849 ext4_group_t group, int needed) 3545 ext4_group_t group, int needed)
3850{ 3546{
3851 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 3547 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3852 struct buffer_head *bitmap_bh = NULL; 3548 struct buffer_head *bitmap_bh = NULL;
3853 struct ext4_prealloc_space *pa, *tmp; 3549 struct ext4_prealloc_space *pa, *tmp;
3550 struct ext4_allocation_context *ac;
3854 struct list_head list; 3551 struct list_head list;
3855 struct ext4_buddy e4b; 3552 struct ext4_buddy e4b;
3856 int err; 3553 int err;
@@ -3878,6 +3575,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
3878 grp = ext4_get_group_info(sb, group); 3575 grp = ext4_get_group_info(sb, group);
3879 INIT_LIST_HEAD(&list); 3576 INIT_LIST_HEAD(&list);
3880 3577
3578 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3881repeat: 3579repeat:
3882 ext4_lock_group(sb, group); 3580 ext4_lock_group(sb, group);
3883 list_for_each_entry_safe(pa, tmp, 3581 list_for_each_entry_safe(pa, tmp,
@@ -3932,9 +3630,9 @@ repeat:
3932 spin_unlock(pa->pa_obj_lock); 3630 spin_unlock(pa->pa_obj_lock);
3933 3631
3934 if (pa->pa_linear) 3632 if (pa->pa_linear)
3935 ext4_mb_release_group_pa(&e4b, pa); 3633 ext4_mb_release_group_pa(&e4b, pa, ac);
3936 else 3634 else
3937 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); 3635 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
3938 3636
3939 list_del(&pa->u.pa_tmp_list); 3637 list_del(&pa->u.pa_tmp_list);
3940 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3638 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3942,6 +3640,8 @@ repeat:
3942 3640
3943out: 3641out:
3944 ext4_unlock_group(sb, group); 3642 ext4_unlock_group(sb, group);
3643 if (ac)
3644 kmem_cache_free(ext4_ac_cachep, ac);
3945 ext4_mb_release_desc(&e4b); 3645 ext4_mb_release_desc(&e4b);
3946 put_bh(bitmap_bh); 3646 put_bh(bitmap_bh);
3947 return free; 3647 return free;
@@ -3962,6 +3662,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3962 struct super_block *sb = inode->i_sb; 3662 struct super_block *sb = inode->i_sb;
3963 struct buffer_head *bitmap_bh = NULL; 3663 struct buffer_head *bitmap_bh = NULL;
3964 struct ext4_prealloc_space *pa, *tmp; 3664 struct ext4_prealloc_space *pa, *tmp;
3665 struct ext4_allocation_context *ac;
3965 ext4_group_t group = 0; 3666 ext4_group_t group = 0;
3966 struct list_head list; 3667 struct list_head list;
3967 struct ext4_buddy e4b; 3668 struct ext4_buddy e4b;
@@ -3976,6 +3677,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3976 3677
3977 INIT_LIST_HEAD(&list); 3678 INIT_LIST_HEAD(&list);
3978 3679
3680 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3979repeat: 3681repeat:
3980 /* first, collect all pa's in the inode */ 3682 /* first, collect all pa's in the inode */
3981 spin_lock(&ei->i_prealloc_lock); 3683 spin_lock(&ei->i_prealloc_lock);
@@ -4040,7 +3742,7 @@ repeat:
4040 3742
4041 ext4_lock_group(sb, group); 3743 ext4_lock_group(sb, group);
4042 list_del(&pa->pa_group_list); 3744 list_del(&pa->pa_group_list);
4043 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); 3745 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
4044 ext4_unlock_group(sb, group); 3746 ext4_unlock_group(sb, group);
4045 3747
4046 ext4_mb_release_desc(&e4b); 3748 ext4_mb_release_desc(&e4b);
@@ -4049,6 +3751,8 @@ repeat:
4049 list_del(&pa->u.pa_tmp_list); 3751 list_del(&pa->u.pa_tmp_list);
4050 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3752 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4051 } 3753 }
3754 if (ac)
3755 kmem_cache_free(ext4_ac_cachep, ac);
4052} 3756}
4053 3757
4054/* 3758/*
@@ -4108,7 +3812,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4108 printk(KERN_ERR "PA:%lu:%d:%u \n", i, 3812 printk(KERN_ERR "PA:%lu:%d:%u \n", i,
4109 start, pa->pa_len); 3813 start, pa->pa_len);
4110 } 3814 }
4111 ext4_lock_group(sb, i); 3815 ext4_unlock_group(sb, i);
4112 3816
4113 if (grp->bb_free == 0) 3817 if (grp->bb_free == 0)
4114 continue; 3818 continue;
@@ -4167,7 +3871,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4167 mutex_lock(&ac->ac_lg->lg_mutex); 3871 mutex_lock(&ac->ac_lg->lg_mutex);
4168} 3872}
4169 3873
4170static int ext4_mb_initialize_context(struct ext4_allocation_context *ac, 3874static noinline_for_stack int
3875ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4171 struct ext4_allocation_request *ar) 3876 struct ext4_allocation_request *ar)
4172{ 3877{
4173 struct super_block *sb = ar->inode->i_sb; 3878 struct super_block *sb = ar->inode->i_sb;
@@ -4398,7 +4103,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
4398 ext4_mb_free_committed_blocks(sb); 4103 ext4_mb_free_committed_blocks(sb);
4399} 4104}
4400 4105
4401static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, 4106static noinline_for_stack int
4107ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4402 ext4_group_t group, ext4_grpblk_t block, int count) 4108 ext4_group_t group, ext4_grpblk_t block, int count)
4403{ 4109{
4404 struct ext4_group_info *db = e4b->bd_info; 4110 struct ext4_group_info *db = e4b->bd_info;
@@ -4489,7 +4195,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4489 if (block < le32_to_cpu(es->s_first_data_block) || 4195 if (block < le32_to_cpu(es->s_first_data_block) ||
4490 block + count < block || 4196 block + count < block ||
4491 block + count > ext4_blocks_count(es)) { 4197 block + count > ext4_blocks_count(es)) {
4492 ext4_error(sb, __FUNCTION__, 4198 ext4_error(sb, __func__,
4493 "Freeing blocks not in datazone - " 4199 "Freeing blocks not in datazone - "
4494 "block = %lu, count = %lu", block, count); 4200 "block = %lu, count = %lu", block, count);
4495 goto error_return; 4201 goto error_return;
@@ -4530,7 +4236,7 @@ do_more:
4530 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4236 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4531 EXT4_SB(sb)->s_itb_per_group)) { 4237 EXT4_SB(sb)->s_itb_per_group)) {
4532 4238
4533 ext4_error(sb, __FUNCTION__, 4239 ext4_error(sb, __func__,
4534 "Freeing blocks in system zone - " 4240 "Freeing blocks in system zone - "
4535 "Block = %lu, count = %lu", block, count); 4241 "Block = %lu, count = %lu", block, count);
4536 } 4242 }
@@ -4588,8 +4294,7 @@ do_more:
4588 } 4294 }
4589 4295
4590 spin_lock(sb_bgl_lock(sbi, block_group)); 4296 spin_lock(sb_bgl_lock(sbi, block_group));
4591 gdp->bg_free_blocks_count = 4297 le16_add_cpu(&gdp->bg_free_blocks_count, count);
4592 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
4593 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4298 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4594 spin_unlock(sb_bgl_lock(sbi, block_group)); 4299 spin_unlock(sb_bgl_lock(sbi, block_group));
4595 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4300 percpu_counter_add(&sbi->s_freeblocks_counter, count);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
new file mode 100644
index 000000000000..bfe6add46bcf
--- /dev/null
+++ b/fs/ext4/mballoc.h
@@ -0,0 +1,304 @@
1/*
2 * fs/ext4/mballoc.h
3 *
4 * Written by: Alex Tomas <alex@clusterfs.com>
5 *
6 */
7#ifndef _EXT4_MBALLOC_H
8#define _EXT4_MBALLOC_H
9
10#include <linux/time.h>
11#include <linux/fs.h>
12#include <linux/namei.h>
13#include <linux/quotaops.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/swap.h>
17#include <linux/proc_fs.h>
18#include <linux/pagemap.h>
19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include "ext4_jbd2.h"
22#include "ext4.h"
23#include "group.h"
24
25/*
26 * with AGGRESSIVE_CHECK allocator runs consistency checks over
27 * structures. these checks slow things down a lot
28 */
29#define AGGRESSIVE_CHECK__
30
31/*
32 * with DOUBLE_CHECK defined mballoc creates persistent in-core
33 * bitmaps, maintains and uses them to check for double allocations
34 */
35#define DOUBLE_CHECK__
36
37/*
38 */
39#define MB_DEBUG__
40#ifdef MB_DEBUG
41#define mb_debug(fmt, a...) printk(fmt, ##a)
42#else
43#define mb_debug(fmt, a...)
44#endif
45
46/*
47 * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
48 * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
49 */
50#define EXT4_MB_HISTORY
51#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
52#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
53#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
54#define EXT4_MB_HISTORY_FREE 8 /* free */
55
56#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
57 EXT4_MB_HISTORY_PREALLOC)
58
59/*
60 * How long mballoc can look for a best extent (in found extents)
61 */
62#define MB_DEFAULT_MAX_TO_SCAN 200
63
64/*
65 * How long mballoc must look for a best extent
66 */
67#define MB_DEFAULT_MIN_TO_SCAN 10
68
69/*
70 * How many groups mballoc will scan looking for the best chunk
71 */
72#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
73
74/*
75 * with 'ext4_mb_stats' allocator will collect stats that will be
76 * shown at umount. The collecting costs though!
77 */
78#define MB_DEFAULT_STATS 1
79
80/*
81 * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
82 * by the stream allocator, which purpose is to pack requests
83 * as close each to other as possible to produce smooth I/O traffic
84 * We use locality group prealloc space for stream request.
85 * We can tune the same via /proc/fs/ext4/<parition>/stream_req
86 */
87#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
88
89/*
90 * for which requests use 2^N search using buddies
91 */
92#define MB_DEFAULT_ORDER2_REQS 2
93
94/*
95 * default group prealloc size 512 blocks
96 */
97#define MB_DEFAULT_GROUP_PREALLOC 512
98
99static struct kmem_cache *ext4_pspace_cachep;
100static struct kmem_cache *ext4_ac_cachep;
101
102#ifdef EXT4_BB_MAX_BLOCKS
103#undef EXT4_BB_MAX_BLOCKS
104#endif
105#define EXT4_BB_MAX_BLOCKS 30
106
107struct ext4_free_metadata {
108 ext4_group_t group;
109 unsigned short num;
110 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
111 struct list_head list;
112};
113
114struct ext4_group_info {
115 unsigned long bb_state;
116 unsigned long bb_tid;
117 struct ext4_free_metadata *bb_md_cur;
118 unsigned short bb_first_free;
119 unsigned short bb_free;
120 unsigned short bb_fragments;
121 struct list_head bb_prealloc_list;
122#ifdef DOUBLE_CHECK
123 void *bb_bitmap;
124#endif
125 unsigned short bb_counters[];
126};
127
128#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
129#define EXT4_GROUP_INFO_LOCKED_BIT 1
130
131#define EXT4_MB_GRP_NEED_INIT(grp) \
132 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
133
134
135struct ext4_prealloc_space {
136 struct list_head pa_inode_list;
137 struct list_head pa_group_list;
138 union {
139 struct list_head pa_tmp_list;
140 struct rcu_head pa_rcu;
141 } u;
142 spinlock_t pa_lock;
143 atomic_t pa_count;
144 unsigned pa_deleted;
145 ext4_fsblk_t pa_pstart; /* phys. block */
146 ext4_lblk_t pa_lstart; /* log. block */
147 unsigned short pa_len; /* len of preallocated chunk */
148 unsigned short pa_free; /* how many blocks are free */
149 unsigned short pa_linear; /* consumed in one direction
150 * strictly, for grp prealloc */
151 spinlock_t *pa_obj_lock;
152 struct inode *pa_inode; /* hack, for history only */
153};
154
155
156struct ext4_free_extent {
157 ext4_lblk_t fe_logical;
158 ext4_grpblk_t fe_start;
159 ext4_group_t fe_group;
160 int fe_len;
161};
162
163/*
164 * Locality group:
165 * we try to group all related changes together
166 * so that writeback can flush/allocate them together as well
167 */
168struct ext4_locality_group {
169 /* for allocator */
170 struct mutex lg_mutex; /* to serialize allocates */
171 struct list_head lg_prealloc_list;/* list of preallocations */
172 spinlock_t lg_prealloc_lock;
173};
174
175struct ext4_allocation_context {
176 struct inode *ac_inode;
177 struct super_block *ac_sb;
178
179 /* original request */
180 struct ext4_free_extent ac_o_ex;
181
182 /* goal request (after normalization) */
183 struct ext4_free_extent ac_g_ex;
184
185 /* the best found extent */
186 struct ext4_free_extent ac_b_ex;
187
188 /* copy of the bext found extent taken before preallocation efforts */
189 struct ext4_free_extent ac_f_ex;
190
191 /* number of iterations done. we have to track to limit searching */
192 unsigned long ac_ex_scanned;
193 __u16 ac_groups_scanned;
194 __u16 ac_found;
195 __u16 ac_tail;
196 __u16 ac_buddy;
197 __u16 ac_flags; /* allocation hints */
198 __u8 ac_status;
199 __u8 ac_criteria;
200 __u8 ac_repeats;
201 __u8 ac_2order; /* if request is to allocate 2^N blocks and
202 * N > 0, the field stores N, otherwise 0 */
203 __u8 ac_op; /* operation, for history only */
204 struct page *ac_bitmap_page;
205 struct page *ac_buddy_page;
206 struct ext4_prealloc_space *ac_pa;
207 struct ext4_locality_group *ac_lg;
208};
209
210#define AC_STATUS_CONTINUE 1
211#define AC_STATUS_FOUND 2
212#define AC_STATUS_BREAK 3
213
214struct ext4_mb_history {
215 struct ext4_free_extent orig; /* orig allocation */
216 struct ext4_free_extent goal; /* goal allocation */
217 struct ext4_free_extent result; /* result allocation */
218 unsigned pid;
219 unsigned ino;
220 __u16 found; /* how many extents have been found */
221 __u16 groups; /* how many groups have been scanned */
222 __u16 tail; /* what tail broke some buddy */
223 __u16 buddy; /* buddy the tail ^^^ broke */
224 __u16 flags;
225 __u8 cr:3; /* which phase the result extent was found at */
226 __u8 op:4;
227 __u8 merged:1;
228};
229
230struct ext4_buddy {
231 struct page *bd_buddy_page;
232 void *bd_buddy;
233 struct page *bd_bitmap_page;
234 void *bd_bitmap;
235 struct ext4_group_info *bd_info;
236 struct super_block *bd_sb;
237 __u16 bd_blkbits;
238 ext4_group_t bd_group;
239};
240#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
241#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
242
243#ifndef EXT4_MB_HISTORY
244static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
245{
246 return;
247}
248#else
249static void ext4_mb_store_history(struct ext4_allocation_context *ac);
250#endif
251
252#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
253
254static struct proc_dir_entry *proc_root_ext4;
255struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
256
257static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
258 ext4_group_t group);
259static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
260static void ext4_mb_free_committed_blocks(struct super_block *);
261static void ext4_mb_return_to_preallocation(struct inode *inode,
262 struct ext4_buddy *e4b, sector_t block,
263 int count);
264static void ext4_mb_put_pa(struct ext4_allocation_context *,
265 struct super_block *, struct ext4_prealloc_space *pa);
266static int ext4_mb_init_per_dev_proc(struct super_block *sb);
267static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
268
269
270static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
271{
272 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
273
274 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
275}
276
277static inline void ext4_unlock_group(struct super_block *sb,
278 ext4_group_t group)
279{
280 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
281
282 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
283}
284
285static inline int ext4_is_group_locked(struct super_block *sb,
286 ext4_group_t group)
287{
288 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
289
290 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
291 &(grinfo->bb_state));
292}
293
294static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
295 struct ext4_free_extent *fex)
296{
297 ext4_fsblk_t block;
298
299 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
300 + fex->fe_start
301 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
302 return block;
303}
304#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 5c1e27de7755..b9e077ba07e9 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -13,8 +13,8 @@
13 */ 13 */
14 14
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/ext4_jbd2.h> 16#include "ext4_jbd2.h"
17#include <linux/ext4_fs_extents.h> 17#include "ext4_extents.h"
18 18
19/* 19/*
20 * The contiguous blocks details which can be 20 * The contiguous blocks details which can be
@@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
327} 327}
328 328
329static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 329static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
330 struct inode *tmp_inode) 330 struct inode *tmp_inode)
331{ 331{
332 int retval; 332 int retval;
333 __le32 i_data[3]; 333 __le32 i_data[3];
@@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
339 * i_data field of the original inode 339 * i_data field of the original inode
340 */ 340 */
341 retval = ext4_journal_extend(handle, 1); 341 retval = ext4_journal_extend(handle, 1);
342 if (retval != 0) { 342 if (retval) {
343 retval = ext4_journal_restart(handle, 1); 343 retval = ext4_journal_restart(handle, 1);
344 if (retval) 344 if (retval)
345 goto err_out; 345 goto err_out;
@@ -351,6 +351,18 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
351 351
352 down_write(&EXT4_I(inode)->i_data_sem); 352 down_write(&EXT4_I(inode)->i_data_sem);
353 /* 353 /*
354 * if EXT4_EXT_MIGRATE is cleared a block allocation
355 * happened after we started the migrate. We need to
356 * fail the migrate
357 */
358 if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) {
359 retval = -EAGAIN;
360 up_write(&EXT4_I(inode)->i_data_sem);
361 goto err_out;
362 } else
363 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
364 ~EXT4_EXT_MIGRATE;
365 /*
354 * We have the extent map build with the tmp inode. 366 * We have the extent map build with the tmp inode.
355 * Now copy the i_data across 367 * Now copy the i_data across
356 */ 368 */
@@ -508,6 +520,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
508 * switch the inode format to prevent read. 520 * switch the inode format to prevent read.
509 */ 521 */
510 mutex_lock(&(inode->i_mutex)); 522 mutex_lock(&(inode->i_mutex));
523 /*
524 * Even though we take i_mutex we can still cause block allocation
525 * via mmap write to holes. If we have allocated new blocks we fail
526 * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag.
527 * The flag is updated with i_data_sem held to prevent racing with
528 * block allocation.
529 */
530 down_read((&EXT4_I(inode)->i_data_sem));
531 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE;
532 up_read((&EXT4_I(inode)->i_data_sem));
533
511 handle = ext4_journal_start(inode, 1); 534 handle = ext4_journal_start(inode, 1);
512 535
513 ei = EXT4_I(inode); 536 ei = EXT4_I(inode);
@@ -559,9 +582,15 @@ err_out:
559 * tmp_inode 582 * tmp_inode
560 */ 583 */
561 free_ext_block(handle, tmp_inode); 584 free_ext_block(handle, tmp_inode);
562 else 585 else {
563 retval = ext4_ext_swap_inode_data(handle, inode, 586 retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
564 tmp_inode); 587 if (retval)
588 /*
589 * if we fail to swap inode data free the extent
590 * details of the tmp inode
591 */
592 free_ext_block(handle, tmp_inode);
593 }
565 594
566 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ 595 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
567 if (ext4_journal_extend(handle, 1) != 0) 596 if (ext4_journal_extend(handle, 1) != 0)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 28aa2ed4297e..ab16beaa830d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -28,14 +28,14 @@
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/time.h> 30#include <linux/time.h>
31#include <linux/ext4_fs.h>
32#include <linux/ext4_jbd2.h>
33#include <linux/fcntl.h> 31#include <linux/fcntl.h>
34#include <linux/stat.h> 32#include <linux/stat.h>
35#include <linux/string.h> 33#include <linux/string.h>
36#include <linux/quotaops.h> 34#include <linux/quotaops.h>
37#include <linux/buffer_head.h> 35#include <linux/buffer_head.h>
38#include <linux/bio.h> 36#include <linux/bio.h>
37#include "ext4.h"
38#include "ext4_jbd2.h"
39 39
40#include "namei.h" 40#include "namei.h"
41#include "xattr.h" 41#include "xattr.h"
@@ -57,10 +57,15 @@ static struct buffer_head *ext4_append(handle_t *handle,
57 57
58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
59 59
60 if ((bh = ext4_bread(handle, inode, *block, 1, err))) { 60 bh = ext4_bread(handle, inode, *block, 1, err);
61 if (bh) {
61 inode->i_size += inode->i_sb->s_blocksize; 62 inode->i_size += inode->i_sb->s_blocksize;
62 EXT4_I(inode)->i_disksize = inode->i_size; 63 EXT4_I(inode)->i_disksize = inode->i_size;
63 ext4_journal_get_write_access(handle,bh); 64 *err = ext4_journal_get_write_access(handle, bh);
65 if (*err) {
66 brelse(bh);
67 bh = NULL;
68 }
64 } 69 }
65 return bh; 70 return bh;
66} 71}
@@ -348,7 +353,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
348 if (root->info.hash_version != DX_HASH_TEA && 353 if (root->info.hash_version != DX_HASH_TEA &&
349 root->info.hash_version != DX_HASH_HALF_MD4 && 354 root->info.hash_version != DX_HASH_HALF_MD4 &&
350 root->info.hash_version != DX_HASH_LEGACY) { 355 root->info.hash_version != DX_HASH_LEGACY) {
351 ext4_warning(dir->i_sb, __FUNCTION__, 356 ext4_warning(dir->i_sb, __func__,
352 "Unrecognised inode hash code %d", 357 "Unrecognised inode hash code %d",
353 root->info.hash_version); 358 root->info.hash_version);
354 brelse(bh); 359 brelse(bh);
@@ -362,7 +367,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
362 hash = hinfo->hash; 367 hash = hinfo->hash;
363 368
364 if (root->info.unused_flags & 1) { 369 if (root->info.unused_flags & 1) {
365 ext4_warning(dir->i_sb, __FUNCTION__, 370 ext4_warning(dir->i_sb, __func__,
366 "Unimplemented inode hash flags: %#06x", 371 "Unimplemented inode hash flags: %#06x",
367 root->info.unused_flags); 372 root->info.unused_flags);
368 brelse(bh); 373 brelse(bh);
@@ -371,7 +376,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
371 } 376 }
372 377
373 if ((indirect = root->info.indirect_levels) > 1) { 378 if ((indirect = root->info.indirect_levels) > 1) {
374 ext4_warning(dir->i_sb, __FUNCTION__, 379 ext4_warning(dir->i_sb, __func__,
375 "Unimplemented inode hash depth: %#06x", 380 "Unimplemented inode hash depth: %#06x",
376 root->info.indirect_levels); 381 root->info.indirect_levels);
377 brelse(bh); 382 brelse(bh);
@@ -384,7 +389,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
384 389
385 if (dx_get_limit(entries) != dx_root_limit(dir, 390 if (dx_get_limit(entries) != dx_root_limit(dir,
386 root->info.info_length)) { 391 root->info.info_length)) {
387 ext4_warning(dir->i_sb, __FUNCTION__, 392 ext4_warning(dir->i_sb, __func__,
388 "dx entry: limit != root limit"); 393 "dx entry: limit != root limit");
389 brelse(bh); 394 brelse(bh);
390 *err = ERR_BAD_DX_DIR; 395 *err = ERR_BAD_DX_DIR;
@@ -396,7 +401,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
396 { 401 {
397 count = dx_get_count(entries); 402 count = dx_get_count(entries);
398 if (!count || count > dx_get_limit(entries)) { 403 if (!count || count > dx_get_limit(entries)) {
399 ext4_warning(dir->i_sb, __FUNCTION__, 404 ext4_warning(dir->i_sb, __func__,
400 "dx entry: no count or count > limit"); 405 "dx entry: no count or count > limit");
401 brelse(bh); 406 brelse(bh);
402 *err = ERR_BAD_DX_DIR; 407 *err = ERR_BAD_DX_DIR;
@@ -441,7 +446,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
441 goto fail2; 446 goto fail2;
442 at = entries = ((struct dx_node *) bh->b_data)->entries; 447 at = entries = ((struct dx_node *) bh->b_data)->entries;
443 if (dx_get_limit(entries) != dx_node_limit (dir)) { 448 if (dx_get_limit(entries) != dx_node_limit (dir)) {
444 ext4_warning(dir->i_sb, __FUNCTION__, 449 ext4_warning(dir->i_sb, __func__,
445 "dx entry: limit != node limit"); 450 "dx entry: limit != node limit");
446 brelse(bh); 451 brelse(bh);
447 *err = ERR_BAD_DX_DIR; 452 *err = ERR_BAD_DX_DIR;
@@ -457,7 +462,7 @@ fail2:
457 } 462 }
458fail: 463fail:
459 if (*err == ERR_BAD_DX_DIR) 464 if (*err == ERR_BAD_DX_DIR)
460 ext4_warning(dir->i_sb, __FUNCTION__, 465 ext4_warning(dir->i_sb, __func__,
461 "Corrupt dir inode %ld, running e2fsck is " 466 "Corrupt dir inode %ld, running e2fsck is "
462 "recommended.", dir->i_ino); 467 "recommended.", dir->i_ino);
463 return NULL; 468 return NULL;
@@ -914,7 +919,7 @@ restart:
914 wait_on_buffer(bh); 919 wait_on_buffer(bh);
915 if (!buffer_uptodate(bh)) { 920 if (!buffer_uptodate(bh)) {
916 /* read error, skip block & hope for the best */ 921 /* read error, skip block & hope for the best */
917 ext4_error(sb, __FUNCTION__, "reading directory #%lu " 922 ext4_error(sb, __func__, "reading directory #%lu "
918 "offset %lu", dir->i_ino, 923 "offset %lu", dir->i_ino,
919 (unsigned long)block); 924 (unsigned long)block);
920 brelse(bh); 925 brelse(bh);
@@ -1007,7 +1012,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
1007 retval = ext4_htree_next_block(dir, hash, frame, 1012 retval = ext4_htree_next_block(dir, hash, frame,
1008 frames, NULL); 1013 frames, NULL);
1009 if (retval < 0) { 1014 if (retval < 0) {
1010 ext4_warning(sb, __FUNCTION__, 1015 ext4_warning(sb, __func__,
1011 "error reading index page in directory #%lu", 1016 "error reading index page in directory #%lu",
1012 dir->i_ino); 1017 dir->i_ino);
1013 *err = retval; 1018 *err = retval;
@@ -1532,7 +1537,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1532 1537
1533 if (levels && (dx_get_count(frames->entries) == 1538 if (levels && (dx_get_count(frames->entries) ==
1534 dx_get_limit(frames->entries))) { 1539 dx_get_limit(frames->entries))) {
1535 ext4_warning(sb, __FUNCTION__, 1540 ext4_warning(sb, __func__,
1536 "Directory index full!"); 1541 "Directory index full!");
1537 err = -ENOSPC; 1542 err = -ENOSPC;
1538 goto cleanup; 1543 goto cleanup;
@@ -1860,11 +1865,11 @@ static int empty_dir (struct inode * inode)
1860 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 1865 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
1861 !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { 1866 !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
1862 if (err) 1867 if (err)
1863 ext4_error(inode->i_sb, __FUNCTION__, 1868 ext4_error(inode->i_sb, __func__,
1864 "error %d reading directory #%lu offset 0", 1869 "error %d reading directory #%lu offset 0",
1865 err, inode->i_ino); 1870 err, inode->i_ino);
1866 else 1871 else
1867 ext4_warning(inode->i_sb, __FUNCTION__, 1872 ext4_warning(inode->i_sb, __func__,
1868 "bad directory (dir #%lu) - no data block", 1873 "bad directory (dir #%lu) - no data block",
1869 inode->i_ino); 1874 inode->i_ino);
1870 return 1; 1875 return 1;
@@ -1893,7 +1898,7 @@ static int empty_dir (struct inode * inode)
1893 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); 1898 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
1894 if (!bh) { 1899 if (!bh) {
1895 if (err) 1900 if (err)
1896 ext4_error(sb, __FUNCTION__, 1901 ext4_error(sb, __func__,
1897 "error %d reading directory" 1902 "error %d reading directory"
1898 " #%lu offset %lu", 1903 " #%lu offset %lu",
1899 err, inode->i_ino, offset); 1904 err, inode->i_ino, offset);
@@ -2217,6 +2222,8 @@ retry:
2217 goto out_stop; 2222 goto out_stop;
2218 } 2223 }
2219 } else { 2224 } else {
2225 /* clear the extent format for fast symlink */
2226 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
2220 inode->i_op = &ext4_fast_symlink_inode_operations; 2227 inode->i_op = &ext4_fast_symlink_inode_operations;
2221 memcpy((char*)&EXT4_I(inode)->i_data,symname,l); 2228 memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
2222 inode->i_size = l-1; 2229 inode->i_size = l-1;
@@ -2347,6 +2354,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2347 EXT4_FEATURE_INCOMPAT_FILETYPE)) 2354 EXT4_FEATURE_INCOMPAT_FILETYPE))
2348 new_de->file_type = old_de->file_type; 2355 new_de->file_type = old_de->file_type;
2349 new_dir->i_version++; 2356 new_dir->i_version++;
2357 new_dir->i_ctime = new_dir->i_mtime =
2358 ext4_current_time(new_dir);
2359 ext4_mark_inode_dirty(handle, new_dir);
2350 BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata"); 2360 BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
2351 ext4_journal_dirty_metadata(handle, new_bh); 2361 ext4_journal_dirty_metadata(handle, new_bh);
2352 brelse(new_bh); 2362 brelse(new_bh);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index e29efa0f9d62..9f086a6a472b 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -11,11 +11,10 @@
11 11
12#define EXT4FS_DEBUG 12#define EXT4FS_DEBUG
13 13
14#include <linux/ext4_jbd2.h>
15
16#include <linux/errno.h> 14#include <linux/errno.h>
17#include <linux/slab.h> 15#include <linux/slab.h>
18 16
17#include "ext4_jbd2.h"
19#include "group.h" 18#include "group.h"
20 19
21#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 20#define outside(b, first, last) ((b) < (first) || (b) >= (last))
@@ -50,63 +49,63 @@ static int verify_group_input(struct super_block *sb,
50 49
51 ext4_get_group_no_and_offset(sb, start, NULL, &offset); 50 ext4_get_group_no_and_offset(sb, start, NULL, &offset);
52 if (group != sbi->s_groups_count) 51 if (group != sbi->s_groups_count)
53 ext4_warning(sb, __FUNCTION__, 52 ext4_warning(sb, __func__,
54 "Cannot add at group %u (only %lu groups)", 53 "Cannot add at group %u (only %lu groups)",
55 input->group, sbi->s_groups_count); 54 input->group, sbi->s_groups_count);
56 else if (offset != 0) 55 else if (offset != 0)
57 ext4_warning(sb, __FUNCTION__, "Last group not full"); 56 ext4_warning(sb, __func__, "Last group not full");
58 else if (input->reserved_blocks > input->blocks_count / 5) 57 else if (input->reserved_blocks > input->blocks_count / 5)
59 ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", 58 ext4_warning(sb, __func__, "Reserved blocks too high (%u)",
60 input->reserved_blocks); 59 input->reserved_blocks);
61 else if (free_blocks_count < 0) 60 else if (free_blocks_count < 0)
62 ext4_warning(sb, __FUNCTION__, "Bad blocks count %u", 61 ext4_warning(sb, __func__, "Bad blocks count %u",
63 input->blocks_count); 62 input->blocks_count);
64 else if (!(bh = sb_bread(sb, end - 1))) 63 else if (!(bh = sb_bread(sb, end - 1)))
65 ext4_warning(sb, __FUNCTION__, 64 ext4_warning(sb, __func__,
66 "Cannot read last block (%llu)", 65 "Cannot read last block (%llu)",
67 end - 1); 66 end - 1);
68 else if (outside(input->block_bitmap, start, end)) 67 else if (outside(input->block_bitmap, start, end))
69 ext4_warning(sb, __FUNCTION__, 68 ext4_warning(sb, __func__,
70 "Block bitmap not in group (block %llu)", 69 "Block bitmap not in group (block %llu)",
71 (unsigned long long)input->block_bitmap); 70 (unsigned long long)input->block_bitmap);
72 else if (outside(input->inode_bitmap, start, end)) 71 else if (outside(input->inode_bitmap, start, end))
73 ext4_warning(sb, __FUNCTION__, 72 ext4_warning(sb, __func__,
74 "Inode bitmap not in group (block %llu)", 73 "Inode bitmap not in group (block %llu)",
75 (unsigned long long)input->inode_bitmap); 74 (unsigned long long)input->inode_bitmap);
76 else if (outside(input->inode_table, start, end) || 75 else if (outside(input->inode_table, start, end) ||
77 outside(itend - 1, start, end)) 76 outside(itend - 1, start, end))
78 ext4_warning(sb, __FUNCTION__, 77 ext4_warning(sb, __func__,
79 "Inode table not in group (blocks %llu-%llu)", 78 "Inode table not in group (blocks %llu-%llu)",
80 (unsigned long long)input->inode_table, itend - 1); 79 (unsigned long long)input->inode_table, itend - 1);
81 else if (input->inode_bitmap == input->block_bitmap) 80 else if (input->inode_bitmap == input->block_bitmap)
82 ext4_warning(sb, __FUNCTION__, 81 ext4_warning(sb, __func__,
83 "Block bitmap same as inode bitmap (%llu)", 82 "Block bitmap same as inode bitmap (%llu)",
84 (unsigned long long)input->block_bitmap); 83 (unsigned long long)input->block_bitmap);
85 else if (inside(input->block_bitmap, input->inode_table, itend)) 84 else if (inside(input->block_bitmap, input->inode_table, itend))
86 ext4_warning(sb, __FUNCTION__, 85 ext4_warning(sb, __func__,
87 "Block bitmap (%llu) in inode table (%llu-%llu)", 86 "Block bitmap (%llu) in inode table (%llu-%llu)",
88 (unsigned long long)input->block_bitmap, 87 (unsigned long long)input->block_bitmap,
89 (unsigned long long)input->inode_table, itend - 1); 88 (unsigned long long)input->inode_table, itend - 1);
90 else if (inside(input->inode_bitmap, input->inode_table, itend)) 89 else if (inside(input->inode_bitmap, input->inode_table, itend))
91 ext4_warning(sb, __FUNCTION__, 90 ext4_warning(sb, __func__,
92 "Inode bitmap (%llu) in inode table (%llu-%llu)", 91 "Inode bitmap (%llu) in inode table (%llu-%llu)",
93 (unsigned long long)input->inode_bitmap, 92 (unsigned long long)input->inode_bitmap,
94 (unsigned long long)input->inode_table, itend - 1); 93 (unsigned long long)input->inode_table, itend - 1);
95 else if (inside(input->block_bitmap, start, metaend)) 94 else if (inside(input->block_bitmap, start, metaend))
96 ext4_warning(sb, __FUNCTION__, 95 ext4_warning(sb, __func__,
97 "Block bitmap (%llu) in GDT table" 96 "Block bitmap (%llu) in GDT table"
98 " (%llu-%llu)", 97 " (%llu-%llu)",
99 (unsigned long long)input->block_bitmap, 98 (unsigned long long)input->block_bitmap,
100 start, metaend - 1); 99 start, metaend - 1);
101 else if (inside(input->inode_bitmap, start, metaend)) 100 else if (inside(input->inode_bitmap, start, metaend))
102 ext4_warning(sb, __FUNCTION__, 101 ext4_warning(sb, __func__,
103 "Inode bitmap (%llu) in GDT table" 102 "Inode bitmap (%llu) in GDT table"
104 " (%llu-%llu)", 103 " (%llu-%llu)",
105 (unsigned long long)input->inode_bitmap, 104 (unsigned long long)input->inode_bitmap,
106 start, metaend - 1); 105 start, metaend - 1);
107 else if (inside(input->inode_table, start, metaend) || 106 else if (inside(input->inode_table, start, metaend) ||
108 inside(itend - 1, start, metaend)) 107 inside(itend - 1, start, metaend))
109 ext4_warning(sb, __FUNCTION__, 108 ext4_warning(sb, __func__,
110 "Inode table (%llu-%llu) overlaps" 109 "Inode table (%llu-%llu) overlaps"
111 "GDT table (%llu-%llu)", 110 "GDT table (%llu-%llu)",
112 (unsigned long long)input->inode_table, 111 (unsigned long long)input->inode_table,
@@ -368,7 +367,7 @@ static int verify_reserved_gdb(struct super_block *sb,
368 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { 367 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
369 if (le32_to_cpu(*p++) != 368 if (le32_to_cpu(*p++) !=
370 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ 369 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
371 ext4_warning(sb, __FUNCTION__, 370 ext4_warning(sb, __func__,
372 "reserved GDT %llu" 371 "reserved GDT %llu"
373 " missing grp %d (%llu)", 372 " missing grp %d (%llu)",
374 blk, grp, 373 blk, grp,
@@ -424,7 +423,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
424 */ 423 */
425 if (EXT4_SB(sb)->s_sbh->b_blocknr != 424 if (EXT4_SB(sb)->s_sbh->b_blocknr !=
426 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
427 ext4_warning(sb, __FUNCTION__, 426 ext4_warning(sb, __func__,
428 "won't resize using backup superblock at %llu", 427 "won't resize using backup superblock at %llu",
429 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); 428 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
430 return -EPERM; 429 return -EPERM;
@@ -448,7 +447,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
448 447
449 data = (__le32 *)dind->b_data; 448 data = (__le32 *)dind->b_data;
450 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { 449 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
451 ext4_warning(sb, __FUNCTION__, 450 ext4_warning(sb, __func__,
452 "new group %u GDT block %llu not reserved", 451 "new group %u GDT block %llu not reserved",
453 input->group, gdblock); 452 input->group, gdblock);
454 err = -EINVAL; 453 err = -EINVAL;
@@ -469,10 +468,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
469 goto exit_dindj; 468 goto exit_dindj;
470 469
471 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), 470 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
472 GFP_KERNEL); 471 GFP_NOFS);
473 if (!n_group_desc) { 472 if (!n_group_desc) {
474 err = -ENOMEM; 473 err = -ENOMEM;
475 ext4_warning (sb, __FUNCTION__, 474 ext4_warning(sb, __func__,
476 "not enough memory for %lu groups", gdb_num + 1); 475 "not enough memory for %lu groups", gdb_num + 1);
477 goto exit_inode; 476 goto exit_inode;
478 } 477 }
@@ -502,8 +501,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
502 EXT4_SB(sb)->s_gdb_count++; 501 EXT4_SB(sb)->s_gdb_count++;
503 kfree(o_group_desc); 502 kfree(o_group_desc);
504 503
505 es->s_reserved_gdt_blocks = 504 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
506 cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
507 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); 505 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
508 506
509 return 0; 507 return 0;
@@ -553,7 +551,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
553 int res, i; 551 int res, i;
554 int err; 552 int err;
555 553
556 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL); 554 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
557 if (!primary) 555 if (!primary)
558 return -ENOMEM; 556 return -ENOMEM;
559 557
@@ -571,7 +569,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
571 /* Get each reserved primary GDT block and verify it holds backups */ 569 /* Get each reserved primary GDT block and verify it holds backups */
572 for (res = 0; res < reserved_gdb; res++, blk++) { 570 for (res = 0; res < reserved_gdb; res++, blk++) {
573 if (le32_to_cpu(*data) != blk) { 571 if (le32_to_cpu(*data) != blk) {
574 ext4_warning(sb, __FUNCTION__, 572 ext4_warning(sb, __func__,
575 "reserved block %llu" 573 "reserved block %llu"
576 " not at offset %ld", 574 " not at offset %ld",
577 blk, 575 blk,
@@ -715,7 +713,7 @@ static void update_backups(struct super_block *sb,
715 */ 713 */
716exit_err: 714exit_err:
717 if (err) { 715 if (err) {
718 ext4_warning(sb, __FUNCTION__, 716 ext4_warning(sb, __func__,
719 "can't update backup for group %lu (err %d), " 717 "can't update backup for group %lu (err %d), "
720 "forcing fsck on next reboot", group, err); 718 "forcing fsck on next reboot", group, err);
721 sbi->s_mount_state &= ~EXT4_VALID_FS; 719 sbi->s_mount_state &= ~EXT4_VALID_FS;
@@ -755,33 +753,33 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
755 753
756 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, 754 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
757 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 755 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
758 ext4_warning(sb, __FUNCTION__, 756 ext4_warning(sb, __func__,
759 "Can't resize non-sparse filesystem further"); 757 "Can't resize non-sparse filesystem further");
760 return -EPERM; 758 return -EPERM;
761 } 759 }
762 760
763 if (ext4_blocks_count(es) + input->blocks_count < 761 if (ext4_blocks_count(es) + input->blocks_count <
764 ext4_blocks_count(es)) { 762 ext4_blocks_count(es)) {
765 ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n"); 763 ext4_warning(sb, __func__, "blocks_count overflow\n");
766 return -EINVAL; 764 return -EINVAL;
767 } 765 }
768 766
769 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < 767 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
770 le32_to_cpu(es->s_inodes_count)) { 768 le32_to_cpu(es->s_inodes_count)) {
771 ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n"); 769 ext4_warning(sb, __func__, "inodes_count overflow\n");
772 return -EINVAL; 770 return -EINVAL;
773 } 771 }
774 772
775 if (reserved_gdb || gdb_off == 0) { 773 if (reserved_gdb || gdb_off == 0) {
776 if (!EXT4_HAS_COMPAT_FEATURE(sb, 774 if (!EXT4_HAS_COMPAT_FEATURE(sb,
777 EXT4_FEATURE_COMPAT_RESIZE_INODE)){ 775 EXT4_FEATURE_COMPAT_RESIZE_INODE)){
778 ext4_warning(sb, __FUNCTION__, 776 ext4_warning(sb, __func__,
779 "No reserved GDT blocks, can't resize"); 777 "No reserved GDT blocks, can't resize");
780 return -EPERM; 778 return -EPERM;
781 } 779 }
782 inode = ext4_iget(sb, EXT4_RESIZE_INO); 780 inode = ext4_iget(sb, EXT4_RESIZE_INO);
783 if (IS_ERR(inode)) { 781 if (IS_ERR(inode)) {
784 ext4_warning(sb, __FUNCTION__, 782 ext4_warning(sb, __func__,
785 "Error opening resize inode"); 783 "Error opening resize inode");
786 return PTR_ERR(inode); 784 return PTR_ERR(inode);
787 } 785 }
@@ -810,7 +808,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
810 808
811 lock_super(sb); 809 lock_super(sb);
812 if (input->group != sbi->s_groups_count) { 810 if (input->group != sbi->s_groups_count) {
813 ext4_warning(sb, __FUNCTION__, 811 ext4_warning(sb, __func__,
814 "multiple resizers run on filesystem!"); 812 "multiple resizers run on filesystem!");
815 err = -EBUSY; 813 err = -EBUSY;
816 goto exit_journal; 814 goto exit_journal;
@@ -877,8 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
877 */ 875 */
878 ext4_blocks_count_set(es, ext4_blocks_count(es) + 876 ext4_blocks_count_set(es, ext4_blocks_count(es) +
879 input->blocks_count); 877 input->blocks_count);
880 es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) + 878 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
881 EXT4_INODES_PER_GROUP(sb));
882 879
883 /* 880 /*
884 * We need to protect s_groups_count against other CPUs seeing 881 * We need to protect s_groups_count against other CPUs seeing
@@ -977,13 +974,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
977 " too large to resize to %llu blocks safely\n", 974 " too large to resize to %llu blocks safely\n",
978 sb->s_id, n_blocks_count); 975 sb->s_id, n_blocks_count);
979 if (sizeof(sector_t) < 8) 976 if (sizeof(sector_t) < 8)
980 ext4_warning(sb, __FUNCTION__, 977 ext4_warning(sb, __func__,
981 "CONFIG_LBD not enabled\n"); 978 "CONFIG_LBD not enabled\n");
982 return -EINVAL; 979 return -EINVAL;
983 } 980 }
984 981
985 if (n_blocks_count < o_blocks_count) { 982 if (n_blocks_count < o_blocks_count) {
986 ext4_warning(sb, __FUNCTION__, 983 ext4_warning(sb, __func__,
987 "can't shrink FS - resize aborted"); 984 "can't shrink FS - resize aborted");
988 return -EBUSY; 985 return -EBUSY;
989 } 986 }
@@ -992,7 +989,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
992 ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); 989 ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
993 990
994 if (last == 0) { 991 if (last == 0) {
995 ext4_warning(sb, __FUNCTION__, 992 ext4_warning(sb, __func__,
996 "need to use ext2online to resize further"); 993 "need to use ext2online to resize further");
997 return -EPERM; 994 return -EPERM;
998 } 995 }
@@ -1000,7 +997,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1000 add = EXT4_BLOCKS_PER_GROUP(sb) - last; 997 add = EXT4_BLOCKS_PER_GROUP(sb) - last;
1001 998
1002 if (o_blocks_count + add < o_blocks_count) { 999 if (o_blocks_count + add < o_blocks_count) {
1003 ext4_warning(sb, __FUNCTION__, "blocks_count overflow"); 1000 ext4_warning(sb, __func__, "blocks_count overflow");
1004 return -EINVAL; 1001 return -EINVAL;
1005 } 1002 }
1006 1003
@@ -1008,7 +1005,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1008 add = n_blocks_count - o_blocks_count; 1005 add = n_blocks_count - o_blocks_count;
1009 1006
1010 if (o_blocks_count + add < n_blocks_count) 1007 if (o_blocks_count + add < n_blocks_count)
1011 ext4_warning(sb, __FUNCTION__, 1008 ext4_warning(sb, __func__,
1012 "will only finish group (%llu" 1009 "will only finish group (%llu"
1013 " blocks, %u new)", 1010 " blocks, %u new)",
1014 o_blocks_count + add, add); 1011 o_blocks_count + add, add);
@@ -1016,7 +1013,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1016 /* See if the device is actually as big as what was requested */ 1013 /* See if the device is actually as big as what was requested */
1017 bh = sb_bread(sb, o_blocks_count + add -1); 1014 bh = sb_bread(sb, o_blocks_count + add -1);
1018 if (!bh) { 1015 if (!bh) {
1019 ext4_warning(sb, __FUNCTION__, 1016 ext4_warning(sb, __func__,
1020 "can't read last block, resize aborted"); 1017 "can't read last block, resize aborted");
1021 return -ENOSPC; 1018 return -ENOSPC;
1022 } 1019 }
@@ -1028,13 +1025,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1028 handle = ext4_journal_start_sb(sb, 3); 1025 handle = ext4_journal_start_sb(sb, 3);
1029 if (IS_ERR(handle)) { 1026 if (IS_ERR(handle)) {
1030 err = PTR_ERR(handle); 1027 err = PTR_ERR(handle);
1031 ext4_warning(sb, __FUNCTION__, "error %d on journal start",err); 1028 ext4_warning(sb, __func__, "error %d on journal start", err);
1032 goto exit_put; 1029 goto exit_put;
1033 } 1030 }
1034 1031
1035 lock_super(sb); 1032 lock_super(sb);
1036 if (o_blocks_count != ext4_blocks_count(es)) { 1033 if (o_blocks_count != ext4_blocks_count(es)) {
1037 ext4_warning(sb, __FUNCTION__, 1034 ext4_warning(sb, __func__,
1038 "multiple resizers run on filesystem!"); 1035 "multiple resizers run on filesystem!");
1039 unlock_super(sb); 1036 unlock_super(sb);
1040 ext4_journal_stop(handle); 1037 ext4_journal_stop(handle);
@@ -1044,7 +1041,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1044 1041
1045 if ((err = ext4_journal_get_write_access(handle, 1042 if ((err = ext4_journal_get_write_access(handle,
1046 EXT4_SB(sb)->s_sbh))) { 1043 EXT4_SB(sb)->s_sbh))) {
1047 ext4_warning(sb, __FUNCTION__, 1044 ext4_warning(sb, __func__,
1048 "error %d on journal write access", err); 1045 "error %d on journal write access", err);
1049 unlock_super(sb); 1046 unlock_super(sb);
1050 ext4_journal_stop(handle); 1047 ext4_journal_stop(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c81a8e759bad..52dd0679a4e2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -21,8 +21,6 @@
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/ext4_fs.h>
25#include <linux/ext4_jbd2.h>
26#include <linux/slab.h> 24#include <linux/slab.h>
27#include <linux/init.h> 25#include <linux/init.h>
28#include <linux/blkdev.h> 26#include <linux/blkdev.h>
@@ -38,9 +36,10 @@
38#include <linux/seq_file.h> 36#include <linux/seq_file.h>
39#include <linux/log2.h> 37#include <linux/log2.h>
40#include <linux/crc16.h> 38#include <linux/crc16.h>
41
42#include <asm/uaccess.h> 39#include <asm/uaccess.h>
43 40
41#include "ext4.h"
42#include "ext4_jbd2.h"
44#include "xattr.h" 43#include "xattr.h"
45#include "acl.h" 44#include "acl.h"
46#include "namei.h" 45#include "namei.h"
@@ -135,7 +134,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
135 * take the FS itself readonly cleanly. */ 134 * take the FS itself readonly cleanly. */
136 journal = EXT4_SB(sb)->s_journal; 135 journal = EXT4_SB(sb)->s_journal;
137 if (is_journal_aborted(journal)) { 136 if (is_journal_aborted(journal)) {
138 ext4_abort(sb, __FUNCTION__, 137 ext4_abort(sb, __func__,
139 "Detected aborted journal"); 138 "Detected aborted journal");
140 return ERR_PTR(-EROFS); 139 return ERR_PTR(-EROFS);
141 } 140 }
@@ -355,7 +354,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
355 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 354 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
356 return; 355 return;
357 356
358 ext4_warning(sb, __FUNCTION__, 357 ext4_warning(sb, __func__,
359 "updating to rev %d because of new feature flag, " 358 "updating to rev %d because of new feature flag, "
360 "running e2fsck is recommended", 359 "running e2fsck is recommended",
361 EXT4_DYNAMIC_REV); 360 EXT4_DYNAMIC_REV);
@@ -945,8 +944,8 @@ static match_table_t tokens = {
945 {Opt_mballoc, "mballoc"}, 944 {Opt_mballoc, "mballoc"},
946 {Opt_nomballoc, "nomballoc"}, 945 {Opt_nomballoc, "nomballoc"},
947 {Opt_stripe, "stripe=%u"}, 946 {Opt_stripe, "stripe=%u"},
948 {Opt_err, NULL},
949 {Opt_resize, "resize"}, 947 {Opt_resize, "resize"},
948 {Opt_err, NULL},
950}; 949};
951 950
952static ext4_fsblk_t get_sb_block(void **data) 951static ext4_fsblk_t get_sb_block(void **data)
@@ -1388,11 +1387,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1388 * a plain journaled filesystem we can keep it set as 1387 * a plain journaled filesystem we can keep it set as
1389 * valid forever! :) 1388 * valid forever! :)
1390 */ 1389 */
1391 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS); 1390 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1392#endif 1391#endif
1393 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1392 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1394 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1393 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1395 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); 1394 le16_add_cpu(&es->s_mnt_count, 1);
1396 es->s_mtime = cpu_to_le32(get_seconds()); 1395 es->s_mtime = cpu_to_le32(get_seconds());
1397 ext4_update_dynamic_rev(sb); 1396 ext4_update_dynamic_rev(sb);
1398 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1397 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -1485,36 +1484,33 @@ static int ext4_check_descriptors(struct super_block *sb)
1485 block_bitmap = ext4_block_bitmap(sb, gdp); 1484 block_bitmap = ext4_block_bitmap(sb, gdp);
1486 if (block_bitmap < first_block || block_bitmap > last_block) 1485 if (block_bitmap < first_block || block_bitmap > last_block)
1487 { 1486 {
1488 ext4_error (sb, "ext4_check_descriptors", 1487 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1489 "Block bitmap for group %lu" 1488 "Block bitmap for group %lu not in group "
1490 " not in group (block %llu)!", 1489 "(block %llu)!", i, block_bitmap);
1491 i, block_bitmap);
1492 return 0; 1490 return 0;
1493 } 1491 }
1494 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1492 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1495 if (inode_bitmap < first_block || inode_bitmap > last_block) 1493 if (inode_bitmap < first_block || inode_bitmap > last_block)
1496 { 1494 {
1497 ext4_error (sb, "ext4_check_descriptors", 1495 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1498 "Inode bitmap for group %lu" 1496 "Inode bitmap for group %lu not in group "
1499 " not in group (block %llu)!", 1497 "(block %llu)!", i, inode_bitmap);
1500 i, inode_bitmap);
1501 return 0; 1498 return 0;
1502 } 1499 }
1503 inode_table = ext4_inode_table(sb, gdp); 1500 inode_table = ext4_inode_table(sb, gdp);
1504 if (inode_table < first_block || 1501 if (inode_table < first_block ||
1505 inode_table + sbi->s_itb_per_group - 1 > last_block) 1502 inode_table + sbi->s_itb_per_group - 1 > last_block)
1506 { 1503 {
1507 ext4_error (sb, "ext4_check_descriptors", 1504 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1508 "Inode table for group %lu" 1505 "Inode table for group %lu not in group "
1509 " not in group (block %llu)!", 1506 "(block %llu)!", i, inode_table);
1510 i, inode_table);
1511 return 0; 1507 return 0;
1512 } 1508 }
1513 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1509 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1514 ext4_error(sb, __FUNCTION__, 1510 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1515 "Checksum for group %lu failed (%u!=%u)\n", 1511 "Checksum for group %lu failed (%u!=%u)\n",
1516 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1512 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1517 gdp)), le16_to_cpu(gdp->bg_checksum)); 1513 gdp)), le16_to_cpu(gdp->bg_checksum));
1518 return 0; 1514 return 0;
1519 } 1515 }
1520 if (!flexbg_flag) 1516 if (!flexbg_flag)
@@ -1594,8 +1590,8 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1594 while (es->s_last_orphan) { 1590 while (es->s_last_orphan) {
1595 struct inode *inode; 1591 struct inode *inode;
1596 1592
1597 if (!(inode = 1593 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1598 ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { 1594 if (IS_ERR(inode)) {
1599 es->s_last_orphan = 0; 1595 es->s_last_orphan = 0;
1600 break; 1596 break;
1601 } 1597 }
@@ -1605,7 +1601,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1605 if (inode->i_nlink) { 1601 if (inode->i_nlink) {
1606 printk(KERN_DEBUG 1602 printk(KERN_DEBUG
1607 "%s: truncating inode %lu to %Ld bytes\n", 1603 "%s: truncating inode %lu to %Ld bytes\n",
1608 __FUNCTION__, inode->i_ino, inode->i_size); 1604 __func__, inode->i_ino, inode->i_size);
1609 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1605 jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1610 inode->i_ino, inode->i_size); 1606 inode->i_ino, inode->i_size);
1611 ext4_truncate(inode); 1607 ext4_truncate(inode);
@@ -1613,7 +1609,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1613 } else { 1609 } else {
1614 printk(KERN_DEBUG 1610 printk(KERN_DEBUG
1615 "%s: deleting unreferenced inode %lu\n", 1611 "%s: deleting unreferenced inode %lu\n",
1616 __FUNCTION__, inode->i_ino); 1612 __func__, inode->i_ino);
1617 jbd_debug(2, "deleting unreferenced inode %lu\n", 1613 jbd_debug(2, "deleting unreferenced inode %lu\n",
1618 inode->i_ino); 1614 inode->i_ino);
1619 nr_orphans++; 1615 nr_orphans++;
@@ -2699,9 +2695,9 @@ static void ext4_clear_journal_err(struct super_block * sb,
2699 char nbuf[16]; 2695 char nbuf[16];
2700 2696
2701 errstr = ext4_decode_error(sb, j_errno, nbuf); 2697 errstr = ext4_decode_error(sb, j_errno, nbuf);
2702 ext4_warning(sb, __FUNCTION__, "Filesystem error recorded " 2698 ext4_warning(sb, __func__, "Filesystem error recorded "
2703 "from previous mount: %s", errstr); 2699 "from previous mount: %s", errstr);
2704 ext4_warning(sb, __FUNCTION__, "Marking fs in need of " 2700 ext4_warning(sb, __func__, "Marking fs in need of "
2705 "filesystem check."); 2701 "filesystem check.");
2706 2702
2707 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2703 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
@@ -2828,7 +2824,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
2828 } 2824 }
2829 2825
2830 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 2826 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
2831 ext4_abort(sb, __FUNCTION__, "Abort forced by user"); 2827 ext4_abort(sb, __func__, "Abort forced by user");
2832 2828
2833 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2829 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2834 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2830 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3040,8 +3036,14 @@ static int ext4_dquot_drop(struct inode *inode)
3040 3036
3041 /* We may delete quota structure so we need to reserve enough blocks */ 3037 /* We may delete quota structure so we need to reserve enough blocks */
3042 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); 3038 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
3043 if (IS_ERR(handle)) 3039 if (IS_ERR(handle)) {
3040 /*
3041 * We call dquot_drop() anyway to at least release references
3042 * to quota structures so that umount does not hang.
3043 */
3044 dquot_drop(inode);
3044 return PTR_ERR(handle); 3045 return PTR_ERR(handle);
3046 }
3045 ret = dquot_drop(inode); 3047 ret = dquot_drop(inode);
3046 err = ext4_journal_stop(handle); 3048 err = ext4_journal_stop(handle);
3047 if (!ret) 3049 if (!ret)
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e6f9da4287c4..e9178643dc01 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -19,8 +19,8 @@
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/jbd2.h> 21#include <linux/jbd2.h>
22#include <linux/ext4_fs.h>
23#include <linux/namei.h> 22#include <linux/namei.h>
23#include "ext4.h"
24#include "xattr.h" 24#include "xattr.h"
25 25
26static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) 26static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e9054c1c7d93..3fbc2c6c3d0e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,11 +53,11 @@
53#include <linux/init.h> 53#include <linux/init.h>
54#include <linux/fs.h> 54#include <linux/fs.h>
55#include <linux/slab.h> 55#include <linux/slab.h>
56#include <linux/ext4_jbd2.h>
57#include <linux/ext4_fs.h>
58#include <linux/mbcache.h> 56#include <linux/mbcache.h>
59#include <linux/quotaops.h> 57#include <linux/quotaops.h>
60#include <linux/rwsem.h> 58#include <linux/rwsem.h>
59#include "ext4_jbd2.h"
60#include "ext4.h"
61#include "xattr.h" 61#include "xattr.h"
62#include "acl.h" 62#include "acl.h"
63 63
@@ -92,6 +92,8 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
92 struct mb_cache_entry **); 92 struct mb_cache_entry **);
93static void ext4_xattr_rehash(struct ext4_xattr_header *, 93static void ext4_xattr_rehash(struct ext4_xattr_header *,
94 struct ext4_xattr_entry *); 94 struct ext4_xattr_entry *);
95static int ext4_xattr_list(struct inode *inode, char *buffer,
96 size_t buffer_size);
95 97
96static struct mb_cache *ext4_xattr_cache; 98static struct mb_cache *ext4_xattr_cache;
97 99
@@ -225,7 +227,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
225 ea_bdebug(bh, "b_count=%d, refcount=%d", 227 ea_bdebug(bh, "b_count=%d, refcount=%d",
226 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 228 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
227 if (ext4_xattr_check_block(bh)) { 229 if (ext4_xattr_check_block(bh)) {
228bad_block: ext4_error(inode->i_sb, __FUNCTION__, 230bad_block: ext4_error(inode->i_sb, __func__,
229 "inode %lu: bad block %llu", inode->i_ino, 231 "inode %lu: bad block %llu", inode->i_ino,
230 EXT4_I(inode)->i_file_acl); 232 EXT4_I(inode)->i_file_acl);
231 error = -EIO; 233 error = -EIO;
@@ -367,7 +369,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
367 ea_bdebug(bh, "b_count=%d, refcount=%d", 369 ea_bdebug(bh, "b_count=%d, refcount=%d",
368 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 370 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
369 if (ext4_xattr_check_block(bh)) { 371 if (ext4_xattr_check_block(bh)) {
370 ext4_error(inode->i_sb, __FUNCTION__, 372 ext4_error(inode->i_sb, __func__,
371 "inode %lu: bad block %llu", inode->i_ino, 373 "inode %lu: bad block %llu", inode->i_ino,
372 EXT4_I(inode)->i_file_acl); 374 EXT4_I(inode)->i_file_acl);
373 error = -EIO; 375 error = -EIO;
@@ -420,7 +422,7 @@ cleanup:
420 * Returns a negative error number on failure, or the number of bytes 422 * Returns a negative error number on failure, or the number of bytes
421 * used / required on success. 423 * used / required on success.
422 */ 424 */
423int 425static int
424ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 426ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
425{ 427{
426 int i_error, b_error; 428 int i_error, b_error;
@@ -484,8 +486,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
484 get_bh(bh); 486 get_bh(bh);
485 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 487 ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
486 } else { 488 } else {
487 BHDR(bh)->h_refcount = cpu_to_le32( 489 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
488 le32_to_cpu(BHDR(bh)->h_refcount) - 1);
489 error = ext4_journal_dirty_metadata(handle, bh); 490 error = ext4_journal_dirty_metadata(handle, bh);
490 if (IS_SYNC(inode)) 491 if (IS_SYNC(inode))
491 handle->h_sync = 1; 492 handle->h_sync = 1;
@@ -660,7 +661,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
660 atomic_read(&(bs->bh->b_count)), 661 atomic_read(&(bs->bh->b_count)),
661 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 662 le32_to_cpu(BHDR(bs->bh)->h_refcount));
662 if (ext4_xattr_check_block(bs->bh)) { 663 if (ext4_xattr_check_block(bs->bh)) {
663 ext4_error(sb, __FUNCTION__, 664 ext4_error(sb, __func__,
664 "inode %lu: bad block %llu", inode->i_ino, 665 "inode %lu: bad block %llu", inode->i_ino,
665 EXT4_I(inode)->i_file_acl); 666 EXT4_I(inode)->i_file_acl);
666 error = -EIO; 667 error = -EIO;
@@ -738,7 +739,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
738 ce = NULL; 739 ce = NULL;
739 } 740 }
740 ea_bdebug(bs->bh, "cloning"); 741 ea_bdebug(bs->bh, "cloning");
741 s->base = kmalloc(bs->bh->b_size, GFP_KERNEL); 742 s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
742 error = -ENOMEM; 743 error = -ENOMEM;
743 if (s->base == NULL) 744 if (s->base == NULL)
744 goto cleanup; 745 goto cleanup;
@@ -750,7 +751,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
750 } 751 }
751 } else { 752 } else {
752 /* Allocate a buffer where we construct the new block. */ 753 /* Allocate a buffer where we construct the new block. */
753 s->base = kzalloc(sb->s_blocksize, GFP_KERNEL); 754 s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
754 /* assert(header == s->base) */ 755 /* assert(header == s->base) */
755 error = -ENOMEM; 756 error = -ENOMEM;
756 if (s->base == NULL) 757 if (s->base == NULL)
@@ -789,8 +790,7 @@ inserted:
789 if (error) 790 if (error)
790 goto cleanup_dquot; 791 goto cleanup_dquot;
791 lock_buffer(new_bh); 792 lock_buffer(new_bh);
792 BHDR(new_bh)->h_refcount = cpu_to_le32(1 + 793 le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
793 le32_to_cpu(BHDR(new_bh)->h_refcount));
794 ea_bdebug(new_bh, "reusing; refcount now=%d", 794 ea_bdebug(new_bh, "reusing; refcount now=%d",
795 le32_to_cpu(BHDR(new_bh)->h_refcount)); 795 le32_to_cpu(BHDR(new_bh)->h_refcount));
796 unlock_buffer(new_bh); 796 unlock_buffer(new_bh);
@@ -808,10 +808,8 @@ inserted:
808 get_bh(new_bh); 808 get_bh(new_bh);
809 } else { 809 } else {
810 /* We need to allocate a new block */ 810 /* We need to allocate a new block */
811 ext4_fsblk_t goal = le32_to_cpu( 811 ext4_fsblk_t goal = ext4_group_first_block_no(sb,
812 EXT4_SB(sb)->s_es->s_first_data_block) + 812 EXT4_I(inode)->i_block_group);
813 (ext4_fsblk_t)EXT4_I(inode)->i_block_group *
814 EXT4_BLOCKS_PER_GROUP(sb);
815 ext4_fsblk_t block = ext4_new_block(handle, inode, 813 ext4_fsblk_t block = ext4_new_block(handle, inode,
816 goal, &error); 814 goal, &error);
817 if (error) 815 if (error)
@@ -863,7 +861,7 @@ cleanup_dquot:
863 goto cleanup; 861 goto cleanup;
864 862
865bad_block: 863bad_block:
866 ext4_error(inode->i_sb, __FUNCTION__, 864 ext4_error(inode->i_sb, __func__,
867 "inode %lu: bad block %llu", inode->i_ino, 865 "inode %lu: bad block %llu", inode->i_ino,
868 EXT4_I(inode)->i_file_acl); 866 EXT4_I(inode)->i_file_acl);
869 goto cleanup; 867 goto cleanup;
@@ -1166,7 +1164,7 @@ retry:
1166 if (!bh) 1164 if (!bh)
1167 goto cleanup; 1165 goto cleanup;
1168 if (ext4_xattr_check_block(bh)) { 1166 if (ext4_xattr_check_block(bh)) {
1169 ext4_error(inode->i_sb, __FUNCTION__, 1167 ext4_error(inode->i_sb, __func__,
1170 "inode %lu: bad block %llu", inode->i_ino, 1168 "inode %lu: bad block %llu", inode->i_ino,
1171 EXT4_I(inode)->i_file_acl); 1169 EXT4_I(inode)->i_file_acl);
1172 error = -EIO; 1170 error = -EIO;
@@ -1341,14 +1339,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1341 goto cleanup; 1339 goto cleanup;
1342 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 1340 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1343 if (!bh) { 1341 if (!bh) {
1344 ext4_error(inode->i_sb, __FUNCTION__, 1342 ext4_error(inode->i_sb, __func__,
1345 "inode %lu: block %llu read error", inode->i_ino, 1343 "inode %lu: block %llu read error", inode->i_ino,
1346 EXT4_I(inode)->i_file_acl); 1344 EXT4_I(inode)->i_file_acl);
1347 goto cleanup; 1345 goto cleanup;
1348 } 1346 }
1349 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 1347 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1350 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1348 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1351 ext4_error(inode->i_sb, __FUNCTION__, 1349 ext4_error(inode->i_sb, __func__,
1352 "inode %lu: bad block %llu", inode->i_ino, 1350 "inode %lu: bad block %llu", inode->i_ino,
1353 EXT4_I(inode)->i_file_acl); 1351 EXT4_I(inode)->i_file_acl);
1354 goto cleanup; 1352 goto cleanup;
@@ -1475,7 +1473,7 @@ again:
1475 } 1473 }
1476 bh = sb_bread(inode->i_sb, ce->e_block); 1474 bh = sb_bread(inode->i_sb, ce->e_block);
1477 if (!bh) { 1475 if (!bh) {
1478 ext4_error(inode->i_sb, __FUNCTION__, 1476 ext4_error(inode->i_sb, __func__,
1479 "inode %lu: block %lu read error", 1477 "inode %lu: block %lu read error",
1480 inode->i_ino, (unsigned long) ce->e_block); 1478 inode->i_ino, (unsigned long) ce->e_block);
1481 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1479 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index d7f5d6a12651..5992fe979bb9 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -74,7 +74,6 @@ extern struct xattr_handler ext4_xattr_security_handler;
74extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); 74extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
75 75
76extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); 76extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
77extern int ext4_xattr_list(struct inode *, char *, size_t);
78extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); 77extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
79extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); 78extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
80 79
@@ -99,12 +98,6 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
99} 98}
100 99
101static inline int 100static inline int
102ext4_xattr_list(struct inode *inode, void *buffer, size_t size)
103{
104 return -EOPNOTSUPP;
105}
106
107static inline int
108ext4_xattr_set(struct inode *inode, int name_index, const char *name, 101ext4_xattr_set(struct inode *inode, int name_index, const char *name,
109 const void *value, size_t size, int flags) 102 const void *value, size_t size, int flags)
110{ 103{
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index f17eaf2321b9..ca5f89fc6cae 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -6,9 +6,9 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/ext4_jbd2.h>
10#include <linux/ext4_fs.h>
11#include <linux/security.h> 9#include <linux/security.h>
10#include "ext4_jbd2.h"
11#include "ext4.h"
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index e0f05acdafec..fff33382cadc 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -9,8 +9,8 @@
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/ext4_jbd2.h> 12#include "ext4_jbd2.h"
13#include <linux/ext4_fs.h> 13#include "ext4.h"
14#include "xattr.h" 14#include "xattr.h"
15 15
16#define XATTR_TRUSTED_PREFIX "trusted." 16#define XATTR_TRUSTED_PREFIX "trusted."
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index 7ed3d8ebf096..67be723fcc4e 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -8,8 +8,8 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/ext4_jbd2.h> 11#include "ext4_jbd2.h"
12#include <linux/ext4_fs.h> 12#include "ext4.h"
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_USER_PREFIX "user." 15#define XATTR_USER_PREFIX "user."
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index a8173081f831..e0139786f717 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -520,22 +520,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
520 jbd_debug (3, "JBD: commit phase 2\n"); 520 jbd_debug (3, "JBD: commit phase 2\n");
521 521
522 /* 522 /*
523 * First, drop modified flag: all accesses to the buffers
524 * will be tracked for a new trasaction only -bzzz
525 */
526 spin_lock(&journal->j_list_lock);
527 if (commit_transaction->t_buffers) {
528 new_jh = jh = commit_transaction->t_buffers->b_tnext;
529 do {
530 J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
531 new_jh->b_modified == 0);
532 new_jh->b_modified = 0;
533 new_jh = new_jh->b_tnext;
534 } while (new_jh != jh);
535 }
536 spin_unlock(&journal->j_list_lock);
537
538 /*
539 * Now start flushing things to disk, in the order they appear 523 * Now start flushing things to disk, in the order they appear
540 * on the transaction lists. Data blocks go first. 524 * on the transaction lists. Data blocks go first.
541 */ 525 */
@@ -584,6 +568,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
584 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits; 568 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
585 stats.u.run.rs_blocks_logged = 0; 569 stats.u.run.rs_blocks_logged = 0;
586 570
571 J_ASSERT(commit_transaction->t_nr_buffers <=
572 commit_transaction->t_outstanding_credits);
573
587 descriptor = NULL; 574 descriptor = NULL;
588 bufs = 0; 575 bufs = 0;
589 while (commit_transaction->t_buffers) { 576 while (commit_transaction->t_buffers) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index eb7eb6c27bcb..53632e3e8457 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -534,7 +534,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
534 if (!tid_geq(journal->j_commit_request, tid)) { 534 if (!tid_geq(journal->j_commit_request, tid)) {
535 printk(KERN_EMERG 535 printk(KERN_EMERG
536 "%s: error: j_commit_request=%d, tid=%d\n", 536 "%s: error: j_commit_request=%d, tid=%d\n",
537 __FUNCTION__, journal->j_commit_request, tid); 537 __func__, journal->j_commit_request, tid);
538 } 538 }
539 spin_unlock(&journal->j_state_lock); 539 spin_unlock(&journal->j_state_lock);
540#endif 540#endif
@@ -599,7 +599,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
599 599
600 printk(KERN_ALERT "%s: journal block not found " 600 printk(KERN_ALERT "%s: journal block not found "
601 "at offset %lu on %s\n", 601 "at offset %lu on %s\n",
602 __FUNCTION__, 602 __func__,
603 blocknr, 603 blocknr,
604 bdevname(journal->j_dev, b)); 604 bdevname(journal->j_dev, b));
605 err = -EIO; 605 err = -EIO;
@@ -997,13 +997,14 @@ fail:
997 */ 997 */
998 998
999/** 999/**
1000 * journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure 1000 * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
1001 * @bdev: Block device on which to create the journal 1001 * @bdev: Block device on which to create the journal
1002 * @fs_dev: Device which hold journalled filesystem for this journal. 1002 * @fs_dev: Device which hold journalled filesystem for this journal.
1003 * @start: Block nr Start of journal. 1003 * @start: Block nr Start of journal.
1004 * @len: Length of the journal in blocks. 1004 * @len: Length of the journal in blocks.
1005 * @blocksize: blocksize of journalling device 1005 * @blocksize: blocksize of journalling device
1006 * @returns: a newly created journal_t * 1006 *
1007 * Returns: a newly created journal_t *
1007 * 1008 *
1008 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous 1009 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous
1009 * range of blocks on an arbitrary block device. 1010 * range of blocks on an arbitrary block device.
@@ -1027,7 +1028,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
1027 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1028 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
1028 if (!journal->j_wbuf) { 1029 if (!journal->j_wbuf) {
1029 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1030 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1030 __FUNCTION__); 1031 __func__);
1031 kfree(journal); 1032 kfree(journal);
1032 journal = NULL; 1033 journal = NULL;
1033 goto out; 1034 goto out;
@@ -1083,7 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1083 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1084 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
1084 if (!journal->j_wbuf) { 1085 if (!journal->j_wbuf) {
1085 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1086 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1086 __FUNCTION__); 1087 __func__);
1087 kfree(journal); 1088 kfree(journal);
1088 return NULL; 1089 return NULL;
1089 } 1090 }
@@ -1092,7 +1093,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1092 /* If that failed, give up */ 1093 /* If that failed, give up */
1093 if (err) { 1094 if (err) {
1094 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 1095 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
1095 __FUNCTION__); 1096 __func__);
1096 kfree(journal); 1097 kfree(journal);
1097 return NULL; 1098 return NULL;
1098 } 1099 }
@@ -1178,7 +1179,7 @@ int jbd2_journal_create(journal_t *journal)
1178 */ 1179 */
1179 printk(KERN_EMERG 1180 printk(KERN_EMERG
1180 "%s: creation of journal on external device!\n", 1181 "%s: creation of journal on external device!\n",
1181 __FUNCTION__); 1182 __func__);
1182 BUG(); 1183 BUG();
1183 } 1184 }
1184 1185
@@ -1976,9 +1977,10 @@ static int journal_init_jbd2_journal_head_cache(void)
1976 1977
1977static void jbd2_journal_destroy_jbd2_journal_head_cache(void) 1978static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
1978{ 1979{
1979 J_ASSERT(jbd2_journal_head_cache != NULL); 1980 if (jbd2_journal_head_cache) {
1980 kmem_cache_destroy(jbd2_journal_head_cache); 1981 kmem_cache_destroy(jbd2_journal_head_cache);
1981 jbd2_journal_head_cache = NULL; 1982 jbd2_journal_head_cache = NULL;
1983 }
1982} 1984}
1983 1985
1984/* 1986/*
@@ -1997,7 +1999,7 @@ static struct journal_head *journal_alloc_journal_head(void)
1997 jbd_debug(1, "out of memory for journal_head\n"); 1999 jbd_debug(1, "out of memory for journal_head\n");
1998 if (time_after(jiffies, last_warning + 5*HZ)) { 2000 if (time_after(jiffies, last_warning + 5*HZ)) {
1999 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 2001 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
2000 __FUNCTION__); 2002 __func__);
2001 last_warning = jiffies; 2003 last_warning = jiffies;
2002 } 2004 }
2003 while (!ret) { 2005 while (!ret) {
@@ -2134,13 +2136,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
2134 if (jh->b_frozen_data) { 2136 if (jh->b_frozen_data) {
2135 printk(KERN_WARNING "%s: freeing " 2137 printk(KERN_WARNING "%s: freeing "
2136 "b_frozen_data\n", 2138 "b_frozen_data\n",
2137 __FUNCTION__); 2139 __func__);
2138 jbd2_free(jh->b_frozen_data, bh->b_size); 2140 jbd2_free(jh->b_frozen_data, bh->b_size);
2139 } 2141 }
2140 if (jh->b_committed_data) { 2142 if (jh->b_committed_data) {
2141 printk(KERN_WARNING "%s: freeing " 2143 printk(KERN_WARNING "%s: freeing "
2142 "b_committed_data\n", 2144 "b_committed_data\n",
2143 __FUNCTION__); 2145 __func__);
2144 jbd2_free(jh->b_committed_data, bh->b_size); 2146 jbd2_free(jh->b_committed_data, bh->b_size);
2145 } 2147 }
2146 bh->b_private = NULL; 2148 bh->b_private = NULL;
@@ -2305,10 +2307,12 @@ static int __init journal_init(void)
2305 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); 2307 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
2306 2308
2307 ret = journal_init_caches(); 2309 ret = journal_init_caches();
2308 if (ret != 0) 2310 if (ret == 0) {
2311 jbd2_create_debugfs_entry();
2312 jbd2_create_jbd_stats_proc_entry();
2313 } else {
2309 jbd2_journal_destroy_caches(); 2314 jbd2_journal_destroy_caches();
2310 jbd2_create_debugfs_entry(); 2315 }
2311 jbd2_create_jbd_stats_proc_entry();
2312 return ret; 2316 return ret;
2313} 2317}
2314 2318
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 2e1453a5e998..257ff2625765 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -139,7 +139,7 @@ repeat:
139oom: 139oom:
140 if (!journal_oom_retry) 140 if (!journal_oom_retry)
141 return -ENOMEM; 141 return -ENOMEM;
142 jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); 142 jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
143 yield(); 143 yield();
144 goto repeat; 144 goto repeat;
145} 145}
@@ -167,138 +167,121 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
167 return NULL; 167 return NULL;
168} 168}
169 169
170void jbd2_journal_destroy_revoke_caches(void)
171{
172 if (jbd2_revoke_record_cache) {
173 kmem_cache_destroy(jbd2_revoke_record_cache);
174 jbd2_revoke_record_cache = NULL;
175 }
176 if (jbd2_revoke_table_cache) {
177 kmem_cache_destroy(jbd2_revoke_table_cache);
178 jbd2_revoke_table_cache = NULL;
179 }
180}
181
170int __init jbd2_journal_init_revoke_caches(void) 182int __init jbd2_journal_init_revoke_caches(void)
171{ 183{
184 J_ASSERT(!jbd2_revoke_record_cache);
185 J_ASSERT(!jbd2_revoke_table_cache);
186
172 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", 187 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
173 sizeof(struct jbd2_revoke_record_s), 188 sizeof(struct jbd2_revoke_record_s),
174 0, 189 0,
175 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 190 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
176 NULL); 191 NULL);
177 if (!jbd2_revoke_record_cache) 192 if (!jbd2_revoke_record_cache)
178 return -ENOMEM; 193 goto record_cache_failure;
179 194
180 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", 195 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
181 sizeof(struct jbd2_revoke_table_s), 196 sizeof(struct jbd2_revoke_table_s),
182 0, SLAB_TEMPORARY, NULL); 197 0, SLAB_TEMPORARY, NULL);
183 if (!jbd2_revoke_table_cache) { 198 if (!jbd2_revoke_table_cache)
184 kmem_cache_destroy(jbd2_revoke_record_cache); 199 goto table_cache_failure;
185 jbd2_revoke_record_cache = NULL;
186 return -ENOMEM;
187 }
188 return 0; 200 return 0;
201table_cache_failure:
202 jbd2_journal_destroy_revoke_caches();
203record_cache_failure:
204 return -ENOMEM;
189} 205}
190 206
191void jbd2_journal_destroy_revoke_caches(void) 207static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
192{ 208{
193 kmem_cache_destroy(jbd2_revoke_record_cache); 209 int shift = 0;
194 jbd2_revoke_record_cache = NULL; 210 int tmp = hash_size;
195 kmem_cache_destroy(jbd2_revoke_table_cache); 211 struct jbd2_revoke_table_s *table;
196 jbd2_revoke_table_cache = NULL;
197}
198
199/* Initialise the revoke table for a given journal to a given size. */
200
201int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
202{
203 int shift, tmp;
204 212
205 J_ASSERT (journal->j_revoke_table[0] == NULL); 213 table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
214 if (!table)
215 goto out;
206 216
207 shift = 0;
208 tmp = hash_size;
209 while((tmp >>= 1UL) != 0UL) 217 while((tmp >>= 1UL) != 0UL)
210 shift++; 218 shift++;
211 219
212 journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); 220 table->hash_size = hash_size;
213 if (!journal->j_revoke_table[0]) 221 table->hash_shift = shift;
214 return -ENOMEM; 222 table->hash_table =
215 journal->j_revoke = journal->j_revoke_table[0];
216
217 /* Check that the hash_size is a power of two */
218 J_ASSERT(is_power_of_2(hash_size));
219
220 journal->j_revoke->hash_size = hash_size;
221
222 journal->j_revoke->hash_shift = shift;
223
224 journal->j_revoke->hash_table =
225 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 223 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
226 if (!journal->j_revoke->hash_table) { 224 if (!table->hash_table) {
227 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); 225 kmem_cache_free(jbd2_revoke_table_cache, table);
228 journal->j_revoke = NULL; 226 table = NULL;
229 return -ENOMEM; 227 goto out;
230 } 228 }
231 229
232 for (tmp = 0; tmp < hash_size; tmp++) 230 for (tmp = 0; tmp < hash_size; tmp++)
233 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); 231 INIT_LIST_HEAD(&table->hash_table[tmp]);
234 232
235 journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); 233out:
236 if (!journal->j_revoke_table[1]) { 234 return table;
237 kfree(journal->j_revoke_table[0]->hash_table); 235}
238 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); 236
239 return -ENOMEM; 237static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
238{
239 int i;
240 struct list_head *hash_list;
241
242 for (i = 0; i < table->hash_size; i++) {
243 hash_list = &table->hash_table[i];
244 J_ASSERT(list_empty(hash_list));
240 } 245 }
241 246
242 journal->j_revoke = journal->j_revoke_table[1]; 247 kfree(table->hash_table);
248 kmem_cache_free(jbd2_revoke_table_cache, table);
249}
243 250
244 /* Check that the hash_size is a power of two */ 251/* Initialise the revoke table for a given journal to a given size. */
252int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
253{
254 J_ASSERT(journal->j_revoke_table[0] == NULL);
245 J_ASSERT(is_power_of_2(hash_size)); 255 J_ASSERT(is_power_of_2(hash_size));
246 256
247 journal->j_revoke->hash_size = hash_size; 257 journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size);
248 258 if (!journal->j_revoke_table[0])
249 journal->j_revoke->hash_shift = shift; 259 goto fail0;
250 260
251 journal->j_revoke->hash_table = 261 journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size);
252 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 262 if (!journal->j_revoke_table[1])
253 if (!journal->j_revoke->hash_table) { 263 goto fail1;
254 kfree(journal->j_revoke_table[0]->hash_table);
255 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
256 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]);
257 journal->j_revoke = NULL;
258 return -ENOMEM;
259 }
260 264
261 for (tmp = 0; tmp < hash_size; tmp++) 265 journal->j_revoke = journal->j_revoke_table[1];
262 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
263 266
264 spin_lock_init(&journal->j_revoke_lock); 267 spin_lock_init(&journal->j_revoke_lock);
265 268
266 return 0; 269 return 0;
267}
268 270
269/* Destoy a journal's revoke table. The table must already be empty! */ 271fail1:
272 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
273fail0:
274 return -ENOMEM;
275}
270 276
277/* Destroy a journal's revoke table. The table must already be empty! */
271void jbd2_journal_destroy_revoke(journal_t *journal) 278void jbd2_journal_destroy_revoke(journal_t *journal)
272{ 279{
273 struct jbd2_revoke_table_s *table;
274 struct list_head *hash_list;
275 int i;
276
277 table = journal->j_revoke_table[0];
278 if (!table)
279 return;
280
281 for (i=0; i<table->hash_size; i++) {
282 hash_list = &table->hash_table[i];
283 J_ASSERT (list_empty(hash_list));
284 }
285
286 kfree(table->hash_table);
287 kmem_cache_free(jbd2_revoke_table_cache, table);
288 journal->j_revoke = NULL;
289
290 table = journal->j_revoke_table[1];
291 if (!table)
292 return;
293
294 for (i=0; i<table->hash_size; i++) {
295 hash_list = &table->hash_table[i];
296 J_ASSERT (list_empty(hash_list));
297 }
298
299 kfree(table->hash_table);
300 kmem_cache_free(jbd2_revoke_table_cache, table);
301 journal->j_revoke = NULL; 280 journal->j_revoke = NULL;
281 if (journal->j_revoke_table[0])
282 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
283 if (journal->j_revoke_table[1])
284 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]);
302} 285}
303 286
304 287
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b9b0b6f899b9..d6e006e67804 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -618,6 +618,12 @@ repeat:
618 goto done; 618 goto done;
619 619
620 /* 620 /*
621 * this is the first time this transaction is touching this buffer,
622 * reset the modified flag
623 */
624 jh->b_modified = 0;
625
626 /*
621 * If there is already a copy-out version of this buffer, then we don't 627 * If there is already a copy-out version of this buffer, then we don't
622 * need to make another one 628 * need to make another one
623 */ 629 */
@@ -690,7 +696,7 @@ repeat:
690 if (!frozen_buffer) { 696 if (!frozen_buffer) {
691 printk(KERN_EMERG 697 printk(KERN_EMERG
692 "%s: OOM for frozen_buffer\n", 698 "%s: OOM for frozen_buffer\n",
693 __FUNCTION__); 699 __func__);
694 JBUFFER_TRACE(jh, "oom!"); 700 JBUFFER_TRACE(jh, "oom!");
695 error = -ENOMEM; 701 error = -ENOMEM;
696 jbd_lock_bh_state(bh); 702 jbd_lock_bh_state(bh);
@@ -829,9 +835,16 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
829 835
830 if (jh->b_transaction == NULL) { 836 if (jh->b_transaction == NULL) {
831 jh->b_transaction = transaction; 837 jh->b_transaction = transaction;
838
839 /* first access by this transaction */
840 jh->b_modified = 0;
841
832 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 842 JBUFFER_TRACE(jh, "file as BJ_Reserved");
833 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 843 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
834 } else if (jh->b_transaction == journal->j_committing_transaction) { 844 } else if (jh->b_transaction == journal->j_committing_transaction) {
845 /* first access by this transaction */
846 jh->b_modified = 0;
847
835 JBUFFER_TRACE(jh, "set next transaction"); 848 JBUFFER_TRACE(jh, "set next transaction");
836 jh->b_next_transaction = transaction; 849 jh->b_next_transaction = transaction;
837 } 850 }
@@ -901,7 +914,7 @@ repeat:
901 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); 914 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
902 if (!committed_data) { 915 if (!committed_data) {
903 printk(KERN_EMERG "%s: No memory for committed data\n", 916 printk(KERN_EMERG "%s: No memory for committed data\n",
904 __FUNCTION__); 917 __func__);
905 err = -ENOMEM; 918 err = -ENOMEM;
906 goto out; 919 goto out;
907 } 920 }
@@ -1230,6 +1243,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1230 struct journal_head *jh; 1243 struct journal_head *jh;
1231 int drop_reserve = 0; 1244 int drop_reserve = 0;
1232 int err = 0; 1245 int err = 0;
1246 int was_modified = 0;
1233 1247
1234 BUFFER_TRACE(bh, "entry"); 1248 BUFFER_TRACE(bh, "entry");
1235 1249
@@ -1248,6 +1262,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1248 goto not_jbd; 1262 goto not_jbd;
1249 } 1263 }
1250 1264
1265 /* keep track of wether or not this transaction modified us */
1266 was_modified = jh->b_modified;
1267
1251 /* 1268 /*
1252 * The buffer's going from the transaction, we must drop 1269 * The buffer's going from the transaction, we must drop
1253 * all references -bzzz 1270 * all references -bzzz
@@ -1265,7 +1282,12 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1265 1282
1266 JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); 1283 JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1267 1284
1268 drop_reserve = 1; 1285 /*
1286 * we only want to drop a reference if this transaction
1287 * modified the buffer
1288 */
1289 if (was_modified)
1290 drop_reserve = 1;
1269 1291
1270 /* 1292 /*
1271 * We are no longer going to journal this buffer. 1293 * We are no longer going to journal this buffer.
@@ -1305,7 +1327,13 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1305 if (jh->b_next_transaction) { 1327 if (jh->b_next_transaction) {
1306 J_ASSERT(jh->b_next_transaction == transaction); 1328 J_ASSERT(jh->b_next_transaction == transaction);
1307 jh->b_next_transaction = NULL; 1329 jh->b_next_transaction = NULL;
1308 drop_reserve = 1; 1330
1331 /*
1332 * only drop a reference if this transaction modified
1333 * the buffer
1334 */
1335 if (was_modified)
1336 drop_reserve = 1;
1309 } 1337 }
1310 } 1338 }
1311 1339
@@ -1434,7 +1462,8 @@ int jbd2_journal_stop(handle_t *handle)
1434 return err; 1462 return err;
1435} 1463}
1436 1464
1437/**int jbd2_journal_force_commit() - force any uncommitted transactions 1465/**
1466 * int jbd2_journal_force_commit() - force any uncommitted transactions
1438 * @journal: journal to force 1467 * @journal: journal to force
1439 * 1468 *
1440 * For synchronous operations: force any uncommitted transactions 1469 * For synchronous operations: force any uncommitted transactions
@@ -2077,7 +2106,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
2077 jh->b_transaction = jh->b_next_transaction; 2106 jh->b_transaction = jh->b_next_transaction;
2078 jh->b_next_transaction = NULL; 2107 jh->b_next_transaction = NULL;
2079 __jbd2_journal_file_buffer(jh, jh->b_transaction, 2108 __jbd2_journal_file_buffer(jh, jh->b_transaction,
2080 was_dirty ? BJ_Metadata : BJ_Reserved); 2109 jh->b_modified ? BJ_Metadata : BJ_Reserved);
2081 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2110 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2082 2111
2083 if (was_dirty) 2112 if (was_dirty)
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 524021ff5436..3f53dd101f99 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -64,3 +64,16 @@ config XFS_RT
64 See the xfs man page in section 5 for additional information. 64 See the xfs man page in section 5 for additional information.
65 65
66 If unsure, say N. 66 If unsure, say N.
67
68config XFS_DEBUG
69 bool "XFS Debugging support (EXPERIMENTAL)"
70 depends on XFS_FS && EXPERIMENTAL
71 help
72 Say Y here to get an XFS build with many debugging features,
73 including ASSERT checks, function wrappers around macros,
74 and extra sanity-checking functions in various code paths.
75
76 Note that the resulting code will be HUGE and SLOW, and probably
77 not useful unless you are debugging a particular problem.
78
79 Say N unless you are an XFS developer, or you play one on TV.
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index c110bb002665..ff6a19873e5c 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -20,29 +20,24 @@
20 20
21#include <linux/rwsem.h> 21#include <linux/rwsem.h>
22 22
23enum { MR_NONE, MR_ACCESS, MR_UPDATE };
24
25typedef struct { 23typedef struct {
26 struct rw_semaphore mr_lock; 24 struct rw_semaphore mr_lock;
25#ifdef DEBUG
27 int mr_writer; 26 int mr_writer;
27#endif
28} mrlock_t; 28} mrlock_t;
29 29
30#ifdef DEBUG
30#define mrinit(mrp, name) \ 31#define mrinit(mrp, name) \
31 do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0) 32 do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
33#else
34#define mrinit(mrp, name) \
35 do { init_rwsem(&(mrp)->mr_lock); } while (0)
36#endif
37
32#define mrlock_init(mrp, t,n,s) mrinit(mrp, n) 38#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
33#define mrfree(mrp) do { } while (0) 39#define mrfree(mrp) do { } while (0)
34 40
35static inline void mraccess(mrlock_t *mrp)
36{
37 down_read(&mrp->mr_lock);
38}
39
40static inline void mrupdate(mrlock_t *mrp)
41{
42 down_write(&mrp->mr_lock);
43 mrp->mr_writer = 1;
44}
45
46static inline void mraccess_nested(mrlock_t *mrp, int subclass) 41static inline void mraccess_nested(mrlock_t *mrp, int subclass)
47{ 42{
48 down_read_nested(&mrp->mr_lock, subclass); 43 down_read_nested(&mrp->mr_lock, subclass);
@@ -51,10 +46,11 @@ static inline void mraccess_nested(mrlock_t *mrp, int subclass)
51static inline void mrupdate_nested(mrlock_t *mrp, int subclass) 46static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
52{ 47{
53 down_write_nested(&mrp->mr_lock, subclass); 48 down_write_nested(&mrp->mr_lock, subclass);
49#ifdef DEBUG
54 mrp->mr_writer = 1; 50 mrp->mr_writer = 1;
51#endif
55} 52}
56 53
57
58static inline int mrtryaccess(mrlock_t *mrp) 54static inline int mrtryaccess(mrlock_t *mrp)
59{ 55{
60 return down_read_trylock(&mrp->mr_lock); 56 return down_read_trylock(&mrp->mr_lock);
@@ -64,39 +60,31 @@ static inline int mrtryupdate(mrlock_t *mrp)
64{ 60{
65 if (!down_write_trylock(&mrp->mr_lock)) 61 if (!down_write_trylock(&mrp->mr_lock))
66 return 0; 62 return 0;
63#ifdef DEBUG
67 mrp->mr_writer = 1; 64 mrp->mr_writer = 1;
65#endif
68 return 1; 66 return 1;
69} 67}
70 68
71static inline void mrunlock(mrlock_t *mrp) 69static inline void mrunlock_excl(mrlock_t *mrp)
72{ 70{
73 if (mrp->mr_writer) { 71#ifdef DEBUG
74 mrp->mr_writer = 0; 72 mrp->mr_writer = 0;
75 up_write(&mrp->mr_lock); 73#endif
76 } else { 74 up_write(&mrp->mr_lock);
77 up_read(&mrp->mr_lock);
78 }
79} 75}
80 76
81static inline void mrdemote(mrlock_t *mrp) 77static inline void mrunlock_shared(mrlock_t *mrp)
82{ 78{
83 mrp->mr_writer = 0; 79 up_read(&mrp->mr_lock);
84 downgrade_write(&mrp->mr_lock);
85} 80}
86 81
87#ifdef DEBUG 82static inline void mrdemote(mrlock_t *mrp)
88/*
89 * Debug-only routine, without some platform-specific asm code, we can
90 * now only answer requests regarding whether we hold the lock for write
91 * (reader state is outside our visibility, we only track writer state).
92 * Note: means !ismrlocked would give false positives, so don't do that.
93 */
94static inline int ismrlocked(mrlock_t *mrp, int type)
95{ 83{
96 if (mrp && type == MR_UPDATE) 84#ifdef DEBUG
97 return mrp->mr_writer; 85 mrp->mr_writer = 0;
98 return 1;
99}
100#endif 86#endif
87 downgrade_write(&mrp->mr_lock);
88}
101 89
102#endif /* __XFS_SUPPORT_MRLOCK_H__ */ 90#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 52f6846101d5..5105015a75ad 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -886,7 +886,7 @@ int
886xfs_buf_lock_value( 886xfs_buf_lock_value(
887 xfs_buf_t *bp) 887 xfs_buf_t *bp)
888{ 888{
889 return atomic_read(&bp->b_sema.count); 889 return bp->b_sema.count;
890} 890}
891#endif 891#endif
892 892
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 265f0168ab76..c672b3238b14 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -133,7 +133,7 @@ xfs_nfs_get_inode(
133 if (!ip) 133 if (!ip)
134 return ERR_PTR(-EIO); 134 return ERR_PTR(-EIO);
135 135
136 if (!ip->i_d.di_mode || ip->i_d.di_gen != generation) { 136 if (ip->i_d.di_gen != generation) {
137 xfs_iput_new(ip, XFS_ILOCK_SHARED); 137 xfs_iput_new(ip, XFS_ILOCK_SHARED);
138 return ERR_PTR(-ENOENT); 138 return ERR_PTR(-ENOENT);
139 } 139 }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 05905246434d..65e78c13d4ae 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -43,9 +43,6 @@
43#include <linux/smp_lock.h> 43#include <linux/smp_lock.h>
44 44
45static struct vm_operations_struct xfs_file_vm_ops; 45static struct vm_operations_struct xfs_file_vm_ops;
46#ifdef CONFIG_XFS_DMAPI
47static struct vm_operations_struct xfs_dmapi_file_vm_ops;
48#endif
49 46
50STATIC_INLINE ssize_t 47STATIC_INLINE ssize_t
51__xfs_file_read( 48__xfs_file_read(
@@ -202,22 +199,6 @@ xfs_file_fsync(
202 (xfs_off_t)0, (xfs_off_t)-1); 199 (xfs_off_t)0, (xfs_off_t)-1);
203} 200}
204 201
205#ifdef CONFIG_XFS_DMAPI
206STATIC int
207xfs_vm_fault(
208 struct vm_area_struct *vma,
209 struct vm_fault *vmf)
210{
211 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
212 bhv_vnode_t *vp = vn_from_inode(inode);
213
214 ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
215 if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0))
216 return VM_FAULT_SIGBUS;
217 return filemap_fault(vma, vmf);
218}
219#endif /* CONFIG_XFS_DMAPI */
220
221/* 202/*
222 * Unfortunately we can't just use the clean and simple readdir implementation 203 * Unfortunately we can't just use the clean and simple readdir implementation
223 * below, because nfs might call back into ->lookup from the filldir callback 204 * below, because nfs might call back into ->lookup from the filldir callback
@@ -386,11 +367,6 @@ xfs_file_mmap(
386 vma->vm_ops = &xfs_file_vm_ops; 367 vma->vm_ops = &xfs_file_vm_ops;
387 vma->vm_flags |= VM_CAN_NONLINEAR; 368 vma->vm_flags |= VM_CAN_NONLINEAR;
388 369
389#ifdef CONFIG_XFS_DMAPI
390 if (XFS_M(filp->f_path.dentry->d_inode->i_sb)->m_flags & XFS_MOUNT_DMAPI)
391 vma->vm_ops = &xfs_dmapi_file_vm_ops;
392#endif /* CONFIG_XFS_DMAPI */
393
394 file_accessed(filp); 370 file_accessed(filp);
395 return 0; 371 return 0;
396} 372}
@@ -437,47 +413,6 @@ xfs_file_ioctl_invis(
437 return error; 413 return error;
438} 414}
439 415
440#ifdef CONFIG_XFS_DMAPI
441#ifdef HAVE_VMOP_MPROTECT
442STATIC int
443xfs_vm_mprotect(
444 struct vm_area_struct *vma,
445 unsigned int newflags)
446{
447 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
448 struct xfs_mount *mp = XFS_M(inode->i_sb);
449 int error = 0;
450
451 if (mp->m_flags & XFS_MOUNT_DMAPI) {
452 if ((vma->vm_flags & VM_MAYSHARE) &&
453 (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE))
454 error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
455 }
456 return error;
457}
458#endif /* HAVE_VMOP_MPROTECT */
459#endif /* CONFIG_XFS_DMAPI */
460
461#ifdef HAVE_FOP_OPEN_EXEC
462/* If the user is attempting to execute a file that is offline then
463 * we have to trigger a DMAPI READ event before the file is marked as busy
464 * otherwise the invisible I/O will not be able to write to the file to bring
465 * it back online.
466 */
467STATIC int
468xfs_file_open_exec(
469 struct inode *inode)
470{
471 struct xfs_mount *mp = XFS_M(inode->i_sb);
472 struct xfs_inode *ip = XFS_I(inode);
473
474 if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) &&
475 DM_EVENT_ENABLED(ip, DM_EVENT_READ))
476 return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
477 return 0;
478}
479#endif /* HAVE_FOP_OPEN_EXEC */
480
481/* 416/*
482 * mmap()d file has taken write protection fault and is being made 417 * mmap()d file has taken write protection fault and is being made
483 * writable. We can set the page state up correctly for a writable 418 * writable. We can set the page state up correctly for a writable
@@ -546,13 +481,3 @@ static struct vm_operations_struct xfs_file_vm_ops = {
546 .fault = filemap_fault, 481 .fault = filemap_fault,
547 .page_mkwrite = xfs_vm_page_mkwrite, 482 .page_mkwrite = xfs_vm_page_mkwrite,
548}; 483};
549
550#ifdef CONFIG_XFS_DMAPI
551static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
552 .fault = xfs_vm_fault,
553 .page_mkwrite = xfs_vm_page_mkwrite,
554#ifdef HAVE_VMOP_MPROTECT
555 .mprotect = xfs_vm_mprotect,
556#endif
557};
558#endif /* CONFIG_XFS_DMAPI */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4ddb86b73c6b..a42ba9d71156 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -238,7 +238,7 @@ xfs_vget_fsop_handlereq(
238 return error; 238 return error;
239 if (ip == NULL) 239 if (ip == NULL)
240 return XFS_ERROR(EIO); 240 return XFS_ERROR(EIO);
241 if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { 241 if (ip->i_d.di_gen != igen) {
242 xfs_iput_new(ip, XFS_ILOCK_SHARED); 242 xfs_iput_new(ip, XFS_ILOCK_SHARED);
243 return XFS_ERROR(ENOENT); 243 return XFS_ERROR(ENOENT);
244 } 244 }
@@ -505,14 +505,14 @@ xfs_attrmulti_attr_get(
505{ 505{
506 char *kbuf; 506 char *kbuf;
507 int error = EFAULT; 507 int error = EFAULT;
508 508
509 if (*len > XATTR_SIZE_MAX) 509 if (*len > XATTR_SIZE_MAX)
510 return EINVAL; 510 return EINVAL;
511 kbuf = kmalloc(*len, GFP_KERNEL); 511 kbuf = kmalloc(*len, GFP_KERNEL);
512 if (!kbuf) 512 if (!kbuf)
513 return ENOMEM; 513 return ENOMEM;
514 514
515 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags, NULL); 515 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
516 if (error) 516 if (error)
517 goto out_kfree; 517 goto out_kfree;
518 518
@@ -546,7 +546,7 @@ xfs_attrmulti_attr_set(
546 546
547 if (copy_from_user(kbuf, ubuf, len)) 547 if (copy_from_user(kbuf, ubuf, len))
548 goto out_kfree; 548 goto out_kfree;
549 549
550 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); 550 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
551 551
552 out_kfree: 552 out_kfree:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index a1237dad6430..2bf287ef5489 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -511,7 +511,8 @@ xfs_vn_rename(
511 xfs_dentry_to_name(&nname, ndentry); 511 xfs_dentry_to_name(&nname, ndentry);
512 512
513 error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), 513 error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
514 XFS_I(ndir), &nname); 514 XFS_I(ndir), &nname, new_inode ?
515 XFS_I(new_inode) : NULL);
515 if (likely(!error)) { 516 if (likely(!error)) {
516 if (new_inode) 517 if (new_inode)
517 xfs_validate_fields(new_inode); 518 xfs_validate_fields(new_inode);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index e5143323e71f..1bc9f600365f 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -99,7 +99,6 @@
99/* 99/*
100 * Feature macros (disable/enable) 100 * Feature macros (disable/enable)
101 */ 101 */
102#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */
103#ifdef CONFIG_SMP 102#ifdef CONFIG_SMP
104#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 103#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
105#else 104#else
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 1ebd8004469c..5e3b57516ec7 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -394,7 +394,7 @@ xfs_zero_last_block(
394 int error = 0; 394 int error = 0;
395 xfs_bmbt_irec_t imap; 395 xfs_bmbt_irec_t imap;
396 396
397 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 397 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
398 398
399 zero_offset = XFS_B_FSB_OFFSET(mp, isize); 399 zero_offset = XFS_B_FSB_OFFSET(mp, isize);
400 if (zero_offset == 0) { 400 if (zero_offset == 0) {
@@ -425,14 +425,14 @@ xfs_zero_last_block(
425 * out sync. We need to drop the ilock while we do this so we 425 * out sync. We need to drop the ilock while we do this so we
426 * don't deadlock when the buffer cache calls back to us. 426 * don't deadlock when the buffer cache calls back to us.
427 */ 427 */
428 xfs_iunlock(ip, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); 428 xfs_iunlock(ip, XFS_ILOCK_EXCL);
429 429
430 zero_len = mp->m_sb.sb_blocksize - zero_offset; 430 zero_len = mp->m_sb.sb_blocksize - zero_offset;
431 if (isize + zero_len > offset) 431 if (isize + zero_len > offset)
432 zero_len = offset - isize; 432 zero_len = offset - isize;
433 error = xfs_iozero(ip, isize, zero_len); 433 error = xfs_iozero(ip, isize, zero_len);
434 434
435 xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 435 xfs_ilock(ip, XFS_ILOCK_EXCL);
436 ASSERT(error >= 0); 436 ASSERT(error >= 0);
437 return error; 437 return error;
438} 438}
@@ -465,8 +465,7 @@ xfs_zero_eof(
465 int error = 0; 465 int error = 0;
466 xfs_bmbt_irec_t imap; 466 xfs_bmbt_irec_t imap;
467 467
468 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 468 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
469 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
470 ASSERT(offset > isize); 469 ASSERT(offset > isize);
471 470
472 /* 471 /*
@@ -475,8 +474,7 @@ xfs_zero_eof(
475 */ 474 */
476 error = xfs_zero_last_block(ip, offset, isize); 475 error = xfs_zero_last_block(ip, offset, isize);
477 if (error) { 476 if (error) {
478 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 477 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
479 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
480 return error; 478 return error;
481 } 479 }
482 480
@@ -507,8 +505,7 @@ xfs_zero_eof(
507 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, 505 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
508 0, NULL, 0, &imap, &nimaps, NULL, NULL); 506 0, NULL, 0, &imap, &nimaps, NULL, NULL);
509 if (error) { 507 if (error) {
510 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 508 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
511 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
512 return error; 509 return error;
513 } 510 }
514 ASSERT(nimaps > 0); 511 ASSERT(nimaps > 0);
@@ -532,7 +529,7 @@ xfs_zero_eof(
532 * Drop the inode lock while we're doing the I/O. 529 * Drop the inode lock while we're doing the I/O.
533 * We'll still have the iolock to protect us. 530 * We'll still have the iolock to protect us.
534 */ 531 */
535 xfs_iunlock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 532 xfs_iunlock(ip, XFS_ILOCK_EXCL);
536 533
537 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); 534 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
538 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); 535 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
@@ -548,13 +545,13 @@ xfs_zero_eof(
548 start_zero_fsb = imap.br_startoff + imap.br_blockcount; 545 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
549 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 546 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
550 547
551 xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 548 xfs_ilock(ip, XFS_ILOCK_EXCL);
552 } 549 }
553 550
554 return 0; 551 return 0;
555 552
556out_lock: 553out_lock:
557 xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 554 xfs_ilock(ip, XFS_ILOCK_EXCL);
558 ASSERT(error >= 0); 555 ASSERT(error >= 0);
559 return error; 556 return error;
560} 557}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index e1d498b4ba7a..e6be37dbd0e9 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -50,7 +50,6 @@ struct xfs_iomap;
50#define XFS_INVAL_CACHED 18 50#define XFS_INVAL_CACHED 18
51#define XFS_DIORD_ENTER 19 51#define XFS_DIORD_ENTER 19
52#define XFS_DIOWR_ENTER 20 52#define XFS_DIOWR_ENTER 20
53#define XFS_SENDFILE_ENTER 21
54#define XFS_WRITEPAGE_ENTER 22 53#define XFS_WRITEPAGE_ENTER 22
55#define XFS_RELEASEPAGE_ENTER 23 54#define XFS_RELEASEPAGE_ENTER 23
56#define XFS_INVALIDPAGE_ENTER 24 55#define XFS_INVALIDPAGE_ENTER 24
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 865eb708aa95..742b2c7852c1 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1181,7 +1181,7 @@ xfs_fs_statfs(
1181 statp->f_fsid.val[0] = (u32)id; 1181 statp->f_fsid.val[0] = (u32)id;
1182 statp->f_fsid.val[1] = (u32)(id >> 32); 1182 statp->f_fsid.val[1] = (u32)(id >> 32);
1183 1183
1184 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); 1184 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
1185 1185
1186 spin_lock(&mp->m_sb_lock); 1186 spin_lock(&mp->m_sb_lock);
1187 statp->f_bsize = sbp->sb_blocksize; 1187 statp->f_bsize = sbp->sb_blocksize;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 8b4d63ce8694..9d73cb5c0fc7 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -25,12 +25,6 @@ struct attrlist_cursor_kern;
25 25
26typedef struct inode bhv_vnode_t; 26typedef struct inode bhv_vnode_t;
27 27
28#define VN_ISLNK(vp) S_ISLNK((vp)->i_mode)
29#define VN_ISREG(vp) S_ISREG((vp)->i_mode)
30#define VN_ISDIR(vp) S_ISDIR((vp)->i_mode)
31#define VN_ISCHR(vp) S_ISCHR((vp)->i_mode)
32#define VN_ISBLK(vp) S_ISBLK((vp)->i_mode)
33
34/* 28/*
35 * Vnode to Linux inode mapping. 29 * Vnode to Linux inode mapping.
36 */ 30 */
@@ -151,24 +145,6 @@ typedef struct bhv_vattr {
151 XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\ 145 XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
152 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT) 146 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
153 147
154/*
155 * Modes.
156 */
157#define VSUID S_ISUID /* set user id on execution */
158#define VSGID S_ISGID /* set group id on execution */
159#define VSVTX S_ISVTX /* save swapped text even after use */
160#define VREAD S_IRUSR /* read, write, execute permissions */
161#define VWRITE S_IWUSR
162#define VEXEC S_IXUSR
163
164#define MODEMASK S_IALLUGO /* mode bits plus permission bits */
165
166/*
167 * Check whether mandatory file locking is enabled.
168 */
169#define MANDLOCK(vp, mode) \
170 (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
171
172extern void vn_init(void); 148extern void vn_init(void);
173extern int vn_revalidate(bhv_vnode_t *); 149extern int vn_revalidate(bhv_vnode_t *);
174 150
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 631ebb31b295..85df3288efd5 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -933,7 +933,7 @@ xfs_qm_dqget(
933 type == XFS_DQ_PROJ || 933 type == XFS_DQ_PROJ ||
934 type == XFS_DQ_GROUP); 934 type == XFS_DQ_GROUP);
935 if (ip) { 935 if (ip) {
936 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 936 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
937 if (type == XFS_DQ_USER) 937 if (type == XFS_DQ_USER)
938 ASSERT(ip->i_udquot == NULL); 938 ASSERT(ip->i_udquot == NULL);
939 else 939 else
@@ -1088,7 +1088,7 @@ xfs_qm_dqget(
1088 xfs_qm_mplist_unlock(mp); 1088 xfs_qm_mplist_unlock(mp);
1089 XFS_DQ_HASH_UNLOCK(h); 1089 XFS_DQ_HASH_UNLOCK(h);
1090 dqret: 1090 dqret:
1091 ASSERT((ip == NULL) || XFS_ISLOCKED_INODE_EXCL(ip)); 1091 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
1092 xfs_dqtrace_entry(dqp, "DQGET DONE"); 1092 xfs_dqtrace_entry(dqp, "DQGET DONE");
1093 *O_dqpp = dqp; 1093 *O_dqpp = dqp;
1094 return (0); 1094 return (0);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 40ea56409561..d31cce1165c5 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -670,7 +670,7 @@ xfs_qm_dqattach_one(
670 xfs_dquot_t *dqp; 670 xfs_dquot_t *dqp;
671 int error; 671 int error;
672 672
673 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 673 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
674 error = 0; 674 error = 0;
675 /* 675 /*
676 * See if we already have it in the inode itself. IO_idqpp is 676 * See if we already have it in the inode itself. IO_idqpp is
@@ -874,7 +874,7 @@ xfs_qm_dqattach(
874 return 0; 874 return 0;
875 875
876 ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 || 876 ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
877 XFS_ISLOCKED_INODE_EXCL(ip)); 877 xfs_isilocked(ip, XFS_ILOCK_EXCL));
878 878
879 if (! (flags & XFS_QMOPT_ILOCKED)) 879 if (! (flags & XFS_QMOPT_ILOCKED))
880 xfs_ilock(ip, XFS_ILOCK_EXCL); 880 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -888,7 +888,8 @@ xfs_qm_dqattach(
888 goto done; 888 goto done;
889 nquotas++; 889 nquotas++;
890 } 890 }
891 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 891
892 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
892 if (XFS_IS_OQUOTA_ON(mp)) { 893 if (XFS_IS_OQUOTA_ON(mp)) {
893 error = XFS_IS_GQUOTA_ON(mp) ? 894 error = XFS_IS_GQUOTA_ON(mp) ?
894 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, 895 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
@@ -913,7 +914,7 @@ xfs_qm_dqattach(
913 * This WON'T, in general, result in a thrash. 914 * This WON'T, in general, result in a thrash.
914 */ 915 */
915 if (nquotas == 2) { 916 if (nquotas == 2) {
916 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 917 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
917 ASSERT(ip->i_udquot); 918 ASSERT(ip->i_udquot);
918 ASSERT(ip->i_gdquot); 919 ASSERT(ip->i_gdquot);
919 920
@@ -956,7 +957,7 @@ xfs_qm_dqattach(
956 957
957#ifdef QUOTADEBUG 958#ifdef QUOTADEBUG
958 else 959 else
959 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 960 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
960#endif 961#endif
961 return error; 962 return error;
962} 963}
@@ -1291,7 +1292,7 @@ xfs_qm_dqget_noattach(
1291 xfs_mount_t *mp; 1292 xfs_mount_t *mp;
1292 xfs_dquot_t *udqp, *gdqp; 1293 xfs_dquot_t *udqp, *gdqp;
1293 1294
1294 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 1295 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1295 mp = ip->i_mount; 1296 mp = ip->i_mount;
1296 udqp = NULL; 1297 udqp = NULL;
1297 gdqp = NULL; 1298 gdqp = NULL;
@@ -1392,7 +1393,7 @@ xfs_qm_qino_alloc(
1392 * Keep an extra reference to this quota inode. This inode is 1393 * Keep an extra reference to this quota inode. This inode is
1393 * locked exclusively and joined to the transaction already. 1394 * locked exclusively and joined to the transaction already.
1394 */ 1395 */
1395 ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip)); 1396 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1396 VN_HOLD(XFS_ITOV((*ip))); 1397 VN_HOLD(XFS_ITOV((*ip)));
1397 1398
1398 /* 1399 /*
@@ -1737,12 +1738,6 @@ xfs_qm_dqusage_adjust(
1737 return error; 1738 return error;
1738 } 1739 }
1739 1740
1740 if (ip->i_d.di_mode == 0) {
1741 xfs_iput_new(ip, XFS_ILOCK_EXCL);
1742 *res = BULKSTAT_RV_NOTHING;
1743 return XFS_ERROR(ENOENT);
1744 }
1745
1746 /* 1741 /*
1747 * Obtain the locked dquots. In case of an error (eg. allocation 1742 * Obtain the locked dquots. In case of an error (eg. allocation
1748 * fails for ENOSPC), we return the negative of the error number 1743 * fails for ENOSPC), we return the negative of the error number
@@ -2563,7 +2558,7 @@ xfs_qm_vop_chown(
2563 uint bfield = XFS_IS_REALTIME_INODE(ip) ? 2558 uint bfield = XFS_IS_REALTIME_INODE(ip) ?
2564 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; 2559 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2565 2560
2566 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 2561 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2567 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); 2562 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2568 2563
2569 /* old dquot */ 2564 /* old dquot */
@@ -2607,7 +2602,7 @@ xfs_qm_vop_chown_reserve(
2607 uint delblks, blkflags, prjflags = 0; 2602 uint delblks, blkflags, prjflags = 0;
2608 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; 2603 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2609 2604
2610 ASSERT(XFS_ISLOCKED_INODE(ip)); 2605 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2611 mp = ip->i_mount; 2606 mp = ip->i_mount;
2612 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 2607 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2613 2608
@@ -2717,7 +2712,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
2717 if (!XFS_IS_QUOTA_ON(tp->t_mountp)) 2712 if (!XFS_IS_QUOTA_ON(tp->t_mountp))
2718 return; 2713 return;
2719 2714
2720 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 2715 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2721 ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); 2716 ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
2722 2717
2723 if (udqp) { 2718 if (udqp) {
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 8342823dbdc3..768a3b27d2b6 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1366,12 +1366,6 @@ xfs_qm_internalqcheck_adjust(
1366 return (error); 1366 return (error);
1367 } 1367 }
1368 1368
1369 if (ip->i_d.di_mode == 0) {
1370 xfs_iput_new(ip, lock_flags);
1371 *res = BULKSTAT_RV_NOTHING;
1372 return XFS_ERROR(ENOENT);
1373 }
1374
1375 /* 1369 /*
1376 * This inode can have blocks after eof which can get released 1370 * This inode can have blocks after eof which can get released
1377 * when we send it to inactive. Since we don't check the dquot 1371 * when we send it to inactive. Since we don't check the dquot
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index a8b85e2be9d5..5e4a40b1c565 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -27,11 +27,6 @@
27/* Number of dquots that fit in to a dquot block */ 27/* Number of dquots that fit in to a dquot block */
28#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk) 28#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk)
29 29
30#define XFS_ISLOCKED_INODE(ip) (ismrlocked(&(ip)->i_lock, \
31 MR_UPDATE | MR_ACCESS) != 0)
32#define XFS_ISLOCKED_INODE_EXCL(ip) (ismrlocked(&(ip)->i_lock, \
33 MR_UPDATE) != 0)
34
35#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t)) 30#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t))
36 31
37#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims) 32#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims)
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index f441f836ca8b..99611381e740 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -834,7 +834,7 @@ xfs_trans_reserve_quota_nblks(
834 ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); 834 ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
835 ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); 835 ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
836 836
837 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 837 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
838 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); 838 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
839 ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == 839 ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
840 XFS_TRANS_DQ_RES_RTBLKS || 840 XFS_TRANS_DQ_RES_RTBLKS ||
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 765aaf65e2d3..540e4c989825 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -22,7 +22,7 @@
22#define STATIC 22#define STATIC
23#define DEBUG 1 23#define DEBUG 1
24#define XFS_BUF_LOCK_TRACKING 1 24#define XFS_BUF_LOCK_TRACKING 1
25#define QUOTADEBUG 1 25/* #define QUOTADEBUG 1 */
26#endif 26#endif
27 27
28#ifdef CONFIG_XFS_TRACE 28#ifdef CONFIG_XFS_TRACE
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 8e130b9720ae..ebee3a4f703a 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -72,7 +72,7 @@ xfs_acl_vhasacl_default(
72{ 72{
73 int error; 73 int error;
74 74
75 if (!VN_ISDIR(vp)) 75 if (!S_ISDIR(vp->i_mode))
76 return 0; 76 return 0;
77 xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); 77 xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
78 return (error == 0); 78 return (error == 0);
@@ -238,15 +238,8 @@ xfs_acl_vget(
238 error = EINVAL; 238 error = EINVAL;
239 goto out; 239 goto out;
240 } 240 }
241 if (kind == _ACL_TYPE_ACCESS) { 241 if (kind == _ACL_TYPE_ACCESS)
242 bhv_vattr_t va; 242 xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl);
243
244 va.va_mask = XFS_AT_MODE;
245 error = xfs_getattr(xfs_vtoi(vp), &va, 0);
246 if (error)
247 goto out;
248 xfs_acl_sync_mode(va.va_mode, xfs_acl);
249 }
250 error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); 243 error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
251 } 244 }
252out: 245out:
@@ -341,14 +334,15 @@ xfs_acl_iaccess(
341{ 334{
342 xfs_acl_t *acl; 335 xfs_acl_t *acl;
343 int rval; 336 int rval;
337 struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
344 338
345 if (!(_ACL_ALLOC(acl))) 339 if (!(_ACL_ALLOC(acl)))
346 return -1; 340 return -1;
347 341
348 /* If the file has no ACL return -1. */ 342 /* If the file has no ACL return -1. */
349 rval = sizeof(xfs_acl_t); 343 rval = sizeof(xfs_acl_t);
350 if (xfs_attr_fetch(ip, SGI_ACL_FILE, SGI_ACL_FILE_SIZE, 344 if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval,
351 (char *)acl, &rval, ATTR_ROOT | ATTR_KERNACCESS, cr)) { 345 ATTR_ROOT | ATTR_KERNACCESS)) {
352 _ACL_FREE(acl); 346 _ACL_FREE(acl);
353 return -1; 347 return -1;
354 } 348 }
@@ -373,23 +367,15 @@ xfs_acl_allow_set(
373 bhv_vnode_t *vp, 367 bhv_vnode_t *vp,
374 int kind) 368 int kind)
375{ 369{
376 xfs_inode_t *ip = xfs_vtoi(vp);
377 bhv_vattr_t va;
378 int error;
379
380 if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) 370 if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
381 return EPERM; 371 return EPERM;
382 if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp)) 372 if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
383 return ENOTDIR; 373 return ENOTDIR;
384 if (vp->i_sb->s_flags & MS_RDONLY) 374 if (vp->i_sb->s_flags & MS_RDONLY)
385 return EROFS; 375 return EROFS;
386 va.va_mask = XFS_AT_UID; 376 if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))
387 error = xfs_getattr(ip, &va, 0);
388 if (error)
389 return error;
390 if (va.va_uid != current->fsuid && !capable(CAP_FOWNER))
391 return EPERM; 377 return EPERM;
392 return error; 378 return 0;
393} 379}
394 380
395/* 381/*
@@ -594,7 +580,7 @@ xfs_acl_get_attr(
594 *error = xfs_attr_get(xfs_vtoi(vp), 580 *error = xfs_attr_get(xfs_vtoi(vp),
595 kind == _ACL_TYPE_ACCESS ? 581 kind == _ACL_TYPE_ACCESS ?
596 SGI_ACL_FILE : SGI_ACL_DEFAULT, 582 SGI_ACL_FILE : SGI_ACL_DEFAULT,
597 (char *)aclp, &len, flags, sys_cred); 583 (char *)aclp, &len, flags);
598 if (*error || (flags & ATTR_KERNOVAL)) 584 if (*error || (flags & ATTR_KERNOVAL))
599 return; 585 return;
600 xfs_acl_get_endian(aclp); 586 xfs_acl_get_endian(aclp);
@@ -643,7 +629,6 @@ xfs_acl_vtoacl(
643 xfs_acl_t *access_acl, 629 xfs_acl_t *access_acl,
644 xfs_acl_t *default_acl) 630 xfs_acl_t *default_acl)
645{ 631{
646 bhv_vattr_t va;
647 int error = 0; 632 int error = 0;
648 633
649 if (access_acl) { 634 if (access_acl) {
@@ -652,16 +637,10 @@ xfs_acl_vtoacl(
652 * be obtained for some reason, invalidate the access ACL. 637 * be obtained for some reason, invalidate the access ACL.
653 */ 638 */
654 xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error); 639 xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
655 if (!error) {
656 /* Got the ACL, need the mode... */
657 va.va_mask = XFS_AT_MODE;
658 error = xfs_getattr(xfs_vtoi(vp), &va, 0);
659 }
660
661 if (error) 640 if (error)
662 access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; 641 access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
663 else /* We have a good ACL and the file mode, synchronize. */ 642 else /* We have a good ACL and the file mode, synchronize. */
664 xfs_acl_sync_mode(va.va_mode, access_acl); 643 xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl);
665 } 644 }
666 645
667 if (default_acl) { 646 if (default_acl) {
@@ -719,7 +698,7 @@ xfs_acl_inherit(
719 * If the new file is a directory, its default ACL is a copy of 698 * If the new file is a directory, its default ACL is a copy of
720 * the containing directory's default ACL. 699 * the containing directory's default ACL.
721 */ 700 */
722 if (VN_ISDIR(vp)) 701 if (S_ISDIR(vp->i_mode))
723 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); 702 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
724 if (!error && !basicperms) 703 if (!error && !basicperms)
725 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); 704 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
@@ -744,7 +723,7 @@ xfs_acl_setmode(
744 bhv_vattr_t va; 723 bhv_vattr_t va;
745 xfs_acl_entry_t *ap; 724 xfs_acl_entry_t *ap;
746 xfs_acl_entry_t *gap = NULL; 725 xfs_acl_entry_t *gap = NULL;
747 int i, error, nomask = 1; 726 int i, nomask = 1;
748 727
749 *basicperms = 1; 728 *basicperms = 1;
750 729
@@ -756,11 +735,7 @@ xfs_acl_setmode(
756 * mode. The m:: bits take precedence over the g:: bits. 735 * mode. The m:: bits take precedence over the g:: bits.
757 */ 736 */
758 va.va_mask = XFS_AT_MODE; 737 va.va_mask = XFS_AT_MODE;
759 error = xfs_getattr(xfs_vtoi(vp), &va, 0); 738 va.va_mode = xfs_vtoi(vp)->i_d.di_mode;
760 if (error)
761 return error;
762
763 va.va_mask = XFS_AT_MODE;
764 va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); 739 va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
765 ap = acl->acl_entry; 740 ap = acl->acl_entry;
766 for (i = 0; i < acl->acl_cnt; ++i) { 741 for (i = 0; i < acl->acl_cnt; ++i) {
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 36d781ee5fcc..df151a859186 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -101,14 +101,28 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
101ktrace_t *xfs_attr_trace_buf; 101ktrace_t *xfs_attr_trace_buf;
102#endif 102#endif
103 103
104STATIC int
105xfs_attr_name_to_xname(
106 struct xfs_name *xname,
107 const char *aname)
108{
109 if (!aname)
110 return EINVAL;
111 xname->name = aname;
112 xname->len = strlen(aname);
113 if (xname->len >= MAXNAMELEN)
114 return EFAULT; /* match IRIX behaviour */
115
116 return 0;
117}
104 118
105/*======================================================================== 119/*========================================================================
106 * Overall external interface routines. 120 * Overall external interface routines.
107 *========================================================================*/ 121 *========================================================================*/
108 122
109int 123int
110xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen, 124xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
111 char *value, int *valuelenp, int flags, struct cred *cred) 125 char *value, int *valuelenp, int flags)
112{ 126{
113 xfs_da_args_t args; 127 xfs_da_args_t args;
114 int error; 128 int error;
@@ -122,8 +136,8 @@ xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
122 * Fill in the arg structure for this request. 136 * Fill in the arg structure for this request.
123 */ 137 */
124 memset((char *)&args, 0, sizeof(args)); 138 memset((char *)&args, 0, sizeof(args));
125 args.name = name; 139 args.name = name->name;
126 args.namelen = namelen; 140 args.namelen = name->len;
127 args.value = value; 141 args.value = value;
128 args.valuelen = *valuelenp; 142 args.valuelen = *valuelenp;
129 args.flags = flags; 143 args.flags = flags;
@@ -162,31 +176,29 @@ xfs_attr_get(
162 const char *name, 176 const char *name,
163 char *value, 177 char *value,
164 int *valuelenp, 178 int *valuelenp,
165 int flags, 179 int flags)
166 cred_t *cred)
167{ 180{
168 int error, namelen; 181 int error;
182 struct xfs_name xname;
169 183
170 XFS_STATS_INC(xs_attr_get); 184 XFS_STATS_INC(xs_attr_get);
171 185
172 if (!name)
173 return(EINVAL);
174 namelen = strlen(name);
175 if (namelen >= MAXNAMELEN)
176 return(EFAULT); /* match IRIX behaviour */
177
178 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 186 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
179 return(EIO); 187 return(EIO);
180 188
189 error = xfs_attr_name_to_xname(&xname, name);
190 if (error)
191 return error;
192
181 xfs_ilock(ip, XFS_ILOCK_SHARED); 193 xfs_ilock(ip, XFS_ILOCK_SHARED);
182 error = xfs_attr_fetch(ip, name, namelen, value, valuelenp, flags, cred); 194 error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags);
183 xfs_iunlock(ip, XFS_ILOCK_SHARED); 195 xfs_iunlock(ip, XFS_ILOCK_SHARED);
184 return(error); 196 return(error);
185} 197}
186 198
187int 199STATIC int
188xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen, 200xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
189 char *value, int valuelen, int flags) 201 char *value, int valuelen, int flags)
190{ 202{
191 xfs_da_args_t args; 203 xfs_da_args_t args;
192 xfs_fsblock_t firstblock; 204 xfs_fsblock_t firstblock;
@@ -209,7 +221,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
209 */ 221 */
210 if (XFS_IFORK_Q(dp) == 0) { 222 if (XFS_IFORK_Q(dp) == 0) {
211 int sf_size = sizeof(xfs_attr_sf_hdr_t) + 223 int sf_size = sizeof(xfs_attr_sf_hdr_t) +
212 XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen); 224 XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen);
213 225
214 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd))) 226 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
215 return(error); 227 return(error);
@@ -219,8 +231,8 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
219 * Fill in the arg structure for this request. 231 * Fill in the arg structure for this request.
220 */ 232 */
221 memset((char *)&args, 0, sizeof(args)); 233 memset((char *)&args, 0, sizeof(args));
222 args.name = name; 234 args.name = name->name;
223 args.namelen = namelen; 235 args.namelen = name->len;
224 args.value = value; 236 args.value = value;
225 args.valuelen = valuelen; 237 args.valuelen = valuelen;
226 args.flags = flags; 238 args.flags = flags;
@@ -236,7 +248,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
236 * Determine space new attribute will use, and if it would be 248 * Determine space new attribute will use, and if it would be
237 * "local" or "remote" (note: local != inline). 249 * "local" or "remote" (note: local != inline).
238 */ 250 */
239 size = xfs_attr_leaf_newentsize(namelen, valuelen, 251 size = xfs_attr_leaf_newentsize(name->len, valuelen,
240 mp->m_sb.sb_blocksize, &local); 252 mp->m_sb.sb_blocksize, &local);
241 253
242 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); 254 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
@@ -429,26 +441,27 @@ xfs_attr_set(
429 int valuelen, 441 int valuelen,
430 int flags) 442 int flags)
431{ 443{
432 int namelen; 444 int error;
433 445 struct xfs_name xname;
434 namelen = strlen(name);
435 if (namelen >= MAXNAMELEN)
436 return EFAULT; /* match IRIX behaviour */
437 446
438 XFS_STATS_INC(xs_attr_set); 447 XFS_STATS_INC(xs_attr_set);
439 448
440 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 449 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
441 return (EIO); 450 return (EIO);
442 451
443 return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags); 452 error = xfs_attr_name_to_xname(&xname, name);
453 if (error)
454 return error;
455
456 return xfs_attr_set_int(dp, &xname, value, valuelen, flags);
444} 457}
445 458
446/* 459/*
447 * Generic handler routine to remove a name from an attribute list. 460 * Generic handler routine to remove a name from an attribute list.
448 * Transitions attribute list from Btree to shortform as necessary. 461 * Transitions attribute list from Btree to shortform as necessary.
449 */ 462 */
450int 463STATIC int
451xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags) 464xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
452{ 465{
453 xfs_da_args_t args; 466 xfs_da_args_t args;
454 xfs_fsblock_t firstblock; 467 xfs_fsblock_t firstblock;
@@ -460,8 +473,8 @@ xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
460 * Fill in the arg structure for this request. 473 * Fill in the arg structure for this request.
461 */ 474 */
462 memset((char *)&args, 0, sizeof(args)); 475 memset((char *)&args, 0, sizeof(args));
463 args.name = name; 476 args.name = name->name;
464 args.namelen = namelen; 477 args.namelen = name->len;
465 args.flags = flags; 478 args.flags = flags;
466 args.hashval = xfs_da_hashname(args.name, args.namelen); 479 args.hashval = xfs_da_hashname(args.name, args.namelen);
467 args.dp = dp; 480 args.dp = dp;
@@ -575,17 +588,18 @@ xfs_attr_remove(
575 const char *name, 588 const char *name,
576 int flags) 589 int flags)
577{ 590{
578 int namelen; 591 int error;
579 592 struct xfs_name xname;
580 namelen = strlen(name);
581 if (namelen >= MAXNAMELEN)
582 return EFAULT; /* match IRIX behaviour */
583 593
584 XFS_STATS_INC(xs_attr_remove); 594 XFS_STATS_INC(xs_attr_remove);
585 595
586 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 596 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
587 return (EIO); 597 return (EIO);
588 598
599 error = xfs_attr_name_to_xname(&xname, name);
600 if (error)
601 return error;
602
589 xfs_ilock(dp, XFS_ILOCK_SHARED); 603 xfs_ilock(dp, XFS_ILOCK_SHARED);
590 if (XFS_IFORK_Q(dp) == 0 || 604 if (XFS_IFORK_Q(dp) == 0 ||
591 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && 605 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
@@ -595,10 +609,10 @@ xfs_attr_remove(
595 } 609 }
596 xfs_iunlock(dp, XFS_ILOCK_SHARED); 610 xfs_iunlock(dp, XFS_ILOCK_SHARED);
597 611
598 return xfs_attr_remove_int(dp, name, namelen, flags); 612 return xfs_attr_remove_int(dp, &xname, flags);
599} 613}
600 614
601int /* error */ 615STATIC int
602xfs_attr_list_int(xfs_attr_list_context_t *context) 616xfs_attr_list_int(xfs_attr_list_context_t *context)
603{ 617{
604 int error; 618 int error;
@@ -2522,8 +2536,7 @@ attr_generic_get(
2522{ 2536{
2523 int error, asize = size; 2537 int error, asize = size;
2524 2538
2525 error = xfs_attr_get(xfs_vtoi(vp), name, data, 2539 error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
2526 &asize, xflags, NULL);
2527 if (!error) 2540 if (!error)
2528 return asize; 2541 return asize;
2529 return -error; 2542 return -error;
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 786eba3121c4..6cfc9384fe35 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -158,14 +158,10 @@ struct xfs_da_args;
158/* 158/*
159 * Overall external interface routines. 159 * Overall external interface routines.
160 */ 160 */
161int xfs_attr_set_int(struct xfs_inode *, const char *, int, char *, int, int);
162int xfs_attr_remove_int(struct xfs_inode *, const char *, int, int);
163int xfs_attr_list_int(struct xfs_attr_list_context *);
164int xfs_attr_inactive(struct xfs_inode *dp); 161int xfs_attr_inactive(struct xfs_inode *dp);
165 162
166int xfs_attr_shortform_getvalue(struct xfs_da_args *); 163int xfs_attr_shortform_getvalue(struct xfs_da_args *);
167int xfs_attr_fetch(struct xfs_inode *, const char *, int, 164int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
168 char *, int *, int, struct cred *);
169int xfs_attr_rmtval_get(struct xfs_da_args *args); 165int xfs_attr_rmtval_get(struct xfs_da_args *args);
170 166
171#endif /* __XFS_ATTR_H__ */ 167#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index eb198c01c35d..53c259f5a5af 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4074,7 +4074,6 @@ xfs_bmap_add_attrfork(
4074error2: 4074error2:
4075 xfs_bmap_cancel(&flist); 4075 xfs_bmap_cancel(&flist);
4076error1: 4076error1:
4077 ASSERT(ismrlocked(&ip->i_lock,MR_UPDATE));
4078 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4077 xfs_iunlock(ip, XFS_ILOCK_EXCL);
4079error0: 4078error0:
4080 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 4079 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3f53fad356a3..5f3647cb9885 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -162,7 +162,7 @@ xfs_swap_extents(
162 ips[1] = ip; 162 ips[1] = ip;
163 } 163 }
164 164
165 xfs_lock_inodes(ips, 2, 0, lock_flags); 165 xfs_lock_inodes(ips, 2, lock_flags);
166 locked = 1; 166 locked = 1;
167 167
168 /* Verify that both files have the same format */ 168 /* Verify that both files have the same format */
@@ -265,7 +265,7 @@ xfs_swap_extents(
265 locked = 0; 265 locked = 0;
266 goto error0; 266 goto error0;
267 } 267 }
268 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 268 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
269 269
270 /* 270 /*
271 * Count the number of extended attribute blocks 271 * Count the number of extended attribute blocks
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d3a0f538d6a6..381ebda4f7bc 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -462,7 +462,7 @@ xfs_fs_counts(
462 xfs_mount_t *mp, 462 xfs_mount_t *mp,
463 xfs_fsop_counts_t *cnt) 463 xfs_fsop_counts_t *cnt)
464{ 464{
465 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); 465 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
466 spin_lock(&mp->m_sb_lock); 466 spin_lock(&mp->m_sb_lock);
467 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 467 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
468 cnt->freertx = mp->m_sb.sb_frextents; 468 cnt->freertx = mp->m_sb.sb_frextents;
@@ -524,7 +524,7 @@ xfs_reserve_blocks(
524 */ 524 */
525retry: 525retry:
526 spin_lock(&mp->m_sb_lock); 526 spin_lock(&mp->m_sb_lock);
527 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_SB_LOCKED); 527 xfs_icsb_sync_counters_locked(mp, 0);
528 528
529 /* 529 /*
530 * If our previous reservation was larger than the current value, 530 * If our previous reservation was larger than the current value,
@@ -552,11 +552,8 @@ retry:
552 mp->m_resblks += free; 552 mp->m_resblks += free;
553 mp->m_resblks_avail += free; 553 mp->m_resblks_avail += free;
554 fdblks_delta = -free; 554 fdblks_delta = -free;
555 mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
556 } else { 555 } else {
557 fdblks_delta = -delta; 556 fdblks_delta = -delta;
558 mp->m_sb.sb_fdblocks =
559 lcounter + XFS_ALLOC_SET_ASIDE(mp);
560 mp->m_resblks = request; 557 mp->m_resblks = request;
561 mp->m_resblks_avail += delta; 558 mp->m_resblks_avail += delta;
562 } 559 }
@@ -587,7 +584,6 @@ out:
587 if (error == ENOSPC) 584 if (error == ENOSPC)
588 goto retry; 585 goto retry;
589 } 586 }
590
591 return 0; 587 return 0;
592} 588}
593 589
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a64dfbd565a5..aad8c5da38af 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -147,6 +147,7 @@ xfs_ialloc_ag_alloc(
147 int version; /* inode version number to use */ 147 int version; /* inode version number to use */
148 int isaligned = 0; /* inode allocation at stripe unit */ 148 int isaligned = 0; /* inode allocation at stripe unit */
149 /* boundary */ 149 /* boundary */
150 unsigned int gen;
150 151
151 args.tp = tp; 152 args.tp = tp;
152 args.mp = tp->t_mountp; 153 args.mp = tp->t_mountp;
@@ -290,6 +291,14 @@ xfs_ialloc_ag_alloc(
290 else 291 else
291 version = XFS_DINODE_VERSION_1; 292 version = XFS_DINODE_VERSION_1;
292 293
294 /*
295 * Seed the new inode cluster with a random generation number. This
296 * prevents short-term reuse of generation numbers if a chunk is
297 * freed and then immediately reallocated. We use random numbers
298 * rather than a linear progression to prevent the next generation
299 * number from being easily guessable.
300 */
301 gen = random32();
293 for (j = 0; j < nbufs; j++) { 302 for (j = 0; j < nbufs; j++) {
294 /* 303 /*
295 * Get the block. 304 * Get the block.
@@ -309,6 +318,7 @@ xfs_ialloc_ag_alloc(
309 free = XFS_MAKE_IPTR(args.mp, fbuf, i); 318 free = XFS_MAKE_IPTR(args.mp, fbuf, i);
310 free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 319 free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
311 free->di_core.di_version = version; 320 free->di_core.di_version = version;
321 free->di_core.di_gen = cpu_to_be32(gen);
312 free->di_next_unlinked = cpu_to_be32(NULLAGINO); 322 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
313 xfs_ialloc_log_di(tp, fbuf, i, 323 xfs_ialloc_log_di(tp, fbuf, i,
314 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED); 324 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e657c5128460..b07604b94d9f 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -593,8 +593,9 @@ xfs_iunlock_map_shared(
593 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 593 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
594 */ 594 */
595void 595void
596xfs_ilock(xfs_inode_t *ip, 596xfs_ilock(
597 uint lock_flags) 597 xfs_inode_t *ip,
598 uint lock_flags)
598{ 599{
599 /* 600 /*
600 * You can't set both SHARED and EXCL for the same lock, 601 * You can't set both SHARED and EXCL for the same lock,
@@ -607,16 +608,16 @@ xfs_ilock(xfs_inode_t *ip,
607 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 608 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
608 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 609 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
609 610
610 if (lock_flags & XFS_IOLOCK_EXCL) { 611 if (lock_flags & XFS_IOLOCK_EXCL)
611 mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 612 mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
612 } else if (lock_flags & XFS_IOLOCK_SHARED) { 613 else if (lock_flags & XFS_IOLOCK_SHARED)
613 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 614 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
614 } 615
615 if (lock_flags & XFS_ILOCK_EXCL) { 616 if (lock_flags & XFS_ILOCK_EXCL)
616 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 617 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
617 } else if (lock_flags & XFS_ILOCK_SHARED) { 618 else if (lock_flags & XFS_ILOCK_SHARED)
618 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 619 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
619 } 620
620 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); 621 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
621} 622}
622 623
@@ -631,15 +632,12 @@ xfs_ilock(xfs_inode_t *ip,
631 * lock_flags -- this parameter indicates the inode's locks to be 632 * lock_flags -- this parameter indicates the inode's locks to be
632 * to be locked. See the comment for xfs_ilock() for a list 633 * to be locked. See the comment for xfs_ilock() for a list
633 * of valid values. 634 * of valid values.
634 *
635 */ 635 */
636int 636int
637xfs_ilock_nowait(xfs_inode_t *ip, 637xfs_ilock_nowait(
638 uint lock_flags) 638 xfs_inode_t *ip,
639 uint lock_flags)
639{ 640{
640 int iolocked;
641 int ilocked;
642
643 /* 641 /*
644 * You can't set both SHARED and EXCL for the same lock, 642 * You can't set both SHARED and EXCL for the same lock,
645 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 643 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
@@ -651,37 +649,30 @@ xfs_ilock_nowait(xfs_inode_t *ip,
651 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 649 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
652 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 650 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
653 651
654 iolocked = 0;
655 if (lock_flags & XFS_IOLOCK_EXCL) { 652 if (lock_flags & XFS_IOLOCK_EXCL) {
656 iolocked = mrtryupdate(&ip->i_iolock); 653 if (!mrtryupdate(&ip->i_iolock))
657 if (!iolocked) { 654 goto out;
658 return 0;
659 }
660 } else if (lock_flags & XFS_IOLOCK_SHARED) { 655 } else if (lock_flags & XFS_IOLOCK_SHARED) {
661 iolocked = mrtryaccess(&ip->i_iolock); 656 if (!mrtryaccess(&ip->i_iolock))
662 if (!iolocked) { 657 goto out;
663 return 0;
664 }
665 } 658 }
666 if (lock_flags & XFS_ILOCK_EXCL) { 659 if (lock_flags & XFS_ILOCK_EXCL) {
667 ilocked = mrtryupdate(&ip->i_lock); 660 if (!mrtryupdate(&ip->i_lock))
668 if (!ilocked) { 661 goto out_undo_iolock;
669 if (iolocked) {
670 mrunlock(&ip->i_iolock);
671 }
672 return 0;
673 }
674 } else if (lock_flags & XFS_ILOCK_SHARED) { 662 } else if (lock_flags & XFS_ILOCK_SHARED) {
675 ilocked = mrtryaccess(&ip->i_lock); 663 if (!mrtryaccess(&ip->i_lock))
676 if (!ilocked) { 664 goto out_undo_iolock;
677 if (iolocked) {
678 mrunlock(&ip->i_iolock);
679 }
680 return 0;
681 }
682 } 665 }
683 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address); 666 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address);
684 return 1; 667 return 1;
668
669 out_undo_iolock:
670 if (lock_flags & XFS_IOLOCK_EXCL)
671 mrunlock_excl(&ip->i_iolock);
672 else if (lock_flags & XFS_IOLOCK_SHARED)
673 mrunlock_shared(&ip->i_iolock);
674 out:
675 return 0;
685} 676}
686 677
687/* 678/*
@@ -697,8 +688,9 @@ xfs_ilock_nowait(xfs_inode_t *ip,
697 * 688 *
698 */ 689 */
699void 690void
700xfs_iunlock(xfs_inode_t *ip, 691xfs_iunlock(
701 uint lock_flags) 692 xfs_inode_t *ip,
693 uint lock_flags)
702{ 694{
703 /* 695 /*
704 * You can't set both SHARED and EXCL for the same lock, 696 * You can't set both SHARED and EXCL for the same lock,
@@ -713,31 +705,25 @@ xfs_iunlock(xfs_inode_t *ip,
713 XFS_LOCK_DEP_MASK)) == 0); 705 XFS_LOCK_DEP_MASK)) == 0);
714 ASSERT(lock_flags != 0); 706 ASSERT(lock_flags != 0);
715 707
716 if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { 708 if (lock_flags & XFS_IOLOCK_EXCL)
717 ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) || 709 mrunlock_excl(&ip->i_iolock);
718 (ismrlocked(&ip->i_iolock, MR_ACCESS))); 710 else if (lock_flags & XFS_IOLOCK_SHARED)
719 ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) || 711 mrunlock_shared(&ip->i_iolock);
720 (ismrlocked(&ip->i_iolock, MR_UPDATE)));
721 mrunlock(&ip->i_iolock);
722 }
723 712
724 if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) { 713 if (lock_flags & XFS_ILOCK_EXCL)
725 ASSERT(!(lock_flags & XFS_ILOCK_SHARED) || 714 mrunlock_excl(&ip->i_lock);
726 (ismrlocked(&ip->i_lock, MR_ACCESS))); 715 else if (lock_flags & XFS_ILOCK_SHARED)
727 ASSERT(!(lock_flags & XFS_ILOCK_EXCL) || 716 mrunlock_shared(&ip->i_lock);
728 (ismrlocked(&ip->i_lock, MR_UPDATE)));
729 mrunlock(&ip->i_lock);
730 717
718 if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) &&
719 !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) {
731 /* 720 /*
732 * Let the AIL know that this item has been unlocked in case 721 * Let the AIL know that this item has been unlocked in case
733 * it is in the AIL and anyone is waiting on it. Don't do 722 * it is in the AIL and anyone is waiting on it. Don't do
734 * this if the caller has asked us not to. 723 * this if the caller has asked us not to.
735 */ 724 */
736 if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) && 725 xfs_trans_unlocked_item(ip->i_mount,
737 ip->i_itemp != NULL) { 726 (xfs_log_item_t*)(ip->i_itemp));
738 xfs_trans_unlocked_item(ip->i_mount,
739 (xfs_log_item_t*)(ip->i_itemp));
740 }
741 } 727 }
742 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); 728 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
743} 729}
@@ -747,21 +733,47 @@ xfs_iunlock(xfs_inode_t *ip,
747 * if it is being demoted. 733 * if it is being demoted.
748 */ 734 */
749void 735void
750xfs_ilock_demote(xfs_inode_t *ip, 736xfs_ilock_demote(
751 uint lock_flags) 737 xfs_inode_t *ip,
738 uint lock_flags)
752{ 739{
753 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 740 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
754 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 741 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
755 742
756 if (lock_flags & XFS_ILOCK_EXCL) { 743 if (lock_flags & XFS_ILOCK_EXCL)
757 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
758 mrdemote(&ip->i_lock); 744 mrdemote(&ip->i_lock);
759 } 745 if (lock_flags & XFS_IOLOCK_EXCL)
760 if (lock_flags & XFS_IOLOCK_EXCL) {
761 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
762 mrdemote(&ip->i_iolock); 746 mrdemote(&ip->i_iolock);
747}
748
749#ifdef DEBUG
750/*
751 * Debug-only routine, without additional rw_semaphore APIs, we can
752 * now only answer requests regarding whether we hold the lock for write
753 * (reader state is outside our visibility, we only track writer state).
754 *
755 * Note: this means !xfs_isilocked would give false positives, so don't do that.
756 */
757int
758xfs_isilocked(
759 xfs_inode_t *ip,
760 uint lock_flags)
761{
762 if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) ==
763 XFS_ILOCK_EXCL) {
764 if (!ip->i_lock.mr_writer)
765 return 0;
763 } 766 }
767
768 if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) ==
769 XFS_IOLOCK_EXCL) {
770 if (!ip->i_iolock.mr_writer)
771 return 0;
772 }
773
774 return 1;
764} 775}
776#endif
765 777
766/* 778/*
767 * The following three routines simply manage the i_flock 779 * The following three routines simply manage the i_flock
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ca12acb90394..cf0bb9c1d621 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1291,7 +1291,7 @@ xfs_file_last_byte(
1291 xfs_fileoff_t size_last_block; 1291 xfs_fileoff_t size_last_block;
1292 int error; 1292 int error;
1293 1293
1294 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE | MR_ACCESS)); 1294 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
1295 1295
1296 mp = ip->i_mount; 1296 mp = ip->i_mount;
1297 /* 1297 /*
@@ -1402,7 +1402,7 @@ xfs_itruncate_start(
1402 bhv_vnode_t *vp; 1402 bhv_vnode_t *vp;
1403 int error = 0; 1403 int error = 0;
1404 1404
1405 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1405 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1406 ASSERT((new_size == 0) || (new_size <= ip->i_size)); 1406 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1407 ASSERT((flags == XFS_ITRUNC_DEFINITE) || 1407 ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
1408 (flags == XFS_ITRUNC_MAYBE)); 1408 (flags == XFS_ITRUNC_MAYBE));
@@ -1528,8 +1528,7 @@ xfs_itruncate_finish(
1528 xfs_bmap_free_t free_list; 1528 xfs_bmap_free_t free_list;
1529 int error; 1529 int error;
1530 1530
1531 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1531 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1532 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
1533 ASSERT((new_size == 0) || (new_size <= ip->i_size)); 1532 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1534 ASSERT(*tp != NULL); 1533 ASSERT(*tp != NULL);
1535 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 1534 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -1780,8 +1779,7 @@ xfs_igrow_start(
1780 xfs_fsize_t new_size, 1779 xfs_fsize_t new_size,
1781 cred_t *credp) 1780 cred_t *credp)
1782{ 1781{
1783 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1782 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1784 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1785 ASSERT(new_size > ip->i_size); 1783 ASSERT(new_size > ip->i_size);
1786 1784
1787 /* 1785 /*
@@ -1809,8 +1807,7 @@ xfs_igrow_finish(
1809 xfs_fsize_t new_size, 1807 xfs_fsize_t new_size,
1810 int change_flag) 1808 int change_flag)
1811{ 1809{
1812 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1810 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1813 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1814 ASSERT(ip->i_transp == tp); 1811 ASSERT(ip->i_transp == tp);
1815 ASSERT(new_size > ip->i_size); 1812 ASSERT(new_size > ip->i_size);
1816 1813
@@ -2287,7 +2284,7 @@ xfs_ifree(
2287 xfs_dinode_t *dip; 2284 xfs_dinode_t *dip;
2288 xfs_buf_t *ibp; 2285 xfs_buf_t *ibp;
2289 2286
2290 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 2287 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2291 ASSERT(ip->i_transp == tp); 2288 ASSERT(ip->i_transp == tp);
2292 ASSERT(ip->i_d.di_nlink == 0); 2289 ASSERT(ip->i_d.di_nlink == 0);
2293 ASSERT(ip->i_d.di_nextents == 0); 2290 ASSERT(ip->i_d.di_nextents == 0);
@@ -2746,7 +2743,7 @@ void
2746xfs_ipin( 2743xfs_ipin(
2747 xfs_inode_t *ip) 2744 xfs_inode_t *ip)
2748{ 2745{
2749 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 2746 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2750 2747
2751 atomic_inc(&ip->i_pincount); 2748 atomic_inc(&ip->i_pincount);
2752} 2749}
@@ -2779,7 +2776,7 @@ __xfs_iunpin_wait(
2779{ 2776{
2780 xfs_inode_log_item_t *iip = ip->i_itemp; 2777 xfs_inode_log_item_t *iip = ip->i_itemp;
2781 2778
2782 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); 2779 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2783 if (atomic_read(&ip->i_pincount) == 0) 2780 if (atomic_read(&ip->i_pincount) == 0)
2784 return; 2781 return;
2785 2782
@@ -2829,7 +2826,7 @@ xfs_iextents_copy(
2829 xfs_fsblock_t start_block; 2826 xfs_fsblock_t start_block;
2830 2827
2831 ifp = XFS_IFORK_PTR(ip, whichfork); 2828 ifp = XFS_IFORK_PTR(ip, whichfork);
2832 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 2829 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2833 ASSERT(ifp->if_bytes > 0); 2830 ASSERT(ifp->if_bytes > 0);
2834 2831
2835 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 2832 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
@@ -3132,7 +3129,7 @@ xfs_iflush(
3132 3129
3133 XFS_STATS_INC(xs_iflush_count); 3130 XFS_STATS_INC(xs_iflush_count);
3134 3131
3135 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3132 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3136 ASSERT(issemalocked(&(ip->i_flock))); 3133 ASSERT(issemalocked(&(ip->i_flock)));
3137 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3134 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3138 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3135 ip->i_d.di_nextents > ip->i_df.if_ext_max);
@@ -3297,7 +3294,7 @@ xfs_iflush_int(
3297 int first; 3294 int first;
3298#endif 3295#endif
3299 3296
3300 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3297 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3301 ASSERT(issemalocked(&(ip->i_flock))); 3298 ASSERT(issemalocked(&(ip->i_flock)));
3302 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3299 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3303 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3300 ip->i_d.di_nextents > ip->i_df.if_ext_max);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 93c37697a72c..0a999fee4f03 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -386,20 +386,9 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
386#define XFS_ILOCK_EXCL (1<<2) 386#define XFS_ILOCK_EXCL (1<<2)
387#define XFS_ILOCK_SHARED (1<<3) 387#define XFS_ILOCK_SHARED (1<<3)
388#define XFS_IUNLOCK_NONOTIFY (1<<4) 388#define XFS_IUNLOCK_NONOTIFY (1<<4)
389/* #define XFS_IOLOCK_NESTED (1<<5) */
390#define XFS_EXTENT_TOKEN_RD (1<<6)
391#define XFS_SIZE_TOKEN_RD (1<<7)
392#define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
393#define XFS_WILLLEND (1<<8) /* Always acquire tokens for lending */
394#define XFS_EXTENT_TOKEN_WR (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND)
395#define XFS_SIZE_TOKEN_WR (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
396#define XFS_EXTSIZE_WR (XFS_EXTSIZE_RD | XFS_WILLLEND)
397/* TODO:XFS_SIZE_TOKEN_WANT (1<<9) */
398 389
399#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 390#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
400 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \ 391 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
401 | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD \
402 | XFS_WILLLEND)
403 392
404/* 393/*
405 * Flags for lockdep annotations. 394 * Flags for lockdep annotations.
@@ -483,6 +472,7 @@ void xfs_ilock(xfs_inode_t *, uint);
483int xfs_ilock_nowait(xfs_inode_t *, uint); 472int xfs_ilock_nowait(xfs_inode_t *, uint);
484void xfs_iunlock(xfs_inode_t *, uint); 473void xfs_iunlock(xfs_inode_t *, uint);
485void xfs_ilock_demote(xfs_inode_t *, uint); 474void xfs_ilock_demote(xfs_inode_t *, uint);
475int xfs_isilocked(xfs_inode_t *, uint);
486void xfs_iflock(xfs_inode_t *); 476void xfs_iflock(xfs_inode_t *);
487int xfs_iflock_nowait(xfs_inode_t *); 477int xfs_iflock_nowait(xfs_inode_t *);
488uint xfs_ilock_map_shared(xfs_inode_t *); 478uint xfs_ilock_map_shared(xfs_inode_t *);
@@ -534,7 +524,7 @@ int xfs_iflush(xfs_inode_t *, uint);
534void xfs_iflush_all(struct xfs_mount *); 524void xfs_iflush_all(struct xfs_mount *);
535void xfs_ichgtime(xfs_inode_t *, int); 525void xfs_ichgtime(xfs_inode_t *, int);
536xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 526xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
537void xfs_lock_inodes(xfs_inode_t **, int, int, uint); 527void xfs_lock_inodes(xfs_inode_t **, int, uint);
538 528
539void xfs_synchronize_atime(xfs_inode_t *); 529void xfs_synchronize_atime(xfs_inode_t *);
540void xfs_mark_inode_dirty_sync(xfs_inode_t *); 530void xfs_mark_inode_dirty_sync(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 93b5db453ea2..167b33f15772 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -547,7 +547,7 @@ STATIC void
547xfs_inode_item_pin( 547xfs_inode_item_pin(
548 xfs_inode_log_item_t *iip) 548 xfs_inode_log_item_t *iip)
549{ 549{
550 ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE)); 550 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
551 xfs_ipin(iip->ili_inode); 551 xfs_ipin(iip->ili_inode);
552} 552}
553 553
@@ -664,13 +664,13 @@ xfs_inode_item_unlock(
664 664
665 ASSERT(iip != NULL); 665 ASSERT(iip != NULL);
666 ASSERT(iip->ili_inode->i_itemp != NULL); 666 ASSERT(iip->ili_inode->i_itemp != NULL);
667 ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE)); 667 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
668 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 668 ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
669 XFS_ILI_IOLOCKED_EXCL)) || 669 XFS_ILI_IOLOCKED_EXCL)) ||
670 ismrlocked(&(iip->ili_inode->i_iolock), MR_UPDATE)); 670 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL));
671 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 671 ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
672 XFS_ILI_IOLOCKED_SHARED)) || 672 XFS_ILI_IOLOCKED_SHARED)) ||
673 ismrlocked(&(iip->ili_inode->i_iolock), MR_ACCESS)); 673 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED));
674 /* 674 /*
675 * Clear the transaction pointer in the inode. 675 * Clear the transaction pointer in the inode.
676 */ 676 */
@@ -769,7 +769,7 @@ xfs_inode_item_pushbuf(
769 769
770 ip = iip->ili_inode; 770 ip = iip->ili_inode;
771 771
772 ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); 772 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
773 773
774 /* 774 /*
775 * The ili_pushbuf_flag keeps others from 775 * The ili_pushbuf_flag keeps others from
@@ -857,7 +857,7 @@ xfs_inode_item_push(
857 857
858 ip = iip->ili_inode; 858 ip = iip->ili_inode;
859 859
860 ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); 860 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
861 ASSERT(issemalocked(&(ip->i_flock))); 861 ASSERT(issemalocked(&(ip->i_flock)));
862 /* 862 /*
863 * Since we were able to lock the inode's flush lock and 863 * Since we were able to lock the inode's flush lock and
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fb3cf1191419..7edcde691d1a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -196,14 +196,14 @@ xfs_iomap(
196 break; 196 break;
197 case BMAPI_WRITE: 197 case BMAPI_WRITE:
198 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count); 198 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count);
199 lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; 199 lockmode = XFS_ILOCK_EXCL;
200 if (flags & BMAPI_IGNSTATE) 200 if (flags & BMAPI_IGNSTATE)
201 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; 201 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
202 xfs_ilock(ip, lockmode); 202 xfs_ilock(ip, lockmode);
203 break; 203 break;
204 case BMAPI_ALLOCATE: 204 case BMAPI_ALLOCATE:
205 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count); 205 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count);
206 lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD; 206 lockmode = XFS_ILOCK_SHARED;
207 bmapi_flags = XFS_BMAPI_ENTIRE; 207 bmapi_flags = XFS_BMAPI_ENTIRE;
208 208
209 /* Attempt non-blocking lock */ 209 /* Attempt non-blocking lock */
@@ -523,8 +523,7 @@ xfs_iomap_write_direct(
523 goto error_out; 523 goto error_out;
524 } 524 }
525 525
526 if (unlikely(!imap.br_startblock && 526 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
527 !(XFS_IS_REALTIME_INODE(ip)))) {
528 error = xfs_cmn_err_fsblock_zero(ip, &imap); 527 error = xfs_cmn_err_fsblock_zero(ip, &imap);
529 goto error_out; 528 goto error_out;
530 } 529 }
@@ -624,7 +623,7 @@ xfs_iomap_write_delay(
624 int prealloc, fsynced = 0; 623 int prealloc, fsynced = 0;
625 int error; 624 int error;
626 625
627 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 626 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
628 627
629 /* 628 /*
630 * Make sure that the dquots are there. This doesn't hold 629 * Make sure that the dquots are there. This doesn't hold
@@ -686,8 +685,7 @@ retry:
686 goto retry; 685 goto retry;
687 } 686 }
688 687
689 if (unlikely(!imap[0].br_startblock && 688 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
690 !(XFS_IS_REALTIME_INODE(ip))))
691 return xfs_cmn_err_fsblock_zero(ip, &imap[0]); 689 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
692 690
693 *ret_imap = imap[0]; 691 *ret_imap = imap[0];
@@ -838,9 +836,9 @@ xfs_iomap_write_allocate(
838 * See if we were able to allocate an extent that 836 * See if we were able to allocate an extent that
839 * covers at least part of the callers request 837 * covers at least part of the callers request
840 */ 838 */
841 if (unlikely(!imap.br_startblock && 839 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
842 XFS_IS_REALTIME_INODE(ip)))
843 return xfs_cmn_err_fsblock_zero(ip, &imap); 840 return xfs_cmn_err_fsblock_zero(ip, &imap);
841
844 if ((offset_fsb >= imap.br_startoff) && 842 if ((offset_fsb >= imap.br_startoff) &&
845 (offset_fsb < (imap.br_startoff + 843 (offset_fsb < (imap.br_startoff +
846 imap.br_blockcount))) { 844 imap.br_blockcount))) {
@@ -934,8 +932,7 @@ xfs_iomap_write_unwritten(
934 if (error) 932 if (error)
935 return XFS_ERROR(error); 933 return XFS_ERROR(error);
936 934
937 if (unlikely(!imap.br_startblock && 935 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
938 !(XFS_IS_REALTIME_INODE(ip))))
939 return xfs_cmn_err_fsblock_zero(ip, &imap); 936 return xfs_cmn_err_fsblock_zero(ip, &imap);
940 937
941 if ((numblks_fsb = imap.br_blockcount) == 0) { 938 if ((numblks_fsb = imap.br_blockcount) == 0) {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index eb85bdedad0c..419de15aeb43 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -71,11 +71,6 @@ xfs_bulkstat_one_iget(
71 71
72 ASSERT(ip != NULL); 72 ASSERT(ip != NULL);
73 ASSERT(ip->i_blkno != (xfs_daddr_t)0); 73 ASSERT(ip->i_blkno != (xfs_daddr_t)0);
74 if (ip->i_d.di_mode == 0) {
75 *stat = BULKSTAT_RV_NOTHING;
76 error = XFS_ERROR(ENOENT);
77 goto out_iput;
78 }
79 74
80 vp = XFS_ITOV(ip); 75 vp = XFS_ITOV(ip);
81 dic = &ip->i_d; 76 dic = &ip->i_d;
@@ -124,7 +119,6 @@ xfs_bulkstat_one_iget(
124 break; 119 break;
125 } 120 }
126 121
127 out_iput:
128 xfs_iput(ip, XFS_ILOCK_SHARED); 122 xfs_iput(ip, XFS_ILOCK_SHARED);
129 return error; 123 return error;
130} 124}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2fec452afbcc..da3988453b71 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -54,8 +54,9 @@ STATIC void xfs_unmountfs_wait(xfs_mount_t *);
54#ifdef HAVE_PERCPU_SB 54#ifdef HAVE_PERCPU_SB
55STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); 55STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
56STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, 56STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
57 int, int); 57 int);
58STATIC void xfs_icsb_sync_counters(xfs_mount_t *); 58STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
59 int);
59STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 60STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
60 int64_t, int); 61 int64_t, int);
61STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 62STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
@@ -63,8 +64,8 @@ STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
63#else 64#else
64 65
65#define xfs_icsb_destroy_counters(mp) do { } while (0) 66#define xfs_icsb_destroy_counters(mp) do { } while (0)
66#define xfs_icsb_balance_counter(mp, a, b, c) do { } while (0) 67#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
67#define xfs_icsb_sync_counters(mp) do { } while (0) 68#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
68#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 69#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
69 70
70#endif 71#endif
@@ -1400,7 +1401,7 @@ xfs_log_sbcount(
1400 if (!xfs_fs_writable(mp)) 1401 if (!xfs_fs_writable(mp))
1401 return 0; 1402 return 0;
1402 1403
1403 xfs_icsb_sync_counters(mp); 1404 xfs_icsb_sync_counters(mp, 0);
1404 1405
1405 /* 1406 /*
1406 * we don't need to do this if we are updating the superblock 1407 * we don't need to do this if we are updating the superblock
@@ -2026,9 +2027,9 @@ xfs_icsb_cpu_notify(
2026 case CPU_ONLINE: 2027 case CPU_ONLINE:
2027 case CPU_ONLINE_FROZEN: 2028 case CPU_ONLINE_FROZEN:
2028 xfs_icsb_lock(mp); 2029 xfs_icsb_lock(mp);
2029 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 2030 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
2030 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 2031 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
2031 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 2032 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
2032 xfs_icsb_unlock(mp); 2033 xfs_icsb_unlock(mp);
2033 break; 2034 break;
2034 case CPU_DEAD: 2035 case CPU_DEAD:
@@ -2048,12 +2049,9 @@ xfs_icsb_cpu_notify(
2048 2049
2049 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 2050 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
2050 2051
2051 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 2052 xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
2052 XFS_ICSB_SB_LOCKED, 0); 2053 xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
2053 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 2054 xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
2054 XFS_ICSB_SB_LOCKED, 0);
2055 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS,
2056 XFS_ICSB_SB_LOCKED, 0);
2057 spin_unlock(&mp->m_sb_lock); 2055 spin_unlock(&mp->m_sb_lock);
2058 xfs_icsb_unlock(mp); 2056 xfs_icsb_unlock(mp);
2059 break; 2057 break;
@@ -2105,9 +2103,9 @@ xfs_icsb_reinit_counters(
2105 * initial balance kicks us off correctly 2103 * initial balance kicks us off correctly
2106 */ 2104 */
2107 mp->m_icsb_counters = -1; 2105 mp->m_icsb_counters = -1;
2108 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 2106 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
2109 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 2107 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
2110 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 2108 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
2111 xfs_icsb_unlock(mp); 2109 xfs_icsb_unlock(mp);
2112} 2110}
2113 2111
@@ -2223,7 +2221,7 @@ xfs_icsb_disable_counter(
2223 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 2221 if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
2224 /* drain back to superblock */ 2222 /* drain back to superblock */
2225 2223
2226 xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT); 2224 xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
2227 switch(field) { 2225 switch(field) {
2228 case XFS_SBS_ICOUNT: 2226 case XFS_SBS_ICOUNT:
2229 mp->m_sb.sb_icount = cnt.icsb_icount; 2227 mp->m_sb.sb_icount = cnt.icsb_icount;
@@ -2278,38 +2276,33 @@ xfs_icsb_enable_counter(
2278} 2276}
2279 2277
2280void 2278void
2281xfs_icsb_sync_counters_flags( 2279xfs_icsb_sync_counters_locked(
2282 xfs_mount_t *mp, 2280 xfs_mount_t *mp,
2283 int flags) 2281 int flags)
2284{ 2282{
2285 xfs_icsb_cnts_t cnt; 2283 xfs_icsb_cnts_t cnt;
2286 2284
2287 /* Pass 1: lock all counters */
2288 if ((flags & XFS_ICSB_SB_LOCKED) == 0)
2289 spin_lock(&mp->m_sb_lock);
2290
2291 xfs_icsb_count(mp, &cnt, flags); 2285 xfs_icsb_count(mp, &cnt, flags);
2292 2286
2293 /* Step 3: update mp->m_sb fields */
2294 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) 2287 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
2295 mp->m_sb.sb_icount = cnt.icsb_icount; 2288 mp->m_sb.sb_icount = cnt.icsb_icount;
2296 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) 2289 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
2297 mp->m_sb.sb_ifree = cnt.icsb_ifree; 2290 mp->m_sb.sb_ifree = cnt.icsb_ifree;
2298 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) 2291 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
2299 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 2292 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
2300
2301 if ((flags & XFS_ICSB_SB_LOCKED) == 0)
2302 spin_unlock(&mp->m_sb_lock);
2303} 2293}
2304 2294
2305/* 2295/*
2306 * Accurate update of per-cpu counters to incore superblock 2296 * Accurate update of per-cpu counters to incore superblock
2307 */ 2297 */
2308STATIC void 2298void
2309xfs_icsb_sync_counters( 2299xfs_icsb_sync_counters(
2310 xfs_mount_t *mp) 2300 xfs_mount_t *mp,
2301 int flags)
2311{ 2302{
2312 xfs_icsb_sync_counters_flags(mp, 0); 2303 spin_lock(&mp->m_sb_lock);
2304 xfs_icsb_sync_counters_locked(mp, flags);
2305 spin_unlock(&mp->m_sb_lock);
2313} 2306}
2314 2307
2315/* 2308/*
@@ -2332,19 +2325,15 @@ xfs_icsb_sync_counters(
2332#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ 2325#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
2333 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp)) 2326 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
2334STATIC void 2327STATIC void
2335xfs_icsb_balance_counter( 2328xfs_icsb_balance_counter_locked(
2336 xfs_mount_t *mp, 2329 xfs_mount_t *mp,
2337 xfs_sb_field_t field, 2330 xfs_sb_field_t field,
2338 int flags,
2339 int min_per_cpu) 2331 int min_per_cpu)
2340{ 2332{
2341 uint64_t count, resid; 2333 uint64_t count, resid;
2342 int weight = num_online_cpus(); 2334 int weight = num_online_cpus();
2343 uint64_t min = (uint64_t)min_per_cpu; 2335 uint64_t min = (uint64_t)min_per_cpu;
2344 2336
2345 if (!(flags & XFS_ICSB_SB_LOCKED))
2346 spin_lock(&mp->m_sb_lock);
2347
2348 /* disable counter and sync counter */ 2337 /* disable counter and sync counter */
2349 xfs_icsb_disable_counter(mp, field); 2338 xfs_icsb_disable_counter(mp, field);
2350 2339
@@ -2354,19 +2343,19 @@ xfs_icsb_balance_counter(
2354 count = mp->m_sb.sb_icount; 2343 count = mp->m_sb.sb_icount;
2355 resid = do_div(count, weight); 2344 resid = do_div(count, weight);
2356 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2345 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
2357 goto out; 2346 return;
2358 break; 2347 break;
2359 case XFS_SBS_IFREE: 2348 case XFS_SBS_IFREE:
2360 count = mp->m_sb.sb_ifree; 2349 count = mp->m_sb.sb_ifree;
2361 resid = do_div(count, weight); 2350 resid = do_div(count, weight);
2362 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2351 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
2363 goto out; 2352 return;
2364 break; 2353 break;
2365 case XFS_SBS_FDBLOCKS: 2354 case XFS_SBS_FDBLOCKS:
2366 count = mp->m_sb.sb_fdblocks; 2355 count = mp->m_sb.sb_fdblocks;
2367 resid = do_div(count, weight); 2356 resid = do_div(count, weight);
2368 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp))) 2357 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
2369 goto out; 2358 return;
2370 break; 2359 break;
2371 default: 2360 default:
2372 BUG(); 2361 BUG();
@@ -2375,9 +2364,17 @@ xfs_icsb_balance_counter(
2375 } 2364 }
2376 2365
2377 xfs_icsb_enable_counter(mp, field, count, resid); 2366 xfs_icsb_enable_counter(mp, field, count, resid);
2378out: 2367}
2379 if (!(flags & XFS_ICSB_SB_LOCKED)) 2368
2380 spin_unlock(&mp->m_sb_lock); 2369STATIC void
2370xfs_icsb_balance_counter(
2371 xfs_mount_t *mp,
2372 xfs_sb_field_t fields,
2373 int min_per_cpu)
2374{
2375 spin_lock(&mp->m_sb_lock);
2376 xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
2377 spin_unlock(&mp->m_sb_lock);
2381} 2378}
2382 2379
2383STATIC int 2380STATIC int
@@ -2484,7 +2481,7 @@ slow_path:
2484 * we are done. 2481 * we are done.
2485 */ 2482 */
2486 if (ret != ENOSPC) 2483 if (ret != ENOSPC)
2487 xfs_icsb_balance_counter(mp, field, 0, 0); 2484 xfs_icsb_balance_counter(mp, field, 0);
2488 xfs_icsb_unlock(mp); 2485 xfs_icsb_unlock(mp);
2489 return ret; 2486 return ret;
2490 2487
@@ -2508,7 +2505,7 @@ balance_counter:
2508 * will either succeed through the fast path or slow path without 2505 * will either succeed through the fast path or slow path without
2509 * another balance operation being required. 2506 * another balance operation being required.
2510 */ 2507 */
2511 xfs_icsb_balance_counter(mp, field, 0, delta); 2508 xfs_icsb_balance_counter(mp, field, delta);
2512 xfs_icsb_unlock(mp); 2509 xfs_icsb_unlock(mp);
2513 goto again; 2510 goto again;
2514} 2511}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1ed575110ff0..63e0693a358a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -206,17 +206,18 @@ typedef struct xfs_icsb_cnts {
206 206
207#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */ 207#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */
208 208
209#define XFS_ICSB_SB_LOCKED (1 << 0) /* sb already locked */
210#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */ 209#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
211 210
212extern int xfs_icsb_init_counters(struct xfs_mount *); 211extern int xfs_icsb_init_counters(struct xfs_mount *);
213extern void xfs_icsb_reinit_counters(struct xfs_mount *); 212extern void xfs_icsb_reinit_counters(struct xfs_mount *);
214extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int); 213extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
214extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
215 215
216#else 216#else
217#define xfs_icsb_init_counters(mp) (0) 217#define xfs_icsb_init_counters(mp) (0)
218#define xfs_icsb_reinit_counters(mp) do { } while (0) 218#define xfs_icsb_reinit_counters(mp) do { } while (0)
219#define xfs_icsb_sync_counters_flags(mp, flags) do { } while (0) 219#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
220#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
220#endif 221#endif
221 222
222typedef struct xfs_ail { 223typedef struct xfs_ail {
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index ee371890d85d..d8063e1ad298 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -55,85 +55,32 @@ xfs_rename_unlock4(
55 55
56 xfs_iunlock(i_tab[0], lock_mode); 56 xfs_iunlock(i_tab[0], lock_mode);
57 for (i = 1; i < 4; i++) { 57 for (i = 1; i < 4; i++) {
58 if (i_tab[i] == NULL) { 58 if (i_tab[i] == NULL)
59 break; 59 break;
60 } 60
61 /* 61 /*
62 * Watch out for duplicate entries in the table. 62 * Watch out for duplicate entries in the table.
63 */ 63 */
64 if (i_tab[i] != i_tab[i-1]) { 64 if (i_tab[i] != i_tab[i-1])
65 xfs_iunlock(i_tab[i], lock_mode); 65 xfs_iunlock(i_tab[i], lock_mode);
66 }
67 } 66 }
68} 67}
69 68
70#ifdef DEBUG
71int xfs_rename_skip, xfs_rename_nskip;
72#endif
73
74/* 69/*
75 * The following routine will acquire the locks required for a rename 70 * Enter all inodes for a rename transaction into a sorted array.
76 * operation. The code understands the semantics of renames and will
77 * validate that name1 exists under dp1 & that name2 may or may not
78 * exist under dp2.
79 *
80 * We are renaming dp1/name1 to dp2/name2.
81 *
82 * Return ENOENT if dp1 does not exist, other lookup errors, or 0 for success.
83 */ 71 */
84STATIC int 72STATIC void
85xfs_lock_for_rename( 73xfs_sort_for_rename(
86 xfs_inode_t *dp1, /* in: old (source) directory inode */ 74 xfs_inode_t *dp1, /* in: old (source) directory inode */
87 xfs_inode_t *dp2, /* in: new (target) directory inode */ 75 xfs_inode_t *dp2, /* in: new (target) directory inode */
88 xfs_inode_t *ip1, /* in: inode of old entry */ 76 xfs_inode_t *ip1, /* in: inode of old entry */
89 struct xfs_name *name2, /* in: new entry name */ 77 xfs_inode_t *ip2, /* in: inode of new entry, if it
90 xfs_inode_t **ipp2, /* out: inode of new entry, if it
91 already exists, NULL otherwise. */ 78 already exists, NULL otherwise. */
92 xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ 79 xfs_inode_t **i_tab,/* out: array of inode returned, sorted */
93 int *num_inodes) /* out: number of inodes in array */ 80 int *num_inodes) /* out: number of inodes in array */
94{ 81{
95 xfs_inode_t *ip2 = NULL;
96 xfs_inode_t *temp; 82 xfs_inode_t *temp;
97 xfs_ino_t inum1, inum2;
98 int error;
99 int i, j; 83 int i, j;
100 uint lock_mode;
101 int diff_dirs = (dp1 != dp2);
102
103 /*
104 * First, find out the current inums of the entries so that we
105 * can determine the initial locking order. We'll have to
106 * sanity check stuff after all the locks have been acquired
107 * to see if we still have the right inodes, directories, etc.
108 */
109 lock_mode = xfs_ilock_map_shared(dp1);
110 IHOLD(ip1);
111 xfs_itrace_ref(ip1);
112
113 inum1 = ip1->i_ino;
114
115 /*
116 * Unlock dp1 and lock dp2 if they are different.
117 */
118 if (diff_dirs) {
119 xfs_iunlock_map_shared(dp1, lock_mode);
120 lock_mode = xfs_ilock_map_shared(dp2);
121 }
122
123 error = xfs_dir_lookup_int(dp2, lock_mode, name2, &inum2, &ip2);
124 if (error == ENOENT) { /* target does not need to exist. */
125 inum2 = 0;
126 } else if (error) {
127 /*
128 * If dp2 and dp1 are the same, the next line unlocks dp1.
129 * Got it?
130 */
131 xfs_iunlock_map_shared(dp2, lock_mode);
132 IRELE (ip1);
133 return error;
134 } else {
135 xfs_itrace_ref(ip2);
136 }
137 84
138 /* 85 /*
139 * i_tab contains a list of pointers to inodes. We initialize 86 * i_tab contains a list of pointers to inodes. We initialize
@@ -145,21 +92,20 @@ xfs_lock_for_rename(
145 i_tab[0] = dp1; 92 i_tab[0] = dp1;
146 i_tab[1] = dp2; 93 i_tab[1] = dp2;
147 i_tab[2] = ip1; 94 i_tab[2] = ip1;
148 if (inum2 == 0) { 95 if (ip2) {
149 *num_inodes = 3;
150 i_tab[3] = NULL;
151 } else {
152 *num_inodes = 4; 96 *num_inodes = 4;
153 i_tab[3] = ip2; 97 i_tab[3] = ip2;
98 } else {
99 *num_inodes = 3;
100 i_tab[3] = NULL;
154 } 101 }
155 *ipp2 = i_tab[3];
156 102
157 /* 103 /*
158 * Sort the elements via bubble sort. (Remember, there are at 104 * Sort the elements via bubble sort. (Remember, there are at
159 * most 4 elements to sort, so this is adequate.) 105 * most 4 elements to sort, so this is adequate.)
160 */ 106 */
161 for (i=0; i < *num_inodes; i++) { 107 for (i = 0; i < *num_inodes; i++) {
162 for (j=1; j < *num_inodes; j++) { 108 for (j = 1; j < *num_inodes; j++) {
163 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { 109 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
164 temp = i_tab[j]; 110 temp = i_tab[j];
165 i_tab[j] = i_tab[j-1]; 111 i_tab[j] = i_tab[j-1];
@@ -167,30 +113,6 @@ xfs_lock_for_rename(
167 } 113 }
168 } 114 }
169 } 115 }
170
171 /*
172 * We have dp2 locked. If it isn't first, unlock it.
173 * If it is first, tell xfs_lock_inodes so it can skip it
174 * when locking. if dp1 == dp2, xfs_lock_inodes will skip both
175 * since they are equal. xfs_lock_inodes needs all these inodes
176 * so that it can unlock and retry if there might be a dead-lock
177 * potential with the log.
178 */
179
180 if (i_tab[0] == dp2 && lock_mode == XFS_ILOCK_SHARED) {
181#ifdef DEBUG
182 xfs_rename_skip++;
183#endif
184 xfs_lock_inodes(i_tab, *num_inodes, 1, XFS_ILOCK_SHARED);
185 } else {
186#ifdef DEBUG
187 xfs_rename_nskip++;
188#endif
189 xfs_iunlock_map_shared(dp2, lock_mode);
190 xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED);
191 }
192
193 return 0;
194} 116}
195 117
196/* 118/*
@@ -202,10 +124,10 @@ xfs_rename(
202 struct xfs_name *src_name, 124 struct xfs_name *src_name,
203 xfs_inode_t *src_ip, 125 xfs_inode_t *src_ip,
204 xfs_inode_t *target_dp, 126 xfs_inode_t *target_dp,
205 struct xfs_name *target_name) 127 struct xfs_name *target_name,
128 xfs_inode_t *target_ip)
206{ 129{
207 xfs_trans_t *tp; 130 xfs_trans_t *tp = NULL;
208 xfs_inode_t *target_ip;
209 xfs_mount_t *mp = src_dp->i_mount; 131 xfs_mount_t *mp = src_dp->i_mount;
210 int new_parent; /* moving to a new dir */ 132 int new_parent; /* moving to a new dir */
211 int src_is_directory; /* src_name is a directory */ 133 int src_is_directory; /* src_name is a directory */
@@ -215,9 +137,7 @@ xfs_rename(
215 int cancel_flags; 137 int cancel_flags;
216 int committed; 138 int committed;
217 xfs_inode_t *inodes[4]; 139 xfs_inode_t *inodes[4];
218 int target_ip_dropped = 0; /* dropped target_ip link? */
219 int spaceres; 140 int spaceres;
220 int target_link_zero = 0;
221 int num_inodes; 141 int num_inodes;
222 142
223 xfs_itrace_entry(src_dp); 143 xfs_itrace_entry(src_dp);
@@ -230,64 +150,27 @@ xfs_rename(
230 target_dp, DM_RIGHT_NULL, 150 target_dp, DM_RIGHT_NULL,
231 src_name->name, target_name->name, 151 src_name->name, target_name->name,
232 0, 0, 0); 152 0, 0, 0);
233 if (error) { 153 if (error)
234 return error; 154 return error;
235 }
236 } 155 }
237 /* Return through std_return after this point. */ 156 /* Return through std_return after this point. */
238 157
239 /* 158 new_parent = (src_dp != target_dp);
240 * Lock all the participating inodes. Depending upon whether 159 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
241 * the target_name exists in the target directory, and
242 * whether the target directory is the same as the source
243 * directory, we can lock from 2 to 4 inodes.
244 * xfs_lock_for_rename() will return ENOENT if src_name
245 * does not exist in the source directory.
246 */
247 tp = NULL;
248 error = xfs_lock_for_rename(src_dp, target_dp, src_ip, target_name,
249 &target_ip, inodes, &num_inodes);
250 if (error) {
251 /*
252 * We have nothing locked, no inode references, and
253 * no transaction, so just get out.
254 */
255 goto std_return;
256 }
257
258 ASSERT(src_ip != NULL);
259 160
260 if ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 161 if (src_is_directory) {
261 /* 162 /*
262 * Check for link count overflow on target_dp 163 * Check for link count overflow on target_dp
263 */ 164 */
264 if (target_ip == NULL && (src_dp != target_dp) && 165 if (target_ip == NULL && new_parent &&
265 target_dp->i_d.di_nlink >= XFS_MAXLINK) { 166 target_dp->i_d.di_nlink >= XFS_MAXLINK) {
266 error = XFS_ERROR(EMLINK); 167 error = XFS_ERROR(EMLINK);
267 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); 168 goto std_return;
268 goto rele_return;
269 } 169 }
270 } 170 }
271 171
272 /* 172 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
273 * If we are using project inheritance, we only allow renames 173 inodes, &num_inodes);
274 * into our tree when the project IDs are the same; else the
275 * tree quota mechanism would be circumvented.
276 */
277 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
278 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
279 error = XFS_ERROR(EXDEV);
280 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
281 goto rele_return;
282 }
283
284 new_parent = (src_dp != target_dp);
285 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
286
287 /*
288 * Drop the locks on our inodes so that we can start the transaction.
289 */
290 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
291 174
292 XFS_BMAP_INIT(&free_list, &first_block); 175 XFS_BMAP_INIT(&free_list, &first_block);
293 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); 176 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
@@ -302,7 +185,7 @@ xfs_rename(
302 } 185 }
303 if (error) { 186 if (error) {
304 xfs_trans_cancel(tp, 0); 187 xfs_trans_cancel(tp, 0);
305 goto rele_return; 188 goto std_return;
306 } 189 }
307 190
308 /* 191 /*
@@ -310,13 +193,29 @@ xfs_rename(
310 */ 193 */
311 if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { 194 if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
312 xfs_trans_cancel(tp, cancel_flags); 195 xfs_trans_cancel(tp, cancel_flags);
313 goto rele_return; 196 goto std_return;
314 } 197 }
315 198
316 /* 199 /*
317 * Reacquire the inode locks we dropped above. 200 * Lock all the participating inodes. Depending upon whether
201 * the target_name exists in the target directory, and
202 * whether the target directory is the same as the source
203 * directory, we can lock from 2 to 4 inodes.
204 */
205 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
206
207 /*
208 * If we are using project inheritance, we only allow renames
209 * into our tree when the project IDs are the same; else the
210 * tree quota mechanism would be circumvented.
318 */ 211 */
319 xfs_lock_inodes(inodes, num_inodes, 0, XFS_ILOCK_EXCL); 212 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
213 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
214 error = XFS_ERROR(EXDEV);
215 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
216 xfs_trans_cancel(tp, cancel_flags);
217 goto std_return;
218 }
320 219
321 /* 220 /*
322 * Join all the inodes to the transaction. From this point on, 221 * Join all the inodes to the transaction. From this point on,
@@ -328,17 +227,17 @@ xfs_rename(
328 */ 227 */
329 IHOLD(src_dp); 228 IHOLD(src_dp);
330 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 229 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
230
331 if (new_parent) { 231 if (new_parent) {
332 IHOLD(target_dp); 232 IHOLD(target_dp);
333 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 233 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
334 } 234 }
335 if ((src_ip != src_dp) && (src_ip != target_dp)) { 235
336 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 236 IHOLD(src_ip);
337 } 237 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
338 if ((target_ip != NULL) && 238
339 (target_ip != src_ip) && 239 if (target_ip) {
340 (target_ip != src_dp) && 240 IHOLD(target_ip);
341 (target_ip != target_dp)) {
342 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); 241 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
343 } 242 }
344 243
@@ -412,7 +311,6 @@ xfs_rename(
412 error = xfs_droplink(tp, target_ip); 311 error = xfs_droplink(tp, target_ip);
413 if (error) 312 if (error)
414 goto abort_return; 313 goto abort_return;
415 target_ip_dropped = 1;
416 314
417 if (src_is_directory) { 315 if (src_is_directory) {
418 /* 316 /*
@@ -422,10 +320,6 @@ xfs_rename(
422 if (error) 320 if (error)
423 goto abort_return; 321 goto abort_return;
424 } 322 }
425
426 /* Do this test while we still hold the locks */
427 target_link_zero = (target_ip)->i_d.di_nlink==0;
428
429 } /* target_ip != NULL */ 323 } /* target_ip != NULL */
430 324
431 /* 325 /*
@@ -492,15 +386,6 @@ xfs_rename(
492 } 386 }
493 387
494 /* 388 /*
495 * If there was a target inode, take an extra reference on
496 * it here so that it doesn't go to xfs_inactive() from
497 * within the commit.
498 */
499 if (target_ip != NULL) {
500 IHOLD(target_ip);
501 }
502
503 /*
504 * If this is a synchronous mount, make sure that the 389 * If this is a synchronous mount, make sure that the
505 * rename transaction goes to disk before returning to 390 * rename transaction goes to disk before returning to
506 * the user. 391 * the user.
@@ -509,30 +394,11 @@ xfs_rename(
509 xfs_trans_set_sync(tp); 394 xfs_trans_set_sync(tp);
510 } 395 }
511 396
512 /*
513 * Take refs. for vop_link_removed calls below. No need to worry
514 * about directory refs. because the caller holds them.
515 *
516 * Do holds before the xfs_bmap_finish since it might rele them down
517 * to zero.
518 */
519
520 if (target_ip_dropped)
521 IHOLD(target_ip);
522 IHOLD(src_ip);
523
524 error = xfs_bmap_finish(&tp, &free_list, &committed); 397 error = xfs_bmap_finish(&tp, &free_list, &committed);
525 if (error) { 398 if (error) {
526 xfs_bmap_cancel(&free_list); 399 xfs_bmap_cancel(&free_list);
527 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 400 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
528 XFS_TRANS_ABORT)); 401 XFS_TRANS_ABORT));
529 if (target_ip != NULL) {
530 IRELE(target_ip);
531 }
532 if (target_ip_dropped) {
533 IRELE(target_ip);
534 }
535 IRELE(src_ip);
536 goto std_return; 402 goto std_return;
537 } 403 }
538 404
@@ -541,15 +407,6 @@ xfs_rename(
541 * the vnode references. 407 * the vnode references.
542 */ 408 */
543 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 409 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
544 if (target_ip != NULL)
545 IRELE(target_ip);
546 /*
547 * Let interposed file systems know about removed links.
548 */
549 if (target_ip_dropped)
550 IRELE(target_ip);
551
552 IRELE(src_ip);
553 410
554 /* Fall through to std_return with error = 0 or errno from 411 /* Fall through to std_return with error = 0 or errno from
555 * xfs_trans_commit */ 412 * xfs_trans_commit */
@@ -571,11 +428,4 @@ std_return:
571 xfs_bmap_cancel(&free_list); 428 xfs_bmap_cancel(&free_list);
572 xfs_trans_cancel(tp, cancel_flags); 429 xfs_trans_cancel(tp, cancel_flags);
573 goto std_return; 430 goto std_return;
574
575 rele_return:
576 IRELE(src_ip);
577 if (target_ip != NULL) {
578 IRELE(target_ip);
579 }
580 goto std_return;
581} 431}
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index b8db1d5cde5a..4c70bf5e9985 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -111,13 +111,13 @@ xfs_trans_iget(
111 */ 111 */
112 ASSERT(ip->i_itemp != NULL); 112 ASSERT(ip->i_itemp != NULL);
113 ASSERT(lock_flags & XFS_ILOCK_EXCL); 113 ASSERT(lock_flags & XFS_ILOCK_EXCL);
114 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 114 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
115 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || 115 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
116 ismrlocked(&ip->i_iolock, MR_UPDATE)); 116 xfs_isilocked(ip, XFS_IOLOCK_EXCL));
117 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || 117 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
118 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL)); 118 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL));
119 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || 119 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
120 ismrlocked(&ip->i_iolock, (MR_UPDATE | MR_ACCESS))); 120 xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
121 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || 121 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
122 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY)); 122 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY));
123 123
@@ -185,7 +185,7 @@ xfs_trans_ijoin(
185 xfs_inode_log_item_t *iip; 185 xfs_inode_log_item_t *iip;
186 186
187 ASSERT(ip->i_transp == NULL); 187 ASSERT(ip->i_transp == NULL);
188 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 188 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
189 ASSERT(lock_flags & XFS_ILOCK_EXCL); 189 ASSERT(lock_flags & XFS_ILOCK_EXCL);
190 if (ip->i_itemp == NULL) 190 if (ip->i_itemp == NULL)
191 xfs_inode_item_init(ip, ip->i_mount); 191 xfs_inode_item_init(ip, ip->i_mount);
@@ -232,7 +232,7 @@ xfs_trans_ihold(
232{ 232{
233 ASSERT(ip->i_transp == tp); 233 ASSERT(ip->i_transp == tp);
234 ASSERT(ip->i_itemp != NULL); 234 ASSERT(ip->i_itemp != NULL);
235 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 235 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
236 236
237 ip->i_itemp->ili_flags |= XFS_ILI_HOLD; 237 ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
238} 238}
@@ -257,7 +257,7 @@ xfs_trans_log_inode(
257 257
258 ASSERT(ip->i_transp == tp); 258 ASSERT(ip->i_transp == tp);
259 ASSERT(ip->i_itemp != NULL); 259 ASSERT(ip->i_itemp != NULL);
260 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 260 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
261 261
262 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp)); 262 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
263 ASSERT(lidp != NULL); 263 ASSERT(lidp != NULL);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 2b8dc7e40772..98e5f110ba5f 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -41,49 +41,6 @@
41#include "xfs_utils.h" 41#include "xfs_utils.h"
42 42
43 43
44int
45xfs_dir_lookup_int(
46 xfs_inode_t *dp,
47 uint lock_mode,
48 struct xfs_name *name,
49 xfs_ino_t *inum,
50 xfs_inode_t **ipp)
51{
52 int error;
53
54 xfs_itrace_entry(dp);
55
56 error = xfs_dir_lookup(NULL, dp, name, inum);
57 if (!error) {
58 /*
59 * Unlock the directory. We do this because we can't
60 * hold the directory lock while doing the vn_get()
61 * in xfs_iget(). Doing so could cause us to hold
62 * a lock while waiting for the inode to finish
63 * being inactive while it's waiting for a log
64 * reservation in the inactive routine.
65 */
66 xfs_iunlock(dp, lock_mode);
67 error = xfs_iget(dp->i_mount, NULL, *inum, 0, 0, ipp, 0);
68 xfs_ilock(dp, lock_mode);
69
70 if (error) {
71 *ipp = NULL;
72 } else if ((*ipp)->i_d.di_mode == 0) {
73 /*
74 * The inode has been freed. Something is
75 * wrong so just get out of here.
76 */
77 xfs_iunlock(dp, lock_mode);
78 xfs_iput_new(*ipp, 0);
79 *ipp = NULL;
80 xfs_ilock(dp, lock_mode);
81 error = XFS_ERROR(ENOENT);
82 }
83 }
84 return error;
85}
86
87/* 44/*
88 * Allocates a new inode from disk and return a pointer to the 45 * Allocates a new inode from disk and return a pointer to the
89 * incore copy. This routine will internally commit the current 46 * incore copy. This routine will internally commit the current
@@ -310,7 +267,7 @@ xfs_bump_ino_vers2(
310{ 267{
311 xfs_mount_t *mp; 268 xfs_mount_t *mp;
312 269
313 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 270 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
314 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); 271 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1);
315 272
316 ip->i_d.di_version = XFS_DINODE_VERSION_2; 273 ip->i_d.di_version = XFS_DINODE_VERSION_2;
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 175b126d2cab..f316cb85d8e2 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -21,8 +21,6 @@
21#define IRELE(ip) VN_RELE(XFS_ITOV(ip)) 21#define IRELE(ip) VN_RELE(XFS_ITOV(ip))
22#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) 22#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip))
23 23
24extern int xfs_dir_lookup_int(xfs_inode_t *, uint, struct xfs_name *,
25 xfs_ino_t *, xfs_inode_t **);
26extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); 24extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
27extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 25extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
28 xfs_dev_t, cred_t *, prid_t, int, 26 xfs_dev_t, cred_t *, prid_t, int,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index fc48158fe479..30bacd8bb0e5 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -186,6 +186,7 @@ xfs_cleanup(void)
186 kmem_zone_destroy(xfs_efi_zone); 186 kmem_zone_destroy(xfs_efi_zone);
187 kmem_zone_destroy(xfs_ifork_zone); 187 kmem_zone_destroy(xfs_ifork_zone);
188 kmem_zone_destroy(xfs_ili_zone); 188 kmem_zone_destroy(xfs_ili_zone);
189 kmem_zone_destroy(xfs_log_ticket_zone);
189} 190}
190 191
191/* 192/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6650601c64f7..70702a60b4bb 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -76,132 +76,6 @@ xfs_open(
76} 76}
77 77
78/* 78/*
79 * xfs_getattr
80 */
81int
82xfs_getattr(
83 xfs_inode_t *ip,
84 bhv_vattr_t *vap,
85 int flags)
86{
87 bhv_vnode_t *vp = XFS_ITOV(ip);
88 xfs_mount_t *mp = ip->i_mount;
89
90 xfs_itrace_entry(ip);
91
92 if (XFS_FORCED_SHUTDOWN(mp))
93 return XFS_ERROR(EIO);
94
95 if (!(flags & ATTR_LAZY))
96 xfs_ilock(ip, XFS_ILOCK_SHARED);
97
98 vap->va_size = XFS_ISIZE(ip);
99 if (vap->va_mask == XFS_AT_SIZE)
100 goto all_done;
101
102 vap->va_nblocks =
103 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
104 vap->va_nodeid = ip->i_ino;
105#if XFS_BIG_INUMS
106 vap->va_nodeid += mp->m_inoadd;
107#endif
108 vap->va_nlink = ip->i_d.di_nlink;
109
110 /*
111 * Quick exit for non-stat callers
112 */
113 if ((vap->va_mask &
114 ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID|
115 XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0)
116 goto all_done;
117
118 /*
119 * Copy from in-core inode.
120 */
121 vap->va_mode = ip->i_d.di_mode;
122 vap->va_uid = ip->i_d.di_uid;
123 vap->va_gid = ip->i_d.di_gid;
124 vap->va_projid = ip->i_d.di_projid;
125
126 /*
127 * Check vnode type block/char vs. everything else.
128 */
129 switch (ip->i_d.di_mode & S_IFMT) {
130 case S_IFBLK:
131 case S_IFCHR:
132 vap->va_rdev = ip->i_df.if_u2.if_rdev;
133 vap->va_blocksize = BLKDEV_IOSIZE;
134 break;
135 default:
136 vap->va_rdev = 0;
137
138 if (!(XFS_IS_REALTIME_INODE(ip))) {
139 vap->va_blocksize = xfs_preferred_iosize(mp);
140 } else {
141
142 /*
143 * If the file blocks are being allocated from a
144 * realtime partition, then return the inode's
145 * realtime extent size or the realtime volume's
146 * extent size.
147 */
148 vap->va_blocksize =
149 xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
150 }
151 break;
152 }
153
154 vn_atime_to_timespec(vp, &vap->va_atime);
155 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
156 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
157 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
158 vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
159
160 /*
161 * Exit for stat callers. See if any of the rest of the fields
162 * to be filled in are needed.
163 */
164 if ((vap->va_mask &
165 (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
166 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
167 goto all_done;
168
169 /*
170 * Convert di_flags to xflags.
171 */
172 vap->va_xflags = xfs_ip2xflags(ip);
173
174 /*
175 * Exit for inode revalidate. See if any of the rest of
176 * the fields to be filled in are needed.
177 */
178 if ((vap->va_mask &
179 (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
180 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
181 goto all_done;
182
183 vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog;
184 vap->va_nextents =
185 (ip->i_df.if_flags & XFS_IFEXTENTS) ?
186 ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) :
187 ip->i_d.di_nextents;
188 if (ip->i_afp)
189 vap->va_anextents =
190 (ip->i_afp->if_flags & XFS_IFEXTENTS) ?
191 ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) :
192 ip->i_d.di_anextents;
193 else
194 vap->va_anextents = 0;
195 vap->va_gen = ip->i_d.di_gen;
196
197 all_done:
198 if (!(flags & ATTR_LAZY))
199 xfs_iunlock(ip, XFS_ILOCK_SHARED);
200 return 0;
201}
202
203
204/*
205 * xfs_setattr 79 * xfs_setattr
206 */ 80 */
207int 81int
@@ -211,7 +85,6 @@ xfs_setattr(
211 int flags, 85 int flags,
212 cred_t *credp) 86 cred_t *credp)
213{ 87{
214 bhv_vnode_t *vp = XFS_ITOV(ip);
215 xfs_mount_t *mp = ip->i_mount; 88 xfs_mount_t *mp = ip->i_mount;
216 xfs_trans_t *tp; 89 xfs_trans_t *tp;
217 int mask; 90 int mask;
@@ -222,7 +95,6 @@ xfs_setattr(
222 gid_t gid=0, igid=0; 95 gid_t gid=0, igid=0;
223 int timeflags = 0; 96 int timeflags = 0;
224 xfs_prid_t projid=0, iprojid=0; 97 xfs_prid_t projid=0, iprojid=0;
225 int mandlock_before, mandlock_after;
226 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 98 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
227 int file_owner; 99 int file_owner;
228 int need_iolock = 1; 100 int need_iolock = 1;
@@ -383,7 +255,7 @@ xfs_setattr(
383 m |= S_ISGID; 255 m |= S_ISGID;
384#if 0 256#if 0
385 /* Linux allows this, Irix doesn't. */ 257 /* Linux allows this, Irix doesn't. */
386 if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) 258 if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
387 m |= S_ISVTX; 259 m |= S_ISVTX;
388#endif 260#endif
389 if (m && !capable(CAP_FSETID)) 261 if (m && !capable(CAP_FSETID))
@@ -461,10 +333,10 @@ xfs_setattr(
461 goto error_return; 333 goto error_return;
462 } 334 }
463 335
464 if (VN_ISDIR(vp)) { 336 if (S_ISDIR(ip->i_d.di_mode)) {
465 code = XFS_ERROR(EISDIR); 337 code = XFS_ERROR(EISDIR);
466 goto error_return; 338 goto error_return;
467 } else if (!VN_ISREG(vp)) { 339 } else if (!S_ISREG(ip->i_d.di_mode)) {
468 code = XFS_ERROR(EINVAL); 340 code = XFS_ERROR(EINVAL);
469 goto error_return; 341 goto error_return;
470 } 342 }
@@ -626,9 +498,6 @@ xfs_setattr(
626 xfs_trans_ihold(tp, ip); 498 xfs_trans_ihold(tp, ip);
627 } 499 }
628 500
629 /* determine whether mandatory locking mode changes */
630 mandlock_before = MANDLOCK(vp, ip->i_d.di_mode);
631
632 /* 501 /*
633 * Truncate file. Must have write permission and not be a directory. 502 * Truncate file. Must have write permission and not be a directory.
634 */ 503 */
@@ -858,13 +727,6 @@ xfs_setattr(
858 code = xfs_trans_commit(tp, commit_flags); 727 code = xfs_trans_commit(tp, commit_flags);
859 } 728 }
860 729
861 /*
862 * If the (regular) file's mandatory locking mode changed, then
863 * notify the vnode. We do this under the inode lock to prevent
864 * racing calls to vop_vnode_change.
865 */
866 mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
867
868 xfs_iunlock(ip, lock_flags); 730 xfs_iunlock(ip, lock_flags);
869 731
870 /* 732 /*
@@ -1443,7 +1305,7 @@ xfs_inactive_attrs(
1443 int error; 1305 int error;
1444 xfs_mount_t *mp; 1306 xfs_mount_t *mp;
1445 1307
1446 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); 1308 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1447 tp = *tpp; 1309 tp = *tpp;
1448 mp = ip->i_mount; 1310 mp = ip->i_mount;
1449 ASSERT(ip->i_d.di_forkoff != 0); 1311 ASSERT(ip->i_d.di_forkoff != 0);
@@ -1491,7 +1353,7 @@ xfs_release(
1491 xfs_mount_t *mp = ip->i_mount; 1353 xfs_mount_t *mp = ip->i_mount;
1492 int error; 1354 int error;
1493 1355
1494 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) 1356 if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
1495 return 0; 1357 return 0;
1496 1358
1497 /* If this is a read-only mount, don't do this (would generate I/O) */ 1359 /* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1774,8 +1636,7 @@ xfs_lookup(
1774 struct xfs_name *name, 1636 struct xfs_name *name,
1775 xfs_inode_t **ipp) 1637 xfs_inode_t **ipp)
1776{ 1638{
1777 xfs_inode_t *ip; 1639 xfs_ino_t inum;
1778 xfs_ino_t e_inum;
1779 int error; 1640 int error;
1780 uint lock_mode; 1641 uint lock_mode;
1781 1642
@@ -1785,12 +1646,21 @@ xfs_lookup(
1785 return XFS_ERROR(EIO); 1646 return XFS_ERROR(EIO);
1786 1647
1787 lock_mode = xfs_ilock_map_shared(dp); 1648 lock_mode = xfs_ilock_map_shared(dp);
1788 error = xfs_dir_lookup_int(dp, lock_mode, name, &e_inum, &ip); 1649 error = xfs_dir_lookup(NULL, dp, name, &inum);
1789 if (!error) {
1790 *ipp = ip;
1791 xfs_itrace_ref(ip);
1792 }
1793 xfs_iunlock_map_shared(dp, lock_mode); 1650 xfs_iunlock_map_shared(dp, lock_mode);
1651
1652 if (error)
1653 goto out;
1654
1655 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
1656 if (error)
1657 goto out;
1658
1659 xfs_itrace_ref(*ipp);
1660 return 0;
1661
1662 out:
1663 *ipp = NULL;
1794 return error; 1664 return error;
1795} 1665}
1796 1666
@@ -1906,7 +1776,7 @@ xfs_create(
1906 * It is locked (and joined to the transaction). 1776 * It is locked (and joined to the transaction).
1907 */ 1777 */
1908 1778
1909 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 1779 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1910 1780
1911 /* 1781 /*
1912 * Now we join the directory inode to the transaction. We do not do it 1782 * Now we join the directory inode to the transaction. We do not do it
@@ -2112,7 +1982,7 @@ again:
2112 1982
2113 ips[0] = ip; 1983 ips[0] = ip;
2114 ips[1] = dp; 1984 ips[1] = dp;
2115 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 1985 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
2116 } 1986 }
2117 /* else e_inum == dp->i_ino */ 1987 /* else e_inum == dp->i_ino */
2118 /* This can happen if we're asked to lock /x/.. 1988 /* This can happen if we're asked to lock /x/..
@@ -2160,7 +2030,6 @@ void
2160xfs_lock_inodes( 2030xfs_lock_inodes(
2161 xfs_inode_t **ips, 2031 xfs_inode_t **ips,
2162 int inodes, 2032 int inodes,
2163 int first_locked,
2164 uint lock_mode) 2033 uint lock_mode)
2165{ 2034{
2166 int attempts = 0, i, j, try_lock; 2035 int attempts = 0, i, j, try_lock;
@@ -2168,13 +2037,8 @@ xfs_lock_inodes(
2168 2037
2169 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 2038 ASSERT(ips && (inodes >= 2)); /* we need at least two */
2170 2039
2171 if (first_locked) { 2040 try_lock = 0;
2172 try_lock = 1; 2041 i = 0;
2173 i = 1;
2174 } else {
2175 try_lock = 0;
2176 i = 0;
2177 }
2178 2042
2179again: 2043again:
2180 for (; i < inodes; i++) { 2044 for (; i < inodes; i++) {
@@ -2298,29 +2162,14 @@ xfs_remove(
2298 return error; 2162 return error;
2299 } 2163 }
2300 2164
2301 /*
2302 * We need to get a reference to ip before we get our log
2303 * reservation. The reason for this is that we cannot call
2304 * xfs_iget for an inode for which we do not have a reference
2305 * once we've acquired a log reservation. This is because the
2306 * inode we are trying to get might be in xfs_inactive going
2307 * for a log reservation. Since we'll have to wait for the
2308 * inactive code to complete before returning from xfs_iget,
2309 * we need to make sure that we don't have log space reserved
2310 * when we call xfs_iget. Instead we get an unlocked reference
2311 * to the inode before getting our log reservation.
2312 */
2313 IHOLD(ip);
2314
2315 xfs_itrace_entry(ip); 2165 xfs_itrace_entry(ip);
2316 xfs_itrace_ref(ip); 2166 xfs_itrace_ref(ip);
2317 2167
2318 error = XFS_QM_DQATTACH(mp, dp, 0); 2168 error = XFS_QM_DQATTACH(mp, dp, 0);
2319 if (!error && dp != ip) 2169 if (!error)
2320 error = XFS_QM_DQATTACH(mp, ip, 0); 2170 error = XFS_QM_DQATTACH(mp, ip, 0);
2321 if (error) { 2171 if (error) {
2322 REMOVE_DEBUG_TRACE(__LINE__); 2172 REMOVE_DEBUG_TRACE(__LINE__);
2323 IRELE(ip);
2324 goto std_return; 2173 goto std_return;
2325 } 2174 }
2326 2175
@@ -2347,7 +2196,6 @@ xfs_remove(
2347 ASSERT(error != ENOSPC); 2196 ASSERT(error != ENOSPC);
2348 REMOVE_DEBUG_TRACE(__LINE__); 2197 REMOVE_DEBUG_TRACE(__LINE__);
2349 xfs_trans_cancel(tp, 0); 2198 xfs_trans_cancel(tp, 0);
2350 IRELE(ip);
2351 return error; 2199 return error;
2352 } 2200 }
2353 2201
@@ -2355,7 +2203,6 @@ xfs_remove(
2355 if (error) { 2203 if (error) {
2356 REMOVE_DEBUG_TRACE(__LINE__); 2204 REMOVE_DEBUG_TRACE(__LINE__);
2357 xfs_trans_cancel(tp, cancel_flags); 2205 xfs_trans_cancel(tp, cancel_flags);
2358 IRELE(ip);
2359 goto std_return; 2206 goto std_return;
2360 } 2207 }
2361 2208
@@ -2363,23 +2210,18 @@ xfs_remove(
2363 * At this point, we've gotten both the directory and the entry 2210 * At this point, we've gotten both the directory and the entry
2364 * inodes locked. 2211 * inodes locked.
2365 */ 2212 */
2213 IHOLD(ip);
2366 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2214 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2367 if (dp != ip) { 2215
2368 /* 2216 IHOLD(dp);
2369 * Increment vnode ref count only in this case since 2217 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2370 * there's an extra vnode reference in the case where
2371 * dp == ip.
2372 */
2373 IHOLD(dp);
2374 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2375 }
2376 2218
2377 /* 2219 /*
2378 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2220 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
2379 */ 2221 */
2380 XFS_BMAP_INIT(&free_list, &first_block); 2222 XFS_BMAP_INIT(&free_list, &first_block);
2381 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 2223 error = xfs_dir_removename(tp, dp, name, ip->i_ino,
2382 &first_block, &free_list, 0); 2224 &first_block, &free_list, resblks);
2383 if (error) { 2225 if (error) {
2384 ASSERT(error != ENOENT); 2226 ASSERT(error != ENOENT);
2385 REMOVE_DEBUG_TRACE(__LINE__); 2227 REMOVE_DEBUG_TRACE(__LINE__);
@@ -2402,12 +2244,6 @@ xfs_remove(
2402 link_zero = (ip)->i_d.di_nlink==0; 2244 link_zero = (ip)->i_d.di_nlink==0;
2403 2245
2404 /* 2246 /*
2405 * Take an extra ref on the inode so that it doesn't
2406 * go to xfs_inactive() from within the commit.
2407 */
2408 IHOLD(ip);
2409
2410 /*
2411 * If this is a synchronous mount, make sure that the 2247 * If this is a synchronous mount, make sure that the
2412 * remove transaction goes to disk before returning to 2248 * remove transaction goes to disk before returning to
2413 * the user. 2249 * the user.
@@ -2423,10 +2259,8 @@ xfs_remove(
2423 } 2259 }
2424 2260
2425 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2261 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2426 if (error) { 2262 if (error)
2427 IRELE(ip);
2428 goto std_return; 2263 goto std_return;
2429 }
2430 2264
2431 /* 2265 /*
2432 * If we are using filestreams, kill the stream association. 2266 * If we are using filestreams, kill the stream association.
@@ -2438,7 +2272,6 @@ xfs_remove(
2438 xfs_filestream_deassociate(ip); 2272 xfs_filestream_deassociate(ip);
2439 2273
2440 xfs_itrace_exit(ip); 2274 xfs_itrace_exit(ip);
2441 IRELE(ip);
2442 2275
2443/* Fall through to std_return with error = 0 */ 2276/* Fall through to std_return with error = 0 */
2444 std_return: 2277 std_return:
@@ -2467,8 +2300,6 @@ xfs_remove(
2467 cancel_flags |= XFS_TRANS_ABORT; 2300 cancel_flags |= XFS_TRANS_ABORT;
2468 xfs_trans_cancel(tp, cancel_flags); 2301 xfs_trans_cancel(tp, cancel_flags);
2469 2302
2470 IRELE(ip);
2471
2472 goto std_return; 2303 goto std_return;
2473} 2304}
2474 2305
@@ -2536,7 +2367,7 @@ xfs_link(
2536 ips[1] = sip; 2367 ips[1] = sip;
2537 } 2368 }
2538 2369
2539 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2370 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
2540 2371
2541 /* 2372 /*
2542 * Increment vnode ref counts since xfs_trans_commit & 2373 * Increment vnode ref counts since xfs_trans_commit &
@@ -2840,7 +2671,6 @@ xfs_rmdir(
2840 struct xfs_name *name, 2671 struct xfs_name *name,
2841 xfs_inode_t *cdp) 2672 xfs_inode_t *cdp)
2842{ 2673{
2843 bhv_vnode_t *dir_vp = XFS_ITOV(dp);
2844 xfs_mount_t *mp = dp->i_mount; 2674 xfs_mount_t *mp = dp->i_mount;
2845 xfs_trans_t *tp; 2675 xfs_trans_t *tp;
2846 int error; 2676 int error;
@@ -2866,27 +2696,12 @@ xfs_rmdir(
2866 } 2696 }
2867 2697
2868 /* 2698 /*
2869 * We need to get a reference to cdp before we get our log
2870 * reservation. The reason for this is that we cannot call
2871 * xfs_iget for an inode for which we do not have a reference
2872 * once we've acquired a log reservation. This is because the
2873 * inode we are trying to get might be in xfs_inactive going
2874 * for a log reservation. Since we'll have to wait for the
2875 * inactive code to complete before returning from xfs_iget,
2876 * we need to make sure that we don't have log space reserved
2877 * when we call xfs_iget. Instead we get an unlocked reference
2878 * to the inode before getting our log reservation.
2879 */
2880 IHOLD(cdp);
2881
2882 /*
2883 * Get the dquots for the inodes. 2699 * Get the dquots for the inodes.
2884 */ 2700 */
2885 error = XFS_QM_DQATTACH(mp, dp, 0); 2701 error = XFS_QM_DQATTACH(mp, dp, 0);
2886 if (!error && dp != cdp) 2702 if (!error)
2887 error = XFS_QM_DQATTACH(mp, cdp, 0); 2703 error = XFS_QM_DQATTACH(mp, cdp, 0);
2888 if (error) { 2704 if (error) {
2889 IRELE(cdp);
2890 REMOVE_DEBUG_TRACE(__LINE__); 2705 REMOVE_DEBUG_TRACE(__LINE__);
2891 goto std_return; 2706 goto std_return;
2892 } 2707 }
@@ -2913,7 +2728,6 @@ xfs_rmdir(
2913 if (error) { 2728 if (error) {
2914 ASSERT(error != ENOSPC); 2729 ASSERT(error != ENOSPC);
2915 cancel_flags = 0; 2730 cancel_flags = 0;
2916 IRELE(cdp);
2917 goto error_return; 2731 goto error_return;
2918 } 2732 }
2919 XFS_BMAP_INIT(&free_list, &first_block); 2733 XFS_BMAP_INIT(&free_list, &first_block);
@@ -2927,21 +2741,13 @@ xfs_rmdir(
2927 error = xfs_lock_dir_and_entry(dp, cdp); 2741 error = xfs_lock_dir_and_entry(dp, cdp);
2928 if (error) { 2742 if (error) {
2929 xfs_trans_cancel(tp, cancel_flags); 2743 xfs_trans_cancel(tp, cancel_flags);
2930 IRELE(cdp);
2931 goto std_return; 2744 goto std_return;
2932 } 2745 }
2933 2746
2747 IHOLD(dp);
2934 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2748 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2935 if (dp != cdp) {
2936 /*
2937 * Only increment the parent directory vnode count if
2938 * we didn't bump it in looking up cdp. The only time
2939 * we don't bump it is when we're looking up ".".
2940 */
2941 VN_HOLD(dir_vp);
2942 }
2943 2749
2944 xfs_itrace_ref(cdp); 2750 IHOLD(cdp);
2945 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); 2751 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
2946 2752
2947 ASSERT(cdp->i_d.di_nlink >= 2); 2753 ASSERT(cdp->i_d.di_nlink >= 2);
@@ -2995,12 +2801,6 @@ xfs_rmdir(
2995 last_cdp_link = (cdp)->i_d.di_nlink==0; 2801 last_cdp_link = (cdp)->i_d.di_nlink==0;
2996 2802
2997 /* 2803 /*
2998 * Take an extra ref on the child vnode so that it
2999 * does not go to xfs_inactive() from within the commit.
3000 */
3001 IHOLD(cdp);
3002
3003 /*
3004 * If this is a synchronous mount, make sure that the 2804 * If this is a synchronous mount, make sure that the
3005 * rmdir transaction goes to disk before returning to 2805 * rmdir transaction goes to disk before returning to
3006 * the user. 2806 * the user.
@@ -3014,19 +2814,15 @@ xfs_rmdir(
3014 xfs_bmap_cancel(&free_list); 2814 xfs_bmap_cancel(&free_list);
3015 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 2815 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
3016 XFS_TRANS_ABORT)); 2816 XFS_TRANS_ABORT));
3017 IRELE(cdp);
3018 goto std_return; 2817 goto std_return;
3019 } 2818 }
3020 2819
3021 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2820 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3022 if (error) { 2821 if (error) {
3023 IRELE(cdp);
3024 goto std_return; 2822 goto std_return;
3025 } 2823 }
3026 2824
3027 2825
3028 IRELE(cdp);
3029
3030 /* Fall through to std_return with error = 0 or the errno 2826 /* Fall through to std_return with error = 0 or the errno
3031 * from xfs_trans_commit. */ 2827 * from xfs_trans_commit. */
3032 std_return: 2828 std_return:
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 24c53923dc2c..8abe8f186e20 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -15,7 +15,6 @@ struct xfs_iomap;
15 15
16 16
17int xfs_open(struct xfs_inode *ip); 17int xfs_open(struct xfs_inode *ip);
18int xfs_getattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags);
19int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, 18int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
20 struct cred *credp); 19 struct cred *credp);
21int xfs_readlink(struct xfs_inode *ip, char *link); 20int xfs_readlink(struct xfs_inode *ip, char *link);
@@ -48,9 +47,9 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd,
48 struct cred *credp, int attr_flags); 47 struct cred *credp, int attr_flags);
49int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 48int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
50 struct xfs_inode *src_ip, struct xfs_inode *target_dp, 49 struct xfs_inode *src_ip, struct xfs_inode *target_dp,
51 struct xfs_name *target_name); 50 struct xfs_name *target_name, struct xfs_inode *target_ip);
52int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, 51int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value,
53 int *valuelenp, int flags, cred_t *cred); 52 int *valuelenp, int flags);
54int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, 53int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,
55 int valuelen, int flags); 54 int valuelen, int flags);
56int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); 55int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags);
@@ -61,9 +60,6 @@ int xfs_ioctl(struct xfs_inode *ip, struct file *filp,
61ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, 60ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
62 const struct iovec *iovp, unsigned int segs, 61 const struct iovec *iovp, unsigned int segs,
63 loff_t *offset, int ioflags); 62 loff_t *offset, int ioflags);
64ssize_t xfs_sendfile(struct xfs_inode *ip, struct file *filp,
65 loff_t *offset, int ioflags, size_t count,
66 read_actor_t actor, void *target);
67ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp, 63ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp,
68 loff_t *ppos, struct pipe_inode_info *pipe, size_t count, 64 loff_t *ppos, struct pipe_inode_info *pipe, size_t count,
69 int flags, int ioflags); 65 int flags, int ioflags);