aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c4
-rw-r--r--fs/9p/vfs_inode_dotl.c4
-rw-r--r--fs/adfs/inode.c2
-rw-r--r--fs/affs/amigaffs.c4
-rw-r--r--fs/affs/inode.c8
-rw-r--r--fs/affs/namei.c6
-rw-r--r--fs/afs/fsclient.c2
-rw-r--r--fs/afs/inode.c4
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/btrfs/delayed-inode.c2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/dcache.c40
-rw-r--r--fs/devpts/inode.c4
-rw-r--r--fs/ecryptfs/inode.c12
-rw-r--r--fs/efs/inode.c2
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/ext2/balloc.c2
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/super.c8
-rw-r--r--fs/ext3/balloc.c17
-rw-r--r--fs/ext3/fsync.c10
-rw-r--r--fs/ext3/ialloc.c47
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/ioctl.c24
-rw-r--r--fs/ext3/namei.c6
-rw-r--r--fs/ext3/super.c12
-rw-r--r--fs/ext4/balloc.c345
-rw-r--r--fs/ext4/ext4.h141
-rw-r--r--fs/ext4/ext4_extents.h2
-rw-r--r--fs/ext4/ext4_jbd2.c8
-rw-r--r--fs/ext4/extents.c1168
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/fsync.c10
-rw-r--r--fs/ext4/ialloc.c206
-rw-r--r--fs/ext4/indirect.c20
-rw-r--r--fs/ext4/inode.c514
-rw-r--r--fs/ext4/ioctl.c65
-rw-r--r--fs/ext4/mballoc.c331
-rw-r--r--fs/ext4/mballoc.h11
-rw-r--r--fs/ext4/migrate.c111
-rw-r--r--fs/ext4/mmp.c10
-rw-r--r--fs/ext4/move_extent.c1
-rw-r--r--fs/ext4/namei.c29
-rw-r--r--fs/ext4/page-io.c66
-rw-r--r--fs/ext4/resize.c10
-rw-r--r--fs/ext4/super.c263
-rw-r--r--fs/ext4/xattr.c12
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_msdos.c2
-rw-r--r--fs/fat/namei_vfat.c2
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/glops.c2
-rw-r--r--fs/hfs/dir.c4
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hfsplus/inode.c10
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hostfs/hostfs_user.c1
-rw-r--r--fs/hpfs/dir.c2
-rw-r--r--fs/hpfs/inode.c10
-rw-r--r--fs/hpfs/namei.c8
-rw-r--r--fs/hppfs/hppfs.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/isofs/inode.c4
-rw-r--r--fs/isofs/rock.c4
-rw-r--r--fs/jbd/journal.c8
-rw-r--r--fs/jbd2/commit.c26
-rw-r--r--fs/jbd2/journal.c44
-rw-r--r--fs/jbd2/recovery.c28
-rw-r--r--fs/jbd2/transaction.c68
-rw-r--r--fs/jffs2/dir.c6
-rw-r--r--fs/jffs2/fs.c6
-rw-r--r--fs/jfs/jfs_imap.c6
-rw-r--r--fs/jfs/jfs_inode.c2
-rw-r--r--fs/jfs/namei.c12
-rw-r--r--fs/jfs/super.c1
-rw-r--r--fs/libfs.c6
-rw-r--r--fs/logfs/dir.c8
-rw-r--r--fs/logfs/inode.c3
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/minix/inode.c4
-rw-r--r--fs/namei.c18
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/inode.c6
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/nilfs2/namei.c2
-rw-r--r--fs/ntfs/inode.c8
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/dlmglue.c2
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/namei.c18
-rw-r--r--fs/openpromfs/inode.c4
-rw-r--r--fs/proc/base.c12
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/qnx4/inode.c2
-rw-r--r--fs/quota/quota.c7
-rw-r--r--fs/reiserfs/inode.c10
-rw-r--r--fs/reiserfs/namei.c16
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/squashfs/inode.c18
-rw-r--r--fs/stack.c2
-rw-r--r--fs/stat.c5
-rw-r--r--fs/super.c9
-rw-r--r--fs/sysfs/inode.c2
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/ubifs/xattr.c4
-rw-r--r--fs/udf/balloc.c14
-rw-r--r--fs/udf/directory.c8
-rw-r--r--fs/udf/inode.c56
-rw-r--r--fs/udf/lowlevel.c2
-rw-r--r--fs/udf/misc.c19
-rw-r--r--fs/udf/namei.c20
-rw-r--r--fs/udf/partition.c19
-rw-r--r--fs/udf/super.c280
-rw-r--r--fs/udf/truncate.c22
-rw-r--r--fs/udf/udf_sb.h5
-rw-r--r--fs/udf/udfdecl.h35
-rw-r--r--fs/udf/udftime.c3
-rw-r--r--fs/udf/unicode.c6
-rw-r--r--fs/ufs/ialloc.c2
-rw-r--r--fs/ufs/inode.c4
-rw-r--r--fs/xfs/xfs_iops.c2
141 files changed, 2848 insertions, 1714 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b5a1076aaa6c..879ed8851737 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1138,7 +1138,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1138 struct v9fs_session_info *v9ses = sb->s_fs_info; 1138 struct v9fs_session_info *v9ses = sb->s_fs_info;
1139 struct v9fs_inode *v9inode = V9FS_I(inode); 1139 struct v9fs_inode *v9inode = V9FS_I(inode);
1140 1140
1141 inode->i_nlink = 1; 1141 set_nlink(inode, 1);
1142 1142
1143 inode->i_atime.tv_sec = stat->atime; 1143 inode->i_atime.tv_sec = stat->atime;
1144 inode->i_mtime.tv_sec = stat->mtime; 1144 inode->i_mtime.tv_sec = stat->mtime;
@@ -1164,7 +1164,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1164 /* HARDLINKCOUNT %u */ 1164 /* HARDLINKCOUNT %u */
1165 sscanf(ext, "%13s %u", tag_name, &i_nlink); 1165 sscanf(ext, "%13s %u", tag_name, &i_nlink);
1166 if (!strncmp(tag_name, "HARDLINKCOUNT", 13)) 1166 if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
1167 inode->i_nlink = i_nlink; 1167 set_nlink(inode, i_nlink);
1168 } 1168 }
1169 } 1169 }
1170 mode = stat->mode & S_IALLUGO; 1170 mode = stat->mode & S_IALLUGO;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index aded79fcd5cf..0b5745e21946 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -606,7 +606,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
606 inode->i_ctime.tv_nsec = stat->st_ctime_nsec; 606 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
607 inode->i_uid = stat->st_uid; 607 inode->i_uid = stat->st_uid;
608 inode->i_gid = stat->st_gid; 608 inode->i_gid = stat->st_gid;
609 inode->i_nlink = stat->st_nlink; 609 set_nlink(inode, stat->st_nlink);
610 610
611 mode = stat->st_mode & S_IALLUGO; 611 mode = stat->st_mode & S_IALLUGO;
612 mode |= inode->i_mode & ~S_IALLUGO; 612 mode |= inode->i_mode & ~S_IALLUGO;
@@ -632,7 +632,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
632 if (stat->st_result_mask & P9_STATS_GID) 632 if (stat->st_result_mask & P9_STATS_GID)
633 inode->i_gid = stat->st_gid; 633 inode->i_gid = stat->st_gid;
634 if (stat->st_result_mask & P9_STATS_NLINK) 634 if (stat->st_result_mask & P9_STATS_NLINK)
635 inode->i_nlink = stat->st_nlink; 635 set_nlink(inode, stat->st_nlink);
636 if (stat->st_result_mask & P9_STATS_MODE) { 636 if (stat->st_result_mask & P9_STATS_MODE) {
637 inode->i_mode = stat->st_mode; 637 inode->i_mode = stat->st_mode;
638 if ((S_ISBLK(inode->i_mode)) || 638 if ((S_ISBLK(inode->i_mode)) ||
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index d5250c5aae21..1dab6a174d6a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -247,7 +247,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
247 inode->i_gid = ADFS_SB(sb)->s_gid; 247 inode->i_gid = ADFS_SB(sb)->s_gid;
248 inode->i_ino = obj->file_id; 248 inode->i_ino = obj->file_id;
249 inode->i_size = obj->size; 249 inode->i_size = obj->size;
250 inode->i_nlink = 2; 250 set_nlink(inode, 2);
251 inode->i_blocks = (inode->i_size + sb->s_blocksize - 1) >> 251 inode->i_blocks = (inode->i_size + sb->s_blocksize - 1) >>
252 sb->s_blocksize_bits; 252 sb->s_blocksize_bits;
253 253
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 3a4557e8325c..de37ec842340 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -215,7 +215,7 @@ affs_remove_link(struct dentry *dentry)
215 break; 215 break;
216 default: 216 default:
217 if (!AFFS_TAIL(sb, bh)->link_chain) 217 if (!AFFS_TAIL(sb, bh)->link_chain)
218 inode->i_nlink = 1; 218 set_nlink(inode, 1);
219 } 219 }
220 affs_free_block(sb, link_ino); 220 affs_free_block(sb, link_ino);
221 goto done; 221 goto done;
@@ -316,7 +316,7 @@ affs_remove_header(struct dentry *dentry)
316 if (inode->i_nlink > 1) 316 if (inode->i_nlink > 1)
317 retval = affs_remove_link(dentry); 317 retval = affs_remove_link(dentry);
318 else 318 else
319 inode->i_nlink = 0; 319 clear_nlink(inode);
320 affs_unlock_link(inode); 320 affs_unlock_link(inode);
321 inode->i_ctime = CURRENT_TIME_SEC; 321 inode->i_ctime = CURRENT_TIME_SEC;
322 mark_inode_dirty(inode); 322 mark_inode_dirty(inode);
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 5d828903ac69..88a4b0b50058 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -54,7 +54,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
54 prot = be32_to_cpu(tail->protect); 54 prot = be32_to_cpu(tail->protect);
55 55
56 inode->i_size = 0; 56 inode->i_size = 0;
57 inode->i_nlink = 1; 57 set_nlink(inode, 1);
58 inode->i_mode = 0; 58 inode->i_mode = 0;
59 AFFS_I(inode)->i_extcnt = 1; 59 AFFS_I(inode)->i_extcnt = 1;
60 AFFS_I(inode)->i_ext_last = ~1; 60 AFFS_I(inode)->i_ext_last = ~1;
@@ -137,7 +137,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
137 sbi->s_hashsize + 1; 137 sbi->s_hashsize + 1;
138 } 138 }
139 if (tail->link_chain) 139 if (tail->link_chain)
140 inode->i_nlink = 2; 140 set_nlink(inode, 2);
141 inode->i_mapping->a_ops = (sbi->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops; 141 inode->i_mapping->a_ops = (sbi->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
142 inode->i_op = &affs_file_inode_operations; 142 inode->i_op = &affs_file_inode_operations;
143 inode->i_fop = &affs_file_operations; 143 inode->i_fop = &affs_file_operations;
@@ -304,7 +304,7 @@ affs_new_inode(struct inode *dir)
304 inode->i_uid = current_fsuid(); 304 inode->i_uid = current_fsuid();
305 inode->i_gid = current_fsgid(); 305 inode->i_gid = current_fsgid();
306 inode->i_ino = block; 306 inode->i_ino = block;
307 inode->i_nlink = 1; 307 set_nlink(inode, 1);
308 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 308 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
309 atomic_set(&AFFS_I(inode)->i_opencnt, 0); 309 atomic_set(&AFFS_I(inode)->i_opencnt, 0);
310 AFFS_I(inode)->i_blkcnt = 0; 310 AFFS_I(inode)->i_blkcnt = 0;
@@ -387,7 +387,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
387 AFFS_TAIL(sb, inode_bh)->link_chain = cpu_to_be32(block); 387 AFFS_TAIL(sb, inode_bh)->link_chain = cpu_to_be32(block);
388 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain)); 388 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
389 mark_buffer_dirty_inode(inode_bh, inode); 389 mark_buffer_dirty_inode(inode_bh, inode);
390 inode->i_nlink = 2; 390 set_nlink(inode, 2);
391 ihold(inode); 391 ihold(inode);
392 } 392 }
393 affs_fix_checksum(sb, bh); 393 affs_fix_checksum(sb, bh);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index e3e9efc1fdd8..780a11dc6318 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -277,7 +277,7 @@ affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata
277 inode->i_mapping->a_ops = (AFFS_SB(sb)->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops; 277 inode->i_mapping->a_ops = (AFFS_SB(sb)->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
278 error = affs_add_entry(dir, inode, dentry, ST_FILE); 278 error = affs_add_entry(dir, inode, dentry, ST_FILE);
279 if (error) { 279 if (error) {
280 inode->i_nlink = 0; 280 clear_nlink(inode);
281 iput(inode); 281 iput(inode);
282 return error; 282 return error;
283 } 283 }
@@ -305,7 +305,7 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
305 305
306 error = affs_add_entry(dir, inode, dentry, ST_USERDIR); 306 error = affs_add_entry(dir, inode, dentry, ST_USERDIR);
307 if (error) { 307 if (error) {
308 inode->i_nlink = 0; 308 clear_nlink(inode);
309 mark_inode_dirty(inode); 309 mark_inode_dirty(inode);
310 iput(inode); 310 iput(inode);
311 return error; 311 return error;
@@ -392,7 +392,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
392 return 0; 392 return 0;
393 393
394err: 394err:
395 inode->i_nlink = 0; 395 clear_nlink(inode);
396 mark_inode_dirty(inode); 396 mark_inode_dirty(inode);
397 iput(inode); 397 iput(inode);
398 return error; 398 return error;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 346e3289abd7..2f213d109c21 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -90,7 +90,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
90 vnode->vfs_inode.i_uid = status->owner; 90 vnode->vfs_inode.i_uid = status->owner;
91 vnode->vfs_inode.i_gid = status->group; 91 vnode->vfs_inode.i_gid = status->group;
92 vnode->vfs_inode.i_generation = vnode->fid.unique; 92 vnode->vfs_inode.i_generation = vnode->fid.unique;
93 vnode->vfs_inode.i_nlink = status->nlink; 93 set_nlink(&vnode->vfs_inode, status->nlink);
94 94
95 mode = vnode->vfs_inode.i_mode; 95 mode = vnode->vfs_inode.i_mode;
96 mode &= ~S_IALLUGO; 96 mode &= ~S_IALLUGO;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 0fdab6e03d87..d890ae3b2ce6 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -67,7 +67,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
67 fscache_attr_changed(vnode->cache); 67 fscache_attr_changed(vnode->cache);
68#endif 68#endif
69 69
70 inode->i_nlink = vnode->status.nlink; 70 set_nlink(inode, vnode->status.nlink);
71 inode->i_uid = vnode->status.owner; 71 inode->i_uid = vnode->status.owner;
72 inode->i_gid = 0; 72 inode->i_gid = 0;
73 inode->i_size = vnode->status.size; 73 inode->i_size = vnode->status.size;
@@ -174,7 +174,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
174 inode->i_size = 0; 174 inode->i_size = 0;
175 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 175 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
176 inode->i_op = &afs_autocell_inode_operations; 176 inode->i_op = &afs_autocell_inode_operations;
177 inode->i_nlink = 2; 177 set_nlink(inode, 2);
178 inode->i_uid = 0; 178 inode->i_uid = 0;
179 inode->i_gid = 0; 179 inode->i_gid = 0;
180 inode->i_ctime.tv_sec = get_seconds(); 180 inode->i_ctime.tv_sec = get_seconds();
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 180fa2425e49..8179f1ab8175 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -342,7 +342,7 @@ struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode)
342 inode->i_ino = get_next_ino(); 342 inode->i_ino = get_next_ino();
343 343
344 if (S_ISDIR(mode)) { 344 if (S_ISDIR(mode)) {
345 inode->i_nlink = 2; 345 set_nlink(inode, 2);
346 inode->i_op = &autofs4_dir_inode_operations; 346 inode->i_op = &autofs4_dir_inode_operations;
347 inode->i_fop = &autofs4_dir_operations; 347 inode->i_fop = &autofs4_dir_operations;
348 } else if (S_ISLNK(mode)) { 348 } else if (S_ISLNK(mode)) {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 720d885e8dca..8342ca67abcd 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -357,7 +357,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
357 inode->i_gid = befs_sb->mount_opts.use_gid ? 357 inode->i_gid = befs_sb->mount_opts.use_gid ?
358 befs_sb->mount_opts.gid : (gid_t) fs32_to_cpu(sb, raw_inode->gid); 358 befs_sb->mount_opts.gid : (gid_t) fs32_to_cpu(sb, raw_inode->gid);
359 359
360 inode->i_nlink = 1; 360 set_nlink(inode, 1);
361 361
362 /* 362 /*
363 * BEFS's time is 64 bits, but current VFS is 32 bits... 363 * BEFS's time is 64 bits, but current VFS is 32 bits...
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index b14cebfd9047..9cc074019479 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -199,7 +199,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
199 printf("unlinking non-existent file %s:%lu (nlink=%d)\n", 199 printf("unlinking non-existent file %s:%lu (nlink=%d)\n",
200 inode->i_sb->s_id, inode->i_ino, 200 inode->i_sb->s_id, inode->i_ino,
201 inode->i_nlink); 201 inode->i_nlink);
202 inode->i_nlink = 1; 202 set_nlink(inode, 1);
203 } 203 }
204 de->ino = 0; 204 de->ino = 0;
205 mark_buffer_dirty_inode(bh, dir); 205 mark_buffer_dirty_inode(bh, dir);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index a8e37f81d097..697af5bf70b3 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -78,7 +78,7 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
78 BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); 78 BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino);
79 inode->i_uid = le32_to_cpu(di->i_uid); 79 inode->i_uid = le32_to_cpu(di->i_uid);
80 inode->i_gid = le32_to_cpu(di->i_gid); 80 inode->i_gid = le32_to_cpu(di->i_gid);
81 inode->i_nlink = le32_to_cpu(di->i_nlink); 81 set_nlink(inode, le32_to_cpu(di->i_nlink));
82 inode->i_size = BFS_FILESIZE(di); 82 inode->i_size = BFS_FILESIZE(di);
83 inode->i_blocks = BFS_FILEBLOCKS(di); 83 inode->i_blocks = BFS_FILEBLOCKS(di);
84 inode->i_atime.tv_sec = le32_to_cpu(di->i_atime); 84 inode->i_atime.tv_sec = le32_to_cpu(di->i_atime);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index ba1a1ae4a18a..1e9edbdeda7e 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -521,7 +521,7 @@ static void kill_node(Node *e)
521 write_unlock(&entries_lock); 521 write_unlock(&entries_lock);
522 522
523 if (dentry) { 523 if (dentry) {
524 dentry->d_inode->i_nlink--; 524 drop_nlink(dentry->d_inode);
525 d_drop(dentry); 525 d_drop(dentry);
526 dput(dentry); 526 dput(dentry);
527 simple_release_fs(&bm_mnt, &entry_count); 527 simple_release_fs(&bm_mnt, &entry_count);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index b52c672f4c18..ae4d9cd10961 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1641,7 +1641,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1641 inode->i_gid = btrfs_stack_inode_gid(inode_item); 1641 inode->i_gid = btrfs_stack_inode_gid(inode_item);
1642 btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); 1642 btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
1643 inode->i_mode = btrfs_stack_inode_mode(inode_item); 1643 inode->i_mode = btrfs_stack_inode_mode(inode_item);
1644 inode->i_nlink = btrfs_stack_inode_nlink(inode_item); 1644 set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1645 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); 1645 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1646 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); 1646 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1647 BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); 1647 BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 07b3ac662e19..07ea91879a91 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1705,7 +1705,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1705 sb->s_bdi = &fs_info->bdi; 1705 sb->s_bdi = &fs_info->bdi;
1706 1706
1707 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; 1707 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
1708 fs_info->btree_inode->i_nlink = 1; 1708 set_nlink(fs_info->btree_inode, 1);
1709 /* 1709 /*
1710 * we set the i_size on the btree inode to the max possible int. 1710 * we set the i_size on the btree inode to the max possible int.
1711 * the real end of the address space is determined by all of 1711 * the real end of the address space is determined by all of
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b2d004ad66a0..75686a61bd45 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2534,7 +2534,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
2534 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2534 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2535 struct btrfs_inode_item); 2535 struct btrfs_inode_item);
2536 inode->i_mode = btrfs_inode_mode(leaf, inode_item); 2536 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
2537 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); 2537 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
2538 inode->i_uid = btrfs_inode_uid(leaf, inode_item); 2538 inode->i_uid = btrfs_inode_uid(leaf, inode_item);
2539 inode->i_gid = btrfs_inode_gid(leaf, inode_item); 2539 inode->i_gid = btrfs_inode_gid(leaf, inode_item);
2540 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); 2540 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
@@ -6728,7 +6728,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6728 inode->i_op = &btrfs_dir_inode_operations; 6728 inode->i_op = &btrfs_dir_inode_operations;
6729 inode->i_fop = &btrfs_dir_file_operations; 6729 inode->i_fop = &btrfs_dir_file_operations;
6730 6730
6731 inode->i_nlink = 1; 6731 set_nlink(inode, 1);
6732 btrfs_i_size_write(inode, 0); 6732 btrfs_i_size_write(inode, 0);
6733 6733
6734 err = btrfs_update_inode(trans, new_root, inode); 6734 err = btrfs_update_inode(trans, new_root, inode);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 786639fca067..0618aa39740b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1030,7 +1030,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1030 } 1030 }
1031 btrfs_release_path(path); 1031 btrfs_release_path(path);
1032 if (nlink != inode->i_nlink) { 1032 if (nlink != inode->i_nlink) {
1033 inode->i_nlink = nlink; 1033 set_nlink(inode, nlink);
1034 btrfs_update_inode(trans, root, inode); 1034 btrfs_update_inode(trans, root, inode);
1035 } 1035 }
1036 BTRFS_I(inode)->index_cnt = (u64)-1; 1036 BTRFS_I(inode)->index_cnt = (u64)-1;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index b8731bf3ef1f..15b21e35078a 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2363,7 +2363,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2363 } 2363 }
2364 2364
2365 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 2365 if ((issued & CEPH_CAP_LINK_EXCL) == 0)
2366 inode->i_nlink = le32_to_cpu(grant->nlink); 2366 set_nlink(inode, le32_to_cpu(grant->nlink));
2367 2367
2368 if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) { 2368 if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
2369 int len = le32_to_cpu(grant->xattr_len); 2369 int len = le32_to_cpu(grant->xattr_len);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5dde7d51dc11..1616a0d37cbd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -618,7 +618,7 @@ static int fill_inode(struct inode *inode,
618 } 618 }
619 619
620 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 620 if ((issued & CEPH_CAP_LINK_EXCL) == 0)
621 inode->i_nlink = le32_to_cpu(info->nlink); 621 set_nlink(inode, le32_to_cpu(info->nlink));
622 622
623 /* be careful with mtime, atime, size */ 623 /* be careful with mtime, atime, size */
624 ceph_decode_timespec(&atime, &info->atime); 624 ceph_decode_timespec(&atime, &info->atime);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2c50bd2f65d1..e851d5b8931e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -132,7 +132,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
132 inode->i_mtime = fattr->cf_mtime; 132 inode->i_mtime = fattr->cf_mtime;
133 inode->i_ctime = fattr->cf_ctime; 133 inode->i_ctime = fattr->cf_ctime;
134 inode->i_rdev = fattr->cf_rdev; 134 inode->i_rdev = fattr->cf_rdev;
135 inode->i_nlink = fattr->cf_nlink; 135 set_nlink(inode, fattr->cf_nlink);
136 inode->i_uid = fattr->cf_uid; 136 inode->i_uid = fattr->cf_uid;
137 inode->i_gid = fattr->cf_gid; 137 inode->i_gid = fattr->cf_gid;
138 138
@@ -905,7 +905,7 @@ struct inode *cifs_root_iget(struct super_block *sb)
905 if (rc && tcon->ipc) { 905 if (rc && tcon->ipc) {
906 cFYI(1, "ipc connection - fake read inode"); 906 cFYI(1, "ipc connection - fake read inode");
907 inode->i_mode |= S_IFDIR; 907 inode->i_mode |= S_IFDIR;
908 inode->i_nlink = 2; 908 set_nlink(inode, 2);
909 inode->i_op = &cifs_ipc_inode_ops; 909 inode->i_op = &cifs_ipc_inode_ops;
910 inode->i_fop = &simple_dir_operations; 910 inode->i_fop = &simple_dir_operations;
911 inode->i_uid = cifs_sb->mnt_uid; 911 inode->i_uid = cifs_sb->mnt_uid;
@@ -1367,7 +1367,7 @@ mkdir_get_info:
1367 /* setting nlink not necessary except in cases where we 1367 /* setting nlink not necessary except in cases where we
1368 * failed to get it from the server or was set bogus */ 1368 * failed to get it from the server or was set bogus */
1369 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) 1369 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
1370 direntry->d_inode->i_nlink = 2; 1370 set_nlink(direntry->d_inode, 2);
1371 1371
1372 mode &= ~current_umask(); 1372 mode &= ~current_umask();
1373 /* must turn on setgid bit if parent dir has it */ 1373 /* must turn on setgid bit if parent dir has it */
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 8693b5d0e180..6b0e06434391 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -433,7 +433,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
433 if (old_file->d_inode) { 433 if (old_file->d_inode) {
434 cifsInode = CIFS_I(old_file->d_inode); 434 cifsInode = CIFS_I(old_file->d_inode);
435 if (rc == 0) { 435 if (rc == 0) {
436 old_file->d_inode->i_nlink++; 436 inc_nlink(old_file->d_inode);
437/* BB should we make this contingent on superblock flag NOATIME? */ 437/* BB should we make this contingent on superblock flag NOATIME? */
438/* old_file->d_inode->i_ctime = CURRENT_TIME;*/ 438/* old_file->d_inode->i_ctime = CURRENT_TIME;*/
439 /* parent dir timestamps will update from srv 439 /* parent dir timestamps will update from srv
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 2bdbcc11b373..854ace712685 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -104,7 +104,7 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
104 if (attr->va_gid != -1) 104 if (attr->va_gid != -1)
105 inode->i_gid = (gid_t) attr->va_gid; 105 inode->i_gid = (gid_t) attr->va_gid;
106 if (attr->va_nlink != -1) 106 if (attr->va_nlink != -1)
107 inode->i_nlink = attr->va_nlink; 107 set_nlink(inode, attr->va_nlink);
108 if (attr->va_size != -1) 108 if (attr->va_size != -1)
109 inode->i_size = attr->va_size; 109 inode->i_size = attr->va_size;
110 if (attr->va_size != -1) 110 if (attr->va_size != -1)
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 0239433f50cb..28e7e135cfab 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -340,7 +340,7 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
340 if (!error) { 340 if (!error) {
341 /* VFS may delete the child */ 341 /* VFS may delete the child */
342 if (de->d_inode) 342 if (de->d_inode)
343 de->d_inode->i_nlink = 0; 343 clear_nlink(de->d_inode);
344 344
345 /* fix the link count of the parent */ 345 /* fix the link count of the parent */
346 coda_dir_drop_nlink(dir); 346 coda_dir_drop_nlink(dir);
diff --git a/fs/dcache.c b/fs/dcache.c
index a88948b8bd17..274f13e2f094 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -225,7 +225,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
225} 225}
226 226
227/* 227/*
228 * dentry_lru_(add|del|move_tail) must be called with d_lock held. 228 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
229 */ 229 */
230static void dentry_lru_add(struct dentry *dentry) 230static void dentry_lru_add(struct dentry *dentry)
231{ 231{
@@ -245,6 +245,9 @@ static void __dentry_lru_del(struct dentry *dentry)
245 dentry_stat.nr_unused--; 245 dentry_stat.nr_unused--;
246} 246}
247 247
248/*
249 * Remove a dentry with references from the LRU.
250 */
248static void dentry_lru_del(struct dentry *dentry) 251static void dentry_lru_del(struct dentry *dentry)
249{ 252{
250 if (!list_empty(&dentry->d_lru)) { 253 if (!list_empty(&dentry->d_lru)) {
@@ -254,6 +257,23 @@ static void dentry_lru_del(struct dentry *dentry)
254 } 257 }
255} 258}
256 259
260/*
261 * Remove a dentry that is unreferenced and about to be pruned
262 * (unhashed and destroyed) from the LRU, and inform the file system.
263 * This wrapper should be called _prior_ to unhashing a victim dentry.
264 */
265static void dentry_lru_prune(struct dentry *dentry)
266{
267 if (!list_empty(&dentry->d_lru)) {
268 if (dentry->d_flags & DCACHE_OP_PRUNE)
269 dentry->d_op->d_prune(dentry);
270
271 spin_lock(&dcache_lru_lock);
272 __dentry_lru_del(dentry);
273 spin_unlock(&dcache_lru_lock);
274 }
275}
276
257static void dentry_lru_move_tail(struct dentry *dentry) 277static void dentry_lru_move_tail(struct dentry *dentry)
258{ 278{
259 spin_lock(&dcache_lru_lock); 279 spin_lock(&dcache_lru_lock);
@@ -403,8 +423,12 @@ relock:
403 423
404 if (ref) 424 if (ref)
405 dentry->d_count--; 425 dentry->d_count--;
406 /* if dentry was on the d_lru list delete it from there */ 426 /*
407 dentry_lru_del(dentry); 427 * if dentry was on the d_lru list delete it from there.
428 * inform the fs via d_prune that this dentry is about to be
429 * unhashed and destroyed.
430 */
431 dentry_lru_prune(dentry);
408 /* if it was on the hash then remove it */ 432 /* if it was on the hash then remove it */
409 __d_drop(dentry); 433 __d_drop(dentry);
410 return d_kill(dentry, parent); 434 return d_kill(dentry, parent);
@@ -854,8 +878,12 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
854 do { 878 do {
855 struct inode *inode; 879 struct inode *inode;
856 880
857 /* detach from the system */ 881 /*
858 dentry_lru_del(dentry); 882 * remove the dentry from the lru, and inform
883 * the fs that this dentry is about to be
884 * unhashed and destroyed.
885 */
886 dentry_lru_prune(dentry);
859 __d_shrink(dentry); 887 __d_shrink(dentry);
860 888
861 if (dentry->d_count != 0) { 889 if (dentry->d_count != 0) {
@@ -1283,6 +1311,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1283 dentry->d_flags |= DCACHE_OP_REVALIDATE; 1311 dentry->d_flags |= DCACHE_OP_REVALIDATE;
1284 if (op->d_delete) 1312 if (op->d_delete)
1285 dentry->d_flags |= DCACHE_OP_DELETE; 1313 dentry->d_flags |= DCACHE_OP_DELETE;
1314 if (op->d_prune)
1315 dentry->d_flags |= DCACHE_OP_PRUNE;
1286 1316
1287} 1317}
1288EXPORT_SYMBOL(d_set_d_op); 1318EXPORT_SYMBOL(d_set_d_op);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 2f27e578d466..d5d5297efe97 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -307,7 +307,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
307 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; 307 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
308 inode->i_op = &simple_dir_inode_operations; 308 inode->i_op = &simple_dir_inode_operations;
309 inode->i_fop = &simple_dir_operations; 309 inode->i_fop = &simple_dir_operations;
310 inode->i_nlink = 2; 310 set_nlink(inode, 2);
311 311
312 s->s_root = d_alloc_root(inode); 312 s->s_root = d_alloc_root(inode);
313 if (s->s_root) 313 if (s->s_root)
@@ -549,7 +549,7 @@ void devpts_pty_kill(struct tty_struct *tty)
549 549
550 dentry = d_find_alias(inode); 550 dentry = d_find_alias(inode);
551 551
552 inode->i_nlink--; 552 drop_nlink(inode);
553 d_delete(dentry); 553 d_delete(dentry);
554 dput(dentry); /* d_alloc_name() in devpts_pty_new() */ 554 dput(dentry); /* d_alloc_name() in devpts_pty_new() */
555 dput(dentry); /* d_find_alias above */ 555 dput(dentry); /* d_find_alias above */
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 11f8582d7218..a36d327f1521 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -474,8 +474,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
474 goto out_lock; 474 goto out_lock;
475 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 475 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
476 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); 476 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
477 old_dentry->d_inode->i_nlink = 477 set_nlink(old_dentry->d_inode,
478 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink; 478 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink);
479 i_size_write(new_dentry->d_inode, file_size_save); 479 i_size_write(new_dentry->d_inode, file_size_save);
480out_lock: 480out_lock:
481 unlock_dir(lower_dir_dentry); 481 unlock_dir(lower_dir_dentry);
@@ -499,8 +499,8 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
499 goto out_unlock; 499 goto out_unlock;
500 } 500 }
501 fsstack_copy_attr_times(dir, lower_dir_inode); 501 fsstack_copy_attr_times(dir, lower_dir_inode);
502 dentry->d_inode->i_nlink = 502 set_nlink(dentry->d_inode,
503 ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink; 503 ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink);
504 dentry->d_inode->i_ctime = dir->i_ctime; 504 dentry->d_inode->i_ctime = dir->i_ctime;
505 d_drop(dentry); 505 d_drop(dentry);
506out_unlock: 506out_unlock:
@@ -565,7 +565,7 @@ static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
565 goto out; 565 goto out;
566 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 566 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
567 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); 567 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
568 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; 568 set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
569out: 569out:
570 unlock_dir(lower_dir_dentry); 570 unlock_dir(lower_dir_dentry);
571 if (!dentry->d_inode) 571 if (!dentry->d_inode)
@@ -588,7 +588,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
588 if (!rc && dentry->d_inode) 588 if (!rc && dentry->d_inode)
589 clear_nlink(dentry->d_inode); 589 clear_nlink(dentry->d_inode);
590 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 590 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
591 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; 591 set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
592 unlock_dir(lower_dir_dentry); 592 unlock_dir(lower_dir_dentry);
593 if (!rc) 593 if (!rc)
594 d_drop(dentry); 594 d_drop(dentry);
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 9c13412e6c99..bc84f365d75c 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -96,7 +96,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
96 efs_inode = (struct efs_dinode *) (bh->b_data + offset); 96 efs_inode = (struct efs_dinode *) (bh->b_data + offset);
97 97
98 inode->i_mode = be16_to_cpu(efs_inode->di_mode); 98 inode->i_mode = be16_to_cpu(efs_inode->di_mode);
99 inode->i_nlink = be16_to_cpu(efs_inode->di_nlink); 99 set_nlink(inode, be16_to_cpu(efs_inode->di_nlink));
100 inode->i_uid = (uid_t)be16_to_cpu(efs_inode->di_uid); 100 inode->i_uid = (uid_t)be16_to_cpu(efs_inode->di_uid);
101 inode->i_gid = (gid_t)be16_to_cpu(efs_inode->di_gid); 101 inode->i_gid = (gid_t)be16_to_cpu(efs_inode->di_gid);
102 inode->i_size = be32_to_cpu(efs_inode->di_size); 102 inode->i_size = be32_to_cpu(efs_inode->di_size);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 3e5f3a6be90a..f6dbf7768ce6 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1165,7 +1165,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1165 inode->i_mode = le16_to_cpu(fcb.i_mode); 1165 inode->i_mode = le16_to_cpu(fcb.i_mode);
1166 inode->i_uid = le32_to_cpu(fcb.i_uid); 1166 inode->i_uid = le32_to_cpu(fcb.i_uid);
1167 inode->i_gid = le32_to_cpu(fcb.i_gid); 1167 inode->i_gid = le32_to_cpu(fcb.i_gid);
1168 inode->i_nlink = le16_to_cpu(fcb.i_links_count); 1168 set_nlink(inode, le16_to_cpu(fcb.i_links_count));
1169 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); 1169 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime);
1170 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); 1170 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime);
1171 inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime); 1171 inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime);
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 8f44cef1b3ef..a8cbe1bc6ad4 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -421,7 +421,7 @@ static inline int rsv_is_empty(struct ext2_reserve_window *rsv)
421void ext2_init_block_alloc_info(struct inode *inode) 421void ext2_init_block_alloc_info(struct inode *inode)
422{ 422{
423 struct ext2_inode_info *ei = EXT2_I(inode); 423 struct ext2_inode_info *ei = EXT2_I(inode);
424 struct ext2_block_alloc_info *block_i = ei->i_block_alloc_info; 424 struct ext2_block_alloc_info *block_i;
425 struct super_block *sb = inode->i_sb; 425 struct super_block *sb = inode->i_sb;
426 426
427 block_i = kmalloc(sizeof(*block_i), GFP_NOFS); 427 block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index ee9ed31948e1..c4e81dfb74ba 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -601,7 +601,7 @@ fail_free_drop:
601fail_drop: 601fail_drop:
602 dquot_drop(inode); 602 dquot_drop(inode);
603 inode->i_flags |= S_NOQUOTA; 603 inode->i_flags |= S_NOQUOTA;
604 inode->i_nlink = 0; 604 clear_nlink(inode);
605 unlock_new_inode(inode); 605 unlock_new_inode(inode);
606 iput(inode); 606 iput(inode);
607 return ERR_PTR(err); 607 return ERR_PTR(err);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index a8a58f63f07c..91a6945af6d8 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1321,7 +1321,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1321 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 1321 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
1322 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 1322 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
1323 } 1323 }
1324 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 1324 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
1325 inode->i_size = le32_to_cpu(raw_inode->i_size); 1325 inode->i_size = le32_to_cpu(raw_inode->i_size);
1326 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); 1326 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
1327 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); 1327 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1dd62ed35b85..bd8ac164a3bf 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -327,10 +327,10 @@ static struct inode *ext2_nfs_get_inode(struct super_block *sb,
327 if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count)) 327 if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
328 return ERR_PTR(-ESTALE); 328 return ERR_PTR(-ESTALE);
329 329
330 /* iget isn't really right if the inode is currently unallocated!! 330 /*
331 * ext2_read_inode currently does appropriate checks, but 331 * ext2_iget isn't quite right if the inode is currently unallocated!
332 * it might be "neater" to call ext2_get_inode first and check 332 * However ext2_iget currently does appropriate checks to handle stale
333 * if the inode is valid..... 333 * inodes so everything is OK.
334 */ 334 */
335 inode = ext2_iget(sb, ino); 335 inode = ext2_iget(sb, ino);
336 if (IS_ERR(inode)) 336 if (IS_ERR(inode))
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 6386d76f44a7..a2038928f9a3 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -427,7 +427,7 @@ static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
427void ext3_init_block_alloc_info(struct inode *inode) 427void ext3_init_block_alloc_info(struct inode *inode)
428{ 428{
429 struct ext3_inode_info *ei = EXT3_I(inode); 429 struct ext3_inode_info *ei = EXT3_I(inode);
430 struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info; 430 struct ext3_block_alloc_info *block_i;
431 struct super_block *sb = inode->i_sb; 431 struct super_block *sb = inode->i_sb;
432 432
433 block_i = kmalloc(sizeof(*block_i), GFP_NOFS); 433 block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
@@ -1440,14 +1440,14 @@ out:
1440 * 1440 *
1441 * Check if filesystem has at least 1 free block available for allocation. 1441 * Check if filesystem has at least 1 free block available for allocation.
1442 */ 1442 */
1443static int ext3_has_free_blocks(struct ext3_sb_info *sbi) 1443static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation)
1444{ 1444{
1445 ext3_fsblk_t free_blocks, root_blocks; 1445 ext3_fsblk_t free_blocks, root_blocks;
1446 1446
1447 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 1447 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1448 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); 1448 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
1449 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && 1449 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
1450 sbi->s_resuid != current_fsuid() && 1450 !use_reservation && sbi->s_resuid != current_fsuid() &&
1451 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { 1451 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
1452 return 0; 1452 return 0;
1453 } 1453 }
@@ -1468,7 +1468,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1468 */ 1468 */
1469int ext3_should_retry_alloc(struct super_block *sb, int *retries) 1469int ext3_should_retry_alloc(struct super_block *sb, int *retries)
1470{ 1470{
1471 if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3) 1471 if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3)
1472 return 0; 1472 return 0;
1473 1473
1474 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); 1474 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
@@ -1546,7 +1546,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1546 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) 1546 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
1547 my_rsv = &block_i->rsv_window_node; 1547 my_rsv = &block_i->rsv_window_node;
1548 1548
1549 if (!ext3_has_free_blocks(sbi)) { 1549 if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) {
1550 *errp = -ENOSPC; 1550 *errp = -ENOSPC;
1551 goto out; 1551 goto out;
1552 } 1552 }
@@ -1924,9 +1924,10 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
1924 * reaches any used block. Then issue a TRIM command on this extent and free 1924 * reaches any used block. Then issue a TRIM command on this extent and free
1925 * the extent in the block bitmap. This is done until whole group is scanned. 1925 * the extent in the block bitmap. This is done until whole group is scanned.
1926 */ 1926 */
1927ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group, 1927static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
1928 ext3_grpblk_t start, ext3_grpblk_t max, 1928 unsigned int group,
1929 ext3_grpblk_t minblocks) 1929 ext3_grpblk_t start, ext3_grpblk_t max,
1930 ext3_grpblk_t minblocks)
1930{ 1931{
1931 handle_t *handle; 1932 handle_t *handle;
1932 ext3_grpblk_t next, free_blocks, bit, freed, count = 0; 1933 ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index d494c554c6e6..1860ed356323 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -61,13 +61,6 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
61 if (ret) 61 if (ret)
62 goto out; 62 goto out;
63 63
64 /*
65 * Taking the mutex here just to keep consistent with how fsync was
66 * called previously, however it looks like we don't need to take
67 * i_mutex at all.
68 */
69 mutex_lock(&inode->i_mutex);
70
71 J_ASSERT(ext3_journal_current_handle() == NULL); 64 J_ASSERT(ext3_journal_current_handle() == NULL);
72 65
73 /* 66 /*
@@ -85,7 +78,6 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
85 * safe in-journal, which is all fsync() needs to ensure. 78 * safe in-journal, which is all fsync() needs to ensure.
86 */ 79 */
87 if (ext3_should_journal_data(inode)) { 80 if (ext3_should_journal_data(inode)) {
88 mutex_unlock(&inode->i_mutex);
89 ret = ext3_force_commit(inode->i_sb); 81 ret = ext3_force_commit(inode->i_sb);
90 goto out; 82 goto out;
91 } 83 }
@@ -108,8 +100,6 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
108 */ 100 */
109 if (needs_barrier) 101 if (needs_barrier)
110 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 102 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
111
112 mutex_unlock(&inode->i_mutex);
113out: 103out:
114 trace_ext3_sync_file_exit(inode, ret); 104 trace_ext3_sync_file_exit(inode, ret);
115 return ret; 105 return ret;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index bf09cbf938cc..5c866e06e7ab 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -178,42 +178,6 @@ error_return:
178} 178}
179 179
180/* 180/*
181 * There are two policies for allocating an inode. If the new inode is
182 * a directory, then a forward search is made for a block group with both
183 * free space and a low directory-to-inode ratio; if that fails, then of
184 * the groups with above-average free space, that group with the fewest
185 * directories already is chosen.
186 *
187 * For other inodes, search forward from the parent directory\'s block
188 * group to find a free inode.
189 */
190static int find_group_dir(struct super_block *sb, struct inode *parent)
191{
192 int ngroups = EXT3_SB(sb)->s_groups_count;
193 unsigned int freei, avefreei;
194 struct ext3_group_desc *desc, *best_desc = NULL;
195 int group, best_group = -1;
196
197 freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
198 avefreei = freei / ngroups;
199
200 for (group = 0; group < ngroups; group++) {
201 desc = ext3_get_group_desc (sb, group, NULL);
202 if (!desc || !desc->bg_free_inodes_count)
203 continue;
204 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
205 continue;
206 if (!best_desc ||
207 (le16_to_cpu(desc->bg_free_blocks_count) >
208 le16_to_cpu(best_desc->bg_free_blocks_count))) {
209 best_group = group;
210 best_desc = desc;
211 }
212 }
213 return best_group;
214}
215
216/*
217 * Orlov's allocator for directories. 181 * Orlov's allocator for directories.
218 * 182 *
219 * We always try to spread first-level directories. 183 * We always try to spread first-level directories.
@@ -436,12 +400,9 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
436 400
437 sbi = EXT3_SB(sb); 401 sbi = EXT3_SB(sb);
438 es = sbi->s_es; 402 es = sbi->s_es;
439 if (S_ISDIR(mode)) { 403 if (S_ISDIR(mode))
440 if (test_opt (sb, OLDALLOC)) 404 group = find_group_orlov(sb, dir);
441 group = find_group_dir(sb, dir); 405 else
442 else
443 group = find_group_orlov(sb, dir);
444 } else
445 group = find_group_other(sb, dir); 406 group = find_group_other(sb, dir);
446 407
447 err = -ENOSPC; 408 err = -ENOSPC;
@@ -621,7 +582,7 @@ fail_free_drop:
621fail_drop: 582fail_drop:
622 dquot_drop(inode); 583 dquot_drop(inode);
623 inode->i_flags |= S_NOQUOTA; 584 inode->i_flags |= S_NOQUOTA;
624 inode->i_nlink = 0; 585 clear_nlink(inode);
625 unlock_new_inode(inode); 586 unlock_new_inode(inode);
626 iput(inode); 587 iput(inode);
627 brelse(bitmap_bh); 588 brelse(bitmap_bh);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 12661e1deedd..85fe655fe3e0 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2899,7 +2899,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2899 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 2899 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2900 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 2900 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2901 } 2901 }
2902 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 2902 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
2903 inode->i_size = le32_to_cpu(raw_inode->i_size); 2903 inode->i_size = le32_to_cpu(raw_inode->i_size);
2904 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); 2904 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2905 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); 2905 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index c7f43944f160..ba1b54e23cae 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -150,30 +150,6 @@ setversion_out:
150 mnt_drop_write(filp->f_path.mnt); 150 mnt_drop_write(filp->f_path.mnt);
151 return err; 151 return err;
152 } 152 }
153#ifdef CONFIG_JBD_DEBUG
154 case EXT3_IOC_WAIT_FOR_READONLY:
155 /*
156 * This is racy - by the time we're woken up and running,
157 * the superblock could be released. And the module could
158 * have been unloaded. So sue me.
159 *
160 * Returns 1 if it slept, else zero.
161 */
162 {
163 struct super_block *sb = inode->i_sb;
164 DECLARE_WAITQUEUE(wait, current);
165 int ret = 0;
166
167 set_current_state(TASK_INTERRUPTIBLE);
168 add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
169 if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
170 schedule();
171 ret = 1;
172 }
173 remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
174 return ret;
175 }
176#endif
177 case EXT3_IOC_GETRSVSZ: 153 case EXT3_IOC_GETRSVSZ:
178 if (test_opt(inode->i_sb, RESERVATION) 154 if (test_opt(inode->i_sb, RESERVATION)
179 && S_ISREG(inode->i_mode) 155 && S_ISREG(inode->i_mode)
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0629e09f6511..642dc6d66dfd 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1821,7 +1821,7 @@ retry:
1821 de->name_len = 2; 1821 de->name_len = 2;
1822 strcpy (de->name, ".."); 1822 strcpy (de->name, "..");
1823 ext3_set_de_type(dir->i_sb, de, S_IFDIR); 1823 ext3_set_de_type(dir->i_sb, de, S_IFDIR);
1824 inode->i_nlink = 2; 1824 set_nlink(inode, 2);
1825 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); 1825 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
1826 err = ext3_journal_dirty_metadata(handle, dir_block); 1826 err = ext3_journal_dirty_metadata(handle, dir_block);
1827 if (err) 1827 if (err)
@@ -1833,7 +1833,7 @@ retry:
1833 1833
1834 if (err) { 1834 if (err) {
1835out_clear_inode: 1835out_clear_inode:
1836 inode->i_nlink = 0; 1836 clear_nlink(inode);
1837 unlock_new_inode(inode); 1837 unlock_new_inode(inode);
1838 ext3_mark_inode_dirty(handle, inode); 1838 ext3_mark_inode_dirty(handle, inode);
1839 iput (inode); 1839 iput (inode);
@@ -2170,7 +2170,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2170 ext3_warning (inode->i_sb, "ext3_unlink", 2170 ext3_warning (inode->i_sb, "ext3_unlink",
2171 "Deleting nonexistent file (%lu), %d", 2171 "Deleting nonexistent file (%lu), %d",
2172 inode->i_ino, inode->i_nlink); 2172 inode->i_ino, inode->i_nlink);
2173 inode->i_nlink = 1; 2173 set_nlink(inode, 1);
2174 } 2174 }
2175 retval = ext3_delete_entry(handle, dir, de, bh); 2175 retval = ext3_delete_entry(handle, dir, de, bh);
2176 if (retval) 2176 if (retval)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7beb69ae0015..922d289aeeb3 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -652,8 +652,6 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
652 seq_puts(seq, ",nouid32"); 652 seq_puts(seq, ",nouid32");
653 if (test_opt(sb, DEBUG)) 653 if (test_opt(sb, DEBUG))
654 seq_puts(seq, ",debug"); 654 seq_puts(seq, ",debug");
655 if (test_opt(sb, OLDALLOC))
656 seq_puts(seq, ",oldalloc");
657#ifdef CONFIG_EXT3_FS_XATTR 655#ifdef CONFIG_EXT3_FS_XATTR
658 if (test_opt(sb, XATTR_USER)) 656 if (test_opt(sb, XATTR_USER))
659 seq_puts(seq, ",user_xattr"); 657 seq_puts(seq, ",user_xattr");
@@ -1049,10 +1047,12 @@ static int parse_options (char *options, struct super_block *sb,
1049 set_opt (sbi->s_mount_opt, DEBUG); 1047 set_opt (sbi->s_mount_opt, DEBUG);
1050 break; 1048 break;
1051 case Opt_oldalloc: 1049 case Opt_oldalloc:
1052 set_opt (sbi->s_mount_opt, OLDALLOC); 1050 ext3_msg(sb, KERN_WARNING,
1051 "Ignoring deprecated oldalloc option");
1053 break; 1052 break;
1054 case Opt_orlov: 1053 case Opt_orlov:
1055 clear_opt (sbi->s_mount_opt, OLDALLOC); 1054 ext3_msg(sb, KERN_WARNING,
1055 "Ignoring deprecated orlov option");
1056 break; 1056 break;
1057#ifdef CONFIG_EXT3_FS_XATTR 1057#ifdef CONFIG_EXT3_FS_XATTR
1058 case Opt_user_xattr: 1058 case Opt_user_xattr:
@@ -2669,13 +2669,13 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2669 /* 2669 /*
2670 * If we have an unprocessed orphan list hanging 2670 * If we have an unprocessed orphan list hanging
2671 * around from a previously readonly bdev mount, 2671 * around from a previously readonly bdev mount,
2672 * require a full umount/remount for now. 2672 * require a full umount & mount for now.
2673 */ 2673 */
2674 if (es->s_last_orphan) { 2674 if (es->s_last_orphan) {
2675 ext3_msg(sb, KERN_WARNING, "warning: couldn't " 2675 ext3_msg(sb, KERN_WARNING, "warning: couldn't "
2676 "remount RDWR because of unprocessed " 2676 "remount RDWR because of unprocessed "
2677 "orphan inode list. Please " 2677 "orphan inode list. Please "
2678 "umount/remount instead."); 2678 "umount & mount instead.");
2679 err = -EINVAL; 2679 err = -EINVAL;
2680 goto restore_opts; 2680 goto restore_opts;
2681 } 2681 }
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f8224adf496e..f6dba4505f1c 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -28,7 +28,8 @@
28 */ 28 */
29 29
30/* 30/*
31 * Calculate the block group number and offset, given a block number 31 * Calculate the block group number and offset into the block/cluster
32 * allocation bitmap, given a block number
32 */ 33 */
33void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 34void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
34 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp) 35 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
@@ -37,7 +38,8 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
37 ext4_grpblk_t offset; 38 ext4_grpblk_t offset;
38 39
39 blocknr = blocknr - le32_to_cpu(es->s_first_data_block); 40 blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
40 offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); 41 offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >>
42 EXT4_SB(sb)->s_cluster_bits;
41 if (offsetp) 43 if (offsetp)
42 *offsetp = offset; 44 *offsetp = offset;
43 if (blockgrpp) 45 if (blockgrpp)
@@ -55,130 +57,169 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
55 return 0; 57 return 0;
56} 58}
57 59
58static int ext4_group_used_meta_blocks(struct super_block *sb, 60/* Return the number of clusters used for file system metadata; this
59 ext4_group_t block_group, 61 * represents the overhead needed by the file system.
60 struct ext4_group_desc *gdp) 62 */
63unsigned ext4_num_overhead_clusters(struct super_block *sb,
64 ext4_group_t block_group,
65 struct ext4_group_desc *gdp)
61{ 66{
62 ext4_fsblk_t tmp; 67 unsigned num_clusters;
68 int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c;
69 ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
70 ext4_fsblk_t itbl_blk;
63 struct ext4_sb_info *sbi = EXT4_SB(sb); 71 struct ext4_sb_info *sbi = EXT4_SB(sb);
64 /* block bitmap, inode bitmap, and inode table blocks */
65 int used_blocks = sbi->s_itb_per_group + 2;
66 72
67 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 73 /* This is the number of clusters used by the superblock,
68 if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), 74 * block group descriptors, and reserved block group
69 block_group)) 75 * descriptor blocks */
70 used_blocks--; 76 num_clusters = ext4_num_base_meta_clusters(sb, block_group);
71 77
72 if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), 78 /*
73 block_group)) 79 * For the allocation bitmaps and inode table, we first need
74 used_blocks--; 80 * to check to see if the block is in the block group. If it
75 81 * is, then check to see if the cluster is already accounted
76 tmp = ext4_inode_table(sb, gdp); 82 * for in the clusters used for the base metadata cluster, or
77 for (; tmp < ext4_inode_table(sb, gdp) + 83 * if we can increment the base metadata cluster to include
78 sbi->s_itb_per_group; tmp++) { 84 * that block. Otherwise, we will have to track the cluster
79 if (!ext4_block_in_group(sb, tmp, block_group)) 85 * used for the allocation bitmap or inode table explicitly.
80 used_blocks -= 1; 86 * Normally all of these blocks are contiguous, so the special
87 * case handling shouldn't be necessary except for *very*
88 * unusual file system layouts.
89 */
90 if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
91 block_cluster = EXT4_B2C(sbi, (start -
92 ext4_block_bitmap(sb, gdp)));
93 if (block_cluster < num_clusters)
94 block_cluster = -1;
95 else if (block_cluster == num_clusters) {
96 num_clusters++;
97 block_cluster = -1;
81 } 98 }
82 } 99 }
83 return used_blocks;
84}
85 100
86/* Initializes an uninitialized block bitmap if given, and returns the 101 if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
87 * number of blocks free in the group. */ 102 inode_cluster = EXT4_B2C(sbi,
88unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 103 start - ext4_inode_bitmap(sb, gdp));
89 ext4_group_t block_group, struct ext4_group_desc *gdp) 104 if (inode_cluster < num_clusters)
90{ 105 inode_cluster = -1;
91 int bit, bit_max; 106 else if (inode_cluster == num_clusters) {
92 ext4_group_t ngroups = ext4_get_groups_count(sb); 107 num_clusters++;
93 unsigned free_blocks, group_blocks; 108 inode_cluster = -1;
94 struct ext4_sb_info *sbi = EXT4_SB(sb);
95
96 if (bh) {
97 J_ASSERT_BH(bh, buffer_locked(bh));
98
99 /* If checksum is bad mark all blocks used to prevent allocation
100 * essentially implementing a per-group read-only flag. */
101 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
102 ext4_error(sb, "Checksum bad for group %u",
103 block_group);
104 ext4_free_blks_set(sb, gdp, 0);
105 ext4_free_inodes_set(sb, gdp, 0);
106 ext4_itable_unused_set(sb, gdp, 0);
107 memset(bh->b_data, 0xff, sb->s_blocksize);
108 return 0;
109 } 109 }
110 memset(bh->b_data, 0, sb->s_blocksize);
111 } 110 }
112 111
113 /* Check for superblock and gdt backups in this group */ 112 itbl_blk = ext4_inode_table(sb, gdp);
114 bit_max = ext4_bg_has_super(sb, block_group); 113 for (i = 0; i < sbi->s_itb_per_group; i++) {
115 114 if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
116 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 115 c = EXT4_B2C(sbi, start - itbl_blk + i);
117 block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * 116 if ((c < num_clusters) || (c == inode_cluster) ||
118 sbi->s_desc_per_block) { 117 (c == block_cluster) || (c == itbl_cluster))
119 if (bit_max) { 118 continue;
120 bit_max += ext4_bg_num_gdb(sb, block_group); 119 if (c == num_clusters) {
121 bit_max += 120 num_clusters++;
122 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); 121 continue;
122 }
123 num_clusters++;
124 itbl_cluster = c;
123 } 125 }
124 } else { /* For META_BG_BLOCK_GROUPS */
125 bit_max += ext4_bg_num_gdb(sb, block_group);
126 } 126 }
127 127
128 if (block_group == ngroups - 1) { 128 if (block_cluster != -1)
129 num_clusters++;
130 if (inode_cluster != -1)
131 num_clusters++;
132
133 return num_clusters;
134}
135
136static unsigned int num_clusters_in_group(struct super_block *sb,
137 ext4_group_t block_group)
138{
139 unsigned int blocks;
140
141 if (block_group == ext4_get_groups_count(sb) - 1) {
129 /* 142 /*
130 * Even though mke2fs always initialize first and last group 143 * Even though mke2fs always initializes the first and
131 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need 144 * last group, just in case some other tool was used,
132 * to make sure we calculate the right free blocks 145 * we need to make sure we calculate the right free
146 * blocks.
133 */ 147 */
134 group_blocks = ext4_blocks_count(sbi->s_es) - 148 blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) -
135 ext4_group_first_block_no(sb, ngroups - 1); 149 ext4_group_first_block_no(sb, block_group);
136 } else { 150 } else
137 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 151 blocks = EXT4_BLOCKS_PER_GROUP(sb);
138 } 152 return EXT4_NUM_B2C(EXT4_SB(sb), blocks);
153}
139 154
140 free_blocks = group_blocks - bit_max; 155/* Initializes an uninitialized block bitmap */
156void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
157 ext4_group_t block_group,
158 struct ext4_group_desc *gdp)
159{
160 unsigned int bit, bit_max;
161 struct ext4_sb_info *sbi = EXT4_SB(sb);
162 ext4_fsblk_t start, tmp;
163 int flex_bg = 0;
164
165 J_ASSERT_BH(bh, buffer_locked(bh));
166
167 /* If checksum is bad mark all blocks used to prevent allocation
168 * essentially implementing a per-group read-only flag. */
169 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
170 ext4_error(sb, "Checksum bad for group %u", block_group);
171 ext4_free_group_clusters_set(sb, gdp, 0);
172 ext4_free_inodes_set(sb, gdp, 0);
173 ext4_itable_unused_set(sb, gdp, 0);
174 memset(bh->b_data, 0xff, sb->s_blocksize);
175 return;
176 }
177 memset(bh->b_data, 0, sb->s_blocksize);
141 178
142 if (bh) { 179 bit_max = ext4_num_base_meta_clusters(sb, block_group);
143 ext4_fsblk_t start, tmp; 180 for (bit = 0; bit < bit_max; bit++)
144 int flex_bg = 0; 181 ext4_set_bit(bit, bh->b_data);
145 182
146 for (bit = 0; bit < bit_max; bit++) 183 start = ext4_group_first_block_no(sb, block_group);
147 ext4_set_bit(bit, bh->b_data);
148 184
149 start = ext4_group_first_block_no(sb, block_group); 185 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
186 flex_bg = 1;
150 187
151 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 188 /* Set bits for block and inode bitmaps, and inode table */
152 EXT4_FEATURE_INCOMPAT_FLEX_BG)) 189 tmp = ext4_block_bitmap(sb, gdp);
153 flex_bg = 1; 190 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
191 ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
154 192
155 /* Set bits for block and inode bitmaps, and inode table */ 193 tmp = ext4_inode_bitmap(sb, gdp);
156 tmp = ext4_block_bitmap(sb, gdp); 194 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
157 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) 195 ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
158 ext4_set_bit(tmp - start, bh->b_data);
159 196
160 tmp = ext4_inode_bitmap(sb, gdp); 197 tmp = ext4_inode_table(sb, gdp);
198 for (; tmp < ext4_inode_table(sb, gdp) +
199 sbi->s_itb_per_group; tmp++) {
161 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) 200 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
162 ext4_set_bit(tmp - start, bh->b_data); 201 ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
163
164 tmp = ext4_inode_table(sb, gdp);
165 for (; tmp < ext4_inode_table(sb, gdp) +
166 sbi->s_itb_per_group; tmp++) {
167 if (!flex_bg ||
168 ext4_block_in_group(sb, tmp, block_group))
169 ext4_set_bit(tmp - start, bh->b_data);
170 }
171 /*
172 * Also if the number of blocks within the group is
173 * less than the blocksize * 8 ( which is the size
174 * of bitmap ), set rest of the block bitmap to 1
175 */
176 ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
177 bh->b_data);
178 } 202 }
179 return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); 203
204 /*
205 * Also if the number of blocks within the group is less than
206 * the blocksize * 8 ( which is the size of bitmap ), set rest
207 * of the block bitmap to 1
208 */
209 ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
210 sb->s_blocksize * 8, bh->b_data);
180} 211}
181 212
213/* Return the number of free blocks in a block group. It is used when
214 * the block bitmap is uninitialized, so we can't just count the bits
215 * in the bitmap. */
216unsigned ext4_free_clusters_after_init(struct super_block *sb,
217 ext4_group_t block_group,
218 struct ext4_group_desc *gdp)
219{
220 return num_clusters_in_group(sb, block_group) -
221 ext4_num_overhead_clusters(sb, block_group, gdp);
222}
182 223
183/* 224/*
184 * The free blocks are managed by bitmaps. A file system contains several 225 * The free blocks are managed by bitmaps. A file system contains several
@@ -362,53 +403,54 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
362} 403}
363 404
364/** 405/**
365 * ext4_has_free_blocks() 406 * ext4_has_free_clusters()
366 * @sbi: in-core super block structure. 407 * @sbi: in-core super block structure.
367 * @nblocks: number of needed blocks 408 * @nclusters: number of needed blocks
409 * @flags: flags from ext4_mb_new_blocks()
368 * 410 *
369 * Check if filesystem has nblocks free & available for allocation. 411 * Check if filesystem has nclusters free & available for allocation.
370 * On success return 1, return 0 on failure. 412 * On success return 1, return 0 on failure.
371 */ 413 */
372static int ext4_has_free_blocks(struct ext4_sb_info *sbi, 414static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
373 s64 nblocks, unsigned int flags) 415 s64 nclusters, unsigned int flags)
374{ 416{
375 s64 free_blocks, dirty_blocks, root_blocks; 417 s64 free_clusters, dirty_clusters, root_clusters;
376 struct percpu_counter *fbc = &sbi->s_freeblocks_counter; 418 struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
377 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; 419 struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
378 420
379 free_blocks = percpu_counter_read_positive(fbc); 421 free_clusters = percpu_counter_read_positive(fcc);
380 dirty_blocks = percpu_counter_read_positive(dbc); 422 dirty_clusters = percpu_counter_read_positive(dcc);
381 root_blocks = ext4_r_blocks_count(sbi->s_es); 423 root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es));
382 424
383 if (free_blocks - (nblocks + root_blocks + dirty_blocks) < 425 if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
384 EXT4_FREEBLOCKS_WATERMARK) { 426 EXT4_FREECLUSTERS_WATERMARK) {
385 free_blocks = percpu_counter_sum_positive(fbc); 427 free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc));
386 dirty_blocks = percpu_counter_sum_positive(dbc); 428 dirty_clusters = percpu_counter_sum_positive(dcc);
387 } 429 }
388 /* Check whether we have space after 430 /* Check whether we have space after accounting for current
389 * accounting for current dirty blocks & root reserved blocks. 431 * dirty clusters & root reserved clusters.
390 */ 432 */
391 if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks)) 433 if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters))
392 return 1; 434 return 1;
393 435
394 /* Hm, nope. Are (enough) root reserved blocks available? */ 436 /* Hm, nope. Are (enough) root reserved clusters available? */
395 if (sbi->s_resuid == current_fsuid() || 437 if (sbi->s_resuid == current_fsuid() ||
396 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || 438 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
397 capable(CAP_SYS_RESOURCE) || 439 capable(CAP_SYS_RESOURCE) ||
398 (flags & EXT4_MB_USE_ROOT_BLOCKS)) { 440 (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
399 441
400 if (free_blocks >= (nblocks + dirty_blocks)) 442 if (free_clusters >= (nclusters + dirty_clusters))
401 return 1; 443 return 1;
402 } 444 }
403 445
404 return 0; 446 return 0;
405} 447}
406 448
407int ext4_claim_free_blocks(struct ext4_sb_info *sbi, 449int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
408 s64 nblocks, unsigned int flags) 450 s64 nclusters, unsigned int flags)
409{ 451{
410 if (ext4_has_free_blocks(sbi, nblocks, flags)) { 452 if (ext4_has_free_clusters(sbi, nclusters, flags)) {
411 percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); 453 percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters);
412 return 0; 454 return 0;
413 } else 455 } else
414 return -ENOSPC; 456 return -ENOSPC;
@@ -428,7 +470,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
428 */ 470 */
429int ext4_should_retry_alloc(struct super_block *sb, int *retries) 471int ext4_should_retry_alloc(struct super_block *sb, int *retries)
430{ 472{
431 if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) || 473 if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
432 (*retries)++ > 3 || 474 (*retries)++ > 3 ||
433 !EXT4_SB(sb)->s_journal) 475 !EXT4_SB(sb)->s_journal)
434 return 0; 476 return 0;
@@ -444,7 +486,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
444 * @handle: handle to this transaction 486 * @handle: handle to this transaction
445 * @inode: file inode 487 * @inode: file inode
446 * @goal: given target block(filesystem wide) 488 * @goal: given target block(filesystem wide)
447 * @count: pointer to total number of blocks needed 489 * @count: pointer to total number of clusters needed
448 * @errp: error code 490 * @errp: error code
449 * 491 *
450 * Return 1st allocated block number on success, *count stores total account 492 * Return 1st allocated block number on success, *count stores total account
@@ -476,18 +518,19 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
476 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 518 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
477 EXT4_I(inode)->i_allocated_meta_blocks += ar.len; 519 EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
478 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 520 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
479 dquot_alloc_block_nofail(inode, ar.len); 521 dquot_alloc_block_nofail(inode,
522 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
480 } 523 }
481 return ret; 524 return ret;
482} 525}
483 526
484/** 527/**
485 * ext4_count_free_blocks() -- count filesystem free blocks 528 * ext4_count_free_clusters() -- count filesystem free clusters
486 * @sb: superblock 529 * @sb: superblock
487 * 530 *
488 * Adds up the number of free blocks from each block group. 531 * Adds up the number of free clusters from each block group.
489 */ 532 */
490ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) 533ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
491{ 534{
492 ext4_fsblk_t desc_count; 535 ext4_fsblk_t desc_count;
493 struct ext4_group_desc *gdp; 536 struct ext4_group_desc *gdp;
@@ -508,7 +551,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
508 gdp = ext4_get_group_desc(sb, i, NULL); 551 gdp = ext4_get_group_desc(sb, i, NULL);
509 if (!gdp) 552 if (!gdp)
510 continue; 553 continue;
511 desc_count += ext4_free_blks_count(sb, gdp); 554 desc_count += ext4_free_group_clusters(sb, gdp);
512 brelse(bitmap_bh); 555 brelse(bitmap_bh);
513 bitmap_bh = ext4_read_block_bitmap(sb, i); 556 bitmap_bh = ext4_read_block_bitmap(sb, i);
514 if (bitmap_bh == NULL) 557 if (bitmap_bh == NULL)
@@ -516,12 +559,13 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
516 559
517 x = ext4_count_free(bitmap_bh, sb->s_blocksize); 560 x = ext4_count_free(bitmap_bh, sb->s_blocksize);
518 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", 561 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
519 i, ext4_free_blks_count(sb, gdp), x); 562 i, ext4_free_group_clusters(sb, gdp), x);
520 bitmap_count += x; 563 bitmap_count += x;
521 } 564 }
522 brelse(bitmap_bh); 565 brelse(bitmap_bh);
523 printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" 566 printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
524 ", computed = %llu, %llu\n", ext4_free_blocks_count(es), 567 ", computed = %llu, %llu\n",
568 EXT4_B2C(sbi, ext4_free_blocks_count(es)),
525 desc_count, bitmap_count); 569 desc_count, bitmap_count);
526 return bitmap_count; 570 return bitmap_count;
527#else 571#else
@@ -530,7 +574,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
530 gdp = ext4_get_group_desc(sb, i, NULL); 574 gdp = ext4_get_group_desc(sb, i, NULL);
531 if (!gdp) 575 if (!gdp)
532 continue; 576 continue;
533 desc_count += ext4_free_blks_count(sb, gdp); 577 desc_count += ext4_free_group_clusters(sb, gdp);
534 } 578 }
535 579
536 return desc_count; 580 return desc_count;
@@ -620,6 +664,31 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
620 664
621} 665}
622 666
667/*
668 * This function returns the number of file system metadata clusters at
669 * the beginning of a block group, including the reserved gdt blocks.
670 */
671unsigned ext4_num_base_meta_clusters(struct super_block *sb,
672 ext4_group_t block_group)
673{
674 struct ext4_sb_info *sbi = EXT4_SB(sb);
675 unsigned num;
676
677 /* Check for superblock and gdt backups in this group */
678 num = ext4_bg_has_super(sb, block_group);
679
680 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
681 block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
682 sbi->s_desc_per_block) {
683 if (num) {
684 num += ext4_bg_num_gdb(sb, block_group);
685 num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
686 }
687 } else { /* For META_BG_BLOCK_GROUPS */
688 num += ext4_bg_num_gdb(sb, block_group);
689 }
690 return EXT4_NUM_B2C(sbi, num);
691}
623/** 692/**
624 * ext4_inode_to_goal_block - return a hint for block allocation 693 * ext4_inode_to_goal_block - return a hint for block allocation
625 * @inode: inode for block allocation 694 * @inode: inode for block allocation
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cec3145e532c..5b0e26a1272d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -144,9 +144,17 @@ struct ext4_allocation_request {
144#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) 144#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
145#define EXT4_MAP_BOUNDARY (1 << BH_Boundary) 145#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
146#define EXT4_MAP_UNINIT (1 << BH_Uninit) 146#define EXT4_MAP_UNINIT (1 << BH_Uninit)
147/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
148 * ext4_map_blocks wants to know whether or not the underlying cluster has
149 * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
150 * the requested mapping was from previously mapped (or delayed allocated)
151 * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
152 * should never appear on buffer_head's state flags.
153 */
154#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
147#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ 155#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
148 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ 156 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
149 EXT4_MAP_UNINIT) 157 EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
150 158
151struct ext4_map_blocks { 159struct ext4_map_blocks {
152 ext4_fsblk_t m_pblk; 160 ext4_fsblk_t m_pblk;
@@ -239,8 +247,11 @@ struct ext4_io_submit {
239# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) 247# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
240#endif 248#endif
241#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) 249#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
250#define EXT4_CLUSTER_SIZE(s) (EXT4_BLOCK_SIZE(s) << \
251 EXT4_SB(s)->s_cluster_bits)
242#ifdef __KERNEL__ 252#ifdef __KERNEL__
243# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) 253# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
254# define EXT4_CLUSTER_BITS(s) (EXT4_SB(s)->s_cluster_bits)
244#else 255#else
245# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) 256# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
246#endif 257#endif
@@ -258,6 +269,14 @@ struct ext4_io_submit {
258#endif 269#endif
259#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) 270#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
260 271
272/* Translate a block number to a cluster number */
273#define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits)
274/* Translate a cluster number to a block number */
275#define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits)
276/* Translate # of blks to # of clusters */
277#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
278 (sbi)->s_cluster_bits)
279
261/* 280/*
262 * Structure of a blocks group descriptor 281 * Structure of a blocks group descriptor
263 */ 282 */
@@ -289,7 +308,7 @@ struct ext4_group_desc
289 308
290struct flex_groups { 309struct flex_groups {
291 atomic_t free_inodes; 310 atomic_t free_inodes;
292 atomic_t free_blocks; 311 atomic_t free_clusters;
293 atomic_t used_dirs; 312 atomic_t used_dirs;
294}; 313};
295 314
@@ -306,6 +325,7 @@ struct flex_groups {
306#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) 325#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size)
307#ifdef __KERNEL__ 326#ifdef __KERNEL__
308# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) 327# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group)
328# define EXT4_CLUSTERS_PER_GROUP(s) (EXT4_SB(s)->s_clusters_per_group)
309# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) 329# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block)
310# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) 330# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group)
311# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) 331# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits)
@@ -358,8 +378,7 @@ struct flex_groups {
358 378
359/* Flags that should be inherited by new inodes from their parent. */ 379/* Flags that should be inherited by new inodes from their parent. */
360#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ 380#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
361 EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\ 381 EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
362 EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
363 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ 382 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
364 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) 383 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
365 384
@@ -520,6 +539,8 @@ struct ext4_new_group_data {
520#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020 539#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020
521 /* Don't normalize allocation size (used for fallocate) */ 540 /* Don't normalize allocation size (used for fallocate) */
522#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 541#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
542 /* Request will not result in inode size update (user for fallocate) */
543#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
523 544
524/* 545/*
525 * Flags used by ext4_free_blocks 546 * Flags used by ext4_free_blocks
@@ -528,6 +549,13 @@ struct ext4_new_group_data {
528#define EXT4_FREE_BLOCKS_FORGET 0x0002 549#define EXT4_FREE_BLOCKS_FORGET 0x0002
529#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 550#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
530#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 551#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
552#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
553#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
554
555/*
556 * Flags used by ext4_discard_partial_page_buffers
557 */
558#define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001
531 559
532/* 560/*
533 * ioctl commands 561 * ioctl commands
@@ -538,9 +566,6 @@ struct ext4_new_group_data {
538#define EXT4_IOC_SETVERSION _IOW('f', 4, long) 566#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
539#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION 567#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
540#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION 568#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
541#ifdef CONFIG_JBD2_DEBUG
542#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
543#endif
544#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) 569#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
545#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) 570#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
546#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) 571#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
@@ -563,9 +588,6 @@ struct ext4_new_group_data {
563#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) 588#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
564#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) 589#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
565#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) 590#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input)
566#ifdef CONFIG_JBD2_DEBUG
567#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
568#endif
569#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION 591#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
570#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION 592#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
571#endif 593#endif
@@ -837,6 +859,7 @@ struct ext4_inode_info {
837 ext4_group_t i_last_alloc_group; 859 ext4_group_t i_last_alloc_group;
838 860
839 /* allocation reservation info for delalloc */ 861 /* allocation reservation info for delalloc */
862 /* In case of bigalloc, these refer to clusters rather than blocks */
840 unsigned int i_reserved_data_blocks; 863 unsigned int i_reserved_data_blocks;
841 unsigned int i_reserved_meta_blocks; 864 unsigned int i_reserved_meta_blocks;
842 unsigned int i_allocated_meta_blocks; 865 unsigned int i_allocated_meta_blocks;
@@ -886,7 +909,6 @@ struct ext4_inode_info {
886/* 909/*
887 * Mount flags 910 * Mount flags
888 */ 911 */
889#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
890#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ 912#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
891#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ 913#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
892#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ 914#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
@@ -918,6 +940,9 @@ struct ext4_inode_info {
918#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ 940#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
919#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ 941#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
920 942
943#define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly
944 specified delalloc */
945
921#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ 946#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
922 ~EXT4_MOUNT_##opt 947 ~EXT4_MOUNT_##opt
923#define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ 948#define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \
@@ -968,9 +993,9 @@ struct ext4_super_block {
968/*10*/ __le32 s_free_inodes_count; /* Free inodes count */ 993/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
969 __le32 s_first_data_block; /* First Data Block */ 994 __le32 s_first_data_block; /* First Data Block */
970 __le32 s_log_block_size; /* Block size */ 995 __le32 s_log_block_size; /* Block size */
971 __le32 s_obso_log_frag_size; /* Obsoleted fragment size */ 996 __le32 s_log_cluster_size; /* Allocation cluster size */
972/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ 997/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
973 __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */ 998 __le32 s_clusters_per_group; /* # Clusters per group */
974 __le32 s_inodes_per_group; /* # Inodes per group */ 999 __le32 s_inodes_per_group; /* # Inodes per group */
975 __le32 s_mtime; /* Mount time */ 1000 __le32 s_mtime; /* Mount time */
976/*30*/ __le32 s_wtime; /* Write time */ 1001/*30*/ __le32 s_wtime; /* Write time */
@@ -1066,7 +1091,10 @@ struct ext4_super_block {
1066 __u8 s_last_error_func[32]; /* function where the error happened */ 1091 __u8 s_last_error_func[32]; /* function where the error happened */
1067#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) 1092#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
1068 __u8 s_mount_opts[64]; 1093 __u8 s_mount_opts[64];
1069 __le32 s_reserved[112]; /* Padding to the end of the block */ 1094 __le32 s_usr_quota_inum; /* inode for tracking user quota */
1095 __le32 s_grp_quota_inum; /* inode for tracking group quota */
1096 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
1097 __le32 s_reserved[109]; /* Padding to the end of the block */
1070}; 1098};
1071 1099
1072#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) 1100#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
@@ -1086,6 +1114,7 @@ struct ext4_sb_info {
1086 unsigned long s_desc_size; /* Size of a group descriptor in bytes */ 1114 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
1087 unsigned long s_inodes_per_block;/* Number of inodes per block */ 1115 unsigned long s_inodes_per_block;/* Number of inodes per block */
1088 unsigned long s_blocks_per_group;/* Number of blocks in a group */ 1116 unsigned long s_blocks_per_group;/* Number of blocks in a group */
1117 unsigned long s_clusters_per_group; /* Number of clusters in a group */
1089 unsigned long s_inodes_per_group;/* Number of inodes in a group */ 1118 unsigned long s_inodes_per_group;/* Number of inodes in a group */
1090 unsigned long s_itb_per_group; /* Number of inode table blocks per group */ 1119 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
1091 unsigned long s_gdb_count; /* Number of group descriptor blocks */ 1120 unsigned long s_gdb_count; /* Number of group descriptor blocks */
@@ -1094,6 +1123,8 @@ struct ext4_sb_info {
1094 ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ 1123 ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
1095 unsigned long s_overhead_last; /* Last calculated overhead */ 1124 unsigned long s_overhead_last; /* Last calculated overhead */
1096 unsigned long s_blocks_last; /* Last seen block count */ 1125 unsigned long s_blocks_last; /* Last seen block count */
1126 unsigned int s_cluster_ratio; /* Number of blocks per cluster */
1127 unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */
1097 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ 1128 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
1098 struct buffer_head * s_sbh; /* Buffer containing the super block */ 1129 struct buffer_head * s_sbh; /* Buffer containing the super block */
1099 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ 1130 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
@@ -1117,10 +1148,10 @@ struct ext4_sb_info {
1117 u32 s_hash_seed[4]; 1148 u32 s_hash_seed[4];
1118 int s_def_hash_version; 1149 int s_def_hash_version;
1119 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ 1150 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
1120 struct percpu_counter s_freeblocks_counter; 1151 struct percpu_counter s_freeclusters_counter;
1121 struct percpu_counter s_freeinodes_counter; 1152 struct percpu_counter s_freeinodes_counter;
1122 struct percpu_counter s_dirs_counter; 1153 struct percpu_counter s_dirs_counter;
1123 struct percpu_counter s_dirtyblocks_counter; 1154 struct percpu_counter s_dirtyclusters_counter;
1124 struct blockgroup_lock *s_blockgroup_lock; 1155 struct blockgroup_lock *s_blockgroup_lock;
1125 struct proc_dir_entry *s_proc; 1156 struct proc_dir_entry *s_proc;
1126 struct kobject s_kobj; 1157 struct kobject s_kobj;
@@ -1136,10 +1167,6 @@ struct ext4_sb_info {
1136 u32 s_max_batch_time; 1167 u32 s_max_batch_time;
1137 u32 s_min_batch_time; 1168 u32 s_min_batch_time;
1138 struct block_device *journal_bdev; 1169 struct block_device *journal_bdev;
1139#ifdef CONFIG_JBD2_DEBUG
1140 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
1141 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
1142#endif
1143#ifdef CONFIG_QUOTA 1170#ifdef CONFIG_QUOTA
1144 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ 1171 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
1145 int s_jquota_fmt; /* Format of quota to use */ 1172 int s_jquota_fmt; /* Format of quota to use */
@@ -1248,6 +1275,15 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
1248 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); 1275 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
1249} 1276}
1250 1277
1278static inline void ext4_set_io_unwritten_flag(struct inode *inode,
1279 struct ext4_io_end *io_end)
1280{
1281 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
1282 io_end->flag |= EXT4_IO_END_UNWRITTEN;
1283 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
1284 }
1285}
1286
1251/* 1287/*
1252 * Inode dynamic state flags 1288 * Inode dynamic state flags
1253 */ 1289 */
@@ -1360,6 +1396,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1360#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 1396#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
1361#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 1397#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
1362#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 1398#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
1399#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
1363 1400
1364#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 1401#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
1365#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 1402#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -1402,7 +1439,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1402 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ 1439 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
1403 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ 1440 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
1404 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ 1441 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
1405 EXT4_FEATURE_RO_COMPAT_HUGE_FILE) 1442 EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
1443 EXT4_FEATURE_RO_COMPAT_BIGALLOC)
1406 1444
1407/* 1445/*
1408 * Default values for user and/or group using reserved blocks 1446 * Default values for user and/or group using reserved blocks
@@ -1735,9 +1773,9 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1735 unsigned int flags, 1773 unsigned int flags,
1736 unsigned long *count, 1774 unsigned long *count,
1737 int *errp); 1775 int *errp);
1738extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, 1776extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
1739 s64 nblocks, unsigned int flags); 1777 s64 nclusters, unsigned int flags);
1740extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1778extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
1741extern void ext4_check_blocks_bitmap(struct super_block *); 1779extern void ext4_check_blocks_bitmap(struct super_block *);
1742extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 1780extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
1743 ext4_group_t block_group, 1781 ext4_group_t block_group,
@@ -1745,12 +1783,18 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
1745extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1783extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
1746struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, 1784struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
1747 ext4_group_t block_group); 1785 ext4_group_t block_group);
1748extern unsigned ext4_init_block_bitmap(struct super_block *sb, 1786extern void ext4_init_block_bitmap(struct super_block *sb,
1749 struct buffer_head *bh, 1787 struct buffer_head *bh,
1750 ext4_group_t group, 1788 ext4_group_t group,
1751 struct ext4_group_desc *desc); 1789 struct ext4_group_desc *desc);
1752#define ext4_free_blocks_after_init(sb, group, desc) \ 1790extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
1753 ext4_init_block_bitmap(sb, NULL, group, desc) 1791 ext4_group_t block_group,
1792 struct ext4_group_desc *gdp);
1793extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
1794 ext4_group_t block_group);
1795extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
1796 ext4_group_t block_group,
1797 struct ext4_group_desc *gdp);
1754ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); 1798ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
1755 1799
1756/* dir.c */ 1800/* dir.c */
@@ -1776,7 +1820,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
1776 1820
1777/* ialloc.c */ 1821/* ialloc.c */
1778extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, 1822extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
1779 const struct qstr *qstr, __u32 goal); 1823 const struct qstr *qstr, __u32 goal,
1824 uid_t *owner);
1780extern void ext4_free_inode(handle_t *, struct inode *); 1825extern void ext4_free_inode(handle_t *, struct inode *);
1781extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); 1826extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1782extern unsigned long ext4_count_free_inodes(struct super_block *); 1827extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -1839,6 +1884,12 @@ extern int ext4_block_truncate_page(handle_t *handle,
1839 struct address_space *mapping, loff_t from); 1884 struct address_space *mapping, loff_t from);
1840extern int ext4_block_zero_page_range(handle_t *handle, 1885extern int ext4_block_zero_page_range(handle_t *handle,
1841 struct address_space *mapping, loff_t from, loff_t length); 1886 struct address_space *mapping, loff_t from, loff_t length);
1887extern int ext4_discard_partial_page_buffers(handle_t *handle,
1888 struct address_space *mapping, loff_t from,
1889 loff_t length, int flags);
1890extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
1891 struct inode *inode, struct page *page, loff_t from,
1892 loff_t length, int flags);
1842extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1893extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1843extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1894extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1844extern void ext4_da_update_reserve_space(struct inode *inode, 1895extern void ext4_da_update_reserve_space(struct inode *inode,
@@ -1927,8 +1978,8 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
1927 struct ext4_group_desc *bg); 1978 struct ext4_group_desc *bg);
1928extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, 1979extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
1929 struct ext4_group_desc *bg); 1980 struct ext4_group_desc *bg);
1930extern __u32 ext4_free_blks_count(struct super_block *sb, 1981extern __u32 ext4_free_group_clusters(struct super_block *sb,
1931 struct ext4_group_desc *bg); 1982 struct ext4_group_desc *bg);
1932extern __u32 ext4_free_inodes_count(struct super_block *sb, 1983extern __u32 ext4_free_inodes_count(struct super_block *sb,
1933 struct ext4_group_desc *bg); 1984 struct ext4_group_desc *bg);
1934extern __u32 ext4_used_dirs_count(struct super_block *sb, 1985extern __u32 ext4_used_dirs_count(struct super_block *sb,
@@ -1941,8 +1992,9 @@ extern void ext4_inode_bitmap_set(struct super_block *sb,
1941 struct ext4_group_desc *bg, ext4_fsblk_t blk); 1992 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1942extern void ext4_inode_table_set(struct super_block *sb, 1993extern void ext4_inode_table_set(struct super_block *sb,
1943 struct ext4_group_desc *bg, ext4_fsblk_t blk); 1994 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1944extern void ext4_free_blks_set(struct super_block *sb, 1995extern void ext4_free_group_clusters_set(struct super_block *sb,
1945 struct ext4_group_desc *bg, __u32 count); 1996 struct ext4_group_desc *bg,
1997 __u32 count);
1946extern void ext4_free_inodes_set(struct super_block *sb, 1998extern void ext4_free_inodes_set(struct super_block *sb,
1947 struct ext4_group_desc *bg, __u32 count); 1999 struct ext4_group_desc *bg, __u32 count);
1948extern void ext4_used_dirs_set(struct super_block *sb, 2000extern void ext4_used_dirs_set(struct super_block *sb,
@@ -2051,13 +2103,13 @@ do { \
2051} while (0) 2103} while (0)
2052 2104
2053#ifdef CONFIG_SMP 2105#ifdef CONFIG_SMP
2054/* Each CPU can accumulate percpu_counter_batch blocks in their local 2106/* Each CPU can accumulate percpu_counter_batch clusters in their local
2055 * counters. So we need to make sure we have free blocks more 2107 * counters. So we need to make sure we have free clusters more
2056 * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times. 2108 * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times.
2057 */ 2109 */
2058#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids)) 2110#define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
2059#else 2111#else
2060#define EXT4_FREEBLOCKS_WATERMARK 0 2112#define EXT4_FREECLUSTERS_WATERMARK 0
2061#endif 2113#endif
2062 2114
2063static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) 2115static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
@@ -2243,10 +2295,19 @@ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
2243enum ext4_state_bits { 2295enum ext4_state_bits {
2244 BH_Uninit /* blocks are allocated but uninitialized on disk */ 2296 BH_Uninit /* blocks are allocated but uninitialized on disk */
2245 = BH_JBDPrivateStart, 2297 = BH_JBDPrivateStart,
2298 BH_AllocFromCluster, /* allocated blocks were part of already
2299 * allocated cluster. Note that this flag will
2300 * never, ever appear in a buffer_head's state
2301 * flag. See EXT4_MAP_FROM_CLUSTER to see where
2302 * this is used. */
2303 BH_Da_Mapped, /* Delayed allocated block that now has a mapping. This
2304 * flag is set when ext4_map_blocks is called on a
2305 * delayed allocated block to get its real mapping. */
2246}; 2306};
2247 2307
2248BUFFER_FNS(Uninit, uninit) 2308BUFFER_FNS(Uninit, uninit)
2249TAS_BUFFER_FNS(Uninit, uninit) 2309TAS_BUFFER_FNS(Uninit, uninit)
2310BUFFER_FNS(Da_Mapped, da_mapped)
2250 2311
2251/* 2312/*
2252 * Add new method to test wether block and inode bitmaps are properly 2313 * Add new method to test wether block and inode bitmaps are properly
@@ -2282,4 +2343,6 @@ extern void ext4_resize_end(struct super_block *sb);
2282 2343
2283#endif /* __KERNEL__ */ 2344#endif /* __KERNEL__ */
2284 2345
2346#include "ext4_extents.h"
2347
2285#endif /* _EXT4_H */ 2348#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 095c36f3b612..a52db3a69a30 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -290,5 +290,7 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
290 struct ext4_ext_path *); 290 struct ext4_ext_path *);
291extern void ext4_ext_drop_refs(struct ext4_ext_path *); 291extern void ext4_ext_drop_refs(struct ext4_ext_path *);
292extern int ext4_ext_check_inode(struct inode *inode); 292extern int ext4_ext_check_inode(struct inode *inode);
293extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
294 int search_hint_reverse);
293#endif /* _EXT4_EXTENTS */ 295#endif /* _EXT4_EXTENTS */
294 296
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index f5240aa15601..aca179017582 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -109,9 +109,11 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
109 109
110 if (ext4_handle_valid(handle)) { 110 if (ext4_handle_valid(handle)) {
111 err = jbd2_journal_dirty_metadata(handle, bh); 111 err = jbd2_journal_dirty_metadata(handle, bh);
112 if (err) 112 if (err) {
113 ext4_journal_abort_handle(where, line, __func__, 113 /* Errors can only happen if there is a bug */
114 bh, handle, err); 114 handle->h_err = err;
115 __ext4_journal_stop(where, line, handle);
116 }
115 } else { 117 } else {
116 if (inode) 118 if (inode)
117 mark_buffer_dirty_inode(bh, inode); 119 mark_buffer_dirty_inode(bh, inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 57cf568a98ab..61fa9e1614af 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -42,7 +42,6 @@
42#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include <linux/fiemap.h> 43#include <linux/fiemap.h>
44#include "ext4_jbd2.h" 44#include "ext4_jbd2.h"
45#include "ext4_extents.h"
46 45
47#include <trace/events/ext4.h> 46#include <trace/events/ext4.h>
48 47
@@ -96,13 +95,17 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
96 * - ENOMEM 95 * - ENOMEM
97 * - EIO 96 * - EIO
98 */ 97 */
99static int ext4_ext_dirty(handle_t *handle, struct inode *inode, 98#define ext4_ext_dirty(handle, inode, path) \
100 struct ext4_ext_path *path) 99 __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
100static int __ext4_ext_dirty(const char *where, unsigned int line,
101 handle_t *handle, struct inode *inode,
102 struct ext4_ext_path *path)
101{ 103{
102 int err; 104 int err;
103 if (path->p_bh) { 105 if (path->p_bh) {
104 /* path points to block */ 106 /* path points to block */
105 err = ext4_handle_dirty_metadata(handle, inode, path->p_bh); 107 err = __ext4_handle_dirty_metadata(where, line, handle,
108 inode, path->p_bh);
106 } else { 109 } else {
107 /* path points to leaf/index in inode body */ 110 /* path points to leaf/index in inode body */
108 err = ext4_mark_inode_dirty(handle, inode); 111 err = ext4_mark_inode_dirty(handle, inode);
@@ -114,11 +117,9 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
114 struct ext4_ext_path *path, 117 struct ext4_ext_path *path,
115 ext4_lblk_t block) 118 ext4_lblk_t block)
116{ 119{
117 int depth;
118
119 if (path) { 120 if (path) {
121 int depth = path->p_depth;
120 struct ext4_extent *ex; 122 struct ext4_extent *ex;
121 depth = path->p_depth;
122 123
123 /* 124 /*
124 * Try to predict block placement assuming that we are 125 * Try to predict block placement assuming that we are
@@ -180,12 +181,10 @@ static inline int ext4_ext_space_block(struct inode *inode, int check)
180 181
181 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 182 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
182 / sizeof(struct ext4_extent); 183 / sizeof(struct ext4_extent);
183 if (!check) {
184#ifdef AGGRESSIVE_TEST 184#ifdef AGGRESSIVE_TEST
185 if (size > 6) 185 if (!check && size > 6)
186 size = 6; 186 size = 6;
187#endif 187#endif
188 }
189 return size; 188 return size;
190} 189}
191 190
@@ -195,12 +194,10 @@ static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
195 194
196 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 195 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
197 / sizeof(struct ext4_extent_idx); 196 / sizeof(struct ext4_extent_idx);
198 if (!check) {
199#ifdef AGGRESSIVE_TEST 197#ifdef AGGRESSIVE_TEST
200 if (size > 5) 198 if (!check && size > 5)
201 size = 5; 199 size = 5;
202#endif 200#endif
203 }
204 return size; 201 return size;
205} 202}
206 203
@@ -211,12 +208,10 @@ static inline int ext4_ext_space_root(struct inode *inode, int check)
211 size = sizeof(EXT4_I(inode)->i_data); 208 size = sizeof(EXT4_I(inode)->i_data);
212 size -= sizeof(struct ext4_extent_header); 209 size -= sizeof(struct ext4_extent_header);
213 size /= sizeof(struct ext4_extent); 210 size /= sizeof(struct ext4_extent);
214 if (!check) {
215#ifdef AGGRESSIVE_TEST 211#ifdef AGGRESSIVE_TEST
216 if (size > 3) 212 if (!check && size > 3)
217 size = 3; 213 size = 3;
218#endif 214#endif
219 }
220 return size; 215 return size;
221} 216}
222 217
@@ -227,12 +222,10 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
227 size = sizeof(EXT4_I(inode)->i_data); 222 size = sizeof(EXT4_I(inode)->i_data);
228 size -= sizeof(struct ext4_extent_header); 223 size -= sizeof(struct ext4_extent_header);
229 size /= sizeof(struct ext4_extent_idx); 224 size /= sizeof(struct ext4_extent_idx);
230 if (!check) {
231#ifdef AGGRESSIVE_TEST 225#ifdef AGGRESSIVE_TEST
232 if (size > 4) 226 if (!check && size > 4)
233 size = 4; 227 size = 4;
234#endif 228#endif
235 }
236 return size; 229 return size;
237} 230}
238 231
@@ -244,7 +237,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
244int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) 237int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
245{ 238{
246 struct ext4_inode_info *ei = EXT4_I(inode); 239 struct ext4_inode_info *ei = EXT4_I(inode);
247 int idxs, num = 0; 240 int idxs;
248 241
249 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 242 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
250 / sizeof(struct ext4_extent_idx)); 243 / sizeof(struct ext4_extent_idx));
@@ -259,6 +252,8 @@ int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
259 */ 252 */
260 if (ei->i_da_metadata_calc_len && 253 if (ei->i_da_metadata_calc_len &&
261 ei->i_da_metadata_calc_last_lblock+1 == lblock) { 254 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
255 int num = 0;
256
262 if ((ei->i_da_metadata_calc_len % idxs) == 0) 257 if ((ei->i_da_metadata_calc_len % idxs) == 0)
263 num++; 258 num++;
264 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) 259 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
@@ -321,8 +316,6 @@ static int ext4_valid_extent_entries(struct inode *inode,
321 struct ext4_extent_header *eh, 316 struct ext4_extent_header *eh,
322 int depth) 317 int depth)
323{ 318{
324 struct ext4_extent *ext;
325 struct ext4_extent_idx *ext_idx;
326 unsigned short entries; 319 unsigned short entries;
327 if (eh->eh_entries == 0) 320 if (eh->eh_entries == 0)
328 return 1; 321 return 1;
@@ -331,7 +324,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
331 324
332 if (depth == 0) { 325 if (depth == 0) {
333 /* leaf entries */ 326 /* leaf entries */
334 ext = EXT_FIRST_EXTENT(eh); 327 struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
335 while (entries) { 328 while (entries) {
336 if (!ext4_valid_extent(inode, ext)) 329 if (!ext4_valid_extent(inode, ext))
337 return 0; 330 return 0;
@@ -339,7 +332,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
339 entries--; 332 entries--;
340 } 333 }
341 } else { 334 } else {
342 ext_idx = EXT_FIRST_INDEX(eh); 335 struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
343 while (entries) { 336 while (entries) {
344 if (!ext4_valid_extent_idx(inode, ext_idx)) 337 if (!ext4_valid_extent_idx(inode, ext_idx))
345 return 0; 338 return 0;
@@ -751,31 +744,30 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
751 return -EIO; 744 return -EIO;
752 } 745 }
753 746
754 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
755 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 747 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
756 /* insert after */ 748 /* insert after */
757 if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { 749 ext_debug("insert new index %d after: %llu\n", logical, ptr);
758 len = (len - 1) * sizeof(struct ext4_extent_idx);
759 len = len < 0 ? 0 : len;
760 ext_debug("insert new index %d after: %llu. "
761 "move %d from 0x%p to 0x%p\n",
762 logical, ptr, len,
763 (curp->p_idx + 1), (curp->p_idx + 2));
764 memmove(curp->p_idx + 2, curp->p_idx + 1, len);
765 }
766 ix = curp->p_idx + 1; 750 ix = curp->p_idx + 1;
767 } else { 751 } else {
768 /* insert before */ 752 /* insert before */
769 len = len * sizeof(struct ext4_extent_idx); 753 ext_debug("insert new index %d before: %llu\n", logical, ptr);
770 len = len < 0 ? 0 : len;
771 ext_debug("insert new index %d before: %llu. "
772 "move %d from 0x%p to 0x%p\n",
773 logical, ptr, len,
774 curp->p_idx, (curp->p_idx + 1));
775 memmove(curp->p_idx + 1, curp->p_idx, len);
776 ix = curp->p_idx; 754 ix = curp->p_idx;
777 } 755 }
778 756
757 len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
758 BUG_ON(len < 0);
759 if (len > 0) {
760 ext_debug("insert new index %d: "
761 "move %d indices from 0x%p to 0x%p\n",
762 logical, len, ix, ix + 1);
763 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
764 }
765
766 if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
767 EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
768 return -EIO;
769 }
770
779 ix->ei_block = cpu_to_le32(logical); 771 ix->ei_block = cpu_to_le32(logical);
780 ext4_idx_store_pblock(ix, ptr); 772 ext4_idx_store_pblock(ix, ptr);
781 le16_add_cpu(&curp->p_hdr->eh_entries, 1); 773 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
@@ -1042,16 +1034,14 @@ cleanup:
1042 */ 1034 */
1043static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, 1035static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1044 unsigned int flags, 1036 unsigned int flags,
1045 struct ext4_ext_path *path,
1046 struct ext4_extent *newext) 1037 struct ext4_extent *newext)
1047{ 1038{
1048 struct ext4_ext_path *curp = path;
1049 struct ext4_extent_header *neh; 1039 struct ext4_extent_header *neh;
1050 struct buffer_head *bh; 1040 struct buffer_head *bh;
1051 ext4_fsblk_t newblock; 1041 ext4_fsblk_t newblock;
1052 int err = 0; 1042 int err = 0;
1053 1043
1054 newblock = ext4_ext_new_meta_block(handle, inode, path, 1044 newblock = ext4_ext_new_meta_block(handle, inode, NULL,
1055 newext, &err, flags); 1045 newext, &err, flags);
1056 if (newblock == 0) 1046 if (newblock == 0)
1057 return err; 1047 return err;
@@ -1071,7 +1061,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1071 } 1061 }
1072 1062
1073 /* move top-level index/leaf into new block */ 1063 /* move top-level index/leaf into new block */
1074 memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data)); 1064 memmove(bh->b_data, EXT4_I(inode)->i_data,
1065 sizeof(EXT4_I(inode)->i_data));
1075 1066
1076 /* set size of new block */ 1067 /* set size of new block */
1077 neh = ext_block_hdr(bh); 1068 neh = ext_block_hdr(bh);
@@ -1089,32 +1080,23 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1089 if (err) 1080 if (err)
1090 goto out; 1081 goto out;
1091 1082
1092 /* create index in new top-level index: num,max,pointer */ 1083 /* Update top-level index: num,max,pointer */
1093 err = ext4_ext_get_access(handle, inode, curp);
1094 if (err)
1095 goto out;
1096
1097 curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
1098 curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1099 curp->p_hdr->eh_entries = cpu_to_le16(1);
1100 curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
1101
1102 if (path[0].p_hdr->eh_depth)
1103 curp->p_idx->ei_block =
1104 EXT_FIRST_INDEX(path[0].p_hdr)->ei_block;
1105 else
1106 curp->p_idx->ei_block =
1107 EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
1108 ext4_idx_store_pblock(curp->p_idx, newblock);
1109
1110 neh = ext_inode_hdr(inode); 1084 neh = ext_inode_hdr(inode);
1085 neh->eh_entries = cpu_to_le16(1);
1086 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1087 if (neh->eh_depth == 0) {
1088 /* Root extent block becomes index block */
1089 neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1090 EXT_FIRST_INDEX(neh)->ei_block =
1091 EXT_FIRST_EXTENT(neh)->ee_block;
1092 }
1111 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 1093 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1112 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), 1094 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1113 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1095 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1114 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1096 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1115 1097
1116 neh->eh_depth = cpu_to_le16(path->p_depth + 1); 1098 neh->eh_depth = cpu_to_le16(neh->eh_depth + 1);
1117 err = ext4_ext_dirty(handle, inode, curp); 1099 ext4_mark_inode_dirty(handle, inode);
1118out: 1100out:
1119 brelse(bh); 1101 brelse(bh);
1120 1102
@@ -1162,8 +1144,7 @@ repeat:
1162 err = PTR_ERR(path); 1144 err = PTR_ERR(path);
1163 } else { 1145 } else {
1164 /* tree is full, time to grow in depth */ 1146 /* tree is full, time to grow in depth */
1165 err = ext4_ext_grow_indepth(handle, inode, flags, 1147 err = ext4_ext_grow_indepth(handle, inode, flags, newext);
1166 path, newext);
1167 if (err) 1148 if (err)
1168 goto out; 1149 goto out;
1169 1150
@@ -1235,9 +1216,9 @@ static int ext4_ext_search_left(struct inode *inode,
1235 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1216 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1236 EXT4_ERROR_INODE(inode, 1217 EXT4_ERROR_INODE(inode,
1237 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", 1218 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1238 ix != NULL ? ix->ei_block : 0, 1219 ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
1239 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? 1220 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
1240 EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0, 1221 le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
1241 depth); 1222 depth);
1242 return -EIO; 1223 return -EIO;
1243 } 1224 }
@@ -1260,13 +1241,14 @@ static int ext4_ext_search_left(struct inode *inode,
1260/* 1241/*
1261 * search the closest allocated block to the right for *logical 1242 * search the closest allocated block to the right for *logical
1262 * and returns it at @logical + it's physical address at @phys 1243 * and returns it at @logical + it's physical address at @phys
1263 * if *logical is the smallest allocated block, the function 1244 * if *logical is the largest allocated block, the function
1264 * returns 0 at @phys 1245 * returns 0 at @phys
1265 * return value contains 0 (success) or error code 1246 * return value contains 0 (success) or error code
1266 */ 1247 */
1267static int ext4_ext_search_right(struct inode *inode, 1248static int ext4_ext_search_right(struct inode *inode,
1268 struct ext4_ext_path *path, 1249 struct ext4_ext_path *path,
1269 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1250 ext4_lblk_t *logical, ext4_fsblk_t *phys,
1251 struct ext4_extent **ret_ex)
1270{ 1252{
1271 struct buffer_head *bh = NULL; 1253 struct buffer_head *bh = NULL;
1272 struct ext4_extent_header *eh; 1254 struct ext4_extent_header *eh;
@@ -1308,9 +1290,7 @@ static int ext4_ext_search_right(struct inode *inode,
1308 return -EIO; 1290 return -EIO;
1309 } 1291 }
1310 } 1292 }
1311 *logical = le32_to_cpu(ex->ee_block); 1293 goto found_extent;
1312 *phys = ext4_ext_pblock(ex);
1313 return 0;
1314 } 1294 }
1315 1295
1316 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { 1296 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
@@ -1323,9 +1303,7 @@ static int ext4_ext_search_right(struct inode *inode,
1323 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { 1303 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1324 /* next allocated block in this leaf */ 1304 /* next allocated block in this leaf */
1325 ex++; 1305 ex++;
1326 *logical = le32_to_cpu(ex->ee_block); 1306 goto found_extent;
1327 *phys = ext4_ext_pblock(ex);
1328 return 0;
1329 } 1307 }
1330 1308
1331 /* go up and search for index to the right */ 1309 /* go up and search for index to the right */
@@ -1368,9 +1346,12 @@ got_index:
1368 return -EIO; 1346 return -EIO;
1369 } 1347 }
1370 ex = EXT_FIRST_EXTENT(eh); 1348 ex = EXT_FIRST_EXTENT(eh);
1349found_extent:
1371 *logical = le32_to_cpu(ex->ee_block); 1350 *logical = le32_to_cpu(ex->ee_block);
1372 *phys = ext4_ext_pblock(ex); 1351 *phys = ext4_ext_pblock(ex);
1373 put_bh(bh); 1352 *ret_ex = ex;
1353 if (bh)
1354 put_bh(bh);
1374 return 0; 1355 return 0;
1375} 1356}
1376 1357
@@ -1395,7 +1376,8 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1395 while (depth >= 0) { 1376 while (depth >= 0) {
1396 if (depth == path->p_depth) { 1377 if (depth == path->p_depth) {
1397 /* leaf */ 1378 /* leaf */
1398 if (path[depth].p_ext != 1379 if (path[depth].p_ext &&
1380 path[depth].p_ext !=
1399 EXT_LAST_EXTENT(path[depth].p_hdr)) 1381 EXT_LAST_EXTENT(path[depth].p_hdr))
1400 return le32_to_cpu(path[depth].p_ext[1].ee_block); 1382 return le32_to_cpu(path[depth].p_ext[1].ee_block);
1401 } else { 1383 } else {
@@ -1623,7 +1605,8 @@ static int ext4_ext_try_to_merge(struct inode *inode,
1623 * such that there will be no overlap, and then returns 1. 1605 * such that there will be no overlap, and then returns 1.
1624 * If there is no overlap found, it returns 0. 1606 * If there is no overlap found, it returns 0.
1625 */ 1607 */
1626static unsigned int ext4_ext_check_overlap(struct inode *inode, 1608static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1609 struct inode *inode,
1627 struct ext4_extent *newext, 1610 struct ext4_extent *newext,
1628 struct ext4_ext_path *path) 1611 struct ext4_ext_path *path)
1629{ 1612{
@@ -1637,6 +1620,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode,
1637 if (!path[depth].p_ext) 1620 if (!path[depth].p_ext)
1638 goto out; 1621 goto out;
1639 b2 = le32_to_cpu(path[depth].p_ext->ee_block); 1622 b2 = le32_to_cpu(path[depth].p_ext->ee_block);
1623 b2 &= ~(sbi->s_cluster_ratio - 1);
1640 1624
1641 /* 1625 /*
1642 * get the next allocated block if the extent in the path 1626 * get the next allocated block if the extent in the path
@@ -1646,6 +1630,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode,
1646 b2 = ext4_ext_next_allocated_block(path); 1630 b2 = ext4_ext_next_allocated_block(path);
1647 if (b2 == EXT_MAX_BLOCKS) 1631 if (b2 == EXT_MAX_BLOCKS)
1648 goto out; 1632 goto out;
1633 b2 &= ~(sbi->s_cluster_ratio - 1);
1649 } 1634 }
1650 1635
1651 /* check for wrap through zero on extent logical start block*/ 1636 /* check for wrap through zero on extent logical start block*/
@@ -1697,7 +1682,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1697 /* try to insert block into found extent and return */ 1682 /* try to insert block into found extent and return */
1698 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) 1683 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
1699 && ext4_can_extents_be_merged(inode, ex, newext)) { 1684 && ext4_can_extents_be_merged(inode, ex, newext)) {
1700 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", 1685 ext_debug("append [%d]%d block to %u:[%d]%d (from %llu)\n",
1701 ext4_ext_is_uninitialized(newext), 1686 ext4_ext_is_uninitialized(newext),
1702 ext4_ext_get_actual_len(newext), 1687 ext4_ext_get_actual_len(newext),
1703 le32_to_cpu(ex->ee_block), 1688 le32_to_cpu(ex->ee_block),
@@ -1735,7 +1720,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1735 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) 1720 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
1736 next = ext4_ext_next_leaf_block(path); 1721 next = ext4_ext_next_leaf_block(path);
1737 if (next != EXT_MAX_BLOCKS) { 1722 if (next != EXT_MAX_BLOCKS) {
1738 ext_debug("next leaf block - %d\n", next); 1723 ext_debug("next leaf block - %u\n", next);
1739 BUG_ON(npath != NULL); 1724 BUG_ON(npath != NULL);
1740 npath = ext4_ext_find_extent(inode, next, NULL); 1725 npath = ext4_ext_find_extent(inode, next, NULL);
1741 if (IS_ERR(npath)) 1726 if (IS_ERR(npath))
@@ -1773,46 +1758,51 @@ has_space:
1773 1758
1774 if (!nearex) { 1759 if (!nearex) {
1775 /* there is no extent in this leaf, create first one */ 1760 /* there is no extent in this leaf, create first one */
1776 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", 1761 ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
1777 le32_to_cpu(newext->ee_block), 1762 le32_to_cpu(newext->ee_block),
1778 ext4_ext_pblock(newext), 1763 ext4_ext_pblock(newext),
1779 ext4_ext_is_uninitialized(newext), 1764 ext4_ext_is_uninitialized(newext),
1780 ext4_ext_get_actual_len(newext)); 1765 ext4_ext_get_actual_len(newext));
1781 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1766 nearex = EXT_FIRST_EXTENT(eh);
1782 } else if (le32_to_cpu(newext->ee_block) 1767 } else {
1768 if (le32_to_cpu(newext->ee_block)
1783 > le32_to_cpu(nearex->ee_block)) { 1769 > le32_to_cpu(nearex->ee_block)) {
1784/* BUG_ON(newext->ee_block == nearex->ee_block); */ 1770 /* Insert after */
1785 if (nearex != EXT_LAST_EXTENT(eh)) { 1771 ext_debug("insert %u:%llu:[%d]%d before: "
1786 len = EXT_MAX_EXTENT(eh) - nearex; 1772 "nearest %p\n",
1787 len = (len - 1) * sizeof(struct ext4_extent);
1788 len = len < 0 ? 0 : len;
1789 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
1790 "move %d from 0x%p to 0x%p\n",
1791 le32_to_cpu(newext->ee_block), 1773 le32_to_cpu(newext->ee_block),
1792 ext4_ext_pblock(newext), 1774 ext4_ext_pblock(newext),
1793 ext4_ext_is_uninitialized(newext), 1775 ext4_ext_is_uninitialized(newext),
1794 ext4_ext_get_actual_len(newext), 1776 ext4_ext_get_actual_len(newext),
1795 nearex, len, nearex + 1, nearex + 2); 1777 nearex);
1796 memmove(nearex + 2, nearex + 1, len); 1778 nearex++;
1779 } else {
1780 /* Insert before */
1781 BUG_ON(newext->ee_block == nearex->ee_block);
1782 ext_debug("insert %u:%llu:[%d]%d after: "
1783 "nearest %p\n",
1784 le32_to_cpu(newext->ee_block),
1785 ext4_ext_pblock(newext),
1786 ext4_ext_is_uninitialized(newext),
1787 ext4_ext_get_actual_len(newext),
1788 nearex);
1789 }
1790 len = EXT_LAST_EXTENT(eh) - nearex + 1;
1791 if (len > 0) {
1792 ext_debug("insert %u:%llu:[%d]%d: "
1793 "move %d extents from 0x%p to 0x%p\n",
1794 le32_to_cpu(newext->ee_block),
1795 ext4_ext_pblock(newext),
1796 ext4_ext_is_uninitialized(newext),
1797 ext4_ext_get_actual_len(newext),
1798 len, nearex, nearex + 1);
1799 memmove(nearex + 1, nearex,
1800 len * sizeof(struct ext4_extent));
1797 } 1801 }
1798 path[depth].p_ext = nearex + 1;
1799 } else {
1800 BUG_ON(newext->ee_block == nearex->ee_block);
1801 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
1802 len = len < 0 ? 0 : len;
1803 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
1804 "move %d from 0x%p to 0x%p\n",
1805 le32_to_cpu(newext->ee_block),
1806 ext4_ext_pblock(newext),
1807 ext4_ext_is_uninitialized(newext),
1808 ext4_ext_get_actual_len(newext),
1809 nearex, len, nearex, nearex + 1);
1810 memmove(nearex + 1, nearex, len);
1811 path[depth].p_ext = nearex;
1812 } 1802 }
1813 1803
1814 le16_add_cpu(&eh->eh_entries, 1); 1804 le16_add_cpu(&eh->eh_entries, 1);
1815 nearex = path[depth].p_ext; 1805 path[depth].p_ext = nearex;
1816 nearex->ee_block = newext->ee_block; 1806 nearex->ee_block = newext->ee_block;
1817 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); 1807 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
1818 nearex->ee_len = newext->ee_len; 1808 nearex->ee_len = newext->ee_len;
@@ -1962,6 +1952,7 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
1962 struct ext4_ext_cache *cex; 1952 struct ext4_ext_cache *cex;
1963 BUG_ON(len == 0); 1953 BUG_ON(len == 0);
1964 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1954 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1955 trace_ext4_ext_put_in_cache(inode, block, len, start);
1965 cex = &EXT4_I(inode)->i_cached_extent; 1956 cex = &EXT4_I(inode)->i_cached_extent;
1966 cex->ec_block = block; 1957 cex->ec_block = block;
1967 cex->ec_len = len; 1958 cex->ec_len = len;
@@ -2063,6 +2054,7 @@ errout:
2063 sbi->extent_cache_misses++; 2054 sbi->extent_cache_misses++;
2064 else 2055 else
2065 sbi->extent_cache_hits++; 2056 sbi->extent_cache_hits++;
2057 trace_ext4_ext_in_cache(inode, block, ret);
2066 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2058 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
2067 return ret; 2059 return ret;
2068} 2060}
@@ -2130,6 +2122,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2130 if (err) 2122 if (err)
2131 return err; 2123 return err;
2132 ext_debug("index is empty, remove it, free block %llu\n", leaf); 2124 ext_debug("index is empty, remove it, free block %llu\n", leaf);
2125 trace_ext4_ext_rm_idx(inode, leaf);
2126
2133 ext4_free_blocks(handle, inode, NULL, leaf, 1, 2127 ext4_free_blocks(handle, inode, NULL, leaf, 1,
2134 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 2128 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
2135 return err; 2129 return err;
@@ -2158,7 +2152,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
2158 * need to account for leaf block credit 2152 * need to account for leaf block credit
2159 * 2153 *
2160 * bitmaps and block group descriptor blocks 2154 * bitmaps and block group descriptor blocks
2161 * and other metadat blocks still need to be 2155 * and other metadata blocks still need to be
2162 * accounted. 2156 * accounted.
2163 */ 2157 */
2164 /* 1 bitmap, 1 block group descriptor */ 2158 /* 1 bitmap, 1 block group descriptor */
@@ -2195,14 +2189,40 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
2195} 2189}
2196 2190
2197static int ext4_remove_blocks(handle_t *handle, struct inode *inode, 2191static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2198 struct ext4_extent *ex, 2192 struct ext4_extent *ex,
2199 ext4_lblk_t from, ext4_lblk_t to) 2193 ext4_fsblk_t *partial_cluster,
2194 ext4_lblk_t from, ext4_lblk_t to)
2200{ 2195{
2196 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2201 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2197 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2198 ext4_fsblk_t pblk;
2202 int flags = EXT4_FREE_BLOCKS_FORGET; 2199 int flags = EXT4_FREE_BLOCKS_FORGET;
2203 2200
2204 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2201 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2205 flags |= EXT4_FREE_BLOCKS_METADATA; 2202 flags |= EXT4_FREE_BLOCKS_METADATA;
2203 /*
2204 * For bigalloc file systems, we never free a partial cluster
2205 * at the beginning of the extent. Instead, we make a note
2206 * that we tried freeing the cluster, and check to see if we
2207 * need to free it on a subsequent call to ext4_remove_blocks,
2208 * or at the end of the ext4_truncate() operation.
2209 */
2210 flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
2211
2212 trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster);
2213 /*
2214 * If we have a partial cluster, and it's different from the
2215 * cluster of the last block, we need to explicitly free the
2216 * partial cluster here.
2217 */
2218 pblk = ext4_ext_pblock(ex) + ee_len - 1;
2219 if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
2220 ext4_free_blocks(handle, inode, NULL,
2221 EXT4_C2B(sbi, *partial_cluster),
2222 sbi->s_cluster_ratio, flags);
2223 *partial_cluster = 0;
2224 }
2225
2206#ifdef EXTENTS_STATS 2226#ifdef EXTENTS_STATS
2207 { 2227 {
2208 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2228 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2222,12 +2242,24 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2222 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { 2242 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
2223 /* tail removal */ 2243 /* tail removal */
2224 ext4_lblk_t num; 2244 ext4_lblk_t num;
2225 ext4_fsblk_t start;
2226 2245
2227 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2246 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2228 start = ext4_ext_pblock(ex) + ee_len - num; 2247 pblk = ext4_ext_pblock(ex) + ee_len - num;
2229 ext_debug("free last %u blocks starting %llu\n", num, start); 2248 ext_debug("free last %u blocks starting %llu\n", num, pblk);
2230 ext4_free_blocks(handle, inode, NULL, start, num, flags); 2249 ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
2250 /*
2251 * If the block range to be freed didn't start at the
2252 * beginning of a cluster, and we removed the entire
2253 * extent, save the partial cluster here, since we
2254 * might need to delete if we determine that the
2255 * truncate operation has removed all of the blocks in
2256 * the cluster.
2257 */
2258 if (pblk & (sbi->s_cluster_ratio - 1) &&
2259 (ee_len == num))
2260 *partial_cluster = EXT4_B2C(sbi, pblk);
2261 else
2262 *partial_cluster = 0;
2231 } else if (from == le32_to_cpu(ex->ee_block) 2263 } else if (from == le32_to_cpu(ex->ee_block)
2232 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2264 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
2233 /* head removal */ 2265 /* head removal */
@@ -2238,7 +2270,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2238 start = ext4_ext_pblock(ex); 2270 start = ext4_ext_pblock(ex);
2239 2271
2240 ext_debug("free first %u blocks starting %llu\n", num, start); 2272 ext_debug("free first %u blocks starting %llu\n", num, start);
2241 ext4_free_blocks(handle, inode, 0, start, num, flags); 2273 ext4_free_blocks(handle, inode, NULL, start, num, flags);
2242 2274
2243 } else { 2275 } else {
2244 printk(KERN_INFO "strange request: removal(2) " 2276 printk(KERN_INFO "strange request: removal(2) "
@@ -2262,19 +2294,19 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2262 */ 2294 */
2263static int 2295static int
2264ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, 2296ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2265 struct ext4_ext_path *path, ext4_lblk_t start, 2297 struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster,
2266 ext4_lblk_t end) 2298 ext4_lblk_t start, ext4_lblk_t end)
2267{ 2299{
2300 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2268 int err = 0, correct_index = 0; 2301 int err = 0, correct_index = 0;
2269 int depth = ext_depth(inode), credits; 2302 int depth = ext_depth(inode), credits;
2270 struct ext4_extent_header *eh; 2303 struct ext4_extent_header *eh;
2271 ext4_lblk_t a, b, block; 2304 ext4_lblk_t a, b;
2272 unsigned num; 2305 unsigned num;
2273 ext4_lblk_t ex_ee_block; 2306 ext4_lblk_t ex_ee_block;
2274 unsigned short ex_ee_len; 2307 unsigned short ex_ee_len;
2275 unsigned uninitialized = 0; 2308 unsigned uninitialized = 0;
2276 struct ext4_extent *ex; 2309 struct ext4_extent *ex;
2277 struct ext4_map_blocks map;
2278 2310
2279 /* the header must be checked already in ext4_ext_remove_space() */ 2311 /* the header must be checked already in ext4_ext_remove_space() */
2280 ext_debug("truncate since %u in leaf\n", start); 2312 ext_debug("truncate since %u in leaf\n", start);
@@ -2291,6 +2323,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2291 ex_ee_block = le32_to_cpu(ex->ee_block); 2323 ex_ee_block = le32_to_cpu(ex->ee_block);
2292 ex_ee_len = ext4_ext_get_actual_len(ex); 2324 ex_ee_len = ext4_ext_get_actual_len(ex);
2293 2325
2326 trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
2327
2294 while (ex >= EXT_FIRST_EXTENT(eh) && 2328 while (ex >= EXT_FIRST_EXTENT(eh) &&
2295 ex_ee_block + ex_ee_len > start) { 2329 ex_ee_block + ex_ee_len > start) {
2296 2330
@@ -2315,86 +2349,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2315 ex_ee_block = le32_to_cpu(ex->ee_block); 2349 ex_ee_block = le32_to_cpu(ex->ee_block);
2316 ex_ee_len = ext4_ext_get_actual_len(ex); 2350 ex_ee_len = ext4_ext_get_actual_len(ex);
2317 continue; 2351 continue;
2318 } else if (a != ex_ee_block && 2352 } else if (b != ex_ee_block + ex_ee_len - 1) {
2319 b != ex_ee_block + ex_ee_len - 1) { 2353 EXT4_ERROR_INODE(inode," bad truncate %u:%u\n",
2320 /* 2354 start, end);
2321 * If this is a truncate, then this condition should 2355 err = -EIO;
2322 * never happen because at least one of the end points 2356 goto out;
2323 * needs to be on the edge of the extent.
2324 */
2325 if (end == EXT_MAX_BLOCKS - 1) {
2326 ext_debug(" bad truncate %u:%u\n",
2327 start, end);
2328 block = 0;
2329 num = 0;
2330 err = -EIO;
2331 goto out;
2332 }
2333 /*
2334 * else this is a hole punch, so the extent needs to
2335 * be split since neither edge of the hole is on the
2336 * extent edge
2337 */
2338 else{
2339 map.m_pblk = ext4_ext_pblock(ex);
2340 map.m_lblk = ex_ee_block;
2341 map.m_len = b - ex_ee_block;
2342
2343 err = ext4_split_extent(handle,
2344 inode, path, &map, 0,
2345 EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
2346 EXT4_GET_BLOCKS_PRE_IO);
2347
2348 if (err < 0)
2349 goto out;
2350
2351 ex_ee_len = ext4_ext_get_actual_len(ex);
2352
2353 b = ex_ee_block+ex_ee_len - 1 < end ?
2354 ex_ee_block+ex_ee_len - 1 : end;
2355
2356 /* Then remove tail of this extent */
2357 block = ex_ee_block;
2358 num = a - block;
2359 }
2360 } else if (a != ex_ee_block) { 2357 } else if (a != ex_ee_block) {
2361 /* remove tail of the extent */ 2358 /* remove tail of the extent */
2362 block = ex_ee_block; 2359 num = a - ex_ee_block;
2363 num = a - block;
2364 } else if (b != ex_ee_block + ex_ee_len - 1) {
2365 /* remove head of the extent */
2366 block = b;
2367 num = ex_ee_block + ex_ee_len - b;
2368
2369 /*
2370 * If this is a truncate, this condition
2371 * should never happen
2372 */
2373 if (end == EXT_MAX_BLOCKS - 1) {
2374 ext_debug(" bad truncate %u:%u\n",
2375 start, end);
2376 err = -EIO;
2377 goto out;
2378 }
2379 } else { 2360 } else {
2380 /* remove whole extent: excellent! */ 2361 /* remove whole extent: excellent! */
2381 block = ex_ee_block;
2382 num = 0; 2362 num = 0;
2383 if (a != ex_ee_block) {
2384 ext_debug(" bad truncate %u:%u\n",
2385 start, end);
2386 err = -EIO;
2387 goto out;
2388 }
2389
2390 if (b != ex_ee_block + ex_ee_len - 1) {
2391 ext_debug(" bad truncate %u:%u\n",
2392 start, end);
2393 err = -EIO;
2394 goto out;
2395 }
2396 } 2363 }
2397
2398 /* 2364 /*
2399 * 3 for leaf, sb, and inode plus 2 (bmap and group 2365 * 3 for leaf, sb, and inode plus 2 (bmap and group
2400 * descriptor) for each block group; assume two block 2366 * descriptor) for each block group; assume two block
@@ -2416,23 +2382,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2416 if (err) 2382 if (err)
2417 goto out; 2383 goto out;
2418 2384
2419 err = ext4_remove_blocks(handle, inode, ex, a, b); 2385 err = ext4_remove_blocks(handle, inode, ex, partial_cluster,
2386 a, b);
2420 if (err) 2387 if (err)
2421 goto out; 2388 goto out;
2422 2389
2423 if (num == 0) { 2390 if (num == 0)
2424 /* this extent is removed; mark slot entirely unused */ 2391 /* this extent is removed; mark slot entirely unused */
2425 ext4_ext_store_pblock(ex, 0); 2392 ext4_ext_store_pblock(ex, 0);
2426 } else if (block != ex_ee_block) {
2427 /*
2428 * If this was a head removal, then we need to update
2429 * the physical block since it is now at a different
2430 * location
2431 */
2432 ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a));
2433 }
2434 2393
2435 ex->ee_block = cpu_to_le32(block);
2436 ex->ee_len = cpu_to_le16(num); 2394 ex->ee_len = cpu_to_le16(num);
2437 /* 2395 /*
2438 * Do not mark uninitialized if all the blocks in the 2396 * Do not mark uninitialized if all the blocks in the
@@ -2440,11 +2398,6 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2440 */ 2398 */
2441 if (uninitialized && num) 2399 if (uninitialized && num)
2442 ext4_ext_mark_uninitialized(ex); 2400 ext4_ext_mark_uninitialized(ex);
2443
2444 err = ext4_ext_dirty(handle, inode, path + depth);
2445 if (err)
2446 goto out;
2447
2448 /* 2401 /*
2449 * If the extent was completely released, 2402 * If the extent was completely released,
2450 * we need to remove it from the leaf 2403 * we need to remove it from the leaf
@@ -2464,9 +2417,14 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2464 sizeof(struct ext4_extent)); 2417 sizeof(struct ext4_extent));
2465 } 2418 }
2466 le16_add_cpu(&eh->eh_entries, -1); 2419 le16_add_cpu(&eh->eh_entries, -1);
2467 } 2420 } else
2421 *partial_cluster = 0;
2468 2422
2469 ext_debug("new extent: %u:%u:%llu\n", block, num, 2423 err = ext4_ext_dirty(handle, inode, path + depth);
2424 if (err)
2425 goto out;
2426
2427 ext_debug("new extent: %u:%u:%llu\n", ex_ee_block, num,
2470 ext4_ext_pblock(ex)); 2428 ext4_ext_pblock(ex));
2471 ex--; 2429 ex--;
2472 ex_ee_block = le32_to_cpu(ex->ee_block); 2430 ex_ee_block = le32_to_cpu(ex->ee_block);
@@ -2476,6 +2434,25 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2476 if (correct_index && eh->eh_entries) 2434 if (correct_index && eh->eh_entries)
2477 err = ext4_ext_correct_indexes(handle, inode, path); 2435 err = ext4_ext_correct_indexes(handle, inode, path);
2478 2436
2437 /*
2438 * If there is still a entry in the leaf node, check to see if
2439 * it references the partial cluster. This is the only place
2440 * where it could; if it doesn't, we can free the cluster.
2441 */
2442 if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) &&
2443 (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
2444 *partial_cluster)) {
2445 int flags = EXT4_FREE_BLOCKS_FORGET;
2446
2447 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2448 flags |= EXT4_FREE_BLOCKS_METADATA;
2449
2450 ext4_free_blocks(handle, inode, NULL,
2451 EXT4_C2B(sbi, *partial_cluster),
2452 sbi->s_cluster_ratio, flags);
2453 *partial_cluster = 0;
2454 }
2455
2479 /* if this leaf is free, then we should 2456 /* if this leaf is free, then we should
2480 * remove it from index block above */ 2457 * remove it from index block above */
2481 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) 2458 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
@@ -2511,6 +2488,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2511 struct super_block *sb = inode->i_sb; 2488 struct super_block *sb = inode->i_sb;
2512 int depth = ext_depth(inode); 2489 int depth = ext_depth(inode);
2513 struct ext4_ext_path *path; 2490 struct ext4_ext_path *path;
2491 ext4_fsblk_t partial_cluster = 0;
2514 handle_t *handle; 2492 handle_t *handle;
2515 int i, err; 2493 int i, err;
2516 2494
@@ -2524,6 +2502,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2524again: 2502again:
2525 ext4_ext_invalidate_cache(inode); 2503 ext4_ext_invalidate_cache(inode);
2526 2504
2505 trace_ext4_ext_remove_space(inode, start, depth);
2506
2527 /* 2507 /*
2528 * We start scanning from right side, freeing all the blocks 2508 * We start scanning from right side, freeing all the blocks
2529 * after i_size and walking into the tree depth-wise. 2509 * after i_size and walking into the tree depth-wise.
@@ -2546,7 +2526,8 @@ again:
2546 if (i == depth) { 2526 if (i == depth) {
2547 /* this is leaf block */ 2527 /* this is leaf block */
2548 err = ext4_ext_rm_leaf(handle, inode, path, 2528 err = ext4_ext_rm_leaf(handle, inode, path,
2549 start, EXT_MAX_BLOCKS - 1); 2529 &partial_cluster, start,
2530 EXT_MAX_BLOCKS - 1);
2550 /* root level has p_bh == NULL, brelse() eats this */ 2531 /* root level has p_bh == NULL, brelse() eats this */
2551 brelse(path[i].p_bh); 2532 brelse(path[i].p_bh);
2552 path[i].p_bh = NULL; 2533 path[i].p_bh = NULL;
@@ -2618,6 +2599,24 @@ again:
2618 } 2599 }
2619 } 2600 }
2620 2601
2602 trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster,
2603 path->p_hdr->eh_entries);
2604
2605 /* If we still have something in the partial cluster and we have removed
2606 * even the first extent, then we should free the blocks in the partial
2607 * cluster as well. */
2608 if (partial_cluster && path->p_hdr->eh_entries == 0) {
2609 int flags = EXT4_FREE_BLOCKS_FORGET;
2610
2611 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2612 flags |= EXT4_FREE_BLOCKS_METADATA;
2613
2614 ext4_free_blocks(handle, inode, NULL,
2615 EXT4_C2B(EXT4_SB(sb), partial_cluster),
2616 EXT4_SB(sb)->s_cluster_ratio, flags);
2617 partial_cluster = 0;
2618 }
2619
2621 /* TODO: flexible tree reduction should be here */ 2620 /* TODO: flexible tree reduction should be here */
2622 if (path->p_hdr->eh_entries == 0) { 2621 if (path->p_hdr->eh_entries == 0) {
2623 /* 2622 /*
@@ -2909,17 +2908,29 @@ out:
2909 * a> There is no split required: Entire extent should be initialized 2908 * a> There is no split required: Entire extent should be initialized
2910 * b> Splits in two extents: Write is happening at either end of the extent 2909 * b> Splits in two extents: Write is happening at either end of the extent
2911 * c> Splits in three extents: Somone is writing in middle of the extent 2910 * c> Splits in three extents: Somone is writing in middle of the extent
2911 *
2912 * Pre-conditions:
2913 * - The extent pointed to by 'path' is uninitialized.
2914 * - The extent pointed to by 'path' contains a superset
2915 * of the logical span [map->m_lblk, map->m_lblk + map->m_len).
2916 *
2917 * Post-conditions on success:
2918 * - the returned value is the number of blocks beyond map->l_lblk
2919 * that are allocated and initialized.
2920 * It is guaranteed to be >= map->m_len.
2912 */ 2921 */
2913static int ext4_ext_convert_to_initialized(handle_t *handle, 2922static int ext4_ext_convert_to_initialized(handle_t *handle,
2914 struct inode *inode, 2923 struct inode *inode,
2915 struct ext4_map_blocks *map, 2924 struct ext4_map_blocks *map,
2916 struct ext4_ext_path *path) 2925 struct ext4_ext_path *path)
2917{ 2926{
2927 struct ext4_extent_header *eh;
2918 struct ext4_map_blocks split_map; 2928 struct ext4_map_blocks split_map;
2919 struct ext4_extent zero_ex; 2929 struct ext4_extent zero_ex;
2920 struct ext4_extent *ex; 2930 struct ext4_extent *ex;
2921 ext4_lblk_t ee_block, eof_block; 2931 ext4_lblk_t ee_block, eof_block;
2922 unsigned int allocated, ee_len, depth; 2932 unsigned int ee_len, depth;
2933 int allocated;
2923 int err = 0; 2934 int err = 0;
2924 int split_flag = 0; 2935 int split_flag = 0;
2925 2936
@@ -2933,11 +2944,93 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2933 eof_block = map->m_lblk + map->m_len; 2944 eof_block = map->m_lblk + map->m_len;
2934 2945
2935 depth = ext_depth(inode); 2946 depth = ext_depth(inode);
2947 eh = path[depth].p_hdr;
2936 ex = path[depth].p_ext; 2948 ex = path[depth].p_ext;
2937 ee_block = le32_to_cpu(ex->ee_block); 2949 ee_block = le32_to_cpu(ex->ee_block);
2938 ee_len = ext4_ext_get_actual_len(ex); 2950 ee_len = ext4_ext_get_actual_len(ex);
2939 allocated = ee_len - (map->m_lblk - ee_block); 2951 allocated = ee_len - (map->m_lblk - ee_block);
2940 2952
2953 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
2954
2955 /* Pre-conditions */
2956 BUG_ON(!ext4_ext_is_uninitialized(ex));
2957 BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
2958 BUG_ON(map->m_lblk + map->m_len > ee_block + ee_len);
2959
2960 /*
2961 * Attempt to transfer newly initialized blocks from the currently
2962 * uninitialized extent to its left neighbor. This is much cheaper
2963 * than an insertion followed by a merge as those involve costly
2964 * memmove() calls. This is the common case in steady state for
2965 * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append
2966 * writes.
2967 *
2968 * Limitations of the current logic:
2969 * - L1: we only deal with writes at the start of the extent.
2970 * The approach could be extended to writes at the end
2971 * of the extent but this scenario was deemed less common.
2972 * - L2: we do not deal with writes covering the whole extent.
2973 * This would require removing the extent if the transfer
2974 * is possible.
2975 * - L3: we only attempt to merge with an extent stored in the
2976 * same extent tree node.
2977 */
2978 if ((map->m_lblk == ee_block) && /*L1*/
2979 (map->m_len < ee_len) && /*L2*/
2980 (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/
2981 struct ext4_extent *prev_ex;
2982 ext4_lblk_t prev_lblk;
2983 ext4_fsblk_t prev_pblk, ee_pblk;
2984 unsigned int prev_len, write_len;
2985
2986 prev_ex = ex - 1;
2987 prev_lblk = le32_to_cpu(prev_ex->ee_block);
2988 prev_len = ext4_ext_get_actual_len(prev_ex);
2989 prev_pblk = ext4_ext_pblock(prev_ex);
2990 ee_pblk = ext4_ext_pblock(ex);
2991 write_len = map->m_len;
2992
2993 /*
2994 * A transfer of blocks from 'ex' to 'prev_ex' is allowed
2995 * upon those conditions:
2996 * - C1: prev_ex is initialized,
2997 * - C2: prev_ex is logically abutting ex,
2998 * - C3: prev_ex is physically abutting ex,
2999 * - C4: prev_ex can receive the additional blocks without
3000 * overflowing the (initialized) length limit.
3001 */
3002 if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/
3003 ((prev_lblk + prev_len) == ee_block) && /*C2*/
3004 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
3005 (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/
3006 err = ext4_ext_get_access(handle, inode, path + depth);
3007 if (err)
3008 goto out;
3009
3010 trace_ext4_ext_convert_to_initialized_fastpath(inode,
3011 map, ex, prev_ex);
3012
3013 /* Shift the start of ex by 'write_len' blocks */
3014 ex->ee_block = cpu_to_le32(ee_block + write_len);
3015 ext4_ext_store_pblock(ex, ee_pblk + write_len);
3016 ex->ee_len = cpu_to_le16(ee_len - write_len);
3017 ext4_ext_mark_uninitialized(ex); /* Restore the flag */
3018
3019 /* Extend prev_ex by 'write_len' blocks */
3020 prev_ex->ee_len = cpu_to_le16(prev_len + write_len);
3021
3022 /* Mark the block containing both extents as dirty */
3023 ext4_ext_dirty(handle, inode, path + depth);
3024
3025 /* Update path to point to the right extent */
3026 path[depth].p_ext = prev_ex;
3027
3028 /* Result: number of initialized blocks past m_lblk */
3029 allocated = write_len;
3030 goto out;
3031 }
3032 }
3033
2941 WARN_ON(map->m_lblk < ee_block); 3034 WARN_ON(map->m_lblk < ee_block);
2942 /* 3035 /*
2943 * It is safe to convert extent to initialized via explicit 3036 * It is safe to convert extent to initialized via explicit
@@ -3165,6 +3258,192 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3165 return ext4_mark_inode_dirty(handle, inode); 3258 return ext4_mark_inode_dirty(handle, inode);
3166} 3259}
3167 3260
3261/**
3262 * ext4_find_delalloc_range: find delayed allocated block in the given range.
3263 *
3264 * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns
3265 * whether there are any buffers marked for delayed allocation. It returns '1'
3266 * on the first delalloc'ed buffer head found. If no buffer head in the given
3267 * range is marked for delalloc, it returns 0.
3268 * lblk_start should always be <= lblk_end.
3269 * search_hint_reverse is to indicate that searching in reverse from lblk_end to
3270 * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
3271 * block sooner). This is useful when blocks are truncated sequentially from
3272 * lblk_start towards lblk_end.
3273 */
3274static int ext4_find_delalloc_range(struct inode *inode,
3275 ext4_lblk_t lblk_start,
3276 ext4_lblk_t lblk_end,
3277 int search_hint_reverse)
3278{
3279 struct address_space *mapping = inode->i_mapping;
3280 struct buffer_head *head, *bh = NULL;
3281 struct page *page;
3282 ext4_lblk_t i, pg_lblk;
3283 pgoff_t index;
3284
3285 /* reverse search wont work if fs block size is less than page size */
3286 if (inode->i_blkbits < PAGE_CACHE_SHIFT)
3287 search_hint_reverse = 0;
3288
3289 if (search_hint_reverse)
3290 i = lblk_end;
3291 else
3292 i = lblk_start;
3293
3294 index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
3295
3296 while ((i >= lblk_start) && (i <= lblk_end)) {
3297 page = find_get_page(mapping, index);
3298 if (!page)
3299 goto nextpage;
3300
3301 if (!page_has_buffers(page))
3302 goto nextpage;
3303
3304 head = page_buffers(page);
3305 if (!head)
3306 goto nextpage;
3307
3308 bh = head;
3309 pg_lblk = index << (PAGE_CACHE_SHIFT -
3310 inode->i_blkbits);
3311 do {
3312 if (unlikely(pg_lblk < lblk_start)) {
3313 /*
3314 * This is possible when fs block size is less
3315 * than page size and our cluster starts/ends in
3316 * middle of the page. So we need to skip the
3317 * initial few blocks till we reach the 'lblk'
3318 */
3319 pg_lblk++;
3320 continue;
3321 }
3322
3323 /* Check if the buffer is delayed allocated and that it
3324 * is not yet mapped. (when da-buffers are mapped during
3325 * their writeout, their da_mapped bit is set.)
3326 */
3327 if (buffer_delay(bh) && !buffer_da_mapped(bh)) {
3328 page_cache_release(page);
3329 trace_ext4_find_delalloc_range(inode,
3330 lblk_start, lblk_end,
3331 search_hint_reverse,
3332 1, i);
3333 return 1;
3334 }
3335 if (search_hint_reverse)
3336 i--;
3337 else
3338 i++;
3339 } while ((i >= lblk_start) && (i <= lblk_end) &&
3340 ((bh = bh->b_this_page) != head));
3341nextpage:
3342 if (page)
3343 page_cache_release(page);
3344 /*
3345 * Move to next page. 'i' will be the first lblk in the next
3346 * page.
3347 */
3348 if (search_hint_reverse)
3349 index--;
3350 else
3351 index++;
3352 i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
3353 }
3354
3355 trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end,
3356 search_hint_reverse, 0, 0);
3357 return 0;
3358}
3359
3360int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
3361 int search_hint_reverse)
3362{
3363 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3364 ext4_lblk_t lblk_start, lblk_end;
3365 lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
3366 lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
3367
3368 return ext4_find_delalloc_range(inode, lblk_start, lblk_end,
3369 search_hint_reverse);
3370}
3371
3372/**
3373 * Determines how many complete clusters (out of those specified by the 'map')
3374 * are under delalloc and were reserved quota for.
3375 * This function is called when we are writing out the blocks that were
3376 * originally written with their allocation delayed, but then the space was
3377 * allocated using fallocate() before the delayed allocation could be resolved.
3378 * The cases to look for are:
3379 * ('=' indicated delayed allocated blocks
3380 * '-' indicates non-delayed allocated blocks)
3381 * (a) partial clusters towards beginning and/or end outside of allocated range
3382 * are not delalloc'ed.
3383 * Ex:
3384 * |----c---=|====c====|====c====|===-c----|
3385 * |++++++ allocated ++++++|
3386 * ==> 4 complete clusters in above example
3387 *
3388 * (b) partial cluster (outside of allocated range) towards either end is
3389 * marked for delayed allocation. In this case, we will exclude that
3390 * cluster.
3391 * Ex:
3392 * |----====c========|========c========|
3393 * |++++++ allocated ++++++|
3394 * ==> 1 complete clusters in above example
3395 *
3396 * Ex:
3397 * |================c================|
3398 * |++++++ allocated ++++++|
3399 * ==> 0 complete clusters in above example
3400 *
3401 * The ext4_da_update_reserve_space will be called only if we
3402 * determine here that there were some "entire" clusters that span
3403 * this 'allocated' range.
3404 * In the non-bigalloc case, this function will just end up returning num_blks
3405 * without ever calling ext4_find_delalloc_range.
3406 */
3407static unsigned int
3408get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3409 unsigned int num_blks)
3410{
3411 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3412 ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
3413 ext4_lblk_t lblk_from, lblk_to, c_offset;
3414 unsigned int allocated_clusters = 0;
3415
3416 alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
3417 alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
3418
3419 /* max possible clusters for this allocation */
3420 allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
3421
3422 trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
3423
3424 /* Check towards left side */
3425 c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
3426 if (c_offset) {
3427 lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
3428 lblk_to = lblk_from + c_offset - 1;
3429
3430 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
3431 allocated_clusters--;
3432 }
3433
3434 /* Now check towards right. */
3435 c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
3436 if (allocated_clusters && c_offset) {
3437 lblk_from = lblk_start + num_blks;
3438 lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
3439
3440 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
3441 allocated_clusters--;
3442 }
3443
3444 return allocated_clusters;
3445}
3446
3168static int 3447static int
3169ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3448ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3170 struct ext4_map_blocks *map, 3449 struct ext4_map_blocks *map,
@@ -3181,6 +3460,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3181 flags, allocated); 3460 flags, allocated);
3182 ext4_ext_show_leaf(inode, path); 3461 ext4_ext_show_leaf(inode, path);
3183 3462
3463 trace_ext4_ext_handle_uninitialized_extents(inode, map, allocated,
3464 newblock);
3465
3184 /* get_block() before submit the IO, split the extent */ 3466 /* get_block() before submit the IO, split the extent */
3185 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3467 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3186 ret = ext4_split_unwritten_extents(handle, inode, map, 3468 ret = ext4_split_unwritten_extents(handle, inode, map,
@@ -3190,10 +3472,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3190 * that this IO needs to conversion to written when IO is 3472 * that this IO needs to conversion to written when IO is
3191 * completed 3473 * completed
3192 */ 3474 */
3193 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 3475 if (io)
3194 io->flag = EXT4_IO_END_UNWRITTEN; 3476 ext4_set_io_unwritten_flag(inode, io);
3195 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); 3477 else
3196 } else
3197 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3478 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3198 if (ext4_should_dioread_nolock(inode)) 3479 if (ext4_should_dioread_nolock(inode))
3199 map->m_flags |= EXT4_MAP_UNINIT; 3480 map->m_flags |= EXT4_MAP_UNINIT;
@@ -3234,14 +3515,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3234 3515
3235 /* buffered write, writepage time, convert*/ 3516 /* buffered write, writepage time, convert*/
3236 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3517 ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
3237 if (ret >= 0) { 3518 if (ret >= 0)
3238 ext4_update_inode_fsync_trans(handle, inode, 1); 3519 ext4_update_inode_fsync_trans(handle, inode, 1);
3239 err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
3240 map->m_len);
3241 if (err < 0)
3242 goto out2;
3243 }
3244
3245out: 3520out:
3246 if (ret <= 0) { 3521 if (ret <= 0) {
3247 err = ret; 3522 err = ret;
@@ -3270,11 +3545,24 @@ out:
3270 * But fallocate would have already updated quota and block 3545 * But fallocate would have already updated quota and block
3271 * count for this offset. So cancel these reservation 3546 * count for this offset. So cancel these reservation
3272 */ 3547 */
3273 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3548 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
3274 ext4_da_update_reserve_space(inode, allocated, 0); 3549 unsigned int reserved_clusters;
3550 reserved_clusters = get_reserved_cluster_alloc(inode,
3551 map->m_lblk, map->m_len);
3552 if (reserved_clusters)
3553 ext4_da_update_reserve_space(inode,
3554 reserved_clusters,
3555 0);
3556 }
3275 3557
3276map_out: 3558map_out:
3277 map->m_flags |= EXT4_MAP_MAPPED; 3559 map->m_flags |= EXT4_MAP_MAPPED;
3560 if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) {
3561 err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
3562 map->m_len);
3563 if (err < 0)
3564 goto out2;
3565 }
3278out1: 3566out1:
3279 if (allocated > map->m_len) 3567 if (allocated > map->m_len)
3280 allocated = map->m_len; 3568 allocated = map->m_len;
@@ -3290,6 +3578,111 @@ out2:
3290} 3578}
3291 3579
3292/* 3580/*
3581 * get_implied_cluster_alloc - check to see if the requested
3582 * allocation (in the map structure) overlaps with a cluster already
3583 * allocated in an extent.
3584 * @sb The filesystem superblock structure
3585 * @map The requested lblk->pblk mapping
3586 * @ex The extent structure which might contain an implied
3587 * cluster allocation
3588 *
3589 * This function is called by ext4_ext_map_blocks() after we failed to
3590 * find blocks that were already in the inode's extent tree. Hence,
3591 * we know that the beginning of the requested region cannot overlap
3592 * the extent from the inode's extent tree. There are three cases we
3593 * want to catch. The first is this case:
3594 *
3595 * |--- cluster # N--|
3596 * |--- extent ---| |---- requested region ---|
3597 * |==========|
3598 *
3599 * The second case that we need to test for is this one:
3600 *
3601 * |--------- cluster # N ----------------|
3602 * |--- requested region --| |------- extent ----|
3603 * |=======================|
3604 *
3605 * The third case is when the requested region lies between two extents
3606 * within the same cluster:
3607 * |------------- cluster # N-------------|
3608 * |----- ex -----| |---- ex_right ----|
3609 * |------ requested region ------|
3610 * |================|
3611 *
3612 * In each of the above cases, we need to set the map->m_pblk and
3613 * map->m_len so it corresponds to the return the extent labelled as
3614 * "|====|" from cluster #N, since it is already in use for data in
3615 * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
3616 * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
3617 * as a new "allocated" block region. Otherwise, we will return 0 and
3618 * ext4_ext_map_blocks() will then allocate one or more new clusters
3619 * by calling ext4_mb_new_blocks().
3620 */
3621static int get_implied_cluster_alloc(struct super_block *sb,
3622 struct ext4_map_blocks *map,
3623 struct ext4_extent *ex,
3624 struct ext4_ext_path *path)
3625{
3626 struct ext4_sb_info *sbi = EXT4_SB(sb);
3627 ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3628 ext4_lblk_t ex_cluster_start, ex_cluster_end;
3629 ext4_lblk_t rr_cluster_start, rr_cluster_end;
3630 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
3631 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
3632 unsigned short ee_len = ext4_ext_get_actual_len(ex);
3633
3634 /* The extent passed in that we are trying to match */
3635 ex_cluster_start = EXT4_B2C(sbi, ee_block);
3636 ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
3637
3638 /* The requested region passed into ext4_map_blocks() */
3639 rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
3640 rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
3641
3642 if ((rr_cluster_start == ex_cluster_end) ||
3643 (rr_cluster_start == ex_cluster_start)) {
3644 if (rr_cluster_start == ex_cluster_end)
3645 ee_start += ee_len - 1;
3646 map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
3647 c_offset;
3648 map->m_len = min(map->m_len,
3649 (unsigned) sbi->s_cluster_ratio - c_offset);
3650 /*
3651 * Check for and handle this case:
3652 *
3653 * |--------- cluster # N-------------|
3654 * |------- extent ----|
3655 * |--- requested region ---|
3656 * |===========|
3657 */
3658
3659 if (map->m_lblk < ee_block)
3660 map->m_len = min(map->m_len, ee_block - map->m_lblk);
3661
3662 /*
3663 * Check for the case where there is already another allocated
3664 * block to the right of 'ex' but before the end of the cluster.
3665 *
3666 * |------------- cluster # N-------------|
3667 * |----- ex -----| |---- ex_right ----|
3668 * |------ requested region ------|
3669 * |================|
3670 */
3671 if (map->m_lblk > ee_block) {
3672 ext4_lblk_t next = ext4_ext_next_allocated_block(path);
3673 map->m_len = min(map->m_len, next - map->m_lblk);
3674 }
3675
3676 trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
3677 return 1;
3678 }
3679
3680 trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
3681 return 0;
3682}
3683
3684
3685/*
3293 * Block allocation/map/preallocation routine for extents based files 3686 * Block allocation/map/preallocation routine for extents based files
3294 * 3687 *
3295 * 3688 *
@@ -3311,15 +3704,17 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3311 struct ext4_map_blocks *map, int flags) 3704 struct ext4_map_blocks *map, int flags)
3312{ 3705{
3313 struct ext4_ext_path *path = NULL; 3706 struct ext4_ext_path *path = NULL;
3314 struct ext4_extent newex, *ex; 3707 struct ext4_extent newex, *ex, *ex2;
3708 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3315 ext4_fsblk_t newblock = 0; 3709 ext4_fsblk_t newblock = 0;
3316 int err = 0, depth, ret; 3710 int free_on_err = 0, err = 0, depth, ret;
3317 unsigned int allocated = 0; 3711 unsigned int allocated = 0, offset = 0;
3712 unsigned int allocated_clusters = 0;
3318 unsigned int punched_out = 0; 3713 unsigned int punched_out = 0;
3319 unsigned int result = 0; 3714 unsigned int result = 0;
3320 struct ext4_allocation_request ar; 3715 struct ext4_allocation_request ar;
3321 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3716 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
3322 struct ext4_map_blocks punch_map; 3717 ext4_lblk_t cluster_offset;
3323 3718
3324 ext_debug("blocks %u/%u requested for inode %lu\n", 3719 ext_debug("blocks %u/%u requested for inode %lu\n",
3325 map->m_lblk, map->m_len, inode->i_ino); 3720 map->m_lblk, map->m_len, inode->i_ino);
@@ -3329,6 +3724,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3329 if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && 3724 if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
3330 ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3725 ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3331 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3726 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3727 if ((sbi->s_cluster_ratio > 1) &&
3728 ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
3729 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3730
3332 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3731 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3333 /* 3732 /*
3334 * block isn't allocated yet and 3733 * block isn't allocated yet and
@@ -3339,6 +3738,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3339 /* we should allocate requested block */ 3738 /* we should allocate requested block */
3340 } else { 3739 } else {
3341 /* block is already allocated */ 3740 /* block is already allocated */
3741 if (sbi->s_cluster_ratio > 1)
3742 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3342 newblock = map->m_lblk 3743 newblock = map->m_lblk
3343 - le32_to_cpu(newex.ee_block) 3744 - le32_to_cpu(newex.ee_block)
3344 + ext4_ext_pblock(&newex); 3745 + ext4_ext_pblock(&newex);
@@ -3384,8 +3785,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3384 * we split out initialized portions during a write. 3785 * we split out initialized portions during a write.
3385 */ 3786 */
3386 ee_len = ext4_ext_get_actual_len(ex); 3787 ee_len = ext4_ext_get_actual_len(ex);
3788
3789 trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
3790
3387 /* if found extent covers block, simply return it */ 3791 /* if found extent covers block, simply return it */
3388 if (in_range(map->m_lblk, ee_block, ee_len)) { 3792 if (in_range(map->m_lblk, ee_block, ee_len)) {
3793 struct ext4_map_blocks punch_map;
3794 ext4_fsblk_t partial_cluster = 0;
3795
3389 newblock = map->m_lblk - ee_block + ee_start; 3796 newblock = map->m_lblk - ee_block + ee_start;
3390 /* number of remaining blocks in the extent */ 3797 /* number of remaining blocks in the extent */
3391 allocated = ee_len - (map->m_lblk - ee_block); 3798 allocated = ee_len - (map->m_lblk - ee_block);
@@ -3469,7 +3876,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3469 ext4_ext_invalidate_cache(inode); 3876 ext4_ext_invalidate_cache(inode);
3470 3877
3471 err = ext4_ext_rm_leaf(handle, inode, path, 3878 err = ext4_ext_rm_leaf(handle, inode, path,
3472 map->m_lblk, map->m_lblk + punched_out); 3879 &partial_cluster, map->m_lblk,
3880 map->m_lblk + punched_out);
3473 3881
3474 if (!err && path->p_hdr->eh_entries == 0) { 3882 if (!err && path->p_hdr->eh_entries == 0) {
3475 /* 3883 /*
@@ -3492,6 +3900,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3492 } 3900 }
3493 } 3901 }
3494 3902
3903 if ((sbi->s_cluster_ratio > 1) &&
3904 ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
3905 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3906
3495 /* 3907 /*
3496 * requested block isn't allocated yet; 3908 * requested block isn't allocated yet;
3497 * we couldn't try to create block if create flag is zero 3909 * we couldn't try to create block if create flag is zero
@@ -3504,9 +3916,25 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3504 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); 3916 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
3505 goto out2; 3917 goto out2;
3506 } 3918 }
3919
3507 /* 3920 /*
3508 * Okay, we need to do block allocation. 3921 * Okay, we need to do block allocation.
3509 */ 3922 */
3923 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
3924 newex.ee_block = cpu_to_le32(map->m_lblk);
3925 cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3926
3927 /*
3928 * If we are doing bigalloc, check to see if the extent returned
3929 * by ext4_ext_find_extent() implies a cluster we can use.
3930 */
3931 if (cluster_offset && ex &&
3932 get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
3933 ar.len = allocated = map->m_len;
3934 newblock = map->m_pblk;
3935 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3936 goto got_allocated_blocks;
3937 }
3510 3938
3511 /* find neighbour allocated blocks */ 3939 /* find neighbour allocated blocks */
3512 ar.lleft = map->m_lblk; 3940 ar.lleft = map->m_lblk;
@@ -3514,10 +3942,21 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3514 if (err) 3942 if (err)
3515 goto out2; 3943 goto out2;
3516 ar.lright = map->m_lblk; 3944 ar.lright = map->m_lblk;
3517 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); 3945 ex2 = NULL;
3946 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
3518 if (err) 3947 if (err)
3519 goto out2; 3948 goto out2;
3520 3949
3950 /* Check if the extent after searching to the right implies a
3951 * cluster we can use. */
3952 if ((sbi->s_cluster_ratio > 1) && ex2 &&
3953 get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
3954 ar.len = allocated = map->m_len;
3955 newblock = map->m_pblk;
3956 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3957 goto got_allocated_blocks;
3958 }
3959
3521 /* 3960 /*
3522 * See if request is beyond maximum number of blocks we can have in 3961 * See if request is beyond maximum number of blocks we can have in
3523 * a single extent. For an initialized extent this limit is 3962 * a single extent. For an initialized extent this limit is
@@ -3532,9 +3971,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3532 map->m_len = EXT_UNINIT_MAX_LEN; 3971 map->m_len = EXT_UNINIT_MAX_LEN;
3533 3972
3534 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ 3973 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
3535 newex.ee_block = cpu_to_le32(map->m_lblk);
3536 newex.ee_len = cpu_to_le16(map->m_len); 3974 newex.ee_len = cpu_to_le16(map->m_len);
3537 err = ext4_ext_check_overlap(inode, &newex, path); 3975 err = ext4_ext_check_overlap(sbi, inode, &newex, path);
3538 if (err) 3976 if (err)
3539 allocated = ext4_ext_get_actual_len(&newex); 3977 allocated = ext4_ext_get_actual_len(&newex);
3540 else 3978 else
@@ -3544,7 +3982,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3544 ar.inode = inode; 3982 ar.inode = inode;
3545 ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); 3983 ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
3546 ar.logical = map->m_lblk; 3984 ar.logical = map->m_lblk;
3547 ar.len = allocated; 3985 /*
3986 * We calculate the offset from the beginning of the cluster
3987 * for the logical block number, since when we allocate a
3988 * physical cluster, the physical block should start at the
3989 * same offset from the beginning of the cluster. This is
3990 * needed so that future calls to get_implied_cluster_alloc()
3991 * work correctly.
3992 */
3993 offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
3994 ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
3995 ar.goal -= offset;
3996 ar.logical -= offset;
3548 if (S_ISREG(inode->i_mode)) 3997 if (S_ISREG(inode->i_mode))
3549 ar.flags = EXT4_MB_HINT_DATA; 3998 ar.flags = EXT4_MB_HINT_DATA;
3550 else 3999 else
@@ -3557,9 +4006,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3557 goto out2; 4006 goto out2;
3558 ext_debug("allocate new block: goal %llu, found %llu/%u\n", 4007 ext_debug("allocate new block: goal %llu, found %llu/%u\n",
3559 ar.goal, newblock, allocated); 4008 ar.goal, newblock, allocated);
4009 free_on_err = 1;
4010 allocated_clusters = ar.len;
4011 ar.len = EXT4_C2B(sbi, ar.len) - offset;
4012 if (ar.len > allocated)
4013 ar.len = allocated;
3560 4014
4015got_allocated_blocks:
3561 /* try to insert new extent into found leaf and return */ 4016 /* try to insert new extent into found leaf and return */
3562 ext4_ext_store_pblock(&newex, newblock); 4017 ext4_ext_store_pblock(&newex, newblock + offset);
3563 newex.ee_len = cpu_to_le16(ar.len); 4018 newex.ee_len = cpu_to_le16(ar.len);
3564 /* Mark uninitialized */ 4019 /* Mark uninitialized */
3565 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ 4020 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
@@ -3572,10 +4027,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3572 * that we need to perform conversion when IO is done. 4027 * that we need to perform conversion when IO is done.
3573 */ 4028 */
3574 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4029 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3575 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 4030 if (io)
3576 io->flag = EXT4_IO_END_UNWRITTEN; 4031 ext4_set_io_unwritten_flag(inode, io);
3577 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); 4032 else
3578 } else
3579 ext4_set_inode_state(inode, 4033 ext4_set_inode_state(inode,
3580 EXT4_STATE_DIO_UNWRITTEN); 4034 EXT4_STATE_DIO_UNWRITTEN);
3581 } 4035 }
@@ -3583,11 +4037,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3583 map->m_flags |= EXT4_MAP_UNINIT; 4037 map->m_flags |= EXT4_MAP_UNINIT;
3584 } 4038 }
3585 4039
3586 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); 4040 err = 0;
4041 if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0)
4042 err = check_eofblocks_fl(handle, inode, map->m_lblk,
4043 path, ar.len);
3587 if (!err) 4044 if (!err)
3588 err = ext4_ext_insert_extent(handle, inode, path, 4045 err = ext4_ext_insert_extent(handle, inode, path,
3589 &newex, flags); 4046 &newex, flags);
3590 if (err) { 4047 if (err && free_on_err) {
3591 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? 4048 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
3592 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; 4049 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
3593 /* free data blocks we just allocated */ 4050 /* free data blocks we just allocated */
@@ -3610,8 +4067,82 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3610 * Update reserved blocks/metadata blocks after successful 4067 * Update reserved blocks/metadata blocks after successful
3611 * block allocation which had been deferred till now. 4068 * block allocation which had been deferred till now.
3612 */ 4069 */
3613 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 4070 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
3614 ext4_da_update_reserve_space(inode, allocated, 1); 4071 unsigned int reserved_clusters;
4072 /*
4073 * Check how many clusters we had reserved this allocated range
4074 */
4075 reserved_clusters = get_reserved_cluster_alloc(inode,
4076 map->m_lblk, allocated);
4077 if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
4078 if (reserved_clusters) {
4079 /*
4080 * We have clusters reserved for this range.
4081 * But since we are not doing actual allocation
4082 * and are simply using blocks from previously
4083 * allocated cluster, we should release the
4084 * reservation and not claim quota.
4085 */
4086 ext4_da_update_reserve_space(inode,
4087 reserved_clusters, 0);
4088 }
4089 } else {
4090 BUG_ON(allocated_clusters < reserved_clusters);
4091 /* We will claim quota for all newly allocated blocks.*/
4092 ext4_da_update_reserve_space(inode, allocated_clusters,
4093 1);
4094 if (reserved_clusters < allocated_clusters) {
4095 struct ext4_inode_info *ei = EXT4_I(inode);
4096 int reservation = allocated_clusters -
4097 reserved_clusters;
4098 /*
4099 * It seems we claimed few clusters outside of
4100 * the range of this allocation. We should give
4101 * it back to the reservation pool. This can
4102 * happen in the following case:
4103 *
4104 * * Suppose s_cluster_ratio is 4 (i.e., each
4105 * cluster has 4 blocks. Thus, the clusters
4106 * are [0-3],[4-7],[8-11]...
4107 * * First comes delayed allocation write for
4108 * logical blocks 10 & 11. Since there were no
4109 * previous delayed allocated blocks in the
4110 * range [8-11], we would reserve 1 cluster
4111 * for this write.
4112 * * Next comes write for logical blocks 3 to 8.
4113 * In this case, we will reserve 2 clusters
4114 * (for [0-3] and [4-7]; and not for [8-11] as
4115 * that range has a delayed allocated blocks.
4116 * Thus total reserved clusters now becomes 3.
4117 * * Now, during the delayed allocation writeout
4118 * time, we will first write blocks [3-8] and
4119 * allocate 3 clusters for writing these
4120 * blocks. Also, we would claim all these
4121 * three clusters above.
4122 * * Now when we come here to writeout the
4123 * blocks [10-11], we would expect to claim
4124 * the reservation of 1 cluster we had made
4125 * (and we would claim it since there are no
4126 * more delayed allocated blocks in the range
4127 * [8-11]. But our reserved cluster count had
4128 * already gone to 0.
4129 *
4130 * Thus, at the step 4 above when we determine
4131 * that there are still some unwritten delayed
4132 * allocated blocks outside of our current
4133 * block range, we should increment the
4134 * reserved clusters count so that when the
4135 * remaining blocks finally gets written, we
4136 * could claim them.
4137 */
4138 dquot_reserve_block(inode,
4139 EXT4_C2B(sbi, reservation));
4140 spin_lock(&ei->i_block_reservation_lock);
4141 ei->i_reserved_data_blocks += reservation;
4142 spin_unlock(&ei->i_block_reservation_lock);
4143 }
4144 }
4145 }
3615 4146
3616 /* 4147 /*
3617 * Cache the extent and update transaction to commit on fdatasync only 4148 * Cache the extent and update transaction to commit on fdatasync only
@@ -3634,12 +4165,12 @@ out2:
3634 ext4_ext_drop_refs(path); 4165 ext4_ext_drop_refs(path);
3635 kfree(path); 4166 kfree(path);
3636 } 4167 }
3637 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
3638 newblock, map->m_len, err ? err : allocated);
3639
3640 result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? 4168 result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ?
3641 punched_out : allocated; 4169 punched_out : allocated;
3642 4170
4171 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
4172 newblock, map->m_len, err ? err : result);
4173
3643 return err ? err : result; 4174 return err ? err : result;
3644} 4175}
3645 4176
@@ -3649,6 +4180,7 @@ void ext4_ext_truncate(struct inode *inode)
3649 struct super_block *sb = inode->i_sb; 4180 struct super_block *sb = inode->i_sb;
3650 ext4_lblk_t last_block; 4181 ext4_lblk_t last_block;
3651 handle_t *handle; 4182 handle_t *handle;
4183 loff_t page_len;
3652 int err = 0; 4184 int err = 0;
3653 4185
3654 /* 4186 /*
@@ -3665,8 +4197,16 @@ void ext4_ext_truncate(struct inode *inode)
3665 if (IS_ERR(handle)) 4197 if (IS_ERR(handle))
3666 return; 4198 return;
3667 4199
3668 if (inode->i_size & (sb->s_blocksize - 1)) 4200 if (inode->i_size % PAGE_CACHE_SIZE != 0) {
3669 ext4_block_truncate_page(handle, mapping, inode->i_size); 4201 page_len = PAGE_CACHE_SIZE -
4202 (inode->i_size & (PAGE_CACHE_SIZE - 1));
4203
4204 err = ext4_discard_partial_page_buffers(handle,
4205 mapping, inode->i_size, page_len, 0);
4206
4207 if (err)
4208 goto out_stop;
4209 }
3670 4210
3671 if (ext4_orphan_add(handle, inode)) 4211 if (ext4_orphan_add(handle, inode))
3672 goto out_stop; 4212 goto out_stop;
@@ -3760,6 +4300,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3760 int ret = 0; 4300 int ret = 0;
3761 int ret2 = 0; 4301 int ret2 = 0;
3762 int retries = 0; 4302 int retries = 0;
4303 int flags;
3763 struct ext4_map_blocks map; 4304 struct ext4_map_blocks map;
3764 unsigned int credits, blkbits = inode->i_blkbits; 4305 unsigned int credits, blkbits = inode->i_blkbits;
3765 4306
@@ -3796,6 +4337,16 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3796 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); 4337 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
3797 return ret; 4338 return ret;
3798 } 4339 }
4340 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT;
4341 if (mode & FALLOC_FL_KEEP_SIZE)
4342 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4343 /*
4344 * Don't normalize the request if it can fit in one extent so
4345 * that it doesn't get unnecessarily split into multiple
4346 * extents.
4347 */
4348 if (len <= EXT_UNINIT_MAX_LEN << blkbits)
4349 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
3799retry: 4350retry:
3800 while (ret >= 0 && ret < max_blocks) { 4351 while (ret >= 0 && ret < max_blocks) {
3801 map.m_lblk = map.m_lblk + ret; 4352 map.m_lblk = map.m_lblk + ret;
@@ -3805,9 +4356,7 @@ retry:
3805 ret = PTR_ERR(handle); 4356 ret = PTR_ERR(handle);
3806 break; 4357 break;
3807 } 4358 }
3808 ret = ext4_map_blocks(handle, inode, &map, 4359 ret = ext4_map_blocks(handle, inode, &map, flags);
3809 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
3810 EXT4_GET_BLOCKS_NO_NORMALIZE);
3811 if (ret <= 0) { 4360 if (ret <= 0) {
3812#ifdef EXT4FS_DEBUG 4361#ifdef EXT4FS_DEBUG
3813 WARN_ON(ret <= 0); 4362 WARN_ON(ret <= 0);
@@ -4102,7 +4651,6 @@ found_delayed_extent:
4102 return EXT_BREAK; 4651 return EXT_BREAK;
4103 return EXT_CONTINUE; 4652 return EXT_CONTINUE;
4104} 4653}
4105
4106/* fiemap flags we can handle specified here */ 4654/* fiemap flags we can handle specified here */
4107#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 4655#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
4108 4656
@@ -4162,17 +4710,28 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4162 struct address_space *mapping = inode->i_mapping; 4710 struct address_space *mapping = inode->i_mapping;
4163 struct ext4_map_blocks map; 4711 struct ext4_map_blocks map;
4164 handle_t *handle; 4712 handle_t *handle;
4165 loff_t first_block_offset, last_block_offset, block_len; 4713 loff_t first_page, last_page, page_len;
4166 loff_t first_page, last_page, first_page_offset, last_page_offset; 4714 loff_t first_page_offset, last_page_offset;
4167 int ret, credits, blocks_released, err = 0; 4715 int ret, credits, blocks_released, err = 0;
4168 4716
4717 /* No need to punch hole beyond i_size */
4718 if (offset >= inode->i_size)
4719 return 0;
4720
4721 /*
4722 * If the hole extends beyond i_size, set the hole
4723 * to end after the page that contains i_size
4724 */
4725 if (offset + length > inode->i_size) {
4726 length = inode->i_size +
4727 PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
4728 offset;
4729 }
4730
4169 first_block = (offset + sb->s_blocksize - 1) >> 4731 first_block = (offset + sb->s_blocksize - 1) >>
4170 EXT4_BLOCK_SIZE_BITS(sb); 4732 EXT4_BLOCK_SIZE_BITS(sb);
4171 last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); 4733 last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
4172 4734
4173 first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb);
4174 last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb);
4175
4176 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 4735 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
4177 last_page = (offset + length) >> PAGE_CACHE_SHIFT; 4736 last_page = (offset + length) >> PAGE_CACHE_SHIFT;
4178 4737
@@ -4185,11 +4744,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4185 */ 4744 */
4186 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 4745 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4187 err = filemap_write_and_wait_range(mapping, 4746 err = filemap_write_and_wait_range(mapping,
4188 first_page_offset == 0 ? 0 : first_page_offset-1, 4747 offset, offset + length - 1);
4189 last_page_offset);
4190 4748
4191 if (err) 4749 if (err)
4192 return err; 4750 return err;
4193 } 4751 }
4194 4752
4195 /* Now release the pages */ 4753 /* Now release the pages */
@@ -4211,24 +4769,64 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4211 goto out; 4769 goto out;
4212 4770
4213 /* 4771 /*
4214 * Now we need to zero out the un block aligned data. 4772 * Now we need to zero out the non-page-aligned data in the
4215 * If the file is smaller than a block, just 4773 * pages at the start and tail of the hole, and unmap the buffer
4216 * zero out the middle 4774 * heads for the block aligned regions of the page that were
4775 * completely zeroed.
4217 */ 4776 */
4218 if (first_block > last_block) 4777 if (first_page > last_page) {
4219 ext4_block_zero_page_range(handle, mapping, offset, length); 4778 /*
4220 else { 4779 * If the file space being truncated is contained within a page
4221 /* zero out the head of the hole before the first block */ 4780 * just zero out and unmap the middle of that page
4222 block_len = first_block_offset - offset; 4781 */
4223 if (block_len > 0) 4782 err = ext4_discard_partial_page_buffers(handle,
4224 ext4_block_zero_page_range(handle, mapping, 4783 mapping, offset, length, 0);
4225 offset, block_len); 4784
4226 4785 if (err)
4227 /* zero out the tail of the hole after the last block */ 4786 goto out;
4228 block_len = offset + length - last_block_offset; 4787 } else {
4229 if (block_len > 0) { 4788 /*
4230 ext4_block_zero_page_range(handle, mapping, 4789 * zero out and unmap the partial page that contains
4231 last_block_offset, block_len); 4790 * the start of the hole
4791 */
4792 page_len = first_page_offset - offset;
4793 if (page_len > 0) {
4794 err = ext4_discard_partial_page_buffers(handle, mapping,
4795 offset, page_len, 0);
4796 if (err)
4797 goto out;
4798 }
4799
4800 /*
4801 * zero out and unmap the partial page that contains
4802 * the end of the hole
4803 */
4804 page_len = offset + length - last_page_offset;
4805 if (page_len > 0) {
4806 err = ext4_discard_partial_page_buffers(handle, mapping,
4807 last_page_offset, page_len, 0);
4808 if (err)
4809 goto out;
4810 }
4811 }
4812
4813
4814 /*
4815 * If i_size is contained in the last page, we need to
4816 * unmap and zero the partial page after i_size
4817 */
4818 if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
4819 inode->i_size % PAGE_CACHE_SIZE != 0) {
4820
4821 page_len = PAGE_CACHE_SIZE -
4822 (inode->i_size & (PAGE_CACHE_SIZE - 1));
4823
4824 if (page_len > 0) {
4825 err = ext4_discard_partial_page_buffers(handle,
4826 mapping, inode->i_size, page_len, 0);
4827
4828 if (err)
4829 goto out;
4232 } 4830 }
4233 } 4831 }
4234 4832
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index b9548f477bb8..cb70f1812a70 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -181,8 +181,8 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
181 path.dentry = mnt->mnt_root; 181 path.dentry = mnt->mnt_root;
182 cp = d_path(&path, buf, sizeof(buf)); 182 cp = d_path(&path, buf, sizeof(buf));
183 if (!IS_ERR(cp)) { 183 if (!IS_ERR(cp)) {
184 memcpy(sbi->s_es->s_last_mounted, cp, 184 strlcpy(sbi->s_es->s_last_mounted, cp,
185 sizeof(sbi->s_es->s_last_mounted)); 185 sizeof(sbi->s_es->s_last_mounted));
186 ext4_mark_super_dirty(sb); 186 ext4_mark_super_dirty(sb);
187 } 187 }
188 } 188 }
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 036f78f7a1ef..00a2cb753efd 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -75,7 +75,7 @@ static void dump_completed_IO(struct inode * inode)
75 * to written. 75 * to written.
76 * The function return the number of pending IOs on success. 76 * The function return the number of pending IOs on success.
77 */ 77 */
78extern int ext4_flush_completed_IO(struct inode *inode) 78int ext4_flush_completed_IO(struct inode *inode)
79{ 79{
80 ext4_io_end_t *io; 80 ext4_io_end_t *io;
81 struct ext4_inode_info *ei = EXT4_I(inode); 81 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -83,14 +83,12 @@ extern int ext4_flush_completed_IO(struct inode *inode)
83 int ret = 0; 83 int ret = 0;
84 int ret2 = 0; 84 int ret2 = 0;
85 85
86 if (list_empty(&ei->i_completed_io_list))
87 return ret;
88
89 dump_completed_IO(inode); 86 dump_completed_IO(inode);
90 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 87 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
91 while (!list_empty(&ei->i_completed_io_list)){ 88 while (!list_empty(&ei->i_completed_io_list)){
92 io = list_entry(ei->i_completed_io_list.next, 89 io = list_entry(ei->i_completed_io_list.next,
93 ext4_io_end_t, list); 90 ext4_io_end_t, list);
91 list_del_init(&io->list);
94 /* 92 /*
95 * Calling ext4_end_io_nolock() to convert completed 93 * Calling ext4_end_io_nolock() to convert completed
96 * IO to written. 94 * IO to written.
@@ -107,11 +105,9 @@ extern int ext4_flush_completed_IO(struct inode *inode)
107 */ 105 */
108 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 106 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
109 ret = ext4_end_io_nolock(io); 107 ret = ext4_end_io_nolock(io);
110 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
111 if (ret < 0) 108 if (ret < 0)
112 ret2 = ret; 109 ret2 = ret;
113 else 110 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
114 list_del_init(&io->list);
115 } 111 }
116 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 112 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
117 return (ret2 < 0) ? ret2 : 0; 113 return (ret2 < 0) ? ret2 : 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 9c63f273b550..00beb4f9cc4f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -78,7 +78,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
78 * allocation, essentially implementing a per-group read-only flag. */ 78 * allocation, essentially implementing a per-group read-only flag. */
79 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 79 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
80 ext4_error(sb, "Checksum bad for group %u", block_group); 80 ext4_error(sb, "Checksum bad for group %u", block_group);
81 ext4_free_blks_set(sb, gdp, 0); 81 ext4_free_group_clusters_set(sb, gdp, 0);
82 ext4_free_inodes_set(sb, gdp, 0); 82 ext4_free_inodes_set(sb, gdp, 0);
83 ext4_itable_unused_set(sb, gdp, 0); 83 ext4_itable_unused_set(sb, gdp, 0);
84 memset(bh->b_data, 0xff, sb->s_blocksize); 84 memset(bh->b_data, 0xff, sb->s_blocksize);
@@ -293,121 +293,9 @@ error_return:
293 ext4_std_error(sb, fatal); 293 ext4_std_error(sb, fatal);
294} 294}
295 295
296/*
297 * There are two policies for allocating an inode. If the new inode is
298 * a directory, then a forward search is made for a block group with both
299 * free space and a low directory-to-inode ratio; if that fails, then of
300 * the groups with above-average free space, that group with the fewest
301 * directories already is chosen.
302 *
303 * For other inodes, search forward from the parent directory\'s block
304 * group to find a free inode.
305 */
306static int find_group_dir(struct super_block *sb, struct inode *parent,
307 ext4_group_t *best_group)
308{
309 ext4_group_t ngroups = ext4_get_groups_count(sb);
310 unsigned int freei, avefreei;
311 struct ext4_group_desc *desc, *best_desc = NULL;
312 ext4_group_t group;
313 int ret = -1;
314
315 freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter);
316 avefreei = freei / ngroups;
317
318 for (group = 0; group < ngroups; group++) {
319 desc = ext4_get_group_desc(sb, group, NULL);
320 if (!desc || !ext4_free_inodes_count(sb, desc))
321 continue;
322 if (ext4_free_inodes_count(sb, desc) < avefreei)
323 continue;
324 if (!best_desc ||
325 (ext4_free_blks_count(sb, desc) >
326 ext4_free_blks_count(sb, best_desc))) {
327 *best_group = group;
328 best_desc = desc;
329 ret = 0;
330 }
331 }
332 return ret;
333}
334
335#define free_block_ratio 10
336
337static int find_group_flex(struct super_block *sb, struct inode *parent,
338 ext4_group_t *best_group)
339{
340 struct ext4_sb_info *sbi = EXT4_SB(sb);
341 struct ext4_group_desc *desc;
342 struct flex_groups *flex_group = sbi->s_flex_groups;
343 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
344 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
345 ext4_group_t ngroups = ext4_get_groups_count(sb);
346 int flex_size = ext4_flex_bg_size(sbi);
347 ext4_group_t best_flex = parent_fbg_group;
348 int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
349 int flexbg_free_blocks;
350 int flex_freeb_ratio;
351 ext4_group_t n_fbg_groups;
352 ext4_group_t i;
353
354 n_fbg_groups = (ngroups + flex_size - 1) >>
355 sbi->s_log_groups_per_flex;
356
357find_close_to_parent:
358 flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks);
359 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
360 if (atomic_read(&flex_group[best_flex].free_inodes) &&
361 flex_freeb_ratio > free_block_ratio)
362 goto found_flexbg;
363
364 if (best_flex && best_flex == parent_fbg_group) {
365 best_flex--;
366 goto find_close_to_parent;
367 }
368
369 for (i = 0; i < n_fbg_groups; i++) {
370 if (i == parent_fbg_group || i == parent_fbg_group - 1)
371 continue;
372
373 flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks);
374 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
375
376 if (flex_freeb_ratio > free_block_ratio &&
377 (atomic_read(&flex_group[i].free_inodes))) {
378 best_flex = i;
379 goto found_flexbg;
380 }
381
382 if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) ||
383 ((atomic_read(&flex_group[i].free_blocks) >
384 atomic_read(&flex_group[best_flex].free_blocks)) &&
385 atomic_read(&flex_group[i].free_inodes)))
386 best_flex = i;
387 }
388
389 if (!atomic_read(&flex_group[best_flex].free_inodes) ||
390 !atomic_read(&flex_group[best_flex].free_blocks))
391 return -1;
392
393found_flexbg:
394 for (i = best_flex * flex_size; i < ngroups &&
395 i < (best_flex + 1) * flex_size; i++) {
396 desc = ext4_get_group_desc(sb, i, NULL);
397 if (ext4_free_inodes_count(sb, desc)) {
398 *best_group = i;
399 goto out;
400 }
401 }
402
403 return -1;
404out:
405 return 0;
406}
407
408struct orlov_stats { 296struct orlov_stats {
409 __u32 free_inodes; 297 __u32 free_inodes;
410 __u32 free_blocks; 298 __u32 free_clusters;
411 __u32 used_dirs; 299 __u32 used_dirs;
412}; 300};
413 301
@@ -424,7 +312,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
424 312
425 if (flex_size > 1) { 313 if (flex_size > 1) {
426 stats->free_inodes = atomic_read(&flex_group[g].free_inodes); 314 stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
427 stats->free_blocks = atomic_read(&flex_group[g].free_blocks); 315 stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
428 stats->used_dirs = atomic_read(&flex_group[g].used_dirs); 316 stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
429 return; 317 return;
430 } 318 }
@@ -432,11 +320,11 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
432 desc = ext4_get_group_desc(sb, g, NULL); 320 desc = ext4_get_group_desc(sb, g, NULL);
433 if (desc) { 321 if (desc) {
434 stats->free_inodes = ext4_free_inodes_count(sb, desc); 322 stats->free_inodes = ext4_free_inodes_count(sb, desc);
435 stats->free_blocks = ext4_free_blks_count(sb, desc); 323 stats->free_clusters = ext4_free_group_clusters(sb, desc);
436 stats->used_dirs = ext4_used_dirs_count(sb, desc); 324 stats->used_dirs = ext4_used_dirs_count(sb, desc);
437 } else { 325 } else {
438 stats->free_inodes = 0; 326 stats->free_inodes = 0;
439 stats->free_blocks = 0; 327 stats->free_clusters = 0;
440 stats->used_dirs = 0; 328 stats->used_dirs = 0;
441 } 329 }
442} 330}
@@ -471,10 +359,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
471 ext4_group_t real_ngroups = ext4_get_groups_count(sb); 359 ext4_group_t real_ngroups = ext4_get_groups_count(sb);
472 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 360 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
473 unsigned int freei, avefreei; 361 unsigned int freei, avefreei;
474 ext4_fsblk_t freeb, avefreeb; 362 ext4_fsblk_t freeb, avefreec;
475 unsigned int ndirs; 363 unsigned int ndirs;
476 int max_dirs, min_inodes; 364 int max_dirs, min_inodes;
477 ext4_grpblk_t min_blocks; 365 ext4_grpblk_t min_clusters;
478 ext4_group_t i, grp, g, ngroups; 366 ext4_group_t i, grp, g, ngroups;
479 struct ext4_group_desc *desc; 367 struct ext4_group_desc *desc;
480 struct orlov_stats stats; 368 struct orlov_stats stats;
@@ -490,9 +378,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
490 378
491 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); 379 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
492 avefreei = freei / ngroups; 380 avefreei = freei / ngroups;
493 freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 381 freeb = EXT4_C2B(sbi,
494 avefreeb = freeb; 382 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
495 do_div(avefreeb, ngroups); 383 avefreec = freeb;
384 do_div(avefreec, ngroups);
496 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); 385 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
497 386
498 if (S_ISDIR(mode) && 387 if (S_ISDIR(mode) &&
@@ -518,7 +407,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
518 continue; 407 continue;
519 if (stats.free_inodes < avefreei) 408 if (stats.free_inodes < avefreei)
520 continue; 409 continue;
521 if (stats.free_blocks < avefreeb) 410 if (stats.free_clusters < avefreec)
522 continue; 411 continue;
523 grp = g; 412 grp = g;
524 ret = 0; 413 ret = 0;
@@ -556,7 +445,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
556 min_inodes = avefreei - inodes_per_group*flex_size / 4; 445 min_inodes = avefreei - inodes_per_group*flex_size / 4;
557 if (min_inodes < 1) 446 if (min_inodes < 1)
558 min_inodes = 1; 447 min_inodes = 1;
559 min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4; 448 min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
560 449
561 /* 450 /*
562 * Start looking in the flex group where we last allocated an 451 * Start looking in the flex group where we last allocated an
@@ -575,7 +464,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
575 continue; 464 continue;
576 if (stats.free_inodes < min_inodes) 465 if (stats.free_inodes < min_inodes)
577 continue; 466 continue;
578 if (stats.free_blocks < min_blocks) 467 if (stats.free_clusters < min_clusters)
579 continue; 468 continue;
580 goto found_flex_bg; 469 goto found_flex_bg;
581 } 470 }
@@ -659,7 +548,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
659 *group = parent_group; 548 *group = parent_group;
660 desc = ext4_get_group_desc(sb, *group, NULL); 549 desc = ext4_get_group_desc(sb, *group, NULL);
661 if (desc && ext4_free_inodes_count(sb, desc) && 550 if (desc && ext4_free_inodes_count(sb, desc) &&
662 ext4_free_blks_count(sb, desc)) 551 ext4_free_group_clusters(sb, desc))
663 return 0; 552 return 0;
664 553
665 /* 554 /*
@@ -683,7 +572,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
683 *group -= ngroups; 572 *group -= ngroups;
684 desc = ext4_get_group_desc(sb, *group, NULL); 573 desc = ext4_get_group_desc(sb, *group, NULL);
685 if (desc && ext4_free_inodes_count(sb, desc) && 574 if (desc && ext4_free_inodes_count(sb, desc) &&
686 ext4_free_blks_count(sb, desc)) 575 ext4_free_group_clusters(sb, desc))
687 return 0; 576 return 0;
688 } 577 }
689 578
@@ -802,7 +691,7 @@ err_ret:
802 * group to find a free inode. 691 * group to find a free inode.
803 */ 692 */
804struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, 693struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
805 const struct qstr *qstr, __u32 goal) 694 const struct qstr *qstr, __u32 goal, uid_t *owner)
806{ 695{
807 struct super_block *sb; 696 struct super_block *sb;
808 struct buffer_head *inode_bitmap_bh = NULL; 697 struct buffer_head *inode_bitmap_bh = NULL;
@@ -816,8 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
816 int ret2, err = 0; 705 int ret2, err = 0;
817 struct inode *ret; 706 struct inode *ret;
818 ext4_group_t i; 707 ext4_group_t i;
819 int free = 0;
820 static int once = 1;
821 ext4_group_t flex_group; 708 ext4_group_t flex_group;
822 709
823 /* Cannot create files in a deleted directory */ 710 /* Cannot create files in a deleted directory */
@@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
843 goto got_group; 730 goto got_group;
844 } 731 }
845 732
846 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { 733 if (S_ISDIR(mode))
847 ret2 = find_group_flex(sb, dir, &group); 734 ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
848 if (ret2 == -1) { 735 else
849 ret2 = find_group_other(sb, dir, &group, mode);
850 if (ret2 == 0 && once) {
851 once = 0;
852 printk(KERN_NOTICE "ext4: find_group_flex "
853 "failed, fallback succeeded dir %lu\n",
854 dir->i_ino);
855 }
856 }
857 goto got_group;
858 }
859
860 if (S_ISDIR(mode)) {
861 if (test_opt(sb, OLDALLOC))
862 ret2 = find_group_dir(sb, dir, &group);
863 else
864 ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
865 } else
866 ret2 = find_group_other(sb, dir, &group, mode); 736 ret2 = find_group_other(sb, dir, &group, mode);
867 737
868got_group: 738got_group:
@@ -950,26 +820,21 @@ got:
950 goto fail; 820 goto fail;
951 } 821 }
952 822
953 free = 0; 823 BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
954 ext4_lock_group(sb, group); 824 err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
825 brelse(block_bitmap_bh);
826
955 /* recheck and clear flag under lock if we still need to */ 827 /* recheck and clear flag under lock if we still need to */
828 ext4_lock_group(sb, group);
956 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 829 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
957 free = ext4_free_blocks_after_init(sb, group, gdp);
958 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 830 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
959 ext4_free_blks_set(sb, gdp, free); 831 ext4_free_group_clusters_set(sb, gdp,
832 ext4_free_clusters_after_init(sb, group, gdp));
960 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, 833 gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
961 gdp); 834 gdp);
962 } 835 }
963 ext4_unlock_group(sb, group); 836 ext4_unlock_group(sb, group);
964 837
965 /* Don't need to dirty bitmap block if we didn't change it */
966 if (free) {
967 BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
968 err = ext4_handle_dirty_metadata(handle,
969 NULL, block_bitmap_bh);
970 }
971
972 brelse(block_bitmap_bh);
973 if (err) 838 if (err)
974 goto fail; 839 goto fail;
975 } 840 }
@@ -987,8 +852,11 @@ got:
987 flex_group = ext4_flex_group(sbi, group); 852 flex_group = ext4_flex_group(sbi, group);
988 atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); 853 atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
989 } 854 }
990 855 if (owner) {
991 if (test_opt(sb, GRPID)) { 856 inode->i_mode = mode;
857 inode->i_uid = owner[0];
858 inode->i_gid = owner[1];
859 } else if (test_opt(sb, GRPID)) {
992 inode->i_mode = mode; 860 inode->i_mode = mode;
993 inode->i_uid = current_fsuid(); 861 inode->i_uid = current_fsuid();
994 inode->i_gid = dir->i_gid; 862 inode->i_gid = dir->i_gid;
@@ -1005,11 +873,7 @@ got:
1005 ei->i_dir_start_lookup = 0; 873 ei->i_dir_start_lookup = 0;
1006 ei->i_disksize = 0; 874 ei->i_disksize = 0;
1007 875
1008 /* 876 /* Don't inherit extent flag from directory, amongst others. */
1009 * Don't inherit extent flag from directory, amongst others. We set
1010 * extent flag on newly created directory and file only if -o extent
1011 * mount option is specified
1012 */
1013 ei->i_flags = 877 ei->i_flags =
1014 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); 878 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
1015 ei->i_file_acl = 0; 879 ei->i_file_acl = 0;
@@ -1084,7 +948,7 @@ fail_free_drop:
1084fail_drop: 948fail_drop:
1085 dquot_drop(inode); 949 dquot_drop(inode);
1086 inode->i_flags |= S_NOQUOTA; 950 inode->i_flags |= S_NOQUOTA;
1087 inode->i_nlink = 0; 951 clear_nlink(inode);
1088 unlock_new_inode(inode); 952 unlock_new_inode(inode);
1089 iput(inode); 953 iput(inode);
1090 brelse(inode_bitmap_bh); 954 brelse(inode_bitmap_bh);
@@ -1235,7 +1099,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
1235 * inode allocation from the current group, so we take alloc_sem lock, to 1099 * inode allocation from the current group, so we take alloc_sem lock, to
1236 * block ext4_claim_inode until we are finished. 1100 * block ext4_claim_inode until we are finished.
1237 */ 1101 */
1238extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, 1102int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1239 int barrier) 1103 int barrier)
1240{ 1104{
1241 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 1105 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 0962642119c0..3cfc73fbca8e 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -699,6 +699,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
699 /* 699 /*
700 * Okay, we need to do block allocation. 700 * Okay, we need to do block allocation.
701 */ 701 */
702 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
703 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
704 EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
705 "non-extent mapped inodes with bigalloc");
706 return -ENOSPC;
707 }
708
702 goal = ext4_find_goal(inode, map->m_lblk, partial); 709 goal = ext4_find_goal(inode, map->m_lblk, partial);
703 710
704 /* the number of blocks need to allocate for [d,t]indirect blocks */ 711 /* the number of blocks need to allocate for [d,t]indirect blocks */
@@ -1343,7 +1350,9 @@ void ext4_ind_truncate(struct inode *inode)
1343 __le32 nr = 0; 1350 __le32 nr = 0;
1344 int n = 0; 1351 int n = 0;
1345 ext4_lblk_t last_block, max_block; 1352 ext4_lblk_t last_block, max_block;
1353 loff_t page_len;
1346 unsigned blocksize = inode->i_sb->s_blocksize; 1354 unsigned blocksize = inode->i_sb->s_blocksize;
1355 int err;
1347 1356
1348 handle = start_transaction(inode); 1357 handle = start_transaction(inode);
1349 if (IS_ERR(handle)) 1358 if (IS_ERR(handle))
@@ -1354,9 +1363,16 @@ void ext4_ind_truncate(struct inode *inode)
1354 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) 1363 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
1355 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 1364 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1356 1365
1357 if (inode->i_size & (blocksize - 1)) 1366 if (inode->i_size % PAGE_CACHE_SIZE != 0) {
1358 if (ext4_block_truncate_page(handle, mapping, inode->i_size)) 1367 page_len = PAGE_CACHE_SIZE -
1368 (inode->i_size & (PAGE_CACHE_SIZE - 1));
1369
1370 err = ext4_discard_partial_page_buffers(handle,
1371 mapping, inode->i_size, page_len, 0);
1372
1373 if (err)
1359 goto out_stop; 1374 goto out_stop;
1375 }
1360 1376
1361 if (last_block != max_block) { 1377 if (last_block != max_block) {
1362 n = ext4_block_to_path(inode, last_block, offsets, NULL); 1378 n = ext4_block_to_path(inode, last_block, offsets, NULL);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0defe0bfe019..cc5a6da030a1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -42,7 +42,6 @@
42#include "ext4_jbd2.h" 42#include "ext4_jbd2.h"
43#include "xattr.h" 43#include "xattr.h"
44#include "acl.h" 44#include "acl.h"
45#include "ext4_extents.h"
46#include "truncate.h" 45#include "truncate.h"
47 46
48#include <trace/events/ext4.h> 47#include <trace/events/ext4.h>
@@ -268,7 +267,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
268 struct ext4_inode_info *ei = EXT4_I(inode); 267 struct ext4_inode_info *ei = EXT4_I(inode);
269 268
270 spin_lock(&ei->i_block_reservation_lock); 269 spin_lock(&ei->i_block_reservation_lock);
271 trace_ext4_da_update_reserve_space(inode, used); 270 trace_ext4_da_update_reserve_space(inode, used, quota_claim);
272 if (unlikely(used > ei->i_reserved_data_blocks)) { 271 if (unlikely(used > ei->i_reserved_data_blocks)) {
273 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 272 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
274 "with only %d reserved data blocks\n", 273 "with only %d reserved data blocks\n",
@@ -281,7 +280,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
281 /* Update per-inode reservations */ 280 /* Update per-inode reservations */
282 ei->i_reserved_data_blocks -= used; 281 ei->i_reserved_data_blocks -= used;
283 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; 282 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
284 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 283 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
285 used + ei->i_allocated_meta_blocks); 284 used + ei->i_allocated_meta_blocks);
286 ei->i_allocated_meta_blocks = 0; 285 ei->i_allocated_meta_blocks = 0;
287 286
@@ -291,7 +290,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
291 * only when we have written all of the delayed 290 * only when we have written all of the delayed
292 * allocation blocks. 291 * allocation blocks.
293 */ 292 */
294 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 293 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
295 ei->i_reserved_meta_blocks); 294 ei->i_reserved_meta_blocks);
296 ei->i_reserved_meta_blocks = 0; 295 ei->i_reserved_meta_blocks = 0;
297 ei->i_da_metadata_calc_len = 0; 296 ei->i_da_metadata_calc_len = 0;
@@ -300,14 +299,14 @@ void ext4_da_update_reserve_space(struct inode *inode,
300 299
301 /* Update quota subsystem for data blocks */ 300 /* Update quota subsystem for data blocks */
302 if (quota_claim) 301 if (quota_claim)
303 dquot_claim_block(inode, used); 302 dquot_claim_block(inode, EXT4_C2B(sbi, used));
304 else { 303 else {
305 /* 304 /*
306 * We did fallocate with an offset that is already delayed 305 * We did fallocate with an offset that is already delayed
307 * allocated. So on delayed allocated writeback we should 306 * allocated. So on delayed allocated writeback we should
308 * not re-claim the quota for fallocated blocks. 307 * not re-claim the quota for fallocated blocks.
309 */ 308 */
310 dquot_release_reservation_block(inode, used); 309 dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
311 } 310 }
312 311
313 /* 312 /*
@@ -399,6 +398,49 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
399} 398}
400 399
401/* 400/*
401 * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
402 */
403static void set_buffers_da_mapped(struct inode *inode,
404 struct ext4_map_blocks *map)
405{
406 struct address_space *mapping = inode->i_mapping;
407 struct pagevec pvec;
408 int i, nr_pages;
409 pgoff_t index, end;
410
411 index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
412 end = (map->m_lblk + map->m_len - 1) >>
413 (PAGE_CACHE_SHIFT - inode->i_blkbits);
414
415 pagevec_init(&pvec, 0);
416 while (index <= end) {
417 nr_pages = pagevec_lookup(&pvec, mapping, index,
418 min(end - index + 1,
419 (pgoff_t)PAGEVEC_SIZE));
420 if (nr_pages == 0)
421 break;
422 for (i = 0; i < nr_pages; i++) {
423 struct page *page = pvec.pages[i];
424 struct buffer_head *bh, *head;
425
426 if (unlikely(page->mapping != mapping) ||
427 !PageDirty(page))
428 break;
429
430 if (page_has_buffers(page)) {
431 bh = head = page_buffers(page);
432 do {
433 set_buffer_da_mapped(bh);
434 bh = bh->b_this_page;
435 } while (bh != head);
436 }
437 index++;
438 }
439 pagevec_release(&pvec);
440 }
441}
442
443/*
402 * The ext4_map_blocks() function tries to look up the requested blocks, 444 * The ext4_map_blocks() function tries to look up the requested blocks,
403 * and returns if the blocks are already mapped. 445 * and returns if the blocks are already mapped.
404 * 446 *
@@ -416,7 +458,7 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
416 * the buffer head is mapped. 458 * the buffer head is mapped.
417 * 459 *
418 * It returns 0 if plain look up failed (blocks have not been allocated), in 460 * It returns 0 if plain look up failed (blocks have not been allocated), in
419 * that casem, buffer head is unmapped 461 * that case, buffer head is unmapped
420 * 462 *
421 * It returns the error in case of allocation failure. 463 * It returns the error in case of allocation failure.
422 */ 464 */
@@ -435,9 +477,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
435 */ 477 */
436 down_read((&EXT4_I(inode)->i_data_sem)); 478 down_read((&EXT4_I(inode)->i_data_sem));
437 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 479 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
438 retval = ext4_ext_map_blocks(handle, inode, map, 0); 480 retval = ext4_ext_map_blocks(handle, inode, map, flags &
481 EXT4_GET_BLOCKS_KEEP_SIZE);
439 } else { 482 } else {
440 retval = ext4_ind_map_blocks(handle, inode, map, 0); 483 retval = ext4_ind_map_blocks(handle, inode, map, flags &
484 EXT4_GET_BLOCKS_KEEP_SIZE);
441 } 485 }
442 up_read((&EXT4_I(inode)->i_data_sem)); 486 up_read((&EXT4_I(inode)->i_data_sem));
443 487
@@ -455,7 +499,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
455 * Returns if the blocks have already allocated 499 * Returns if the blocks have already allocated
456 * 500 *
457 * Note that if blocks have been preallocated 501 * Note that if blocks have been preallocated
458 * ext4_ext_get_block() returns th create = 0 502 * ext4_ext_get_block() returns the create = 0
459 * with buffer head unmapped. 503 * with buffer head unmapped.
460 */ 504 */
461 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 505 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
@@ -517,9 +561,17 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
517 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) 561 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
518 ext4_da_update_reserve_space(inode, retval, 1); 562 ext4_da_update_reserve_space(inode, retval, 1);
519 } 563 }
520 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 564 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
521 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); 565 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
522 566
567 /* If we have successfully mapped the delayed allocated blocks,
568 * set the BH_Da_Mapped bit on them. Its important to do this
569 * under the protection of i_data_sem.
570 */
571 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
572 set_buffers_da_mapped(inode, map);
573 }
574
523 up_write((&EXT4_I(inode)->i_data_sem)); 575 up_write((&EXT4_I(inode)->i_data_sem));
524 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 576 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
525 int ret = check_block_validity(inode, map); 577 int ret = check_block_validity(inode, map);
@@ -909,7 +961,11 @@ static int ext4_ordered_write_end(struct file *file,
909 ext4_orphan_add(handle, inode); 961 ext4_orphan_add(handle, inode);
910 if (ret2 < 0) 962 if (ret2 < 0)
911 ret = ret2; 963 ret = ret2;
964 } else {
965 unlock_page(page);
966 page_cache_release(page);
912 } 967 }
968
913 ret2 = ext4_journal_stop(handle); 969 ret2 = ext4_journal_stop(handle);
914 if (!ret) 970 if (!ret)
915 ret = ret2; 971 ret = ret2;
@@ -1037,14 +1093,14 @@ static int ext4_journalled_write_end(struct file *file,
1037} 1093}
1038 1094
1039/* 1095/*
1040 * Reserve a single block located at lblock 1096 * Reserve a single cluster located at lblock
1041 */ 1097 */
1042static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1098static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1043{ 1099{
1044 int retries = 0; 1100 int retries = 0;
1045 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1101 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1046 struct ext4_inode_info *ei = EXT4_I(inode); 1102 struct ext4_inode_info *ei = EXT4_I(inode);
1047 unsigned long md_needed; 1103 unsigned int md_needed;
1048 int ret; 1104 int ret;
1049 1105
1050 /* 1106 /*
@@ -1054,7 +1110,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1054 */ 1110 */
1055repeat: 1111repeat:
1056 spin_lock(&ei->i_block_reservation_lock); 1112 spin_lock(&ei->i_block_reservation_lock);
1057 md_needed = ext4_calc_metadata_amount(inode, lblock); 1113 md_needed = EXT4_NUM_B2C(sbi,
1114 ext4_calc_metadata_amount(inode, lblock));
1058 trace_ext4_da_reserve_space(inode, md_needed); 1115 trace_ext4_da_reserve_space(inode, md_needed);
1059 spin_unlock(&ei->i_block_reservation_lock); 1116 spin_unlock(&ei->i_block_reservation_lock);
1060 1117
@@ -1063,15 +1120,15 @@ repeat:
1063 * us from metadata over-estimation, though we may go over by 1120 * us from metadata over-estimation, though we may go over by
1064 * a small amount in the end. Here we just reserve for data. 1121 * a small amount in the end. Here we just reserve for data.
1065 */ 1122 */
1066 ret = dquot_reserve_block(inode, 1); 1123 ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
1067 if (ret) 1124 if (ret)
1068 return ret; 1125 return ret;
1069 /* 1126 /*
1070 * We do still charge estimated metadata to the sb though; 1127 * We do still charge estimated metadata to the sb though;
1071 * we cannot afford to run out of free blocks. 1128 * we cannot afford to run out of free blocks.
1072 */ 1129 */
1073 if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { 1130 if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
1074 dquot_release_reservation_block(inode, 1); 1131 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1075 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1132 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1076 yield(); 1133 yield();
1077 goto repeat; 1134 goto repeat;
@@ -1118,19 +1175,21 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1118 * We can release all of the reserved metadata blocks 1175 * We can release all of the reserved metadata blocks
1119 * only when we have written all of the delayed 1176 * only when we have written all of the delayed
1120 * allocation blocks. 1177 * allocation blocks.
1178 * Note that in case of bigalloc, i_reserved_meta_blocks,
1179 * i_reserved_data_blocks, etc. refer to number of clusters.
1121 */ 1180 */
1122 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 1181 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
1123 ei->i_reserved_meta_blocks); 1182 ei->i_reserved_meta_blocks);
1124 ei->i_reserved_meta_blocks = 0; 1183 ei->i_reserved_meta_blocks = 0;
1125 ei->i_da_metadata_calc_len = 0; 1184 ei->i_da_metadata_calc_len = 0;
1126 } 1185 }
1127 1186
1128 /* update fs dirty data blocks counter */ 1187 /* update fs dirty data blocks counter */
1129 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); 1188 percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
1130 1189
1131 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1190 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1132 1191
1133 dquot_release_reservation_block(inode, to_free); 1192 dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
1134} 1193}
1135 1194
1136static void ext4_da_page_release_reservation(struct page *page, 1195static void ext4_da_page_release_reservation(struct page *page,
@@ -1139,6 +1198,9 @@ static void ext4_da_page_release_reservation(struct page *page,
1139 int to_release = 0; 1198 int to_release = 0;
1140 struct buffer_head *head, *bh; 1199 struct buffer_head *head, *bh;
1141 unsigned int curr_off = 0; 1200 unsigned int curr_off = 0;
1201 struct inode *inode = page->mapping->host;
1202 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1203 int num_clusters;
1142 1204
1143 head = page_buffers(page); 1205 head = page_buffers(page);
1144 bh = head; 1206 bh = head;
@@ -1148,10 +1210,24 @@ static void ext4_da_page_release_reservation(struct page *page,
1148 if ((offset <= curr_off) && (buffer_delay(bh))) { 1210 if ((offset <= curr_off) && (buffer_delay(bh))) {
1149 to_release++; 1211 to_release++;
1150 clear_buffer_delay(bh); 1212 clear_buffer_delay(bh);
1213 clear_buffer_da_mapped(bh);
1151 } 1214 }
1152 curr_off = next_off; 1215 curr_off = next_off;
1153 } while ((bh = bh->b_this_page) != head); 1216 } while ((bh = bh->b_this_page) != head);
1154 ext4_da_release_space(page->mapping->host, to_release); 1217
1218 /* If we have released all the blocks belonging to a cluster, then we
1219 * need to release the reserved space for that cluster. */
1220 num_clusters = EXT4_NUM_B2C(sbi, to_release);
1221 while (num_clusters > 0) {
1222 ext4_fsblk_t lblk;
1223 lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
1224 ((num_clusters - 1) << sbi->s_cluster_bits);
1225 if (sbi->s_cluster_ratio == 1 ||
1226 !ext4_find_delalloc_cluster(inode, lblk, 1))
1227 ext4_da_release_space(inode, 1);
1228
1229 num_clusters--;
1230 }
1155} 1231}
1156 1232
1157/* 1233/*
@@ -1253,6 +1329,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1253 clear_buffer_delay(bh); 1329 clear_buffer_delay(bh);
1254 bh->b_blocknr = pblock; 1330 bh->b_blocknr = pblock;
1255 } 1331 }
1332 if (buffer_da_mapped(bh))
1333 clear_buffer_da_mapped(bh);
1256 if (buffer_unwritten(bh) || 1334 if (buffer_unwritten(bh) ||
1257 buffer_mapped(bh)) 1335 buffer_mapped(bh))
1258 BUG_ON(bh->b_blocknr != pblock); 1336 BUG_ON(bh->b_blocknr != pblock);
@@ -1346,12 +1424,15 @@ static void ext4_print_free_blocks(struct inode *inode)
1346{ 1424{
1347 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1425 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1348 printk(KERN_CRIT "Total free blocks count %lld\n", 1426 printk(KERN_CRIT "Total free blocks count %lld\n",
1349 ext4_count_free_blocks(inode->i_sb)); 1427 EXT4_C2B(EXT4_SB(inode->i_sb),
1428 ext4_count_free_clusters(inode->i_sb)));
1350 printk(KERN_CRIT "Free/Dirty block details\n"); 1429 printk(KERN_CRIT "Free/Dirty block details\n");
1351 printk(KERN_CRIT "free_blocks=%lld\n", 1430 printk(KERN_CRIT "free_blocks=%lld\n",
1352 (long long) percpu_counter_sum(&sbi->s_freeblocks_counter)); 1431 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1432 percpu_counter_sum(&sbi->s_freeclusters_counter)));
1353 printk(KERN_CRIT "dirty_blocks=%lld\n", 1433 printk(KERN_CRIT "dirty_blocks=%lld\n",
1354 (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 1434 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1435 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
1355 printk(KERN_CRIT "Block reservation details\n"); 1436 printk(KERN_CRIT "Block reservation details\n");
1356 printk(KERN_CRIT "i_reserved_data_blocks=%u\n", 1437 printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
1357 EXT4_I(inode)->i_reserved_data_blocks); 1438 EXT4_I(inode)->i_reserved_data_blocks);
@@ -1430,8 +1511,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1430 if (err == -EAGAIN) 1511 if (err == -EAGAIN)
1431 goto submit_io; 1512 goto submit_io;
1432 1513
1433 if (err == -ENOSPC && 1514 if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
1434 ext4_count_free_blocks(sb)) {
1435 mpd->retval = err; 1515 mpd->retval = err;
1436 goto submit_io; 1516 goto submit_io;
1437 } 1517 }
@@ -1471,13 +1551,15 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1471 1551
1472 for (i = 0; i < map.m_len; i++) 1552 for (i = 0; i < map.m_len; i++)
1473 unmap_underlying_metadata(bdev, map.m_pblk + i); 1553 unmap_underlying_metadata(bdev, map.m_pblk + i);
1474 }
1475 1554
1476 if (ext4_should_order_data(mpd->inode)) { 1555 if (ext4_should_order_data(mpd->inode)) {
1477 err = ext4_jbd2_file_inode(handle, mpd->inode); 1556 err = ext4_jbd2_file_inode(handle, mpd->inode);
1478 if (err) 1557 if (err) {
1479 /* This only happens if the journal is aborted */ 1558 /* Only if the journal is aborted */
1480 return; 1559 mpd->retval = err;
1560 goto submit_io;
1561 }
1562 }
1481 } 1563 }
1482 1564
1483 /* 1565 /*
@@ -1584,6 +1666,66 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
1584} 1666}
1585 1667
1586/* 1668/*
1669 * This function is grabs code from the very beginning of
1670 * ext4_map_blocks, but assumes that the caller is from delayed write
1671 * time. This function looks up the requested blocks and sets the
1672 * buffer delay bit under the protection of i_data_sem.
1673 */
1674static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1675 struct ext4_map_blocks *map,
1676 struct buffer_head *bh)
1677{
1678 int retval;
1679 sector_t invalid_block = ~((sector_t) 0xffff);
1680
1681 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1682 invalid_block = ~0;
1683
1684 map->m_flags = 0;
1685 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
1686 "logical block %lu\n", inode->i_ino, map->m_len,
1687 (unsigned long) map->m_lblk);
1688 /*
1689 * Try to see if we can get the block without requesting a new
1690 * file system block.
1691 */
1692 down_read((&EXT4_I(inode)->i_data_sem));
1693 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1694 retval = ext4_ext_map_blocks(NULL, inode, map, 0);
1695 else
1696 retval = ext4_ind_map_blocks(NULL, inode, map, 0);
1697
1698 if (retval == 0) {
1699 /*
1700 * XXX: __block_prepare_write() unmaps passed block,
1701 * is it OK?
1702 */
1703 /* If the block was allocated from previously allocated cluster,
1704 * then we dont need to reserve it again. */
1705 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1706 retval = ext4_da_reserve_space(inode, iblock);
1707 if (retval)
1708 /* not enough space to reserve */
1709 goto out_unlock;
1710 }
1711
1712 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
1713 * and it should not appear on the bh->b_state.
1714 */
1715 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
1716
1717 map_bh(bh, inode->i_sb, invalid_block);
1718 set_buffer_new(bh);
1719 set_buffer_delay(bh);
1720 }
1721
1722out_unlock:
1723 up_read((&EXT4_I(inode)->i_data_sem));
1724
1725 return retval;
1726}
1727
1728/*
1587 * This is a special get_blocks_t callback which is used by 1729 * This is a special get_blocks_t callback which is used by
1588 * ext4_da_write_begin(). It will either return mapped block or 1730 * ext4_da_write_begin(). It will either return mapped block or
1589 * reserve space for a single block. 1731 * reserve space for a single block.
@@ -1600,10 +1742,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1600{ 1742{
1601 struct ext4_map_blocks map; 1743 struct ext4_map_blocks map;
1602 int ret = 0; 1744 int ret = 0;
1603 sector_t invalid_block = ~((sector_t) 0xffff);
1604
1605 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1606 invalid_block = ~0;
1607 1745
1608 BUG_ON(create == 0); 1746 BUG_ON(create == 0);
1609 BUG_ON(bh->b_size != inode->i_sb->s_blocksize); 1747 BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
@@ -1616,25 +1754,9 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1616 * preallocated blocks are unmapped but should treated 1754 * preallocated blocks are unmapped but should treated
1617 * the same as allocated blocks. 1755 * the same as allocated blocks.
1618 */ 1756 */
1619 ret = ext4_map_blocks(NULL, inode, &map, 0); 1757 ret = ext4_da_map_blocks(inode, iblock, &map, bh);
1620 if (ret < 0) 1758 if (ret <= 0)
1621 return ret; 1759 return ret;
1622 if (ret == 0) {
1623 if (buffer_delay(bh))
1624 return 0; /* Not sure this could or should happen */
1625 /*
1626 * XXX: __block_write_begin() unmaps passed block, is it OK?
1627 */
1628 ret = ext4_da_reserve_space(inode, iblock);
1629 if (ret)
1630 /* not enough space to reserve */
1631 return ret;
1632
1633 map_bh(bh, inode->i_sb, invalid_block);
1634 set_buffer_new(bh);
1635 set_buffer_delay(bh);
1636 return 0;
1637 }
1638 1760
1639 map_bh(bh, inode->i_sb, map.m_pblk); 1761 map_bh(bh, inode->i_sb, map.m_pblk);
1640 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 1762 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
@@ -2050,6 +2172,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2050 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2172 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2051 pgoff_t done_index = 0; 2173 pgoff_t done_index = 0;
2052 pgoff_t end; 2174 pgoff_t end;
2175 struct blk_plug plug;
2053 2176
2054 trace_ext4_da_writepages(inode, wbc); 2177 trace_ext4_da_writepages(inode, wbc);
2055 2178
@@ -2128,6 +2251,7 @@ retry:
2128 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2251 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2129 tag_pages_for_writeback(mapping, index, end); 2252 tag_pages_for_writeback(mapping, index, end);
2130 2253
2254 blk_start_plug(&plug);
2131 while (!ret && wbc->nr_to_write > 0) { 2255 while (!ret && wbc->nr_to_write > 0) {
2132 2256
2133 /* 2257 /*
@@ -2178,11 +2302,12 @@ retry:
2178 ret = 0; 2302 ret = 0;
2179 } else if (ret == MPAGE_DA_EXTENT_TAIL) { 2303 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
2180 /* 2304 /*
2181 * got one extent now try with 2305 * Got one extent now try with rest of the pages.
2182 * rest of the pages 2306 * If mpd.retval is set -EIO, journal is aborted.
2307 * So we don't need to write any more.
2183 */ 2308 */
2184 pages_written += mpd.pages_written; 2309 pages_written += mpd.pages_written;
2185 ret = 0; 2310 ret = mpd.retval;
2186 io_done = 1; 2311 io_done = 1;
2187 } else if (wbc->nr_to_write) 2312 } else if (wbc->nr_to_write)
2188 /* 2313 /*
@@ -2192,6 +2317,7 @@ retry:
2192 */ 2317 */
2193 break; 2318 break;
2194 } 2319 }
2320 blk_finish_plug(&plug);
2195 if (!io_done && !cycled) { 2321 if (!io_done && !cycled) {
2196 cycled = 1; 2322 cycled = 1;
2197 index = 0; 2323 index = 0;
@@ -2230,10 +2356,11 @@ static int ext4_nonda_switch(struct super_block *sb)
2230 * Delalloc need an accurate free block accounting. So switch 2356 * Delalloc need an accurate free block accounting. So switch
2231 * to non delalloc when we are near to error range. 2357 * to non delalloc when we are near to error range.
2232 */ 2358 */
2233 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 2359 free_blocks = EXT4_C2B(sbi,
2234 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); 2360 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
2361 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
2235 if (2 * free_blocks < 3 * dirty_blocks || 2362 if (2 * free_blocks < 3 * dirty_blocks ||
2236 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 2363 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
2237 /* 2364 /*
2238 * free block count is less than 150% of dirty blocks 2365 * free block count is less than 150% of dirty blocks
2239 * or free blocks is less than watermark 2366 * or free blocks is less than watermark
@@ -2259,6 +2386,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2259 pgoff_t index; 2386 pgoff_t index;
2260 struct inode *inode = mapping->host; 2387 struct inode *inode = mapping->host;
2261 handle_t *handle; 2388 handle_t *handle;
2389 loff_t page_len;
2262 2390
2263 index = pos >> PAGE_CACHE_SHIFT; 2391 index = pos >> PAGE_CACHE_SHIFT;
2264 2392
@@ -2305,6 +2433,13 @@ retry:
2305 */ 2433 */
2306 if (pos + len > inode->i_size) 2434 if (pos + len > inode->i_size)
2307 ext4_truncate_failed_write(inode); 2435 ext4_truncate_failed_write(inode);
2436 } else {
2437 page_len = pos & (PAGE_CACHE_SIZE - 1);
2438 if (page_len > 0) {
2439 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2440 inode, page, pos - page_len, page_len,
2441 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2442 }
2308 } 2443 }
2309 2444
2310 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2445 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2347,6 +2482,7 @@ static int ext4_da_write_end(struct file *file,
2347 loff_t new_i_size; 2482 loff_t new_i_size;
2348 unsigned long start, end; 2483 unsigned long start, end;
2349 int write_mode = (int)(unsigned long)fsdata; 2484 int write_mode = (int)(unsigned long)fsdata;
2485 loff_t page_len;
2350 2486
2351 if (write_mode == FALL_BACK_TO_NONDELALLOC) { 2487 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2352 if (ext4_should_order_data(inode)) { 2488 if (ext4_should_order_data(inode)) {
@@ -2395,6 +2531,16 @@ static int ext4_da_write_end(struct file *file,
2395 } 2531 }
2396 ret2 = generic_write_end(file, mapping, pos, len, copied, 2532 ret2 = generic_write_end(file, mapping, pos, len, copied,
2397 page, fsdata); 2533 page, fsdata);
2534
2535 page_len = PAGE_CACHE_SIZE -
2536 ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
2537
2538 if (page_len > 0) {
2539 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2540 inode, page, pos + copied - 1, page_len,
2541 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2542 }
2543
2398 copied = ret2; 2544 copied = ret2;
2399 if (ret2 < 0) 2545 if (ret2 < 0)
2400 ret = ret2; 2546 ret = ret2;
@@ -2689,10 +2835,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2689 * but being more careful is always safe for the future change. 2835 * but being more careful is always safe for the future change.
2690 */ 2836 */
2691 inode = io_end->inode; 2837 inode = io_end->inode;
2692 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 2838 ext4_set_io_unwritten_flag(inode, io_end);
2693 io_end->flag |= EXT4_IO_END_UNWRITTEN;
2694 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
2695 }
2696 2839
2697 /* Add the io_end to per-inode completed io list*/ 2840 /* Add the io_end to per-inode completed io list*/
2698 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); 2841 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -2858,6 +3001,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
2858 struct inode *inode = file->f_mapping->host; 3001 struct inode *inode = file->f_mapping->host;
2859 ssize_t ret; 3002 ssize_t ret;
2860 3003
3004 /*
3005 * If we are doing data journalling we don't support O_DIRECT
3006 */
3007 if (ext4_should_journal_data(inode))
3008 return 0;
3009
2861 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); 3010 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
2862 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3011 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
2863 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); 3012 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
@@ -2927,6 +3076,7 @@ static const struct address_space_operations ext4_journalled_aops = {
2927 .bmap = ext4_bmap, 3076 .bmap = ext4_bmap,
2928 .invalidatepage = ext4_invalidatepage, 3077 .invalidatepage = ext4_invalidatepage,
2929 .releasepage = ext4_releasepage, 3078 .releasepage = ext4_releasepage,
3079 .direct_IO = ext4_direct_IO,
2930 .is_partially_uptodate = block_is_partially_uptodate, 3080 .is_partially_uptodate = block_is_partially_uptodate,
2931 .error_remove_page = generic_error_remove_page, 3081 .error_remove_page = generic_error_remove_page,
2932}; 3082};
@@ -2963,6 +3113,227 @@ void ext4_set_aops(struct inode *inode)
2963 inode->i_mapping->a_ops = &ext4_journalled_aops; 3113 inode->i_mapping->a_ops = &ext4_journalled_aops;
2964} 3114}
2965 3115
3116
3117/*
3118 * ext4_discard_partial_page_buffers()
3119 * Wrapper function for ext4_discard_partial_page_buffers_no_lock.
3120 * This function finds and locks the page containing the offset
3121 * "from" and passes it to ext4_discard_partial_page_buffers_no_lock.
3122 * Calling functions that already have the page locked should call
3123 * ext4_discard_partial_page_buffers_no_lock directly.
3124 */
3125int ext4_discard_partial_page_buffers(handle_t *handle,
3126 struct address_space *mapping, loff_t from,
3127 loff_t length, int flags)
3128{
3129 struct inode *inode = mapping->host;
3130 struct page *page;
3131 int err = 0;
3132
3133 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3134 mapping_gfp_mask(mapping) & ~__GFP_FS);
3135 if (!page)
3136 return -ENOMEM;
3137
3138 err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page,
3139 from, length, flags);
3140
3141 unlock_page(page);
3142 page_cache_release(page);
3143 return err;
3144}
3145
3146/*
3147 * ext4_discard_partial_page_buffers_no_lock()
3148 * Zeros a page range of length 'length' starting from offset 'from'.
3149 * Buffer heads that correspond to the block aligned regions of the
3150 * zeroed range will be unmapped. Unblock aligned regions
3151 * will have the corresponding buffer head mapped if needed so that
3152 * that region of the page can be updated with the partial zero out.
3153 *
3154 * This function assumes that the page has already been locked. The
3155 * The range to be discarded must be contained with in the given page.
3156 * If the specified range exceeds the end of the page it will be shortened
3157 * to the end of the page that corresponds to 'from'. This function is
3158 * appropriate for updating a page and it buffer heads to be unmapped and
3159 * zeroed for blocks that have been either released, or are going to be
3160 * released.
3161 *
3162 * handle: The journal handle
3163 * inode: The files inode
3164 * page: A locked page that contains the offset "from"
3165 * from: The starting byte offset (from the begining of the file)
3166 * to begin discarding
3167 * len: The length of bytes to discard
3168 * flags: Optional flags that may be used:
3169 *
3170 * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
3171 * Only zero the regions of the page whose buffer heads
3172 * have already been unmapped. This flag is appropriate
3173 * for updateing the contents of a page whose blocks may
3174 * have already been released, and we only want to zero
3175 * out the regions that correspond to those released blocks.
3176 *
3177 * Returns zero on sucess or negative on failure.
3178 */
3179int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3180 struct inode *inode, struct page *page, loff_t from,
3181 loff_t length, int flags)
3182{
3183 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3184 unsigned int offset = from & (PAGE_CACHE_SIZE-1);
3185 unsigned int blocksize, max, pos;
3186 ext4_lblk_t iblock;
3187 struct buffer_head *bh;
3188 int err = 0;
3189
3190 blocksize = inode->i_sb->s_blocksize;
3191 max = PAGE_CACHE_SIZE - offset;
3192
3193 if (index != page->index)
3194 return -EINVAL;
3195
3196 /*
3197 * correct length if it does not fall between
3198 * 'from' and the end of the page
3199 */
3200 if (length > max || length < 0)
3201 length = max;
3202
3203 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3204
3205 if (!page_has_buffers(page)) {
3206 /*
3207 * If the range to be discarded covers a partial block
3208 * we need to get the page buffers. This is because
3209 * partial blocks cannot be released and the page needs
3210 * to be updated with the contents of the block before
3211 * we write the zeros on top of it.
3212 */
3213 if ((from & (blocksize - 1)) ||
3214 ((from + length) & (blocksize - 1))) {
3215 create_empty_buffers(page, blocksize, 0);
3216 } else {
3217 /*
3218 * If there are no partial blocks,
3219 * there is nothing to update,
3220 * so we can return now
3221 */
3222 return 0;
3223 }
3224 }
3225
3226 /* Find the buffer that contains "offset" */
3227 bh = page_buffers(page);
3228 pos = blocksize;
3229 while (offset >= pos) {
3230 bh = bh->b_this_page;
3231 iblock++;
3232 pos += blocksize;
3233 }
3234
3235 pos = offset;
3236 while (pos < offset + length) {
3237 unsigned int end_of_block, range_to_discard;
3238
3239 err = 0;
3240
3241 /* The length of space left to zero and unmap */
3242 range_to_discard = offset + length - pos;
3243
3244 /* The length of space until the end of the block */
3245 end_of_block = blocksize - (pos & (blocksize-1));
3246
3247 /*
3248 * Do not unmap or zero past end of block
3249 * for this buffer head
3250 */
3251 if (range_to_discard > end_of_block)
3252 range_to_discard = end_of_block;
3253
3254
3255 /*
3256 * Skip this buffer head if we are only zeroing unampped
3257 * regions of the page
3258 */
3259 if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED &&
3260 buffer_mapped(bh))
3261 goto next;
3262
3263 /* If the range is block aligned, unmap */
3264 if (range_to_discard == blocksize) {
3265 clear_buffer_dirty(bh);
3266 bh->b_bdev = NULL;
3267 clear_buffer_mapped(bh);
3268 clear_buffer_req(bh);
3269 clear_buffer_new(bh);
3270 clear_buffer_delay(bh);
3271 clear_buffer_unwritten(bh);
3272 clear_buffer_uptodate(bh);
3273 zero_user(page, pos, range_to_discard);
3274 BUFFER_TRACE(bh, "Buffer discarded");
3275 goto next;
3276 }
3277
3278 /*
3279 * If this block is not completely contained in the range
3280 * to be discarded, then it is not going to be released. Because
3281 * we need to keep this block, we need to make sure this part
3282 * of the page is uptodate before we modify it by writeing
3283 * partial zeros on it.
3284 */
3285 if (!buffer_mapped(bh)) {
3286 /*
3287 * Buffer head must be mapped before we can read
3288 * from the block
3289 */
3290 BUFFER_TRACE(bh, "unmapped");
3291 ext4_get_block(inode, iblock, bh, 0);
3292 /* unmapped? It's a hole - nothing to do */
3293 if (!buffer_mapped(bh)) {
3294 BUFFER_TRACE(bh, "still unmapped");
3295 goto next;
3296 }
3297 }
3298
3299 /* Ok, it's mapped. Make sure it's up-to-date */
3300 if (PageUptodate(page))
3301 set_buffer_uptodate(bh);
3302
3303 if (!buffer_uptodate(bh)) {
3304 err = -EIO;
3305 ll_rw_block(READ, 1, &bh);
3306 wait_on_buffer(bh);
3307 /* Uhhuh. Read error. Complain and punt.*/
3308 if (!buffer_uptodate(bh))
3309 goto next;
3310 }
3311
3312 if (ext4_should_journal_data(inode)) {
3313 BUFFER_TRACE(bh, "get write access");
3314 err = ext4_journal_get_write_access(handle, bh);
3315 if (err)
3316 goto next;
3317 }
3318
3319 zero_user(page, pos, range_to_discard);
3320
3321 err = 0;
3322 if (ext4_should_journal_data(inode)) {
3323 err = ext4_handle_dirty_metadata(handle, inode, bh);
3324 } else
3325 mark_buffer_dirty(bh);
3326
3327 BUFFER_TRACE(bh, "Partial buffer zeroed");
3328next:
3329 bh = bh->b_this_page;
3330 iblock++;
3331 pos += range_to_discard;
3332 }
3333
3334 return err;
3335}
3336
2966/* 3337/*
2967 * ext4_block_truncate_page() zeroes out a mapping from file offset `from' 3338 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
2968 * up to the end of the block which corresponds to `from'. 3339 * up to the end of the block which corresponds to `from'.
@@ -3005,7 +3376,7 @@ int ext4_block_zero_page_range(handle_t *handle,
3005 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, 3376 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3006 mapping_gfp_mask(mapping) & ~__GFP_FS); 3377 mapping_gfp_mask(mapping) & ~__GFP_FS);
3007 if (!page) 3378 if (!page)
3008 return -EINVAL; 3379 return -ENOMEM;
3009 3380
3010 blocksize = inode->i_sb->s_blocksize; 3381 blocksize = inode->i_sb->s_blocksize;
3011 max = blocksize - (offset & (blocksize - 1)); 3382 max = blocksize - (offset & (blocksize - 1));
@@ -3074,11 +3445,8 @@ int ext4_block_zero_page_range(handle_t *handle,
3074 err = 0; 3445 err = 0;
3075 if (ext4_should_journal_data(inode)) { 3446 if (ext4_should_journal_data(inode)) {
3076 err = ext4_handle_dirty_metadata(handle, inode, bh); 3447 err = ext4_handle_dirty_metadata(handle, inode, bh);
3077 } else { 3448 } else
3078 if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
3079 err = ext4_jbd2_file_inode(handle, inode);
3080 mark_buffer_dirty(bh); 3449 mark_buffer_dirty(bh);
3081 }
3082 3450
3083unlock: 3451unlock:
3084 unlock_page(page); 3452 unlock_page(page);
@@ -3119,6 +3487,11 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
3119 return -ENOTSUPP; 3487 return -ENOTSUPP;
3120 } 3488 }
3121 3489
3490 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
3491 /* TODO: Add support for bigalloc file systems */
3492 return -ENOTSUPP;
3493 }
3494
3122 return ext4_ext_punch_hole(file, offset, length); 3495 return ext4_ext_punch_hole(file, offset, length);
3123} 3496}
3124 3497
@@ -3418,7 +3791,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3418 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 3791 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3419 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 3792 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3420 } 3793 }
3421 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 3794 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
3422 3795
3423 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ 3796 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
3424 ei->i_dir_start_lookup = 0; 3797 ei->i_dir_start_lookup = 0;
@@ -4420,6 +4793,7 @@ retry_alloc:
4420 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { 4793 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
4421 unlock_page(page); 4794 unlock_page(page);
4422 ret = VM_FAULT_SIGBUS; 4795 ret = VM_FAULT_SIGBUS;
4796 ext4_journal_stop(handle);
4423 goto out; 4797 goto out;
4424 } 4798 }
4425 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 4799 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f18bfe37aff8..a56796814d6a 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -21,6 +21,7 @@
21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
22{ 22{
23 struct inode *inode = filp->f_dentry->d_inode; 23 struct inode *inode = filp->f_dentry->d_inode;
24 struct super_block *sb = inode->i_sb;
24 struct ext4_inode_info *ei = EXT4_I(inode); 25 struct ext4_inode_info *ei = EXT4_I(inode);
25 unsigned int flags; 26 unsigned int flags;
26 27
@@ -173,33 +174,8 @@ setversion_out:
173 mnt_drop_write(filp->f_path.mnt); 174 mnt_drop_write(filp->f_path.mnt);
174 return err; 175 return err;
175 } 176 }
176#ifdef CONFIG_JBD2_DEBUG
177 case EXT4_IOC_WAIT_FOR_READONLY:
178 /*
179 * This is racy - by the time we're woken up and running,
180 * the superblock could be released. And the module could
181 * have been unloaded. So sue me.
182 *
183 * Returns 1 if it slept, else zero.
184 */
185 {
186 struct super_block *sb = inode->i_sb;
187 DECLARE_WAITQUEUE(wait, current);
188 int ret = 0;
189
190 set_current_state(TASK_INTERRUPTIBLE);
191 add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
192 if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) {
193 schedule();
194 ret = 1;
195 }
196 remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
197 return ret;
198 }
199#endif
200 case EXT4_IOC_GROUP_EXTEND: { 177 case EXT4_IOC_GROUP_EXTEND: {
201 ext4_fsblk_t n_blocks_count; 178 ext4_fsblk_t n_blocks_count;
202 struct super_block *sb = inode->i_sb;
203 int err, err2=0; 179 int err, err2=0;
204 180
205 err = ext4_resize_begin(sb); 181 err = ext4_resize_begin(sb);
@@ -209,6 +185,13 @@ setversion_out:
209 if (get_user(n_blocks_count, (__u32 __user *)arg)) 185 if (get_user(n_blocks_count, (__u32 __user *)arg))
210 return -EFAULT; 186 return -EFAULT;
211 187
188 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
189 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
190 ext4_msg(sb, KERN_ERR,
191 "Online resizing not supported with bigalloc");
192 return -EOPNOTSUPP;
193 }
194
212 err = mnt_want_write(filp->f_path.mnt); 195 err = mnt_want_write(filp->f_path.mnt);
213 if (err) 196 if (err)
214 return err; 197 return err;
@@ -250,6 +233,13 @@ setversion_out:
250 goto mext_out; 233 goto mext_out;
251 } 234 }
252 235
236 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
237 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
238 ext4_msg(sb, KERN_ERR,
239 "Online defrag not supported with bigalloc");
240 return -EOPNOTSUPP;
241 }
242
253 err = mnt_want_write(filp->f_path.mnt); 243 err = mnt_want_write(filp->f_path.mnt);
254 if (err) 244 if (err)
255 goto mext_out; 245 goto mext_out;
@@ -270,7 +260,6 @@ mext_out:
270 260
271 case EXT4_IOC_GROUP_ADD: { 261 case EXT4_IOC_GROUP_ADD: {
272 struct ext4_new_group_data input; 262 struct ext4_new_group_data input;
273 struct super_block *sb = inode->i_sb;
274 int err, err2=0; 263 int err, err2=0;
275 264
276 err = ext4_resize_begin(sb); 265 err = ext4_resize_begin(sb);
@@ -281,6 +270,13 @@ mext_out:
281 sizeof(input))) 270 sizeof(input)))
282 return -EFAULT; 271 return -EFAULT;
283 272
273 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
274 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
275 ext4_msg(sb, KERN_ERR,
276 "Online resizing not supported with bigalloc");
277 return -EOPNOTSUPP;
278 }
279
284 err = mnt_want_write(filp->f_path.mnt); 280 err = mnt_want_write(filp->f_path.mnt);
285 if (err) 281 if (err)
286 return err; 282 return err;
@@ -337,7 +333,6 @@ mext_out:
337 333
338 case FITRIM: 334 case FITRIM:
339 { 335 {
340 struct super_block *sb = inode->i_sb;
341 struct request_queue *q = bdev_get_queue(sb->s_bdev); 336 struct request_queue *q = bdev_get_queue(sb->s_bdev);
342 struct fstrim_range range; 337 struct fstrim_range range;
343 int ret = 0; 338 int ret = 0;
@@ -348,7 +343,14 @@ mext_out:
348 if (!blk_queue_discard(q)) 343 if (!blk_queue_discard(q))
349 return -EOPNOTSUPP; 344 return -EOPNOTSUPP;
350 345
351 if (copy_from_user(&range, (struct fstrim_range *)arg, 346 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
347 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
348 ext4_msg(sb, KERN_ERR,
349 "FITRIM not supported with bigalloc");
350 return -EOPNOTSUPP;
351 }
352
353 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
352 sizeof(range))) 354 sizeof(range)))
353 return -EFAULT; 355 return -EFAULT;
354 356
@@ -358,7 +360,7 @@ mext_out:
358 if (ret < 0) 360 if (ret < 0)
359 return ret; 361 return ret;
360 362
361 if (copy_to_user((struct fstrim_range *)arg, &range, 363 if (copy_to_user((struct fstrim_range __user *)arg, &range,
362 sizeof(range))) 364 sizeof(range)))
363 return -EFAULT; 365 return -EFAULT;
364 366
@@ -396,11 +398,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
396 case EXT4_IOC32_SETVERSION_OLD: 398 case EXT4_IOC32_SETVERSION_OLD:
397 cmd = EXT4_IOC_SETVERSION_OLD; 399 cmd = EXT4_IOC_SETVERSION_OLD;
398 break; 400 break;
399#ifdef CONFIG_JBD2_DEBUG
400 case EXT4_IOC32_WAIT_FOR_READONLY:
401 cmd = EXT4_IOC_WAIT_FOR_READONLY;
402 break;
403#endif
404 case EXT4_IOC32_GETRSVSZ: 401 case EXT4_IOC32_GETRSVSZ:
405 cmd = EXT4_IOC_GETRSVSZ; 402 cmd = EXT4_IOC_GETRSVSZ;
406 break; 403 break;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 17a5a57c415a..e2d8be8f28bf 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -70,8 +70,8 @@
70 * 70 *
71 * pa_lstart -> the logical start block for this prealloc space 71 * pa_lstart -> the logical start block for this prealloc space
72 * pa_pstart -> the physical start block for this prealloc space 72 * pa_pstart -> the physical start block for this prealloc space
73 * pa_len -> length for this prealloc space 73 * pa_len -> length for this prealloc space (in clusters)
74 * pa_free -> free space available in this prealloc space 74 * pa_free -> free space available in this prealloc space (in clusters)
75 * 75 *
76 * The inode preallocation space is used looking at the _logical_ start 76 * The inode preallocation space is used looking at the _logical_ start
77 * block. If only the logical file block falls within the range of prealloc 77 * block. If only the logical file block falls within the range of prealloc
@@ -126,7 +126,8 @@
126 * list. In case of inode preallocation we follow a list of heuristics 126 * list. In case of inode preallocation we follow a list of heuristics
127 * based on file size. This can be found in ext4_mb_normalize_request. If 127 * based on file size. This can be found in ext4_mb_normalize_request. If
128 * we are doing a group prealloc we try to normalize the request to 128 * we are doing a group prealloc we try to normalize the request to
129 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is 129 * sbi->s_mb_group_prealloc. The default value of s_mb_group_prealloc is
130 * dependent on the cluster size; for non-bigalloc file systems, it is
130 * 512 blocks. This can be tuned via 131 * 512 blocks. This can be tuned via
131 * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in 132 * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
132 * terms of number of blocks. If we have mounted the file system with -O 133 * terms of number of blocks. If we have mounted the file system with -O
@@ -459,7 +460,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
459 ext4_fsblk_t blocknr; 460 ext4_fsblk_t blocknr;
460 461
461 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 462 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
462 blocknr += first + i; 463 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
463 ext4_grp_locked_error(sb, e4b->bd_group, 464 ext4_grp_locked_error(sb, e4b->bd_group,
464 inode ? inode->i_ino : 0, 465 inode ? inode->i_ino : 0,
465 blocknr, 466 blocknr,
@@ -580,7 +581,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
580 continue; 581 continue;
581 } 582 }
582 583
583 /* both bits in buddy2 must be 0 */ 584 /* both bits in buddy2 must be 1 */
584 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); 585 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
585 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); 586 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
586 587
@@ -653,7 +654,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
653 ext4_grpblk_t chunk; 654 ext4_grpblk_t chunk;
654 unsigned short border; 655 unsigned short border;
655 656
656 BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); 657 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
657 658
658 border = 2 << sb->s_blocksize_bits; 659 border = 2 << sb->s_blocksize_bits;
659 660
@@ -705,7 +706,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
705 void *buddy, void *bitmap, ext4_group_t group) 706 void *buddy, void *bitmap, ext4_group_t group)
706{ 707{
707 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 708 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
708 ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb); 709 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
709 ext4_grpblk_t i = 0; 710 ext4_grpblk_t i = 0;
710 ext4_grpblk_t first; 711 ext4_grpblk_t first;
711 ext4_grpblk_t len; 712 ext4_grpblk_t len;
@@ -734,7 +735,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
734 735
735 if (free != grp->bb_free) { 736 if (free != grp->bb_free) {
736 ext4_grp_locked_error(sb, group, 0, 0, 737 ext4_grp_locked_error(sb, group, 0, 0,
737 "%u blocks in bitmap, %u in gd", 738 "%u clusters in bitmap, %u in gd",
738 free, grp->bb_free); 739 free, grp->bb_free);
739 /* 740 /*
740 * If we intent to continue, we consider group descritor 741 * If we intent to continue, we consider group descritor
@@ -1339,7 +1340,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1339 ext4_fsblk_t blocknr; 1340 ext4_fsblk_t blocknr;
1340 1341
1341 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 1342 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1342 blocknr += block; 1343 blocknr += EXT4_C2B(EXT4_SB(sb), block);
1343 ext4_grp_locked_error(sb, e4b->bd_group, 1344 ext4_grp_locked_error(sb, e4b->bd_group,
1344 inode ? inode->i_ino : 0, 1345 inode ? inode->i_ino : 0,
1345 blocknr, 1346 blocknr,
@@ -1390,7 +1391,6 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1390{ 1391{
1391 int next = block; 1392 int next = block;
1392 int max; 1393 int max;
1393 int ord;
1394 void *buddy; 1394 void *buddy;
1395 1395
1396 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); 1396 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
@@ -1432,9 +1432,8 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1432 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) 1432 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
1433 break; 1433 break;
1434 1434
1435 ord = mb_find_order_for_block(e4b, next); 1435 order = mb_find_order_for_block(e4b, next);
1436 1436
1437 order = ord;
1438 block = next >> order; 1437 block = next >> order;
1439 ex->fe_len += 1 << order; 1438 ex->fe_len += 1 << order;
1440 } 1439 }
@@ -1624,8 +1623,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1624 struct ext4_free_extent *gex = &ac->ac_g_ex; 1623 struct ext4_free_extent *gex = &ac->ac_g_ex;
1625 1624
1626 BUG_ON(ex->fe_len <= 0); 1625 BUG_ON(ex->fe_len <= 0);
1627 BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 1626 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1628 BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 1627 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1629 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); 1628 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1630 1629
1631 ac->ac_found++; 1630 ac->ac_found++;
@@ -1823,15 +1822,15 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1823 1822
1824 while (free && ac->ac_status == AC_STATUS_CONTINUE) { 1823 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1825 i = mb_find_next_zero_bit(bitmap, 1824 i = mb_find_next_zero_bit(bitmap,
1826 EXT4_BLOCKS_PER_GROUP(sb), i); 1825 EXT4_CLUSTERS_PER_GROUP(sb), i);
1827 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { 1826 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1828 /* 1827 /*
1829 * IF we have corrupt bitmap, we won't find any 1828 * IF we have corrupt bitmap, we won't find any
1830 * free blocks even though group info says we 1829 * free blocks even though group info says we
1831 * we have free blocks 1830 * we have free blocks
1832 */ 1831 */
1833 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, 1832 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1834 "%d free blocks as per " 1833 "%d free clusters as per "
1835 "group info. But bitmap says 0", 1834 "group info. But bitmap says 0",
1836 free); 1835 free);
1837 break; 1836 break;
@@ -1841,7 +1840,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1841 BUG_ON(ex.fe_len <= 0); 1840 BUG_ON(ex.fe_len <= 0);
1842 if (free < ex.fe_len) { 1841 if (free < ex.fe_len) {
1843 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, 1842 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1844 "%d free blocks as per " 1843 "%d free clusters as per "
1845 "group info. But got %d blocks", 1844 "group info. But got %d blocks",
1846 free, ex.fe_len); 1845 free, ex.fe_len);
1847 /* 1846 /*
@@ -1887,7 +1886,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1887 do_div(a, sbi->s_stripe); 1886 do_div(a, sbi->s_stripe);
1888 i = (a * sbi->s_stripe) - first_group_block; 1887 i = (a * sbi->s_stripe) - first_group_block;
1889 1888
1890 while (i < EXT4_BLOCKS_PER_GROUP(sb)) { 1889 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
1891 if (!mb_test_bit(i, bitmap)) { 1890 if (!mb_test_bit(i, bitmap)) {
1892 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); 1891 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
1893 if (max >= sbi->s_stripe) { 1892 if (max >= sbi->s_stripe) {
@@ -2252,10 +2251,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2252 */ 2251 */
2253 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 2252 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2254 meta_group_info[i]->bb_free = 2253 meta_group_info[i]->bb_free =
2255 ext4_free_blocks_after_init(sb, group, desc); 2254 ext4_free_clusters_after_init(sb, group, desc);
2256 } else { 2255 } else {
2257 meta_group_info[i]->bb_free = 2256 meta_group_info[i]->bb_free =
2258 ext4_free_blks_count(sb, desc); 2257 ext4_free_group_clusters(sb, desc);
2259 } 2258 }
2260 2259
2261 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2260 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
@@ -2473,7 +2472,20 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2473 sbi->s_mb_stats = MB_DEFAULT_STATS; 2472 sbi->s_mb_stats = MB_DEFAULT_STATS;
2474 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; 2473 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2475 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; 2474 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2476 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2475 /*
2476 * The default group preallocation is 512, which for 4k block
2477 * sizes translates to 2 megabytes. However for bigalloc file
2478 * systems, this is probably too big (i.e, if the cluster size
2479 * is 1 megabyte, then group preallocation size becomes half a
2480 * gigabyte!). As a default, we will keep a two megabyte
2481 * group pralloc size for cluster sizes up to 64k, and after
2482 * that, we will force a minimum group preallocation size of
2483 * 32 clusters. This translates to 8 megs when the cluster
2484 * size is 256k, and 32 megs when the cluster size is 1 meg,
2485 * which seems reasonable as a default.
2486 */
2487 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2488 sbi->s_cluster_bits, 32);
2477 /* 2489 /*
2478 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc 2490 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
2479 * to the lowest multiple of s_stripe which is bigger than 2491 * to the lowest multiple of s_stripe which is bigger than
@@ -2490,7 +2502,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2490 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2502 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2491 if (sbi->s_locality_groups == NULL) { 2503 if (sbi->s_locality_groups == NULL) {
2492 ret = -ENOMEM; 2504 ret = -ENOMEM;
2493 goto out; 2505 goto out_free_groupinfo_slab;
2494 } 2506 }
2495 for_each_possible_cpu(i) { 2507 for_each_possible_cpu(i) {
2496 struct ext4_locality_group *lg; 2508 struct ext4_locality_group *lg;
@@ -2503,9 +2515,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2503 2515
2504 /* init file for buddy data */ 2516 /* init file for buddy data */
2505 ret = ext4_mb_init_backend(sb); 2517 ret = ext4_mb_init_backend(sb);
2506 if (ret != 0) { 2518 if (ret != 0)
2507 goto out; 2519 goto out_free_locality_groups;
2508 }
2509 2520
2510 if (sbi->s_proc) 2521 if (sbi->s_proc)
2511 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2522 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
@@ -2513,11 +2524,19 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2513 2524
2514 if (sbi->s_journal) 2525 if (sbi->s_journal)
2515 sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2526 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2527
2528 return 0;
2529
2530out_free_locality_groups:
2531 free_percpu(sbi->s_locality_groups);
2532 sbi->s_locality_groups = NULL;
2533out_free_groupinfo_slab:
2534 ext4_groupinfo_destroy_slabs();
2516out: 2535out:
2517 if (ret) { 2536 kfree(sbi->s_mb_offsets);
2518 kfree(sbi->s_mb_offsets); 2537 sbi->s_mb_offsets = NULL;
2519 kfree(sbi->s_mb_maxs); 2538 kfree(sbi->s_mb_maxs);
2520 } 2539 sbi->s_mb_maxs = NULL;
2521 return ret; 2540 return ret;
2522} 2541}
2523 2542
@@ -2602,11 +2621,13 @@ int ext4_mb_release(struct super_block *sb)
2602} 2621}
2603 2622
2604static inline int ext4_issue_discard(struct super_block *sb, 2623static inline int ext4_issue_discard(struct super_block *sb,
2605 ext4_group_t block_group, ext4_grpblk_t block, int count) 2624 ext4_group_t block_group, ext4_grpblk_t cluster, int count)
2606{ 2625{
2607 ext4_fsblk_t discard_block; 2626 ext4_fsblk_t discard_block;
2608 2627
2609 discard_block = block + ext4_group_first_block_no(sb, block_group); 2628 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2629 ext4_group_first_block_no(sb, block_group));
2630 count = EXT4_C2B(EXT4_SB(sb), count);
2610 trace_ext4_discard_blocks(sb, 2631 trace_ext4_discard_blocks(sb,
2611 (unsigned long long) discard_block, count); 2632 (unsigned long long) discard_block, count);
2612 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2633 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
@@ -2633,7 +2654,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2633 2654
2634 if (test_opt(sb, DISCARD)) 2655 if (test_opt(sb, DISCARD))
2635 ext4_issue_discard(sb, entry->group, 2656 ext4_issue_discard(sb, entry->group,
2636 entry->start_blk, entry->count); 2657 entry->start_cluster, entry->count);
2637 2658
2638 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2659 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2639 /* we expect to find existing buddy because it's pinned */ 2660 /* we expect to find existing buddy because it's pinned */
@@ -2646,7 +2667,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2646 ext4_lock_group(sb, entry->group); 2667 ext4_lock_group(sb, entry->group);
2647 /* Take it out of per group rb tree */ 2668 /* Take it out of per group rb tree */
2648 rb_erase(&entry->node, &(db->bb_free_root)); 2669 rb_erase(&entry->node, &(db->bb_free_root));
2649 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); 2670 mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count);
2650 2671
2651 /* 2672 /*
2652 * Clear the trimmed flag for the group so that the next 2673 * Clear the trimmed flag for the group so that the next
@@ -2752,7 +2773,7 @@ void ext4_exit_mballoc(void)
2752 */ 2773 */
2753static noinline_for_stack int 2774static noinline_for_stack int
2754ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2775ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2755 handle_t *handle, unsigned int reserv_blks) 2776 handle_t *handle, unsigned int reserv_clstrs)
2756{ 2777{
2757 struct buffer_head *bitmap_bh = NULL; 2778 struct buffer_head *bitmap_bh = NULL;
2758 struct ext4_group_desc *gdp; 2779 struct ext4_group_desc *gdp;
@@ -2783,7 +2804,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2783 goto out_err; 2804 goto out_err;
2784 2805
2785 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, 2806 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2786 ext4_free_blks_count(sb, gdp)); 2807 ext4_free_group_clusters(sb, gdp));
2787 2808
2788 err = ext4_journal_get_write_access(handle, gdp_bh); 2809 err = ext4_journal_get_write_access(handle, gdp_bh);
2789 if (err) 2810 if (err)
@@ -2791,7 +2812,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2791 2812
2792 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); 2813 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2793 2814
2794 len = ac->ac_b_ex.fe_len; 2815 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2795 if (!ext4_data_block_valid(sbi, block, len)) { 2816 if (!ext4_data_block_valid(sbi, block, len)) {
2796 ext4_error(sb, "Allocating blocks %llu-%llu which overlap " 2817 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2797 "fs metadata\n", block, block+len); 2818 "fs metadata\n", block, block+len);
@@ -2823,28 +2844,29 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2823 ac->ac_b_ex.fe_len); 2844 ac->ac_b_ex.fe_len);
2824 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 2845 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2825 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 2846 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2826 ext4_free_blks_set(sb, gdp, 2847 ext4_free_group_clusters_set(sb, gdp,
2827 ext4_free_blocks_after_init(sb, 2848 ext4_free_clusters_after_init(sb,
2828 ac->ac_b_ex.fe_group, gdp)); 2849 ac->ac_b_ex.fe_group, gdp));
2829 } 2850 }
2830 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; 2851 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
2831 ext4_free_blks_set(sb, gdp, len); 2852 ext4_free_group_clusters_set(sb, gdp, len);
2832 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2853 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2833 2854
2834 ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 2855 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2835 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2856 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
2836 /* 2857 /*
2837 * Now reduce the dirty block count also. Should not go negative 2858 * Now reduce the dirty block count also. Should not go negative
2838 */ 2859 */
2839 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2860 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2840 /* release all the reserved blocks if non delalloc */ 2861 /* release all the reserved blocks if non delalloc */
2841 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); 2862 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
2863 reserv_clstrs);
2842 2864
2843 if (sbi->s_log_groups_per_flex) { 2865 if (sbi->s_log_groups_per_flex) {
2844 ext4_group_t flex_group = ext4_flex_group(sbi, 2866 ext4_group_t flex_group = ext4_flex_group(sbi,
2845 ac->ac_b_ex.fe_group); 2867 ac->ac_b_ex.fe_group);
2846 atomic_sub(ac->ac_b_ex.fe_len, 2868 atomic_sub(ac->ac_b_ex.fe_len,
2847 &sbi->s_flex_groups[flex_group].free_blocks); 2869 &sbi->s_flex_groups[flex_group].free_clusters);
2848 } 2870 }
2849 2871
2850 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 2872 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -2886,6 +2908,7 @@ static noinline_for_stack void
2886ext4_mb_normalize_request(struct ext4_allocation_context *ac, 2908ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2887 struct ext4_allocation_request *ar) 2909 struct ext4_allocation_request *ar)
2888{ 2910{
2911 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2889 int bsbits, max; 2912 int bsbits, max;
2890 ext4_lblk_t end; 2913 ext4_lblk_t end;
2891 loff_t size, orig_size, start_off; 2914 loff_t size, orig_size, start_off;
@@ -2916,7 +2939,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2916 2939
2917 /* first, let's learn actual file size 2940 /* first, let's learn actual file size
2918 * given current request is allocated */ 2941 * given current request is allocated */
2919 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; 2942 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
2920 size = size << bsbits; 2943 size = size << bsbits;
2921 if (size < i_size_read(ac->ac_inode)) 2944 if (size < i_size_read(ac->ac_inode))
2922 size = i_size_read(ac->ac_inode); 2945 size = i_size_read(ac->ac_inode);
@@ -2988,7 +3011,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2988 continue; 3011 continue;
2989 } 3012 }
2990 3013
2991 pa_end = pa->pa_lstart + pa->pa_len; 3014 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3015 pa->pa_len);
2992 3016
2993 /* PA must not overlap original request */ 3017 /* PA must not overlap original request */
2994 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || 3018 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
@@ -3018,9 +3042,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3018 rcu_read_lock(); 3042 rcu_read_lock();
3019 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { 3043 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3020 ext4_lblk_t pa_end; 3044 ext4_lblk_t pa_end;
3045
3021 spin_lock(&pa->pa_lock); 3046 spin_lock(&pa->pa_lock);
3022 if (pa->pa_deleted == 0) { 3047 if (pa->pa_deleted == 0) {
3023 pa_end = pa->pa_lstart + pa->pa_len; 3048 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3049 pa->pa_len);
3024 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); 3050 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3025 } 3051 }
3026 spin_unlock(&pa->pa_lock); 3052 spin_unlock(&pa->pa_lock);
@@ -3036,14 +3062,14 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3036 } 3062 }
3037 BUG_ON(start + size <= ac->ac_o_ex.fe_logical && 3063 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3038 start > ac->ac_o_ex.fe_logical); 3064 start > ac->ac_o_ex.fe_logical);
3039 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 3065 BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
3040 3066
3041 /* now prepare goal request */ 3067 /* now prepare goal request */
3042 3068
3043 /* XXX: is it better to align blocks WRT to logical 3069 /* XXX: is it better to align blocks WRT to logical
3044 * placement or satisfy big request as is */ 3070 * placement or satisfy big request as is */
3045 ac->ac_g_ex.fe_logical = start; 3071 ac->ac_g_ex.fe_logical = start;
3046 ac->ac_g_ex.fe_len = size; 3072 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3047 3073
3048 /* define goal start in order to merge */ 3074 /* define goal start in order to merge */
3049 if (ar->pright && (ar->lright == (start + size))) { 3075 if (ar->pright && (ar->lright == (start + size))) {
@@ -3112,14 +3138,16 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3112static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, 3138static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3113 struct ext4_prealloc_space *pa) 3139 struct ext4_prealloc_space *pa)
3114{ 3140{
3141 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3115 ext4_fsblk_t start; 3142 ext4_fsblk_t start;
3116 ext4_fsblk_t end; 3143 ext4_fsblk_t end;
3117 int len; 3144 int len;
3118 3145
3119 /* found preallocated blocks, use them */ 3146 /* found preallocated blocks, use them */
3120 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); 3147 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3121 end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len); 3148 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3122 len = end - start; 3149 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3150 len = EXT4_NUM_B2C(sbi, end - start);
3123 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, 3151 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3124 &ac->ac_b_ex.fe_start); 3152 &ac->ac_b_ex.fe_start);
3125 ac->ac_b_ex.fe_len = len; 3153 ac->ac_b_ex.fe_len = len;
@@ -3127,7 +3155,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3127 ac->ac_pa = pa; 3155 ac->ac_pa = pa;
3128 3156
3129 BUG_ON(start < pa->pa_pstart); 3157 BUG_ON(start < pa->pa_pstart);
3130 BUG_ON(start + len > pa->pa_pstart + pa->pa_len); 3158 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3131 BUG_ON(pa->pa_free < len); 3159 BUG_ON(pa->pa_free < len);
3132 pa->pa_free -= len; 3160 pa->pa_free -= len;
3133 3161
@@ -3193,6 +3221,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3193static noinline_for_stack int 3221static noinline_for_stack int
3194ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3222ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3195{ 3223{
3224 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3196 int order, i; 3225 int order, i;
3197 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3226 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3198 struct ext4_locality_group *lg; 3227 struct ext4_locality_group *lg;
@@ -3210,12 +3239,14 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3210 /* all fields in this condition don't change, 3239 /* all fields in this condition don't change,
3211 * so we can skip locking for them */ 3240 * so we can skip locking for them */
3212 if (ac->ac_o_ex.fe_logical < pa->pa_lstart || 3241 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3213 ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) 3242 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3243 EXT4_C2B(sbi, pa->pa_len)))
3214 continue; 3244 continue;
3215 3245
3216 /* non-extent files can't have physical blocks past 2^32 */ 3246 /* non-extent files can't have physical blocks past 2^32 */
3217 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && 3247 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3218 pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) 3248 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3249 EXT4_MAX_BLOCK_FILE_PHYS))
3219 continue; 3250 continue;
3220 3251
3221 /* found preallocated blocks, use them */ 3252 /* found preallocated blocks, use them */
@@ -3291,7 +3322,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3291 3322
3292 while (n) { 3323 while (n) {
3293 entry = rb_entry(n, struct ext4_free_data, node); 3324 entry = rb_entry(n, struct ext4_free_data, node);
3294 ext4_set_bits(bitmap, entry->start_blk, entry->count); 3325 ext4_set_bits(bitmap, entry->start_cluster, entry->count);
3295 n = rb_next(n); 3326 n = rb_next(n);
3296 } 3327 }
3297 return; 3328 return;
@@ -3312,7 +3343,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3312 ext4_group_t groupnr; 3343 ext4_group_t groupnr;
3313 ext4_grpblk_t start; 3344 ext4_grpblk_t start;
3314 int preallocated = 0; 3345 int preallocated = 0;
3315 int count = 0;
3316 int len; 3346 int len;
3317 3347
3318 /* all form of preallocation discards first load group, 3348 /* all form of preallocation discards first load group,
@@ -3335,7 +3365,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3335 BUG_ON(groupnr != group); 3365 BUG_ON(groupnr != group);
3336 ext4_set_bits(bitmap, start, len); 3366 ext4_set_bits(bitmap, start, len);
3337 preallocated += len; 3367 preallocated += len;
3338 count++;
3339 } 3368 }
3340 mb_debug(1, "prellocated %u for group %u\n", preallocated, group); 3369 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3341} 3370}
@@ -3412,6 +3441,7 @@ static noinline_for_stack int
3412ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) 3441ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3413{ 3442{
3414 struct super_block *sb = ac->ac_sb; 3443 struct super_block *sb = ac->ac_sb;
3444 struct ext4_sb_info *sbi = EXT4_SB(sb);
3415 struct ext4_prealloc_space *pa; 3445 struct ext4_prealloc_space *pa;
3416 struct ext4_group_info *grp; 3446 struct ext4_group_info *grp;
3417 struct ext4_inode_info *ei; 3447 struct ext4_inode_info *ei;
@@ -3443,16 +3473,18 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3443 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; 3473 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3444 3474
3445 /* also, we should cover whole original request */ 3475 /* also, we should cover whole original request */
3446 wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len; 3476 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3447 3477
3448 /* the smallest one defines real window */ 3478 /* the smallest one defines real window */
3449 win = min(winl, wins); 3479 win = min(winl, wins);
3450 3480
3451 offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len; 3481 offs = ac->ac_o_ex.fe_logical %
3482 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3452 if (offs && offs < win) 3483 if (offs && offs < win)
3453 win = offs; 3484 win = offs;
3454 3485
3455 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win; 3486 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3487 EXT4_B2C(sbi, win);
3456 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); 3488 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3457 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); 3489 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3458 } 3490 }
@@ -3477,7 +3509,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3477 trace_ext4_mb_new_inode_pa(ac, pa); 3509 trace_ext4_mb_new_inode_pa(ac, pa);
3478 3510
3479 ext4_mb_use_inode_pa(ac, pa); 3511 ext4_mb_use_inode_pa(ac, pa);
3480 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); 3512 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3481 3513
3482 ei = EXT4_I(ac->ac_inode); 3514 ei = EXT4_I(ac->ac_inode);
3483 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); 3515 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
@@ -3592,7 +3624,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3592 3624
3593 BUG_ON(pa->pa_deleted == 0); 3625 BUG_ON(pa->pa_deleted == 0);
3594 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3626 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3595 grp_blk_start = pa->pa_pstart - bit; 3627 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3596 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3628 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3597 end = bit + pa->pa_len; 3629 end = bit + pa->pa_len;
3598 3630
@@ -3607,7 +3639,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3607 free += next - bit; 3639 free += next - bit;
3608 3640
3609 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); 3641 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3610 trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit, 3642 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3643 EXT4_C2B(sbi, bit)),
3611 next - bit); 3644 next - bit);
3612 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3645 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3613 bit = next + 1; 3646 bit = next + 1;
@@ -3690,7 +3723,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3690 } 3723 }
3691 3724
3692 if (needed == 0) 3725 if (needed == 0)
3693 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; 3726 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3694 3727
3695 INIT_LIST_HEAD(&list); 3728 INIT_LIST_HEAD(&list);
3696repeat: 3729repeat:
@@ -3958,7 +3991,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3958 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) 3991 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3959 return; 3992 return;
3960 3993
3961 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; 3994 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3962 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) 3995 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
3963 >> bsbits; 3996 >> bsbits;
3964 3997
@@ -3969,6 +4002,11 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3969 return; 4002 return;
3970 } 4003 }
3971 4004
4005 if (sbi->s_mb_group_prealloc <= 0) {
4006 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4007 return;
4008 }
4009
3972 /* don't use group allocation for large files */ 4010 /* don't use group allocation for large files */
3973 size = max(size, isize); 4011 size = max(size, isize);
3974 if (size > sbi->s_mb_stream_request) { 4012 if (size > sbi->s_mb_stream_request) {
@@ -4007,8 +4045,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4007 len = ar->len; 4045 len = ar->len;
4008 4046
4009 /* just a dirty hack to filter too big requests */ 4047 /* just a dirty hack to filter too big requests */
4010 if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10) 4048 if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
4011 len = EXT4_BLOCKS_PER_GROUP(sb) - 10; 4049 len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
4012 4050
4013 /* start searching from the goal */ 4051 /* start searching from the goal */
4014 goal = ar->goal; 4052 goal = ar->goal;
@@ -4019,18 +4057,15 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4019 4057
4020 /* set up allocation goals */ 4058 /* set up allocation goals */
4021 memset(ac, 0, sizeof(struct ext4_allocation_context)); 4059 memset(ac, 0, sizeof(struct ext4_allocation_context));
4022 ac->ac_b_ex.fe_logical = ar->logical; 4060 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
4023 ac->ac_status = AC_STATUS_CONTINUE; 4061 ac->ac_status = AC_STATUS_CONTINUE;
4024 ac->ac_sb = sb; 4062 ac->ac_sb = sb;
4025 ac->ac_inode = ar->inode; 4063 ac->ac_inode = ar->inode;
4026 ac->ac_o_ex.fe_logical = ar->logical; 4064 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4027 ac->ac_o_ex.fe_group = group; 4065 ac->ac_o_ex.fe_group = group;
4028 ac->ac_o_ex.fe_start = block; 4066 ac->ac_o_ex.fe_start = block;
4029 ac->ac_o_ex.fe_len = len; 4067 ac->ac_o_ex.fe_len = len;
4030 ac->ac_g_ex.fe_logical = ar->logical; 4068 ac->ac_g_ex = ac->ac_o_ex;
4031 ac->ac_g_ex.fe_group = group;
4032 ac->ac_g_ex.fe_start = block;
4033 ac->ac_g_ex.fe_len = len;
4034 ac->ac_flags = ar->flags; 4069 ac->ac_flags = ar->flags;
4035 4070
4036 /* we have to define context: we'll we work with a file or 4071 /* we have to define context: we'll we work with a file or
@@ -4182,13 +4217,14 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4182 */ 4217 */
4183static int ext4_mb_release_context(struct ext4_allocation_context *ac) 4218static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4184{ 4219{
4220 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4185 struct ext4_prealloc_space *pa = ac->ac_pa; 4221 struct ext4_prealloc_space *pa = ac->ac_pa;
4186 if (pa) { 4222 if (pa) {
4187 if (pa->pa_type == MB_GROUP_PA) { 4223 if (pa->pa_type == MB_GROUP_PA) {
4188 /* see comment in ext4_mb_use_group_pa() */ 4224 /* see comment in ext4_mb_use_group_pa() */
4189 spin_lock(&pa->pa_lock); 4225 spin_lock(&pa->pa_lock);
4190 pa->pa_pstart += ac->ac_b_ex.fe_len; 4226 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4191 pa->pa_lstart += ac->ac_b_ex.fe_len; 4227 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4192 pa->pa_free -= ac->ac_b_ex.fe_len; 4228 pa->pa_free -= ac->ac_b_ex.fe_len;
4193 pa->pa_len -= ac->ac_b_ex.fe_len; 4229 pa->pa_len -= ac->ac_b_ex.fe_len;
4194 spin_unlock(&pa->pa_lock); 4230 spin_unlock(&pa->pa_lock);
@@ -4249,13 +4285,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4249 struct super_block *sb; 4285 struct super_block *sb;
4250 ext4_fsblk_t block = 0; 4286 ext4_fsblk_t block = 0;
4251 unsigned int inquota = 0; 4287 unsigned int inquota = 0;
4252 unsigned int reserv_blks = 0; 4288 unsigned int reserv_clstrs = 0;
4253 4289
4254 sb = ar->inode->i_sb; 4290 sb = ar->inode->i_sb;
4255 sbi = EXT4_SB(sb); 4291 sbi = EXT4_SB(sb);
4256 4292
4257 trace_ext4_request_blocks(ar); 4293 trace_ext4_request_blocks(ar);
4258 4294
4295 /* Allow to use superuser reservation for quota file */
4296 if (IS_NOQUOTA(ar->inode))
4297 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4298
4259 /* 4299 /*
4260 * For delayed allocation, we could skip the ENOSPC and 4300 * For delayed allocation, we could skip the ENOSPC and
4261 * EDQUOT check, as blocks and quotas have been already 4301 * EDQUOT check, as blocks and quotas have been already
@@ -4269,7 +4309,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4269 * and verify allocation doesn't exceed the quota limits. 4309 * and verify allocation doesn't exceed the quota limits.
4270 */ 4310 */
4271 while (ar->len && 4311 while (ar->len &&
4272 ext4_claim_free_blocks(sbi, ar->len, ar->flags)) { 4312 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4273 4313
4274 /* let others to free the space */ 4314 /* let others to free the space */
4275 yield(); 4315 yield();
@@ -4279,12 +4319,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4279 *errp = -ENOSPC; 4319 *errp = -ENOSPC;
4280 return 0; 4320 return 0;
4281 } 4321 }
4282 reserv_blks = ar->len; 4322 reserv_clstrs = ar->len;
4283 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { 4323 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4284 dquot_alloc_block_nofail(ar->inode, ar->len); 4324 dquot_alloc_block_nofail(ar->inode,
4325 EXT4_C2B(sbi, ar->len));
4285 } else { 4326 } else {
4286 while (ar->len && 4327 while (ar->len &&
4287 dquot_alloc_block(ar->inode, ar->len)) { 4328 dquot_alloc_block(ar->inode,
4329 EXT4_C2B(sbi, ar->len))) {
4288 4330
4289 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4331 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4290 ar->len--; 4332 ar->len--;
@@ -4328,7 +4370,7 @@ repeat:
4328 ext4_mb_new_preallocation(ac); 4370 ext4_mb_new_preallocation(ac);
4329 } 4371 }
4330 if (likely(ac->ac_status == AC_STATUS_FOUND)) { 4372 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4331 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); 4373 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4332 if (*errp == -EAGAIN) { 4374 if (*errp == -EAGAIN) {
4333 /* 4375 /*
4334 * drop the reference that we took 4376 * drop the reference that we took
@@ -4364,13 +4406,13 @@ out:
4364 if (ac) 4406 if (ac)
4365 kmem_cache_free(ext4_ac_cachep, ac); 4407 kmem_cache_free(ext4_ac_cachep, ac);
4366 if (inquota && ar->len < inquota) 4408 if (inquota && ar->len < inquota)
4367 dquot_free_block(ar->inode, inquota - ar->len); 4409 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4368 if (!ar->len) { 4410 if (!ar->len) {
4369 if (!ext4_test_inode_state(ar->inode, 4411 if (!ext4_test_inode_state(ar->inode,
4370 EXT4_STATE_DELALLOC_RESERVED)) 4412 EXT4_STATE_DELALLOC_RESERVED))
4371 /* release all the reserved blocks if non delalloc */ 4413 /* release all the reserved blocks if non delalloc */
4372 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 4414 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4373 reserv_blks); 4415 reserv_clstrs);
4374 } 4416 }
4375 4417
4376 trace_ext4_allocate_blocks(ar, (unsigned long long)block); 4418 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
@@ -4388,7 +4430,7 @@ static int can_merge(struct ext4_free_data *entry1,
4388{ 4430{
4389 if ((entry1->t_tid == entry2->t_tid) && 4431 if ((entry1->t_tid == entry2->t_tid) &&
4390 (entry1->group == entry2->group) && 4432 (entry1->group == entry2->group) &&
4391 ((entry1->start_blk + entry1->count) == entry2->start_blk)) 4433 ((entry1->start_cluster + entry1->count) == entry2->start_cluster))
4392 return 1; 4434 return 1;
4393 return 0; 4435 return 0;
4394} 4436}
@@ -4398,7 +4440,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4398 struct ext4_free_data *new_entry) 4440 struct ext4_free_data *new_entry)
4399{ 4441{
4400 ext4_group_t group = e4b->bd_group; 4442 ext4_group_t group = e4b->bd_group;
4401 ext4_grpblk_t block; 4443 ext4_grpblk_t cluster;
4402 struct ext4_free_data *entry; 4444 struct ext4_free_data *entry;
4403 struct ext4_group_info *db = e4b->bd_info; 4445 struct ext4_group_info *db = e4b->bd_info;
4404 struct super_block *sb = e4b->bd_sb; 4446 struct super_block *sb = e4b->bd_sb;
@@ -4411,7 +4453,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4411 BUG_ON(e4b->bd_buddy_page == NULL); 4453 BUG_ON(e4b->bd_buddy_page == NULL);
4412 4454
4413 new_node = &new_entry->node; 4455 new_node = &new_entry->node;
4414 block = new_entry->start_blk; 4456 cluster = new_entry->start_cluster;
4415 4457
4416 if (!*n) { 4458 if (!*n) {
4417 /* first free block exent. We need to 4459 /* first free block exent. We need to
@@ -4425,13 +4467,14 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4425 while (*n) { 4467 while (*n) {
4426 parent = *n; 4468 parent = *n;
4427 entry = rb_entry(parent, struct ext4_free_data, node); 4469 entry = rb_entry(parent, struct ext4_free_data, node);
4428 if (block < entry->start_blk) 4470 if (cluster < entry->start_cluster)
4429 n = &(*n)->rb_left; 4471 n = &(*n)->rb_left;
4430 else if (block >= (entry->start_blk + entry->count)) 4472 else if (cluster >= (entry->start_cluster + entry->count))
4431 n = &(*n)->rb_right; 4473 n = &(*n)->rb_right;
4432 else { 4474 else {
4433 ext4_grp_locked_error(sb, group, 0, 4475 ext4_grp_locked_error(sb, group, 0,
4434 ext4_group_first_block_no(sb, group) + block, 4476 ext4_group_first_block_no(sb, group) +
4477 EXT4_C2B(sbi, cluster),
4435 "Block already on to-be-freed list"); 4478 "Block already on to-be-freed list");
4436 return 0; 4479 return 0;
4437 } 4480 }
@@ -4445,7 +4488,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4445 if (node) { 4488 if (node) {
4446 entry = rb_entry(node, struct ext4_free_data, node); 4489 entry = rb_entry(node, struct ext4_free_data, node);
4447 if (can_merge(entry, new_entry)) { 4490 if (can_merge(entry, new_entry)) {
4448 new_entry->start_blk = entry->start_blk; 4491 new_entry->start_cluster = entry->start_cluster;
4449 new_entry->count += entry->count; 4492 new_entry->count += entry->count;
4450 rb_erase(node, &(db->bb_free_root)); 4493 rb_erase(node, &(db->bb_free_root));
4451 spin_lock(&sbi->s_md_lock); 4494 spin_lock(&sbi->s_md_lock);
@@ -4496,6 +4539,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4496 ext4_group_t block_group; 4539 ext4_group_t block_group;
4497 struct ext4_sb_info *sbi; 4540 struct ext4_sb_info *sbi;
4498 struct ext4_buddy e4b; 4541 struct ext4_buddy e4b;
4542 unsigned int count_clusters;
4499 int err = 0; 4543 int err = 0;
4500 int ret; 4544 int ret;
4501 4545
@@ -4544,6 +4588,38 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4544 if (!ext4_should_writeback_data(inode)) 4588 if (!ext4_should_writeback_data(inode))
4545 flags |= EXT4_FREE_BLOCKS_METADATA; 4589 flags |= EXT4_FREE_BLOCKS_METADATA;
4546 4590
4591 /*
4592 * If the extent to be freed does not begin on a cluster
4593 * boundary, we need to deal with partial clusters at the
4594 * beginning and end of the extent. Normally we will free
4595 * blocks at the beginning or the end unless we are explicitly
4596 * requested to avoid doing so.
4597 */
4598 overflow = block & (sbi->s_cluster_ratio - 1);
4599 if (overflow) {
4600 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4601 overflow = sbi->s_cluster_ratio - overflow;
4602 block += overflow;
4603 if (count > overflow)
4604 count -= overflow;
4605 else
4606 return;
4607 } else {
4608 block -= overflow;
4609 count += overflow;
4610 }
4611 }
4612 overflow = count & (sbi->s_cluster_ratio - 1);
4613 if (overflow) {
4614 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4615 if (count > overflow)
4616 count -= overflow;
4617 else
4618 return;
4619 } else
4620 count += sbi->s_cluster_ratio - overflow;
4621 }
4622
4547do_more: 4623do_more:
4548 overflow = 0; 4624 overflow = 0;
4549 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 4625 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4552,10 +4628,12 @@ do_more:
4552 * Check to see if we are freeing blocks across a group 4628 * Check to see if we are freeing blocks across a group
4553 * boundary. 4629 * boundary.
4554 */ 4630 */
4555 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { 4631 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4556 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); 4632 overflow = EXT4_C2B(sbi, bit) + count -
4633 EXT4_BLOCKS_PER_GROUP(sb);
4557 count -= overflow; 4634 count -= overflow;
4558 } 4635 }
4636 count_clusters = EXT4_B2C(sbi, count);
4559 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 4637 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4560 if (!bitmap_bh) { 4638 if (!bitmap_bh) {
4561 err = -EIO; 4639 err = -EIO;
@@ -4570,9 +4648,9 @@ do_more:
4570 if (in_range(ext4_block_bitmap(sb, gdp), block, count) || 4648 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4571 in_range(ext4_inode_bitmap(sb, gdp), block, count) || 4649 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4572 in_range(block, ext4_inode_table(sb, gdp), 4650 in_range(block, ext4_inode_table(sb, gdp),
4573 EXT4_SB(sb)->s_itb_per_group) || 4651 EXT4_SB(sb)->s_itb_per_group) ||
4574 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4652 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4575 EXT4_SB(sb)->s_itb_per_group)) { 4653 EXT4_SB(sb)->s_itb_per_group)) {
4576 4654
4577 ext4_error(sb, "Freeing blocks in system zone - " 4655 ext4_error(sb, "Freeing blocks in system zone - "
4578 "Block = %llu, count = %lu", block, count); 4656 "Block = %llu, count = %lu", block, count);
@@ -4597,11 +4675,11 @@ do_more:
4597#ifdef AGGRESSIVE_CHECK 4675#ifdef AGGRESSIVE_CHECK
4598 { 4676 {
4599 int i; 4677 int i;
4600 for (i = 0; i < count; i++) 4678 for (i = 0; i < count_clusters; i++)
4601 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); 4679 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4602 } 4680 }
4603#endif 4681#endif
4604 trace_ext4_mballoc_free(sb, inode, block_group, bit, count); 4682 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4605 4683
4606 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4684 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4607 if (err) 4685 if (err)
@@ -4618,13 +4696,13 @@ do_more:
4618 err = -ENOMEM; 4696 err = -ENOMEM;
4619 goto error_return; 4697 goto error_return;
4620 } 4698 }
4621 new_entry->start_blk = bit; 4699 new_entry->start_cluster = bit;
4622 new_entry->group = block_group; 4700 new_entry->group = block_group;
4623 new_entry->count = count; 4701 new_entry->count = count_clusters;
4624 new_entry->t_tid = handle->h_transaction->t_tid; 4702 new_entry->t_tid = handle->h_transaction->t_tid;
4625 4703
4626 ext4_lock_group(sb, block_group); 4704 ext4_lock_group(sb, block_group);
4627 mb_clear_bits(bitmap_bh->b_data, bit, count); 4705 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4628 ext4_mb_free_metadata(handle, &e4b, new_entry); 4706 ext4_mb_free_metadata(handle, &e4b, new_entry);
4629 } else { 4707 } else {
4630 /* need to update group_info->bb_free and bitmap 4708 /* need to update group_info->bb_free and bitmap
@@ -4632,25 +4710,29 @@ do_more:
4632 * them with group lock_held 4710 * them with group lock_held
4633 */ 4711 */
4634 ext4_lock_group(sb, block_group); 4712 ext4_lock_group(sb, block_group);
4635 mb_clear_bits(bitmap_bh->b_data, bit, count); 4713 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4636 mb_free_blocks(inode, &e4b, bit, count); 4714 mb_free_blocks(inode, &e4b, bit, count_clusters);
4637 } 4715 }
4638 4716
4639 ret = ext4_free_blks_count(sb, gdp) + count; 4717 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4640 ext4_free_blks_set(sb, gdp, ret); 4718 ext4_free_group_clusters_set(sb, gdp, ret);
4641 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4719 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4642 ext4_unlock_group(sb, block_group); 4720 ext4_unlock_group(sb, block_group);
4643 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4721 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4644 4722
4645 if (sbi->s_log_groups_per_flex) { 4723 if (sbi->s_log_groups_per_flex) {
4646 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4724 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4647 atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); 4725 atomic_add(count_clusters,
4726 &sbi->s_flex_groups[flex_group].free_clusters);
4648 } 4727 }
4649 4728
4650 ext4_mb_unload_buddy(&e4b); 4729 ext4_mb_unload_buddy(&e4b);
4651 4730
4652 freed += count; 4731 freed += count;
4653 4732
4733 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4734 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4735
4654 /* We dirtied the bitmap block */ 4736 /* We dirtied the bitmap block */
4655 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4737 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4656 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 4738 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -4669,8 +4751,6 @@ do_more:
4669 } 4751 }
4670 ext4_mark_super_dirty(sb); 4752 ext4_mark_super_dirty(sb);
4671error_return: 4753error_return:
4672 if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4673 dquot_free_block(inode, freed);
4674 brelse(bitmap_bh); 4754 brelse(bitmap_bh);
4675 ext4_std_error(sb, err); 4755 ext4_std_error(sb, err);
4676 return; 4756 return;
@@ -4778,16 +4858,17 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4778 ext4_lock_group(sb, block_group); 4858 ext4_lock_group(sb, block_group);
4779 mb_clear_bits(bitmap_bh->b_data, bit, count); 4859 mb_clear_bits(bitmap_bh->b_data, bit, count);
4780 mb_free_blocks(NULL, &e4b, bit, count); 4860 mb_free_blocks(NULL, &e4b, bit, count);
4781 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); 4861 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
4782 ext4_free_blks_set(sb, desc, blk_free_count); 4862 ext4_free_group_clusters_set(sb, desc, blk_free_count);
4783 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 4863 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
4784 ext4_unlock_group(sb, block_group); 4864 ext4_unlock_group(sb, block_group);
4785 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); 4865 percpu_counter_add(&sbi->s_freeclusters_counter,
4866 EXT4_B2C(sbi, blocks_freed));
4786 4867
4787 if (sbi->s_log_groups_per_flex) { 4868 if (sbi->s_log_groups_per_flex) {
4788 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4869 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4789 atomic_add(blocks_freed, 4870 atomic_add(EXT4_B2C(sbi, blocks_freed),
4790 &sbi->s_flex_groups[flex_group].free_blocks); 4871 &sbi->s_flex_groups[flex_group].free_clusters);
4791 } 4872 }
4792 4873
4793 ext4_mb_unload_buddy(&e4b); 4874 ext4_mb_unload_buddy(&e4b);
@@ -4948,7 +5029,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4948 struct ext4_group_info *grp; 5029 struct ext4_group_info *grp;
4949 ext4_group_t first_group, last_group; 5030 ext4_group_t first_group, last_group;
4950 ext4_group_t group, ngroups = ext4_get_groups_count(sb); 5031 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4951 ext4_grpblk_t cnt = 0, first_block, last_block; 5032 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
4952 uint64_t start, len, minlen, trimmed = 0; 5033 uint64_t start, len, minlen, trimmed = 0;
4953 ext4_fsblk_t first_data_blk = 5034 ext4_fsblk_t first_data_blk =
4954 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 5035 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
@@ -4958,7 +5039,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4958 len = range->len >> sb->s_blocksize_bits; 5039 len = range->len >> sb->s_blocksize_bits;
4959 minlen = range->minlen >> sb->s_blocksize_bits; 5040 minlen = range->minlen >> sb->s_blocksize_bits;
4960 5041
4961 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) 5042 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)))
4962 return -EINVAL; 5043 return -EINVAL;
4963 if (start + len <= first_data_blk) 5044 if (start + len <= first_data_blk)
4964 goto out; 5045 goto out;
@@ -4969,11 +5050,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4969 5050
4970 /* Determine first and last group to examine based on start and len */ 5051 /* Determine first and last group to examine based on start and len */
4971 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, 5052 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
4972 &first_group, &first_block); 5053 &first_group, &first_cluster);
4973 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), 5054 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
4974 &last_group, &last_block); 5055 &last_group, &last_cluster);
4975 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; 5056 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
4976 last_block = EXT4_BLOCKS_PER_GROUP(sb); 5057 last_cluster = EXT4_CLUSTERS_PER_GROUP(sb);
4977 5058
4978 if (first_group > last_group) 5059 if (first_group > last_group)
4979 return -EINVAL; 5060 return -EINVAL;
@@ -4993,20 +5074,20 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4993 * change it for the last group in which case start + 5074 * change it for the last group in which case start +
4994 * len < EXT4_BLOCKS_PER_GROUP(sb). 5075 * len < EXT4_BLOCKS_PER_GROUP(sb).
4995 */ 5076 */
4996 if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb)) 5077 if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb))
4997 last_block = first_block + len; 5078 last_cluster = first_cluster + len;
4998 len -= last_block - first_block; 5079 len -= last_cluster - first_cluster;
4999 5080
5000 if (grp->bb_free >= minlen) { 5081 if (grp->bb_free >= minlen) {
5001 cnt = ext4_trim_all_free(sb, group, first_block, 5082 cnt = ext4_trim_all_free(sb, group, first_cluster,
5002 last_block, minlen); 5083 last_cluster, minlen);
5003 if (cnt < 0) { 5084 if (cnt < 0) {
5004 ret = cnt; 5085 ret = cnt;
5005 break; 5086 break;
5006 } 5087 }
5007 } 5088 }
5008 trimmed += cnt; 5089 trimmed += cnt;
5009 first_block = 0; 5090 first_cluster = 0;
5010 } 5091 }
5011 range->len = trimmed * sb->s_blocksize; 5092 range->len = trimmed * sb->s_blocksize;
5012 5093
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 9d4a636b546c..47705f3285e3 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -106,7 +106,7 @@ struct ext4_free_data {
106 ext4_group_t group; 106 ext4_group_t group;
107 107
108 /* free block extent */ 108 /* free block extent */
109 ext4_grpblk_t start_blk; 109 ext4_grpblk_t start_cluster;
110 ext4_grpblk_t count; 110 ext4_grpblk_t count;
111 111
112 /* transaction which freed this extent */ 112 /* transaction which freed this extent */
@@ -139,9 +139,9 @@ enum {
139 139
140struct ext4_free_extent { 140struct ext4_free_extent {
141 ext4_lblk_t fe_logical; 141 ext4_lblk_t fe_logical;
142 ext4_grpblk_t fe_start; 142 ext4_grpblk_t fe_start; /* In cluster units */
143 ext4_group_t fe_group; 143 ext4_group_t fe_group;
144 ext4_grpblk_t fe_len; 144 ext4_grpblk_t fe_len; /* In cluster units */
145}; 145};
146 146
147/* 147/*
@@ -175,7 +175,7 @@ struct ext4_allocation_context {
175 /* the best found extent */ 175 /* the best found extent */
176 struct ext4_free_extent ac_b_ex; 176 struct ext4_free_extent ac_b_ex;
177 177
178 /* copy of the bext found extent taken before preallocation efforts */ 178 /* copy of the best found extent taken before preallocation efforts */
179 struct ext4_free_extent ac_f_ex; 179 struct ext4_free_extent ac_f_ex;
180 180
181 /* number of iterations done. we have to track to limit searching */ 181 /* number of iterations done. we have to track to limit searching */
@@ -216,6 +216,7 @@ struct ext4_buddy {
216static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, 216static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
217 struct ext4_free_extent *fex) 217 struct ext4_free_extent *fex)
218{ 218{
219 return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start; 219 return ext4_group_first_block_no(sb, fex->fe_group) +
220 (fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
220} 221}
221#endif 222#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b57b98fb44d1..16ac228dbec6 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -15,19 +15,18 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include "ext4_jbd2.h" 17#include "ext4_jbd2.h"
18#include "ext4_extents.h"
19 18
20/* 19/*
21 * The contiguous blocks details which can be 20 * The contiguous blocks details which can be
22 * represented by a single extent 21 * represented by a single extent
23 */ 22 */
24struct list_blocks_struct { 23struct migrate_struct {
25 ext4_lblk_t first_block, last_block; 24 ext4_lblk_t first_block, last_block, curr_block;
26 ext4_fsblk_t first_pblock, last_pblock; 25 ext4_fsblk_t first_pblock, last_pblock;
27}; 26};
28 27
29static int finish_range(handle_t *handle, struct inode *inode, 28static int finish_range(handle_t *handle, struct inode *inode,
30 struct list_blocks_struct *lb) 29 struct migrate_struct *lb)
31 30
32{ 31{
33 int retval = 0, needed; 32 int retval = 0, needed;
@@ -87,8 +86,7 @@ err_out:
87} 86}
88 87
89static int update_extent_range(handle_t *handle, struct inode *inode, 88static int update_extent_range(handle_t *handle, struct inode *inode,
90 ext4_fsblk_t pblock, ext4_lblk_t blk_num, 89 ext4_fsblk_t pblock, struct migrate_struct *lb)
91 struct list_blocks_struct *lb)
92{ 90{
93 int retval; 91 int retval;
94 /* 92 /*
@@ -96,9 +94,10 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
96 */ 94 */
97 if (lb->first_pblock && 95 if (lb->first_pblock &&
98 (lb->last_pblock+1 == pblock) && 96 (lb->last_pblock+1 == pblock) &&
99 (lb->last_block+1 == blk_num)) { 97 (lb->last_block+1 == lb->curr_block)) {
100 lb->last_pblock = pblock; 98 lb->last_pblock = pblock;
101 lb->last_block = blk_num; 99 lb->last_block = lb->curr_block;
100 lb->curr_block++;
102 return 0; 101 return 0;
103 } 102 }
104 /* 103 /*
@@ -106,64 +105,49 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
106 */ 105 */
107 retval = finish_range(handle, inode, lb); 106 retval = finish_range(handle, inode, lb);
108 lb->first_pblock = lb->last_pblock = pblock; 107 lb->first_pblock = lb->last_pblock = pblock;
109 lb->first_block = lb->last_block = blk_num; 108 lb->first_block = lb->last_block = lb->curr_block;
110 109 lb->curr_block++;
111 return retval; 110 return retval;
112} 111}
113 112
114static int update_ind_extent_range(handle_t *handle, struct inode *inode, 113static int update_ind_extent_range(handle_t *handle, struct inode *inode,
115 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 114 ext4_fsblk_t pblock,
116 struct list_blocks_struct *lb) 115 struct migrate_struct *lb)
117{ 116{
118 struct buffer_head *bh; 117 struct buffer_head *bh;
119 __le32 *i_data; 118 __le32 *i_data;
120 int i, retval = 0; 119 int i, retval = 0;
121 ext4_lblk_t blk_count = *blk_nump;
122 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 120 unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
123 121
124 if (!pblock) {
125 /* Only update the file block number */
126 *blk_nump += max_entries;
127 return 0;
128 }
129
130 bh = sb_bread(inode->i_sb, pblock); 122 bh = sb_bread(inode->i_sb, pblock);
131 if (!bh) 123 if (!bh)
132 return -EIO; 124 return -EIO;
133 125
134 i_data = (__le32 *)bh->b_data; 126 i_data = (__le32 *)bh->b_data;
135 for (i = 0; i < max_entries; i++, blk_count++) { 127 for (i = 0; i < max_entries; i++) {
136 if (i_data[i]) { 128 if (i_data[i]) {
137 retval = update_extent_range(handle, inode, 129 retval = update_extent_range(handle, inode,
138 le32_to_cpu(i_data[i]), 130 le32_to_cpu(i_data[i]), lb);
139 blk_count, lb);
140 if (retval) 131 if (retval)
141 break; 132 break;
133 } else {
134 lb->curr_block++;
142 } 135 }
143 } 136 }
144
145 /* Update the file block number */
146 *blk_nump = blk_count;
147 put_bh(bh); 137 put_bh(bh);
148 return retval; 138 return retval;
149 139
150} 140}
151 141
152static int update_dind_extent_range(handle_t *handle, struct inode *inode, 142static int update_dind_extent_range(handle_t *handle, struct inode *inode,
153 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 143 ext4_fsblk_t pblock,
154 struct list_blocks_struct *lb) 144 struct migrate_struct *lb)
155{ 145{
156 struct buffer_head *bh; 146 struct buffer_head *bh;
157 __le32 *i_data; 147 __le32 *i_data;
158 int i, retval = 0; 148 int i, retval = 0;
159 ext4_lblk_t blk_count = *blk_nump;
160 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 149 unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
161 150
162 if (!pblock) {
163 /* Only update the file block number */
164 *blk_nump += max_entries * max_entries;
165 return 0;
166 }
167 bh = sb_bread(inode->i_sb, pblock); 151 bh = sb_bread(inode->i_sb, pblock);
168 if (!bh) 152 if (!bh)
169 return -EIO; 153 return -EIO;
@@ -172,38 +156,28 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
172 for (i = 0; i < max_entries; i++) { 156 for (i = 0; i < max_entries; i++) {
173 if (i_data[i]) { 157 if (i_data[i]) {
174 retval = update_ind_extent_range(handle, inode, 158 retval = update_ind_extent_range(handle, inode,
175 le32_to_cpu(i_data[i]), 159 le32_to_cpu(i_data[i]), lb);
176 &blk_count, lb);
177 if (retval) 160 if (retval)
178 break; 161 break;
179 } else { 162 } else {
180 /* Only update the file block number */ 163 /* Only update the file block number */
181 blk_count += max_entries; 164 lb->curr_block += max_entries;
182 } 165 }
183 } 166 }
184
185 /* Update the file block number */
186 *blk_nump = blk_count;
187 put_bh(bh); 167 put_bh(bh);
188 return retval; 168 return retval;
189 169
190} 170}
191 171
192static int update_tind_extent_range(handle_t *handle, struct inode *inode, 172static int update_tind_extent_range(handle_t *handle, struct inode *inode,
193 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 173 ext4_fsblk_t pblock,
194 struct list_blocks_struct *lb) 174 struct migrate_struct *lb)
195{ 175{
196 struct buffer_head *bh; 176 struct buffer_head *bh;
197 __le32 *i_data; 177 __le32 *i_data;
198 int i, retval = 0; 178 int i, retval = 0;
199 ext4_lblk_t blk_count = *blk_nump;
200 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 179 unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
201 180
202 if (!pblock) {
203 /* Only update the file block number */
204 *blk_nump += max_entries * max_entries * max_entries;
205 return 0;
206 }
207 bh = sb_bread(inode->i_sb, pblock); 181 bh = sb_bread(inode->i_sb, pblock);
208 if (!bh) 182 if (!bh)
209 return -EIO; 183 return -EIO;
@@ -212,16 +186,14 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
212 for (i = 0; i < max_entries; i++) { 186 for (i = 0; i < max_entries; i++) {
213 if (i_data[i]) { 187 if (i_data[i]) {
214 retval = update_dind_extent_range(handle, inode, 188 retval = update_dind_extent_range(handle, inode,
215 le32_to_cpu(i_data[i]), 189 le32_to_cpu(i_data[i]), lb);
216 &blk_count, lb);
217 if (retval) 190 if (retval)
218 break; 191 break;
219 } else 192 } else {
220 /* Only update the file block number */ 193 /* Only update the file block number */
221 blk_count += max_entries * max_entries; 194 lb->curr_block += max_entries * max_entries;
195 }
222 } 196 }
223 /* Update the file block number */
224 *blk_nump = blk_count;
225 put_bh(bh); 197 put_bh(bh);
226 return retval; 198 return retval;
227 199
@@ -462,12 +434,12 @@ int ext4_ext_migrate(struct inode *inode)
462 handle_t *handle; 434 handle_t *handle;
463 int retval = 0, i; 435 int retval = 0, i;
464 __le32 *i_data; 436 __le32 *i_data;
465 ext4_lblk_t blk_count = 0;
466 struct ext4_inode_info *ei; 437 struct ext4_inode_info *ei;
467 struct inode *tmp_inode = NULL; 438 struct inode *tmp_inode = NULL;
468 struct list_blocks_struct lb; 439 struct migrate_struct lb;
469 unsigned long max_entries; 440 unsigned long max_entries;
470 __u32 goal; 441 __u32 goal;
442 uid_t owner[2];
471 443
472 /* 444 /*
473 * If the filesystem does not support extents, or the inode 445 * If the filesystem does not support extents, or the inode
@@ -495,10 +467,12 @@ int ext4_ext_migrate(struct inode *inode)
495 } 467 }
496 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * 468 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
497 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; 469 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
470 owner[0] = inode->i_uid;
471 owner[1] = inode->i_gid;
498 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 472 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
499 S_IFREG, NULL, goal); 473 S_IFREG, NULL, goal, owner);
500 if (IS_ERR(tmp_inode)) { 474 if (IS_ERR(tmp_inode)) {
501 retval = -ENOMEM; 475 retval = PTR_ERR(inode);
502 ext4_journal_stop(handle); 476 ext4_journal_stop(handle);
503 return retval; 477 return retval;
504 } 478 }
@@ -507,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode)
507 * Set the i_nlink to zero so it will be deleted later 481 * Set the i_nlink to zero so it will be deleted later
508 * when we drop inode reference. 482 * when we drop inode reference.
509 */ 483 */
510 tmp_inode->i_nlink = 0; 484 clear_nlink(tmp_inode);
511 485
512 ext4_ext_tree_init(handle, tmp_inode); 486 ext4_ext_tree_init(handle, tmp_inode);
513 ext4_orphan_add(handle, tmp_inode); 487 ext4_orphan_add(handle, tmp_inode);
@@ -551,35 +525,32 @@ int ext4_ext_migrate(struct inode *inode)
551 525
552 /* 32 bit block address 4 bytes */ 526 /* 32 bit block address 4 bytes */
553 max_entries = inode->i_sb->s_blocksize >> 2; 527 max_entries = inode->i_sb->s_blocksize >> 2;
554 for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { 528 for (i = 0; i < EXT4_NDIR_BLOCKS; i++) {
555 if (i_data[i]) { 529 if (i_data[i]) {
556 retval = update_extent_range(handle, tmp_inode, 530 retval = update_extent_range(handle, tmp_inode,
557 le32_to_cpu(i_data[i]), 531 le32_to_cpu(i_data[i]), &lb);
558 blk_count, &lb);
559 if (retval) 532 if (retval)
560 goto err_out; 533 goto err_out;
561 } 534 } else
535 lb.curr_block++;
562 } 536 }
563 if (i_data[EXT4_IND_BLOCK]) { 537 if (i_data[EXT4_IND_BLOCK]) {
564 retval = update_ind_extent_range(handle, tmp_inode, 538 retval = update_ind_extent_range(handle, tmp_inode,
565 le32_to_cpu(i_data[EXT4_IND_BLOCK]), 539 le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
566 &blk_count, &lb);
567 if (retval) 540 if (retval)
568 goto err_out; 541 goto err_out;
569 } else 542 } else
570 blk_count += max_entries; 543 lb.curr_block += max_entries;
571 if (i_data[EXT4_DIND_BLOCK]) { 544 if (i_data[EXT4_DIND_BLOCK]) {
572 retval = update_dind_extent_range(handle, tmp_inode, 545 retval = update_dind_extent_range(handle, tmp_inode,
573 le32_to_cpu(i_data[EXT4_DIND_BLOCK]), 546 le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
574 &blk_count, &lb);
575 if (retval) 547 if (retval)
576 goto err_out; 548 goto err_out;
577 } else 549 } else
578 blk_count += max_entries * max_entries; 550 lb.curr_block += max_entries * max_entries;
579 if (i_data[EXT4_TIND_BLOCK]) { 551 if (i_data[EXT4_TIND_BLOCK]) {
580 retval = update_tind_extent_range(handle, tmp_inode, 552 retval = update_tind_extent_range(handle, tmp_inode,
581 le32_to_cpu(i_data[EXT4_TIND_BLOCK]), 553 le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
582 &blk_count, &lb);
583 if (retval) 554 if (retval)
584 goto err_out; 555 goto err_out;
585 } 556 }
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 9bdef3f537c5..7ea4ba4eff2a 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -109,7 +109,7 @@ static int kmmpd(void *data)
109 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 109 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
110 bdevname(bh->b_bdev, mmp->mmp_bdevname); 110 bdevname(bh->b_bdev, mmp->mmp_bdevname);
111 111
112 memcpy(mmp->mmp_nodename, init_utsname()->sysname, 112 memcpy(mmp->mmp_nodename, init_utsname()->nodename,
113 sizeof(mmp->mmp_nodename)); 113 sizeof(mmp->mmp_nodename));
114 114
115 while (!kthread_should_stop()) { 115 while (!kthread_should_stop()) {
@@ -125,8 +125,9 @@ static int kmmpd(void *data)
125 * Don't spew too many error messages. Print one every 125 * Don't spew too many error messages. Print one every
126 * (s_mmp_update_interval * 60) seconds. 126 * (s_mmp_update_interval * 60) seconds.
127 */ 127 */
128 if (retval && (failed_writes % 60) == 0) { 128 if (retval) {
129 ext4_error(sb, "Error writing to MMP block"); 129 if ((failed_writes % 60) == 0)
130 ext4_error(sb, "Error writing to MMP block");
130 failed_writes++; 131 failed_writes++;
131 } 132 }
132 133
@@ -295,7 +296,8 @@ skip:
295 /* 296 /*
296 * write a new random sequence number. 297 * write a new random sequence number.
297 */ 298 */
298 mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq()); 299 seq = mmp_new_seq();
300 mmp->mmp_seq = cpu_to_le32(seq);
299 301
300 retval = write_mmp_block(bh); 302 retval = write_mmp_block(bh);
301 if (retval) 303 if (retval)
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index f57455a1b1b2..c5826c623e7a 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -17,7 +17,6 @@
17#include <linux/quotaops.h> 17#include <linux/quotaops.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include "ext4_jbd2.h" 19#include "ext4_jbd2.h"
20#include "ext4_extents.h"
21#include "ext4.h" 20#include "ext4.h"
22 21
23/** 22/**
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1c924faeb6c8..aa4c782c9dd7 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1586,7 +1586,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1586 dxtrace(dx_show_index("node", frames[1].entries)); 1586 dxtrace(dx_show_index("node", frames[1].entries));
1587 dxtrace(dx_show_index("node", 1587 dxtrace(dx_show_index("node",
1588 ((struct dx_node *) bh2->b_data)->entries)); 1588 ((struct dx_node *) bh2->b_data)->entries));
1589 err = ext4_handle_dirty_metadata(handle, inode, bh2); 1589 err = ext4_handle_dirty_metadata(handle, dir, bh2);
1590 if (err) 1590 if (err)
1591 goto journal_error; 1591 goto journal_error;
1592 brelse (bh2); 1592 brelse (bh2);
@@ -1612,7 +1612,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1612 if (err) 1612 if (err)
1613 goto journal_error; 1613 goto journal_error;
1614 } 1614 }
1615 err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh); 1615 err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
1616 if (err) { 1616 if (err) {
1617 ext4_std_error(inode->i_sb, err); 1617 ext4_std_error(inode->i_sb, err);
1618 goto cleanup; 1618 goto cleanup;
@@ -1694,7 +1694,7 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode)
1694 if (is_dx(inode) && inode->i_nlink > 1) { 1694 if (is_dx(inode) && inode->i_nlink > 1) {
1695 /* limit is 16-bit i_links_count */ 1695 /* limit is 16-bit i_links_count */
1696 if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) { 1696 if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
1697 inode->i_nlink = 1; 1697 set_nlink(inode, 1);
1698 EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb, 1698 EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
1699 EXT4_FEATURE_RO_COMPAT_DIR_NLINK); 1699 EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
1700 } 1700 }
@@ -1707,9 +1707,8 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode)
1707 */ 1707 */
1708static void ext4_dec_count(handle_t *handle, struct inode *inode) 1708static void ext4_dec_count(handle_t *handle, struct inode *inode)
1709{ 1709{
1710 drop_nlink(inode); 1710 if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
1711 if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0) 1711 drop_nlink(inode);
1712 inc_nlink(inode);
1713} 1712}
1714 1713
1715 1714
@@ -1756,7 +1755,7 @@ retry:
1756 if (IS_DIRSYNC(dir)) 1755 if (IS_DIRSYNC(dir))
1757 ext4_handle_sync(handle); 1756 ext4_handle_sync(handle);
1758 1757
1759 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); 1758 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
1760 err = PTR_ERR(inode); 1759 err = PTR_ERR(inode);
1761 if (!IS_ERR(inode)) { 1760 if (!IS_ERR(inode)) {
1762 inode->i_op = &ext4_file_inode_operations; 1761 inode->i_op = &ext4_file_inode_operations;
@@ -1792,7 +1791,7 @@ retry:
1792 if (IS_DIRSYNC(dir)) 1791 if (IS_DIRSYNC(dir))
1793 ext4_handle_sync(handle); 1792 ext4_handle_sync(handle);
1794 1793
1795 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); 1794 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
1796 err = PTR_ERR(inode); 1795 err = PTR_ERR(inode);
1797 if (!IS_ERR(inode)) { 1796 if (!IS_ERR(inode)) {
1798 init_special_inode(inode, inode->i_mode, rdev); 1797 init_special_inode(inode, inode->i_mode, rdev);
@@ -1832,7 +1831,7 @@ retry:
1832 ext4_handle_sync(handle); 1831 ext4_handle_sync(handle);
1833 1832
1834 inode = ext4_new_inode(handle, dir, S_IFDIR | mode, 1833 inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
1835 &dentry->d_name, 0); 1834 &dentry->d_name, 0, NULL);
1836 err = PTR_ERR(inode); 1835 err = PTR_ERR(inode);
1837 if (IS_ERR(inode)) 1836 if (IS_ERR(inode))
1838 goto out_stop; 1837 goto out_stop;
@@ -1861,9 +1860,9 @@ retry:
1861 de->name_len = 2; 1860 de->name_len = 2;
1862 strcpy(de->name, ".."); 1861 strcpy(de->name, "..");
1863 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1862 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1864 inode->i_nlink = 2; 1863 set_nlink(inode, 2);
1865 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); 1864 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
1866 err = ext4_handle_dirty_metadata(handle, dir, dir_block); 1865 err = ext4_handle_dirty_metadata(handle, inode, dir_block);
1867 if (err) 1866 if (err)
1868 goto out_clear_inode; 1867 goto out_clear_inode;
1869 err = ext4_mark_inode_dirty(handle, inode); 1868 err = ext4_mark_inode_dirty(handle, inode);
@@ -2214,7 +2213,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2214 ext4_warning(inode->i_sb, 2213 ext4_warning(inode->i_sb,
2215 "Deleting nonexistent file (%lu), %d", 2214 "Deleting nonexistent file (%lu), %d",
2216 inode->i_ino, inode->i_nlink); 2215 inode->i_ino, inode->i_nlink);
2217 inode->i_nlink = 1; 2216 set_nlink(inode, 1);
2218 } 2217 }
2219 retval = ext4_delete_entry(handle, dir, de, bh); 2218 retval = ext4_delete_entry(handle, dir, de, bh);
2220 if (retval) 2219 if (retval)
@@ -2279,7 +2278,7 @@ retry:
2279 ext4_handle_sync(handle); 2278 ext4_handle_sync(handle);
2280 2279
2281 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, 2280 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
2282 &dentry->d_name, 0); 2281 &dentry->d_name, 0, NULL);
2283 err = PTR_ERR(inode); 2282 err = PTR_ERR(inode);
2284 if (IS_ERR(inode)) 2283 if (IS_ERR(inode))
2285 goto out_stop; 2284 goto out_stop;
@@ -2530,7 +2529,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2530 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = 2529 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
2531 cpu_to_le32(new_dir->i_ino); 2530 cpu_to_le32(new_dir->i_ino);
2532 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); 2531 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
2533 retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh); 2532 retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh);
2534 if (retval) { 2533 if (retval) {
2535 ext4_std_error(old_dir->i_sb, retval); 2534 ext4_std_error(old_dir->i_sb, retval);
2536 goto end_rename; 2535 goto end_rename;
@@ -2539,7 +2538,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2539 if (new_inode) { 2538 if (new_inode) {
2540 /* checked empty_dir above, can't have another parent, 2539 /* checked empty_dir above, can't have another parent,
2541 * ext4_dec_count() won't work for many-linked dirs */ 2540 * ext4_dec_count() won't work for many-linked dirs */
2542 new_inode->i_nlink = 0; 2541 clear_nlink(new_inode);
2543 } else { 2542 } else {
2544 ext4_inc_count(handle, new_dir); 2543 ext4_inc_count(handle, new_dir);
2545 ext4_update_dx_flag(new_dir); 2544 ext4_update_dx_flag(new_dir);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 92f38ee13f8a..7ce1d0b19c94 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -70,7 +70,6 @@ static void put_io_page(struct ext4_io_page *io_page)
70void ext4_free_io_end(ext4_io_end_t *io) 70void ext4_free_io_end(ext4_io_end_t *io)
71{ 71{
72 int i; 72 int i;
73 wait_queue_head_t *wq;
74 73
75 BUG_ON(!io); 74 BUG_ON(!io);
76 if (io->page) 75 if (io->page)
@@ -78,56 +77,43 @@ void ext4_free_io_end(ext4_io_end_t *io)
78 for (i = 0; i < io->num_io_pages; i++) 77 for (i = 0; i < io->num_io_pages; i++)
79 put_io_page(io->pages[i]); 78 put_io_page(io->pages[i]);
80 io->num_io_pages = 0; 79 io->num_io_pages = 0;
81 wq = ext4_ioend_wq(io->inode); 80 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
82 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && 81 wake_up_all(ext4_ioend_wq(io->inode));
83 waitqueue_active(wq))
84 wake_up_all(wq);
85 kmem_cache_free(io_end_cachep, io); 82 kmem_cache_free(io_end_cachep, io);
86} 83}
87 84
88/* 85/*
89 * check a range of space and convert unwritten extents to written. 86 * check a range of space and convert unwritten extents to written.
87 *
88 * Called with inode->i_mutex; we depend on this when we manipulate
89 * io->flag, since we could otherwise race with ext4_flush_completed_IO()
90 */ 90 */
91int ext4_end_io_nolock(ext4_io_end_t *io) 91int ext4_end_io_nolock(ext4_io_end_t *io)
92{ 92{
93 struct inode *inode = io->inode; 93 struct inode *inode = io->inode;
94 loff_t offset = io->offset; 94 loff_t offset = io->offset;
95 ssize_t size = io->size; 95 ssize_t size = io->size;
96 wait_queue_head_t *wq;
97 int ret = 0; 96 int ret = 0;
98 97
99 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," 98 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
100 "list->prev 0x%p\n", 99 "list->prev 0x%p\n",
101 io, inode->i_ino, io->list.next, io->list.prev); 100 io, inode->i_ino, io->list.next, io->list.prev);
102 101
103 if (list_empty(&io->list))
104 return ret;
105
106 if (!(io->flag & EXT4_IO_END_UNWRITTEN))
107 return ret;
108
109 ret = ext4_convert_unwritten_extents(inode, offset, size); 102 ret = ext4_convert_unwritten_extents(inode, offset, size);
110 if (ret < 0) { 103 if (ret < 0) {
111 printk(KERN_EMERG "%s: failed to convert unwritten " 104 ext4_msg(inode->i_sb, KERN_EMERG,
112 "extents to written extents, error is %d " 105 "failed to convert unwritten extents to written "
113 "io is still on inode %lu aio dio list\n", 106 "extents -- potential data loss! "
114 __func__, ret, inode->i_ino); 107 "(inode %lu, offset %llu, size %zd, error %d)",
115 return ret; 108 inode->i_ino, offset, size, ret);
116 } 109 }
117 110
118 if (io->iocb) 111 if (io->iocb)
119 aio_complete(io->iocb, io->result, 0); 112 aio_complete(io->iocb, io->result, 0);
120 /* clear the DIO AIO unwritten flag */
121 if (io->flag & EXT4_IO_END_UNWRITTEN) {
122 io->flag &= ~EXT4_IO_END_UNWRITTEN;
123 /* Wake up anyone waiting on unwritten extent conversion */
124 wq = ext4_ioend_wq(io->inode);
125 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
126 waitqueue_active(wq)) {
127 wake_up_all(wq);
128 }
129 }
130 113
114 /* Wake up anyone waiting on unwritten extent conversion */
115 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
116 wake_up_all(ext4_ioend_wq(io->inode));
131 return ret; 117 return ret;
132} 118}
133 119
@@ -140,9 +126,15 @@ static void ext4_end_io_work(struct work_struct *work)
140 struct inode *inode = io->inode; 126 struct inode *inode = io->inode;
141 struct ext4_inode_info *ei = EXT4_I(inode); 127 struct ext4_inode_info *ei = EXT4_I(inode);
142 unsigned long flags; 128 unsigned long flags;
143 int ret; 129
130 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
131 if (list_empty(&io->list)) {
132 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
133 goto free;
134 }
144 135
145 if (!mutex_trylock(&inode->i_mutex)) { 136 if (!mutex_trylock(&inode->i_mutex)) {
137 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
146 /* 138 /*
147 * Requeue the work instead of waiting so that the work 139 * Requeue the work instead of waiting so that the work
148 * items queued after this can be processed. 140 * items queued after this can be processed.
@@ -159,17 +151,11 @@ static void ext4_end_io_work(struct work_struct *work)
159 io->flag |= EXT4_IO_END_QUEUED; 151 io->flag |= EXT4_IO_END_QUEUED;
160 return; 152 return;
161 } 153 }
162 ret = ext4_end_io_nolock(io); 154 list_del_init(&io->list);
163 if (ret < 0) {
164 mutex_unlock(&inode->i_mutex);
165 return;
166 }
167
168 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
169 if (!list_empty(&io->list))
170 list_del_init(&io->list);
171 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 155 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
156 (void) ext4_end_io_nolock(io);
172 mutex_unlock(&inode->i_mutex); 157 mutex_unlock(&inode->i_mutex);
158free:
173 ext4_free_io_end(io); 159 ext4_free_io_end(io);
174} 160}
175 161
@@ -350,10 +336,8 @@ submit_and_retry:
350 if ((io_end->num_io_pages >= MAX_IO_PAGES) && 336 if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
351 (io_end->pages[io_end->num_io_pages-1] != io_page)) 337 (io_end->pages[io_end->num_io_pages-1] != io_page))
352 goto submit_and_retry; 338 goto submit_and_retry;
353 if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 339 if (buffer_uninit(bh))
354 io_end->flag |= EXT4_IO_END_UNWRITTEN; 340 ext4_set_io_unwritten_flag(inode, io_end);
355 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
356 }
357 io->io_end->size += bh->b_size; 341 io->io_end->size += bh->b_size;
358 io->io_next_block++; 342 io->io_next_block++;
359 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); 343 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 707d3f16f7ce..996780ab4f4e 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -875,7 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
875 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ 875 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
876 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ 876 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
877 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ 877 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
878 ext4_free_blks_set(sb, gdp, input->free_blocks_count); 878 ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count);
879 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 879 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
880 gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); 880 gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
881 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); 881 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
@@ -937,8 +937,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
937 input->reserved_blocks); 937 input->reserved_blocks);
938 938
939 /* Update the free space counts */ 939 /* Update the free space counts */
940 percpu_counter_add(&sbi->s_freeblocks_counter, 940 percpu_counter_add(&sbi->s_freeclusters_counter,
941 input->free_blocks_count); 941 EXT4_B2C(sbi, input->free_blocks_count));
942 percpu_counter_add(&sbi->s_freeinodes_counter, 942 percpu_counter_add(&sbi->s_freeinodes_counter,
943 EXT4_INODES_PER_GROUP(sb)); 943 EXT4_INODES_PER_GROUP(sb));
944 944
@@ -946,8 +946,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
946 sbi->s_log_groups_per_flex) { 946 sbi->s_log_groups_per_flex) {
947 ext4_group_t flex_group; 947 ext4_group_t flex_group;
948 flex_group = ext4_flex_group(sbi, input->group); 948 flex_group = ext4_flex_group(sbi, input->group);
949 atomic_add(input->free_blocks_count, 949 atomic_add(EXT4_B2C(sbi, input->free_blocks_count),
950 &sbi->s_flex_groups[flex_group].free_blocks); 950 &sbi->s_flex_groups[flex_group].free_clusters);
951 atomic_add(EXT4_INODES_PER_GROUP(sb), 951 atomic_add(EXT4_INODES_PER_GROUP(sb),
952 &sbi->s_flex_groups[flex_group].free_inodes); 952 &sbi->s_flex_groups[flex_group].free_inodes);
953 } 953 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 44d0c8db2239..9953d80145ad 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -45,6 +45,7 @@
45#include <linux/freezer.h> 45#include <linux/freezer.h>
46 46
47#include "ext4.h" 47#include "ext4.h"
48#include "ext4_extents.h"
48#include "ext4_jbd2.h" 49#include "ext4_jbd2.h"
49#include "xattr.h" 50#include "xattr.h"
50#include "acl.h" 51#include "acl.h"
@@ -163,8 +164,8 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
163 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 164 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
164} 165}
165 166
166__u32 ext4_free_blks_count(struct super_block *sb, 167__u32 ext4_free_group_clusters(struct super_block *sb,
167 struct ext4_group_desc *bg) 168 struct ext4_group_desc *bg)
168{ 169{
169 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 170 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
170 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 171 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
@@ -219,8 +220,8 @@ void ext4_inode_table_set(struct super_block *sb,
219 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 220 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
220} 221}
221 222
222void ext4_free_blks_set(struct super_block *sb, 223void ext4_free_group_clusters_set(struct super_block *sb,
223 struct ext4_group_desc *bg, __u32 count) 224 struct ext4_group_desc *bg, __u32 count)
224{ 225{
225 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 226 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
226 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 227 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
@@ -414,6 +415,22 @@ static void save_error_info(struct super_block *sb, const char *func,
414 ext4_commit_super(sb, 1); 415 ext4_commit_super(sb, 1);
415} 416}
416 417
418/*
419 * The del_gendisk() function uninitializes the disk-specific data
420 * structures, including the bdi structure, without telling anyone
421 * else. Once this happens, any attempt to call mark_buffer_dirty()
422 * (for example, by ext4_commit_super), will cause a kernel OOPS.
423 * This is a kludge to prevent these oops until we can put in a proper
424 * hook in del_gendisk() to inform the VFS and file system layers.
425 */
426static int block_device_ejected(struct super_block *sb)
427{
428 struct inode *bd_inode = sb->s_bdev->bd_inode;
429 struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
430
431 return bdi->dev == NULL;
432}
433
417 434
418/* Deal with the reporting of failure conditions on a filesystem such as 435/* Deal with the reporting of failure conditions on a filesystem such as
419 * inconsistencies detected or read IO failures. 436 * inconsistencies detected or read IO failures.
@@ -821,10 +838,10 @@ static void ext4_put_super(struct super_block *sb)
821 brelse(sbi->s_group_desc[i]); 838 brelse(sbi->s_group_desc[i]);
822 ext4_kvfree(sbi->s_group_desc); 839 ext4_kvfree(sbi->s_group_desc);
823 ext4_kvfree(sbi->s_flex_groups); 840 ext4_kvfree(sbi->s_flex_groups);
824 percpu_counter_destroy(&sbi->s_freeblocks_counter); 841 percpu_counter_destroy(&sbi->s_freeclusters_counter);
825 percpu_counter_destroy(&sbi->s_freeinodes_counter); 842 percpu_counter_destroy(&sbi->s_freeinodes_counter);
826 percpu_counter_destroy(&sbi->s_dirs_counter); 843 percpu_counter_destroy(&sbi->s_dirs_counter);
827 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 844 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
828 brelse(sbi->s_sbh); 845 brelse(sbi->s_sbh);
829#ifdef CONFIG_QUOTA 846#ifdef CONFIG_QUOTA
830 for (i = 0; i < MAXQUOTAS; i++) 847 for (i = 0; i < MAXQUOTAS; i++)
@@ -1057,8 +1074,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1057 seq_puts(seq, ",nouid32"); 1074 seq_puts(seq, ",nouid32");
1058 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 1075 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
1059 seq_puts(seq, ",debug"); 1076 seq_puts(seq, ",debug");
1060 if (test_opt(sb, OLDALLOC))
1061 seq_puts(seq, ",oldalloc");
1062#ifdef CONFIG_EXT4_FS_XATTR 1077#ifdef CONFIG_EXT4_FS_XATTR
1063 if (test_opt(sb, XATTR_USER)) 1078 if (test_opt(sb, XATTR_USER))
1064 seq_puts(seq, ",user_xattr"); 1079 seq_puts(seq, ",user_xattr");
@@ -1567,10 +1582,12 @@ static int parse_options(char *options, struct super_block *sb,
1567 set_opt(sb, DEBUG); 1582 set_opt(sb, DEBUG);
1568 break; 1583 break;
1569 case Opt_oldalloc: 1584 case Opt_oldalloc:
1570 set_opt(sb, OLDALLOC); 1585 ext4_msg(sb, KERN_WARNING,
1586 "Ignoring deprecated oldalloc option");
1571 break; 1587 break;
1572 case Opt_orlov: 1588 case Opt_orlov:
1573 clear_opt(sb, OLDALLOC); 1589 ext4_msg(sb, KERN_WARNING,
1590 "Ignoring deprecated orlov option");
1574 break; 1591 break;
1575#ifdef CONFIG_EXT4_FS_XATTR 1592#ifdef CONFIG_EXT4_FS_XATTR
1576 case Opt_user_xattr: 1593 case Opt_user_xattr:
@@ -1801,6 +1818,7 @@ set_qf_format:
1801 break; 1818 break;
1802 case Opt_nodelalloc: 1819 case Opt_nodelalloc:
1803 clear_opt(sb, DELALLOC); 1820 clear_opt(sb, DELALLOC);
1821 clear_opt2(sb, EXPLICIT_DELALLOC);
1804 break; 1822 break;
1805 case Opt_mblk_io_submit: 1823 case Opt_mblk_io_submit:
1806 set_opt(sb, MBLK_IO_SUBMIT); 1824 set_opt(sb, MBLK_IO_SUBMIT);
@@ -1817,6 +1835,7 @@ set_qf_format:
1817 break; 1835 break;
1818 case Opt_delalloc: 1836 case Opt_delalloc:
1819 set_opt(sb, DELALLOC); 1837 set_opt(sb, DELALLOC);
1838 set_opt2(sb, EXPLICIT_DELALLOC);
1820 break; 1839 break;
1821 case Opt_block_validity: 1840 case Opt_block_validity:
1822 set_opt(sb, BLOCK_VALIDITY); 1841 set_opt(sb, BLOCK_VALIDITY);
@@ -1935,7 +1954,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1935 res = MS_RDONLY; 1954 res = MS_RDONLY;
1936 } 1955 }
1937 if (read_only) 1956 if (read_only)
1938 return res; 1957 goto done;
1939 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1958 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1940 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1959 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1941 "running e2fsck is recommended"); 1960 "running e2fsck is recommended");
@@ -1966,6 +1985,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1966 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1985 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1967 1986
1968 ext4_commit_super(sb, 1); 1987 ext4_commit_super(sb, 1);
1988done:
1969 if (test_opt(sb, DEBUG)) 1989 if (test_opt(sb, DEBUG))
1970 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1990 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1971 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", 1991 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
@@ -2015,8 +2035,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
2015 flex_group = ext4_flex_group(sbi, i); 2035 flex_group = ext4_flex_group(sbi, i);
2016 atomic_add(ext4_free_inodes_count(sb, gdp), 2036 atomic_add(ext4_free_inodes_count(sb, gdp),
2017 &sbi->s_flex_groups[flex_group].free_inodes); 2037 &sbi->s_flex_groups[flex_group].free_inodes);
2018 atomic_add(ext4_free_blks_count(sb, gdp), 2038 atomic_add(ext4_free_group_clusters(sb, gdp),
2019 &sbi->s_flex_groups[flex_group].free_blocks); 2039 &sbi->s_flex_groups[flex_group].free_clusters);
2020 atomic_add(ext4_used_dirs_count(sb, gdp), 2040 atomic_add(ext4_used_dirs_count(sb, gdp),
2021 &sbi->s_flex_groups[flex_group].used_dirs); 2041 &sbi->s_flex_groups[flex_group].used_dirs);
2022 } 2042 }
@@ -2134,7 +2154,8 @@ static int ext4_check_descriptors(struct super_block *sb,
2134 if (NULL != first_not_zeroed) 2154 if (NULL != first_not_zeroed)
2135 *first_not_zeroed = grp; 2155 *first_not_zeroed = grp;
2136 2156
2137 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 2157 ext4_free_blocks_count_set(sbi->s_es,
2158 EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
2138 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 2159 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
2139 return 1; 2160 return 1;
2140} 2161}
@@ -2454,7 +2475,8 @@ static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2454 char *buf) 2475 char *buf)
2455{ 2476{
2456 return snprintf(buf, PAGE_SIZE, "%llu\n", 2477 return snprintf(buf, PAGE_SIZE, "%llu\n",
2457 (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 2478 (s64) EXT4_C2B(sbi,
2479 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
2458} 2480}
2459 2481
2460static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2482static ssize_t session_write_kbytes_show(struct ext4_attr *a,
@@ -2682,6 +2704,13 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2682 return 0; 2704 return 0;
2683 } 2705 }
2684 } 2706 }
2707 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
2708 !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2709 ext4_msg(sb, KERN_ERR,
2710 "Can't support bigalloc feature without "
2711 "extents feature\n");
2712 return 0;
2713 }
2685 return 1; 2714 return 1;
2686} 2715}
2687 2716
@@ -3087,10 +3116,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3087 char *cp; 3116 char *cp;
3088 const char *descr; 3117 const char *descr;
3089 int ret = -ENOMEM; 3118 int ret = -ENOMEM;
3090 int blocksize; 3119 int blocksize, clustersize;
3091 unsigned int db_count; 3120 unsigned int db_count;
3092 unsigned int i; 3121 unsigned int i;
3093 int needs_recovery, has_huge_files; 3122 int needs_recovery, has_huge_files, has_bigalloc;
3094 __u64 blocks_count; 3123 __u64 blocks_count;
3095 int err; 3124 int err;
3096 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3125 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
@@ -3224,6 +3253,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3224 &journal_ioprio, NULL, 0)) 3253 &journal_ioprio, NULL, 0))
3225 goto failed_mount; 3254 goto failed_mount;
3226 3255
3256 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3257 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
3258 "with data=journal disables delayed "
3259 "allocation and O_DIRECT support!\n");
3260 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3261 ext4_msg(sb, KERN_ERR, "can't mount with "
3262 "both data=journal and delalloc");
3263 goto failed_mount;
3264 }
3265 if (test_opt(sb, DIOREAD_NOLOCK)) {
3266 ext4_msg(sb, KERN_ERR, "can't mount with "
3267 "both data=journal and delalloc");
3268 goto failed_mount;
3269 }
3270 if (test_opt(sb, DELALLOC))
3271 clear_opt(sb, DELALLOC);
3272 }
3273
3274 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3275 if (test_opt(sb, DIOREAD_NOLOCK)) {
3276 if (blocksize < PAGE_SIZE) {
3277 ext4_msg(sb, KERN_ERR, "can't mount with "
3278 "dioread_nolock if block size != PAGE_SIZE");
3279 goto failed_mount;
3280 }
3281 }
3282
3227 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3283 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3228 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3284 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3229 3285
@@ -3265,8 +3321,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3265 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 3321 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3266 goto failed_mount; 3322 goto failed_mount;
3267 3323
3268 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3269
3270 if (blocksize < EXT4_MIN_BLOCK_SIZE || 3324 if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3271 blocksize > EXT4_MAX_BLOCK_SIZE) { 3325 blocksize > EXT4_MAX_BLOCK_SIZE) {
3272 ext4_msg(sb, KERN_ERR, 3326 ext4_msg(sb, KERN_ERR,
@@ -3369,12 +3423,53 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3369 sb->s_dirt = 1; 3423 sb->s_dirt = 1;
3370 } 3424 }
3371 3425
3372 if (sbi->s_blocks_per_group > blocksize * 8) { 3426 /* Handle clustersize */
3373 ext4_msg(sb, KERN_ERR, 3427 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
3374 "#blocks per group too big: %lu", 3428 has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3375 sbi->s_blocks_per_group); 3429 EXT4_FEATURE_RO_COMPAT_BIGALLOC);
3376 goto failed_mount; 3430 if (has_bigalloc) {
3431 if (clustersize < blocksize) {
3432 ext4_msg(sb, KERN_ERR,
3433 "cluster size (%d) smaller than "
3434 "block size (%d)", clustersize, blocksize);
3435 goto failed_mount;
3436 }
3437 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
3438 le32_to_cpu(es->s_log_block_size);
3439 sbi->s_clusters_per_group =
3440 le32_to_cpu(es->s_clusters_per_group);
3441 if (sbi->s_clusters_per_group > blocksize * 8) {
3442 ext4_msg(sb, KERN_ERR,
3443 "#clusters per group too big: %lu",
3444 sbi->s_clusters_per_group);
3445 goto failed_mount;
3446 }
3447 if (sbi->s_blocks_per_group !=
3448 (sbi->s_clusters_per_group * (clustersize / blocksize))) {
3449 ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
3450 "clusters per group (%lu) inconsistent",
3451 sbi->s_blocks_per_group,
3452 sbi->s_clusters_per_group);
3453 goto failed_mount;
3454 }
3455 } else {
3456 if (clustersize != blocksize) {
3457 ext4_warning(sb, "fragment/cluster size (%d) != "
3458 "block size (%d)", clustersize,
3459 blocksize);
3460 clustersize = blocksize;
3461 }
3462 if (sbi->s_blocks_per_group > blocksize * 8) {
3463 ext4_msg(sb, KERN_ERR,
3464 "#blocks per group too big: %lu",
3465 sbi->s_blocks_per_group);
3466 goto failed_mount;
3467 }
3468 sbi->s_clusters_per_group = sbi->s_blocks_per_group;
3469 sbi->s_cluster_bits = 0;
3377 } 3470 }
3471 sbi->s_cluster_ratio = clustersize / blocksize;
3472
3378 if (sbi->s_inodes_per_group > blocksize * 8) { 3473 if (sbi->s_inodes_per_group > blocksize * 8) {
3379 ext4_msg(sb, KERN_ERR, 3474 ext4_msg(sb, KERN_ERR,
3380 "#inodes per group too big: %lu", 3475 "#inodes per group too big: %lu",
@@ -3446,10 +3541,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3446 goto failed_mount; 3541 goto failed_mount;
3447 } 3542 }
3448 3543
3449#ifdef CONFIG_PROC_FS
3450 if (ext4_proc_root) 3544 if (ext4_proc_root)
3451 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 3545 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
3452#endif
3453 3546
3454 bgl_lock_init(sbi->s_blockgroup_lock); 3547 bgl_lock_init(sbi->s_blockgroup_lock);
3455 3548
@@ -3483,8 +3576,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3483 sbi->s_err_report.function = print_daily_error_info; 3576 sbi->s_err_report.function = print_daily_error_info;
3484 sbi->s_err_report.data = (unsigned long) sb; 3577 sbi->s_err_report.data = (unsigned long) sb;
3485 3578
3486 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3579 err = percpu_counter_init(&sbi->s_freeclusters_counter,
3487 ext4_count_free_blocks(sb)); 3580 ext4_count_free_clusters(sb));
3488 if (!err) { 3581 if (!err) {
3489 err = percpu_counter_init(&sbi->s_freeinodes_counter, 3582 err = percpu_counter_init(&sbi->s_freeinodes_counter,
3490 ext4_count_free_inodes(sb)); 3583 ext4_count_free_inodes(sb));
@@ -3494,7 +3587,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3494 ext4_count_dirs(sb)); 3587 ext4_count_dirs(sb));
3495 } 3588 }
3496 if (!err) { 3589 if (!err) {
3497 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 3590 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
3498 } 3591 }
3499 if (err) { 3592 if (err) {
3500 ext4_msg(sb, KERN_ERR, "insufficient memory"); 3593 ext4_msg(sb, KERN_ERR, "insufficient memory");
@@ -3609,13 +3702,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3609 * The journal may have updated the bg summary counts, so we 3702 * The journal may have updated the bg summary counts, so we
3610 * need to update the global counters. 3703 * need to update the global counters.
3611 */ 3704 */
3612 percpu_counter_set(&sbi->s_freeblocks_counter, 3705 percpu_counter_set(&sbi->s_freeclusters_counter,
3613 ext4_count_free_blocks(sb)); 3706 ext4_count_free_clusters(sb));
3614 percpu_counter_set(&sbi->s_freeinodes_counter, 3707 percpu_counter_set(&sbi->s_freeinodes_counter,
3615 ext4_count_free_inodes(sb)); 3708 ext4_count_free_inodes(sb));
3616 percpu_counter_set(&sbi->s_dirs_counter, 3709 percpu_counter_set(&sbi->s_dirs_counter,
3617 ext4_count_dirs(sb)); 3710 ext4_count_dirs(sb));
3618 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); 3711 percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
3619 3712
3620no_journal: 3713no_journal:
3621 /* 3714 /*
@@ -3679,25 +3772,6 @@ no_journal:
3679 "available"); 3772 "available");
3680 } 3773 }
3681 3774
3682 if (test_opt(sb, DELALLOC) &&
3683 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
3684 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
3685 "requested data journaling mode");
3686 clear_opt(sb, DELALLOC);
3687 }
3688 if (test_opt(sb, DIOREAD_NOLOCK)) {
3689 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3690 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
3691 "option - requested data journaling mode");
3692 clear_opt(sb, DIOREAD_NOLOCK);
3693 }
3694 if (sb->s_blocksize < PAGE_SIZE) {
3695 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
3696 "option - block size is too small");
3697 clear_opt(sb, DIOREAD_NOLOCK);
3698 }
3699 }
3700
3701 err = ext4_setup_system_zone(sb); 3775 err = ext4_setup_system_zone(sb);
3702 if (err) { 3776 if (err) {
3703 ext4_msg(sb, KERN_ERR, "failed to initialize system " 3777 ext4_msg(sb, KERN_ERR, "failed to initialize system "
@@ -3710,22 +3784,19 @@ no_journal:
3710 if (err) { 3784 if (err) {
3711 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", 3785 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
3712 err); 3786 err);
3713 goto failed_mount4; 3787 goto failed_mount5;
3714 } 3788 }
3715 3789
3716 err = ext4_register_li_request(sb, first_not_zeroed); 3790 err = ext4_register_li_request(sb, first_not_zeroed);
3717 if (err) 3791 if (err)
3718 goto failed_mount4; 3792 goto failed_mount6;
3719 3793
3720 sbi->s_kobj.kset = ext4_kset; 3794 sbi->s_kobj.kset = ext4_kset;
3721 init_completion(&sbi->s_kobj_unregister); 3795 init_completion(&sbi->s_kobj_unregister);
3722 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 3796 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
3723 "%s", sb->s_id); 3797 "%s", sb->s_id);
3724 if (err) { 3798 if (err)
3725 ext4_mb_release(sb); 3799 goto failed_mount7;
3726 ext4_ext_release(sb);
3727 goto failed_mount4;
3728 };
3729 3800
3730 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 3801 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
3731 ext4_orphan_cleanup(sb, es); 3802 ext4_orphan_cleanup(sb, es);
@@ -3759,13 +3830,19 @@ cantfind_ext4:
3759 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 3830 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
3760 goto failed_mount; 3831 goto failed_mount;
3761 3832
3833failed_mount7:
3834 ext4_unregister_li_request(sb);
3835failed_mount6:
3836 ext4_ext_release(sb);
3837failed_mount5:
3838 ext4_mb_release(sb);
3839 ext4_release_system_zone(sb);
3762failed_mount4: 3840failed_mount4:
3763 iput(root); 3841 iput(root);
3764 sb->s_root = NULL; 3842 sb->s_root = NULL;
3765 ext4_msg(sb, KERN_ERR, "mount failed"); 3843 ext4_msg(sb, KERN_ERR, "mount failed");
3766 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 3844 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
3767failed_mount_wq: 3845failed_mount_wq:
3768 ext4_release_system_zone(sb);
3769 if (sbi->s_journal) { 3846 if (sbi->s_journal) {
3770 jbd2_journal_destroy(sbi->s_journal); 3847 jbd2_journal_destroy(sbi->s_journal);
3771 sbi->s_journal = NULL; 3848 sbi->s_journal = NULL;
@@ -3774,10 +3851,10 @@ failed_mount3:
3774 del_timer(&sbi->s_err_report); 3851 del_timer(&sbi->s_err_report);
3775 if (sbi->s_flex_groups) 3852 if (sbi->s_flex_groups)
3776 ext4_kvfree(sbi->s_flex_groups); 3853 ext4_kvfree(sbi->s_flex_groups);
3777 percpu_counter_destroy(&sbi->s_freeblocks_counter); 3854 percpu_counter_destroy(&sbi->s_freeclusters_counter);
3778 percpu_counter_destroy(&sbi->s_freeinodes_counter); 3855 percpu_counter_destroy(&sbi->s_freeinodes_counter);
3779 percpu_counter_destroy(&sbi->s_dirs_counter); 3856 percpu_counter_destroy(&sbi->s_dirs_counter);
3780 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 3857 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
3781 if (sbi->s_mmp_tsk) 3858 if (sbi->s_mmp_tsk)
3782 kthread_stop(sbi->s_mmp_tsk); 3859 kthread_stop(sbi->s_mmp_tsk);
3783failed_mount2: 3860failed_mount2:
@@ -4064,7 +4141,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
4064 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 4141 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4065 int error = 0; 4142 int error = 0;
4066 4143
4067 if (!sbh) 4144 if (!sbh || block_device_ejected(sb))
4068 return error; 4145 return error;
4069 if (buffer_write_io_error(sbh)) { 4146 if (buffer_write_io_error(sbh)) {
4070 /* 4147 /*
@@ -4100,8 +4177,9 @@ static int ext4_commit_super(struct super_block *sb, int sync)
4100 else 4177 else
4101 es->s_kbytes_written = 4178 es->s_kbytes_written =
4102 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 4179 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4103 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 4180 ext4_free_blocks_count_set(es,
4104 &EXT4_SB(sb)->s_freeblocks_counter)); 4181 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4182 &EXT4_SB(sb)->s_freeclusters_counter)));
4105 es->s_free_inodes_count = 4183 es->s_free_inodes_count =
4106 cpu_to_le32(percpu_counter_sum_positive( 4184 cpu_to_le32(percpu_counter_sum_positive(
4107 &EXT4_SB(sb)->s_freeinodes_counter)); 4185 &EXT4_SB(sb)->s_freeinodes_counter));
@@ -4506,16 +4584,34 @@ restore_opts:
4506 return err; 4584 return err;
4507} 4585}
4508 4586
4587/*
4588 * Note: calculating the overhead so we can be compatible with
4589 * historical BSD practice is quite difficult in the face of
4590 * clusters/bigalloc. This is because multiple metadata blocks from
4591 * different block group can end up in the same allocation cluster.
4592 * Calculating the exact overhead in the face of clustered allocation
4593 * requires either O(all block bitmaps) in memory or O(number of block
4594 * groups**2) in time. We will still calculate the superblock for
4595 * older file systems --- and if we come across with a bigalloc file
4596 * system with zero in s_overhead_clusters the estimate will be close to
4597 * correct especially for very large cluster sizes --- but for newer
4598 * file systems, it's better to calculate this figure once at mkfs
4599 * time, and store it in the superblock. If the superblock value is
4600 * present (even for non-bigalloc file systems), we will use it.
4601 */
4509static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 4602static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4510{ 4603{
4511 struct super_block *sb = dentry->d_sb; 4604 struct super_block *sb = dentry->d_sb;
4512 struct ext4_sb_info *sbi = EXT4_SB(sb); 4605 struct ext4_sb_info *sbi = EXT4_SB(sb);
4513 struct ext4_super_block *es = sbi->s_es; 4606 struct ext4_super_block *es = sbi->s_es;
4607 struct ext4_group_desc *gdp;
4514 u64 fsid; 4608 u64 fsid;
4515 s64 bfree; 4609 s64 bfree;
4516 4610
4517 if (test_opt(sb, MINIX_DF)) { 4611 if (test_opt(sb, MINIX_DF)) {
4518 sbi->s_overhead_last = 0; 4612 sbi->s_overhead_last = 0;
4613 } else if (es->s_overhead_clusters) {
4614 sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters);
4519 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 4615 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
4520 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 4616 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4521 ext4_fsblk_t overhead = 0; 4617 ext4_fsblk_t overhead = 0;
@@ -4530,24 +4626,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4530 * All of the blocks before first_data_block are 4626 * All of the blocks before first_data_block are
4531 * overhead 4627 * overhead
4532 */ 4628 */
4533 overhead = le32_to_cpu(es->s_first_data_block); 4629 overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
4534 4630
4535 /* 4631 /*
4536 * Add the overhead attributed to the superblock and 4632 * Add the overhead found in each block group
4537 * block group descriptors. If the sparse superblocks
4538 * feature is turned on, then not all groups have this.
4539 */ 4633 */
4540 for (i = 0; i < ngroups; i++) { 4634 for (i = 0; i < ngroups; i++) {
4541 overhead += ext4_bg_has_super(sb, i) + 4635 gdp = ext4_get_group_desc(sb, i, NULL);
4542 ext4_bg_num_gdb(sb, i); 4636 overhead += ext4_num_overhead_clusters(sb, i, gdp);
4543 cond_resched(); 4637 cond_resched();
4544 } 4638 }
4545
4546 /*
4547 * Every block group has an inode bitmap, a block
4548 * bitmap, and an inode table.
4549 */
4550 overhead += ngroups * (2 + sbi->s_itb_per_group);
4551 sbi->s_overhead_last = overhead; 4639 sbi->s_overhead_last = overhead;
4552 smp_wmb(); 4640 smp_wmb();
4553 sbi->s_blocks_last = ext4_blocks_count(es); 4641 sbi->s_blocks_last = ext4_blocks_count(es);
@@ -4555,11 +4643,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4555 4643
4556 buf->f_type = EXT4_SUPER_MAGIC; 4644 buf->f_type = EXT4_SUPER_MAGIC;
4557 buf->f_bsize = sb->s_blocksize; 4645 buf->f_bsize = sb->s_blocksize;
4558 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 4646 buf->f_blocks = (ext4_blocks_count(es) -
4559 bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 4647 EXT4_C2B(sbi, sbi->s_overhead_last));
4560 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 4648 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
4649 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
4561 /* prevent underflow in case that few free space is available */ 4650 /* prevent underflow in case that few free space is available */
4562 buf->f_bfree = max_t(s64, bfree, 0); 4651 buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
4563 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 4652 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
4564 if (buf->f_bfree < ext4_r_blocks_count(es)) 4653 if (buf->f_bfree < ext4_r_blocks_count(es))
4565 buf->f_bavail = 0; 4654 buf->f_bavail = 0;
@@ -4980,13 +5069,11 @@ static int __init ext4_init_fs(void)
4980 return err; 5069 return err;
4981 err = ext4_init_system_zone(); 5070 err = ext4_init_system_zone();
4982 if (err) 5071 if (err)
4983 goto out7; 5072 goto out6;
4984 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 5073 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
4985 if (!ext4_kset) 5074 if (!ext4_kset)
4986 goto out6;
4987 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4988 if (!ext4_proc_root)
4989 goto out5; 5075 goto out5;
5076 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4990 5077
4991 err = ext4_init_feat_adverts(); 5078 err = ext4_init_feat_adverts();
4992 if (err) 5079 if (err)
@@ -5022,12 +5109,12 @@ out2:
5022out3: 5109out3:
5023 ext4_exit_feat_adverts(); 5110 ext4_exit_feat_adverts();
5024out4: 5111out4:
5025 remove_proc_entry("fs/ext4", NULL); 5112 if (ext4_proc_root)
5026out5: 5113 remove_proc_entry("fs/ext4", NULL);
5027 kset_unregister(ext4_kset); 5114 kset_unregister(ext4_kset);
5028out6: 5115out5:
5029 ext4_exit_system_zone(); 5116 ext4_exit_system_zone();
5030out7: 5117out6:
5031 ext4_exit_pageio(); 5118 ext4_exit_pageio();
5032 return err; 5119 return err;
5033} 5120}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c757adc97250..93a00d89a220 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -820,8 +820,14 @@ inserted:
820 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 820 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
821 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; 821 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
822 822
823 /*
824 * take i_data_sem because we will test
825 * i_delalloc_reserved_flag in ext4_mb_new_blocks
826 */
827 down_read((&EXT4_I(inode)->i_data_sem));
823 block = ext4_new_meta_blocks(handle, inode, goal, 0, 828 block = ext4_new_meta_blocks(handle, inode, goal, 0,
824 NULL, &error); 829 NULL, &error);
830 up_read((&EXT4_I(inode)->i_data_sem));
825 if (error) 831 if (error)
826 goto cleanup; 832 goto cleanup;
827 833
@@ -985,11 +991,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
985 no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); 991 no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
986 ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); 992 ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
987 993
988 error = ext4_get_inode_loc(inode, &is.iloc); 994 error = ext4_reserve_inode_write(handle, inode, &is.iloc);
989 if (error)
990 goto cleanup;
991
992 error = ext4_journal_get_write_access(handle, is.iloc.bh);
993 if (error) 995 if (error)
994 goto cleanup; 996 goto cleanup;
995 997
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 1726d7303047..808cac7edcfb 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -379,7 +379,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
379 return error; 379 return error;
380 MSDOS_I(inode)->mmu_private = inode->i_size; 380 MSDOS_I(inode)->mmu_private = inode->i_size;
381 381
382 inode->i_nlink = fat_subdirs(inode); 382 set_nlink(inode, fat_subdirs(inode));
383 } else { /* not a directory */ 383 } else { /* not a directory */
384 inode->i_generation |= 1; 384 inode->i_generation |= 1;
385 inode->i_mode = fat_make_mode(sbi, de->attr, 385 inode->i_mode = fat_make_mode(sbi, de->attr,
@@ -1233,7 +1233,7 @@ static int fat_read_root(struct inode *inode)
1233 fat_save_attrs(inode, ATTR_DIR); 1233 fat_save_attrs(inode, ATTR_DIR);
1234 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; 1234 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
1235 inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; 1235 inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
1236 inode->i_nlink = fat_subdirs(inode)+2; 1236 set_nlink(inode, fat_subdirs(inode)+2);
1237 1237
1238 return 0; 1238 return 0;
1239} 1239}
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 66e83b845455..216b419f30e2 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -387,7 +387,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode)
387 /* the directory was completed, just return a error */ 387 /* the directory was completed, just return a error */
388 goto out; 388 goto out;
389 } 389 }
390 inode->i_nlink = 2; 390 set_nlink(inode, 2);
391 inode->i_mtime = inode->i_atime = inode->i_ctime = ts; 391 inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
392 /* timestamp is already written, so mark_inode_dirty() is unneeded. */ 392 /* timestamp is already written, so mark_inode_dirty() is unneeded. */
393 393
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index bb3f29c3557b..a87a65663c25 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -900,7 +900,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode)
900 goto out; 900 goto out;
901 } 901 }
902 inode->i_version++; 902 inode->i_version++;
903 inode->i_nlink = 2; 903 set_nlink(inode, 2);
904 inode->i_mtime = inode->i_atime = inode->i_ctime = ts; 904 inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
905 /* timestamp is already written, so mark_inode_dirty() is unneeded. */ 905 /* timestamp is already written, so mark_inode_dirty() is unneeded. */
906 906
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 1a4311437a8b..7b2af5abe2fa 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -227,7 +227,7 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
227 ip->i_uid = (uid_t)vip->vii_uid; 227 ip->i_uid = (uid_t)vip->vii_uid;
228 ip->i_gid = (gid_t)vip->vii_gid; 228 ip->i_gid = (gid_t)vip->vii_gid;
229 229
230 ip->i_nlink = vip->vii_nlink; 230 set_nlink(ip, vip->vii_nlink);
231 ip->i_size = vip->vii_size; 231 ip->i_size = vip->vii_size;
232 232
233 ip->i_atime.tv_sec = vip->vii_atime; 233 ip->i_atime.tv_sec = vip->vii_atime;
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 85542a7daf40..42593c587d48 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -231,7 +231,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
231 if (iop) 231 if (iop)
232 inode->i_op = iop; 232 inode->i_op = iop;
233 inode->i_fop = fop; 233 inode->i_fop = fop;
234 inode->i_nlink = nlink; 234 set_nlink(inode, nlink);
235 inode->i_private = fc; 235 inode->i_private = fc;
236 d_add(dentry, inode); 236 d_add(dentry, inode);
237 return dentry; 237 return dentry;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index add96f6ffda5..3e6d72756479 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -151,7 +151,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
151 151
152 inode->i_ino = attr->ino; 152 inode->i_ino = attr->ino;
153 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 153 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
154 inode->i_nlink = attr->nlink; 154 set_nlink(inode, attr->nlink);
155 inode->i_uid = attr->uid; 155 inode->i_uid = attr->uid;
156 inode->i_gid = attr->gid; 156 inode->i_gid = attr->gid;
157 inode->i_blocks = attr->blocks; 157 inode->i_blocks = attr->blocks;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 78418b4fa857..1656df7aacd2 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -299,7 +299,7 @@ static void gfs2_set_nlink(struct inode *inode, u32 nlink)
299 if (nlink == 0) 299 if (nlink == 0)
300 clear_nlink(inode); 300 clear_nlink(inode);
301 else 301 else
302 inode->i_nlink = nlink; 302 set_nlink(inode, nlink);
303 } 303 }
304} 304}
305 305
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index b4d70b13be92..bce4eef91a06 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -198,7 +198,7 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, int mode,
198 198
199 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode); 199 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
200 if (res) { 200 if (res) {
201 inode->i_nlink = 0; 201 clear_nlink(inode);
202 hfs_delete_inode(inode); 202 hfs_delete_inode(inode);
203 iput(inode); 203 iput(inode);
204 return res; 204 return res;
@@ -227,7 +227,7 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
227 227
228 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode); 228 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
229 if (res) { 229 if (res) {
230 inode->i_nlink = 0; 230 clear_nlink(inode);
231 hfs_delete_inode(inode); 231 hfs_delete_inode(inode);
232 iput(inode); 232 iput(inode);
233 return res; 233 return res;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 96a1b625fc74..a1a9fdcd2a00 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -183,7 +183,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
183 inode->i_mode = mode; 183 inode->i_mode = mode;
184 inode->i_uid = current_fsuid(); 184 inode->i_uid = current_fsuid();
185 inode->i_gid = current_fsgid(); 185 inode->i_gid = current_fsgid();
186 inode->i_nlink = 1; 186 set_nlink(inode, 1);
187 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 187 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
188 HFS_I(inode)->flags = 0; 188 HFS_I(inode)->flags = 0;
189 HFS_I(inode)->rsrc_inode = NULL; 189 HFS_I(inode)->rsrc_inode = NULL;
@@ -313,7 +313,7 @@ static int hfs_read_inode(struct inode *inode, void *data)
313 /* Initialize the inode */ 313 /* Initialize the inode */
314 inode->i_uid = hsb->s_uid; 314 inode->i_uid = hsb->s_uid;
315 inode->i_gid = hsb->s_gid; 315 inode->i_gid = hsb->s_gid;
316 inode->i_nlink = 1; 316 set_nlink(inode, 1);
317 317
318 if (idata->key) 318 if (idata->key)
319 HFS_I(inode)->cat_key = *idata->key; 319 HFS_I(inode)->cat_key = *idata->key;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 25b2443a004c..4536cd3f15ae 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -415,7 +415,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
415 goto out; 415 goto out;
416 416
417out_err: 417out_err:
418 inode->i_nlink = 0; 418 clear_nlink(inode);
419 hfsplus_delete_inode(inode); 419 hfsplus_delete_inode(inode);
420 iput(inode); 420 iput(inode);
421out: 421out:
@@ -440,7 +440,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
440 440
441 res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); 441 res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode);
442 if (res) { 442 if (res) {
443 inode->i_nlink = 0; 443 clear_nlink(inode);
444 hfsplus_delete_inode(inode); 444 hfsplus_delete_inode(inode);
445 iput(inode); 445 iput(inode);
446 goto out; 446 goto out;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 4cc1e3a36ec7..40e1413be4cf 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -391,7 +391,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
391 inode->i_mode = mode; 391 inode->i_mode = mode;
392 inode->i_uid = current_fsuid(); 392 inode->i_uid = current_fsuid();
393 inode->i_gid = current_fsgid(); 393 inode->i_gid = current_fsgid();
394 inode->i_nlink = 1; 394 set_nlink(inode, 1);
395 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 395 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
396 396
397 hip = HFSPLUS_I(inode); 397 hip = HFSPLUS_I(inode);
@@ -512,7 +512,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
512 hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, 512 hfs_bnode_read(fd->bnode, &entry, fd->entryoffset,
513 sizeof(struct hfsplus_cat_folder)); 513 sizeof(struct hfsplus_cat_folder));
514 hfsplus_get_perms(inode, &folder->permissions, 1); 514 hfsplus_get_perms(inode, &folder->permissions, 1);
515 inode->i_nlink = 1; 515 set_nlink(inode, 1);
516 inode->i_size = 2 + be32_to_cpu(folder->valence); 516 inode->i_size = 2 + be32_to_cpu(folder->valence);
517 inode->i_atime = hfsp_mt2ut(folder->access_date); 517 inode->i_atime = hfsp_mt2ut(folder->access_date);
518 inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); 518 inode->i_mtime = hfsp_mt2ut(folder->content_mod_date);
@@ -532,11 +532,11 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
532 hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ? 532 hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ?
533 &file->rsrc_fork : &file->data_fork); 533 &file->rsrc_fork : &file->data_fork);
534 hfsplus_get_perms(inode, &file->permissions, 0); 534 hfsplus_get_perms(inode, &file->permissions, 0);
535 inode->i_nlink = 1; 535 set_nlink(inode, 1);
536 if (S_ISREG(inode->i_mode)) { 536 if (S_ISREG(inode->i_mode)) {
537 if (file->permissions.dev) 537 if (file->permissions.dev)
538 inode->i_nlink = 538 set_nlink(inode,
539 be32_to_cpu(file->permissions.dev); 539 be32_to_cpu(file->permissions.dev));
540 inode->i_op = &hfsplus_file_inode_operations; 540 inode->i_op = &hfsplus_file_inode_operations;
541 inode->i_fop = &hfsplus_file_operations; 541 inode->i_fop = &hfsplus_file_operations;
542 inode->i_mapping->a_ops = &hfsplus_aops; 542 inode->i_mapping->a_ops = &hfsplus_aops;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 0d22afdd4611..2f72da5ae686 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -541,7 +541,7 @@ static int read_name(struct inode *ino, char *name)
541 541
542 ino->i_ino = st.ino; 542 ino->i_ino = st.ino;
543 ino->i_mode = st.mode; 543 ino->i_mode = st.mode;
544 ino->i_nlink = st.nlink; 544 set_nlink(ino, st.nlink);
545 ino->i_uid = st.uid; 545 ino->i_uid = st.uid;
546 ino->i_gid = st.gid; 546 ino->i_gid = st.gid;
547 ino->i_atime = st.atime; 547 ino->i_atime = st.atime;
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index d51a98384bc0..dd7bc38a3825 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -16,7 +16,6 @@
16#include <sys/vfs.h> 16#include <sys/vfs.h>
17#include "hostfs.h" 17#include "hostfs.h"
18#include "os.h" 18#include "os.h"
19#include "user.h"
20#include <utime.h> 19#include <utime.h>
21 20
22static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p) 21static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 96a8ed91cedd..2fa0089a02a8 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -247,7 +247,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
247 result->i_mode &= ~0111; 247 result->i_mode &= ~0111;
248 result->i_op = &hpfs_file_iops; 248 result->i_op = &hpfs_file_iops;
249 result->i_fop = &hpfs_file_ops; 249 result->i_fop = &hpfs_file_ops;
250 result->i_nlink = 1; 250 set_nlink(result, 1);
251 } 251 }
252 unlock_new_inode(result); 252 unlock_new_inode(result);
253 } 253 }
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 338cd8368451..3b2cec29972b 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -53,7 +53,7 @@ void hpfs_read_inode(struct inode *i)
53 i->i_mode &= ~0111; 53 i->i_mode &= ~0111;
54 i->i_op = &hpfs_file_iops; 54 i->i_op = &hpfs_file_iops;
55 i->i_fop = &hpfs_file_ops; 55 i->i_fop = &hpfs_file_ops;
56 i->i_nlink = 0;*/ 56 clear_nlink(i);*/
57 make_bad_inode(i); 57 make_bad_inode(i);
58 return; 58 return;
59 } 59 }
@@ -77,7 +77,7 @@ void hpfs_read_inode(struct inode *i)
77 i->i_mode = S_IFLNK | 0777; 77 i->i_mode = S_IFLNK | 0777;
78 i->i_op = &page_symlink_inode_operations; 78 i->i_op = &page_symlink_inode_operations;
79 i->i_data.a_ops = &hpfs_symlink_aops; 79 i->i_data.a_ops = &hpfs_symlink_aops;
80 i->i_nlink = 1; 80 set_nlink(i, 1);
81 i->i_size = ea_size; 81 i->i_size = ea_size;
82 i->i_blocks = 1; 82 i->i_blocks = 1;
83 brelse(bh); 83 brelse(bh);
@@ -101,7 +101,7 @@ void hpfs_read_inode(struct inode *i)
101 } 101 }
102 if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { 102 if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) {
103 brelse(bh); 103 brelse(bh);
104 i->i_nlink = 1; 104 set_nlink(i, 1);
105 i->i_size = 0; 105 i->i_size = 0;
106 i->i_blocks = 1; 106 i->i_blocks = 1;
107 init_special_inode(i, mode, 107 init_special_inode(i, mode,
@@ -125,13 +125,13 @@ void hpfs_read_inode(struct inode *i)
125 hpfs_count_dnodes(i->i_sb, hpfs_inode->i_dno, &n_dnodes, &n_subdirs, NULL); 125 hpfs_count_dnodes(i->i_sb, hpfs_inode->i_dno, &n_dnodes, &n_subdirs, NULL);
126 i->i_blocks = 4 * n_dnodes; 126 i->i_blocks = 4 * n_dnodes;
127 i->i_size = 2048 * n_dnodes; 127 i->i_size = 2048 * n_dnodes;
128 i->i_nlink = 2 + n_subdirs; 128 set_nlink(i, 2 + n_subdirs);
129 } else { 129 } else {
130 i->i_mode |= S_IFREG; 130 i->i_mode |= S_IFREG;
131 if (!hpfs_inode->i_ea_mode) i->i_mode &= ~0111; 131 if (!hpfs_inode->i_ea_mode) i->i_mode &= ~0111;
132 i->i_op = &hpfs_file_iops; 132 i->i_op = &hpfs_file_iops;
133 i->i_fop = &hpfs_file_ops; 133 i->i_fop = &hpfs_file_ops;
134 i->i_nlink = 1; 134 set_nlink(i, 1);
135 i->i_size = le32_to_cpu(fnode->file_size); 135 i->i_size = le32_to_cpu(fnode->file_size);
136 i->i_blocks = ((i->i_size + 511) >> 9) + 1; 136 i->i_blocks = ((i->i_size + 511) >> 9) + 1;
137 i->i_data.a_ops = &hpfs_aops; 137 i->i_data.a_ops = &hpfs_aops;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 2df69e2f07cf..ea91fcb0ef9b 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -56,7 +56,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
56 result->i_fop = &hpfs_dir_ops; 56 result->i_fop = &hpfs_dir_ops;
57 result->i_blocks = 4; 57 result->i_blocks = 4;
58 result->i_size = 2048; 58 result->i_size = 2048;
59 result->i_nlink = 2; 59 set_nlink(result, 2);
60 if (dee.read_only) 60 if (dee.read_only)
61 result->i_mode &= ~0222; 61 result->i_mode &= ~0222;
62 62
@@ -150,7 +150,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
150 result->i_mode &= ~0111; 150 result->i_mode &= ~0111;
151 result->i_op = &hpfs_file_iops; 151 result->i_op = &hpfs_file_iops;
152 result->i_fop = &hpfs_file_ops; 152 result->i_fop = &hpfs_file_ops;
153 result->i_nlink = 1; 153 set_nlink(result, 1);
154 hpfs_i(result)->i_parent_dir = dir->i_ino; 154 hpfs_i(result)->i_parent_dir = dir->i_ino;
155 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)); 155 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
156 result->i_ctime.tv_nsec = 0; 156 result->i_ctime.tv_nsec = 0;
@@ -242,7 +242,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
242 hpfs_i(result)->i_ea_size = 0; 242 hpfs_i(result)->i_ea_size = 0;
243 result->i_uid = current_fsuid(); 243 result->i_uid = current_fsuid();
244 result->i_gid = current_fsgid(); 244 result->i_gid = current_fsgid();
245 result->i_nlink = 1; 245 set_nlink(result, 1);
246 result->i_size = 0; 246 result->i_size = 0;
247 result->i_blocks = 1; 247 result->i_blocks = 1;
248 init_special_inode(result, mode, rdev); 248 init_special_inode(result, mode, rdev);
@@ -318,7 +318,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
318 result->i_uid = current_fsuid(); 318 result->i_uid = current_fsuid();
319 result->i_gid = current_fsgid(); 319 result->i_gid = current_fsgid();
320 result->i_blocks = 1; 320 result->i_blocks = 1;
321 result->i_nlink = 1; 321 set_nlink(result, 1);
322 result->i_size = strlen(symlink); 322 result->i_size = strlen(symlink);
323 result->i_op = &page_symlink_inode_operations; 323 result->i_op = &page_symlink_inode_operations;
324 result->i_data.a_ops = &hpfs_symlink_aops; 324 result->i_data.a_ops = &hpfs_symlink_aops;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 970ea987b3f6..f590b1160c6c 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -702,7 +702,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
702 inode->i_ctime = proc_ino->i_ctime; 702 inode->i_ctime = proc_ino->i_ctime;
703 inode->i_ino = proc_ino->i_ino; 703 inode->i_ino = proc_ino->i_ino;
704 inode->i_mode = proc_ino->i_mode; 704 inode->i_mode = proc_ino->i_mode;
705 inode->i_nlink = proc_ino->i_nlink; 705 set_nlink(inode, proc_ino->i_nlink);
706 inode->i_size = proc_ino->i_size; 706 inode->i_size = proc_ino->i_size;
707 inode->i_blocks = proc_ino->i_blocks; 707 inode->i_blocks = proc_ino->i_blocks;
708 708
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ec889538e5a6..0be5a78598d0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -970,7 +970,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
970 970
971 d_instantiate(path.dentry, inode); 971 d_instantiate(path.dentry, inode);
972 inode->i_size = size; 972 inode->i_size = size;
973 inode->i_nlink = 0; 973 clear_nlink(inode);
974 974
975 error = -ENFILE; 975 error = -ENFILE;
976 file = alloc_file(&path, FMODE_WRITE | FMODE_READ, 976 file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
diff --git a/fs/inode.c b/fs/inode.c
index ecbb68dc7e2a..ee4e66b998f4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -142,7 +142,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
142 atomic_set(&inode->i_count, 1); 142 atomic_set(&inode->i_count, 1);
143 inode->i_op = &empty_iops; 143 inode->i_op = &empty_iops;
144 inode->i_fop = &empty_fops; 144 inode->i_fop = &empty_fops;
145 inode->i_nlink = 1; 145 inode->__i_nlink = 1;
146 inode->i_opflags = 0; 146 inode->i_opflags = 0;
147 inode->i_uid = 0; 147 inode->i_uid = 0;
148 inode->i_gid = 0; 148 inode->i_gid = 0;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index a5d03672d04e..562adabef985 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1319,7 +1319,7 @@ static int isofs_read_inode(struct inode *inode)
1319 inode->i_mode = S_IFDIR | sbi->s_dmode; 1319 inode->i_mode = S_IFDIR | sbi->s_dmode;
1320 else 1320 else
1321 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 1321 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
1322 inode->i_nlink = 1; /* 1322 set_nlink(inode, 1); /*
1323 * Set to 1. We know there are 2, but 1323 * Set to 1. We know there are 2, but
1324 * the find utility tries to optimize 1324 * the find utility tries to optimize
1325 * if it is 2, and it screws up. It is 1325 * if it is 2, and it screws up. It is
@@ -1337,7 +1337,7 @@ static int isofs_read_inode(struct inode *inode)
1337 */ 1337 */
1338 inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO; 1338 inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO;
1339 } 1339 }
1340 inode->i_nlink = 1; 1340 set_nlink(inode, 1);
1341 } 1341 }
1342 inode->i_uid = sbi->s_uid; 1342 inode->i_uid = sbi->s_uid;
1343 inode->i_gid = sbi->s_gid; 1343 inode->i_gid = sbi->s_gid;
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 1fbc7de88f50..70e79d0c756a 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -363,7 +363,7 @@ repeat:
363 break; 363 break;
364 case SIG('P', 'X'): 364 case SIG('P', 'X'):
365 inode->i_mode = isonum_733(rr->u.PX.mode); 365 inode->i_mode = isonum_733(rr->u.PX.mode);
366 inode->i_nlink = isonum_733(rr->u.PX.n_links); 366 set_nlink(inode, isonum_733(rr->u.PX.n_links));
367 inode->i_uid = isonum_733(rr->u.PX.uid); 367 inode->i_uid = isonum_733(rr->u.PX.uid);
368 inode->i_gid = isonum_733(rr->u.PX.gid); 368 inode->i_gid = isonum_733(rr->u.PX.gid);
369 break; 369 break;
@@ -496,7 +496,7 @@ repeat:
496 goto out; 496 goto out;
497 } 497 }
498 inode->i_mode = reloc->i_mode; 498 inode->i_mode = reloc->i_mode;
499 inode->i_nlink = reloc->i_nlink; 499 set_nlink(inode, reloc->i_nlink);
500 inode->i_uid = reloc->i_uid; 500 inode->i_uid = reloc->i_uid;
501 inode->i_gid = reloc->i_gid; 501 inode->i_gid = reloc->i_gid;
502 inode->i_rdev = reloc->i_rdev; 502 inode->i_rdev = reloc->i_rdev;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 9fe061fb8779..fea8dd661d2b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1135,6 +1135,14 @@ static int journal_get_superblock(journal_t *journal)
1135 goto out; 1135 goto out;
1136 } 1136 }
1137 1137
1138 if (be32_to_cpu(sb->s_first) == 0 ||
1139 be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
1140 printk(KERN_WARNING
1141 "JBD: Invalid start block of journal: %u\n",
1142 be32_to_cpu(sb->s_first));
1143 goto out;
1144 }
1145
1138 return 0; 1146 return 0;
1139 1147
1140out: 1148out:
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index eef6979821a4..68d704db787f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -352,7 +352,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
352 J_ASSERT(commit_transaction->t_state == T_RUNNING); 352 J_ASSERT(commit_transaction->t_state == T_RUNNING);
353 353
354 trace_jbd2_start_commit(journal, commit_transaction); 354 trace_jbd2_start_commit(journal, commit_transaction);
355 jbd_debug(1, "JBD: starting commit of transaction %d\n", 355 jbd_debug(1, "JBD2: starting commit of transaction %d\n",
356 commit_transaction->t_tid); 356 commit_transaction->t_tid);
357 357
358 write_lock(&journal->j_state_lock); 358 write_lock(&journal->j_state_lock);
@@ -427,7 +427,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
427 __jbd2_journal_clean_checkpoint_list(journal); 427 __jbd2_journal_clean_checkpoint_list(journal);
428 spin_unlock(&journal->j_list_lock); 428 spin_unlock(&journal->j_list_lock);
429 429
430 jbd_debug (3, "JBD: commit phase 1\n"); 430 jbd_debug(3, "JBD2: commit phase 1\n");
431 431
432 /* 432 /*
433 * Switch to a new revoke table. 433 * Switch to a new revoke table.
@@ -447,7 +447,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
447 wake_up(&journal->j_wait_transaction_locked); 447 wake_up(&journal->j_wait_transaction_locked);
448 write_unlock(&journal->j_state_lock); 448 write_unlock(&journal->j_state_lock);
449 449
450 jbd_debug (3, "JBD: commit phase 2\n"); 450 jbd_debug(3, "JBD2: commit phase 2\n");
451 451
452 /* 452 /*
453 * Now start flushing things to disk, in the order they appear 453 * Now start flushing things to disk, in the order they appear
@@ -462,7 +462,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
462 WRITE_SYNC); 462 WRITE_SYNC);
463 blk_finish_plug(&plug); 463 blk_finish_plug(&plug);
464 464
465 jbd_debug(3, "JBD: commit phase 2\n"); 465 jbd_debug(3, "JBD2: commit phase 2\n");
466 466
467 /* 467 /*
468 * Way to go: we have now written out all of the data for a 468 * Way to go: we have now written out all of the data for a
@@ -522,7 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
522 522
523 J_ASSERT (bufs == 0); 523 J_ASSERT (bufs == 0);
524 524
525 jbd_debug(4, "JBD: get descriptor\n"); 525 jbd_debug(4, "JBD2: get descriptor\n");
526 526
527 descriptor = jbd2_journal_get_descriptor_buffer(journal); 527 descriptor = jbd2_journal_get_descriptor_buffer(journal);
528 if (!descriptor) { 528 if (!descriptor) {
@@ -531,7 +531,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
531 } 531 }
532 532
533 bh = jh2bh(descriptor); 533 bh = jh2bh(descriptor);
534 jbd_debug(4, "JBD: got buffer %llu (%p)\n", 534 jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
535 (unsigned long long)bh->b_blocknr, bh->b_data); 535 (unsigned long long)bh->b_blocknr, bh->b_data);
536 header = (journal_header_t *)&bh->b_data[0]; 536 header = (journal_header_t *)&bh->b_data[0];
537 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 537 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
@@ -625,7 +625,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
625 commit_transaction->t_buffers == NULL || 625 commit_transaction->t_buffers == NULL ||
626 space_left < tag_bytes + 16) { 626 space_left < tag_bytes + 16) {
627 627
628 jbd_debug(4, "JBD: Submit %d IOs\n", bufs); 628 jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
629 629
630 /* Write an end-of-descriptor marker before 630 /* Write an end-of-descriptor marker before
631 submitting the IOs. "tag" still points to 631 submitting the IOs. "tag" still points to
@@ -707,7 +707,7 @@ start_journal_io:
707 so we incur less scheduling load. 707 so we incur less scheduling load.
708 */ 708 */
709 709
710 jbd_debug(3, "JBD: commit phase 3\n"); 710 jbd_debug(3, "JBD2: commit phase 3\n");
711 711
712 /* 712 /*
713 * akpm: these are BJ_IO, and j_list_lock is not needed. 713 * akpm: these are BJ_IO, and j_list_lock is not needed.
@@ -771,7 +771,7 @@ wait_for_iobuf:
771 771
772 J_ASSERT (commit_transaction->t_shadow_list == NULL); 772 J_ASSERT (commit_transaction->t_shadow_list == NULL);
773 773
774 jbd_debug(3, "JBD: commit phase 4\n"); 774 jbd_debug(3, "JBD2: commit phase 4\n");
775 775
776 /* Here we wait for the revoke record and descriptor record buffers */ 776 /* Here we wait for the revoke record and descriptor record buffers */
777 wait_for_ctlbuf: 777 wait_for_ctlbuf:
@@ -801,7 +801,7 @@ wait_for_iobuf:
801 if (err) 801 if (err)
802 jbd2_journal_abort(journal, err); 802 jbd2_journal_abort(journal, err);
803 803
804 jbd_debug(3, "JBD: commit phase 5\n"); 804 jbd_debug(3, "JBD2: commit phase 5\n");
805 write_lock(&journal->j_state_lock); 805 write_lock(&journal->j_state_lock);
806 J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); 806 J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
807 commit_transaction->t_state = T_COMMIT_JFLUSH; 807 commit_transaction->t_state = T_COMMIT_JFLUSH;
@@ -830,7 +830,7 @@ wait_for_iobuf:
830 transaction can be removed from any checkpoint list it was on 830 transaction can be removed from any checkpoint list it was on
831 before. */ 831 before. */
832 832
833 jbd_debug(3, "JBD: commit phase 6\n"); 833 jbd_debug(3, "JBD2: commit phase 6\n");
834 834
835 J_ASSERT(list_empty(&commit_transaction->t_inode_list)); 835 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
836 J_ASSERT(commit_transaction->t_buffers == NULL); 836 J_ASSERT(commit_transaction->t_buffers == NULL);
@@ -964,7 +964,7 @@ restart_loop:
964 964
965 /* Done with this transaction! */ 965 /* Done with this transaction! */
966 966
967 jbd_debug(3, "JBD: commit phase 7\n"); 967 jbd_debug(3, "JBD2: commit phase 7\n");
968 968
969 J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); 969 J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
970 970
@@ -1039,7 +1039,7 @@ restart_loop:
1039 journal->j_commit_callback(journal, commit_transaction); 1039 journal->j_commit_callback(journal, commit_transaction);
1040 1040
1041 trace_jbd2_end_commit(journal, commit_transaction); 1041 trace_jbd2_end_commit(journal, commit_transaction);
1042 jbd_debug(1, "JBD: commit %d complete, head %d\n", 1042 jbd_debug(1, "JBD2: commit %d complete, head %d\n",
1043 journal->j_commit_sequence, journal->j_tail_sequence); 1043 journal->j_commit_sequence, journal->j_tail_sequence);
1044 if (to_free) 1044 if (to_free)
1045 kfree(commit_transaction); 1045 kfree(commit_transaction);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f24df13adc4e..0fa0123151d3 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -491,7 +491,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
491 */ 491 */
492 492
493 journal->j_commit_request = target; 493 journal->j_commit_request = target;
494 jbd_debug(1, "JBD: requesting commit %d/%d\n", 494 jbd_debug(1, "JBD2: requesting commit %d/%d\n",
495 journal->j_commit_request, 495 journal->j_commit_request,
496 journal->j_commit_sequence); 496 journal->j_commit_sequence);
497 wake_up(&journal->j_wait_commit); 497 wake_up(&journal->j_wait_commit);
@@ -500,7 +500,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
500 /* This should never happen, but if it does, preserve 500 /* This should never happen, but if it does, preserve
501 the evidence before kjournald goes into a loop and 501 the evidence before kjournald goes into a loop and
502 increments j_commit_sequence beyond all recognition. */ 502 increments j_commit_sequence beyond all recognition. */
503 WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n", 503 WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
504 journal->j_commit_request, 504 journal->j_commit_request,
505 journal->j_commit_sequence, 505 journal->j_commit_sequence,
506 target, journal->j_running_transaction ? 506 target, journal->j_running_transaction ?
@@ -645,7 +645,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
645 } 645 }
646#endif 646#endif
647 while (tid_gt(tid, journal->j_commit_sequence)) { 647 while (tid_gt(tid, journal->j_commit_sequence)) {
648 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", 648 jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
649 tid, journal->j_commit_sequence); 649 tid, journal->j_commit_sequence);
650 wake_up(&journal->j_wait_commit); 650 wake_up(&journal->j_wait_commit);
651 read_unlock(&journal->j_state_lock); 651 read_unlock(&journal->j_state_lock);
@@ -1093,7 +1093,7 @@ static int journal_reset(journal_t *journal)
1093 first = be32_to_cpu(sb->s_first); 1093 first = be32_to_cpu(sb->s_first);
1094 last = be32_to_cpu(sb->s_maxlen); 1094 last = be32_to_cpu(sb->s_maxlen);
1095 if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { 1095 if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
1096 printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n", 1096 printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
1097 first, last); 1097 first, last);
1098 journal_fail_superblock(journal); 1098 journal_fail_superblock(journal);
1099 return -EINVAL; 1099 return -EINVAL;
@@ -1139,7 +1139,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1139 */ 1139 */
1140 if (sb->s_start == 0 && journal->j_tail_sequence == 1140 if (sb->s_start == 0 && journal->j_tail_sequence ==
1141 journal->j_transaction_sequence) { 1141 journal->j_transaction_sequence) {
1142 jbd_debug(1,"JBD: Skipping superblock update on recovered sb " 1142 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
1143 "(start %ld, seq %d, errno %d)\n", 1143 "(start %ld, seq %d, errno %d)\n",
1144 journal->j_tail, journal->j_tail_sequence, 1144 journal->j_tail, journal->j_tail_sequence,
1145 journal->j_errno); 1145 journal->j_errno);
@@ -1163,7 +1163,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1163 } 1163 }
1164 1164
1165 read_lock(&journal->j_state_lock); 1165 read_lock(&journal->j_state_lock);
1166 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", 1166 jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n",
1167 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1167 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
1168 1168
1169 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1169 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -1216,8 +1216,8 @@ static int journal_get_superblock(journal_t *journal)
1216 ll_rw_block(READ, 1, &bh); 1216 ll_rw_block(READ, 1, &bh);
1217 wait_on_buffer(bh); 1217 wait_on_buffer(bh);
1218 if (!buffer_uptodate(bh)) { 1218 if (!buffer_uptodate(bh)) {
1219 printk (KERN_ERR 1219 printk(KERN_ERR
1220 "JBD: IO error reading journal superblock\n"); 1220 "JBD2: IO error reading journal superblock\n");
1221 goto out; 1221 goto out;
1222 } 1222 }
1223 } 1223 }
@@ -1228,7 +1228,7 @@ static int journal_get_superblock(journal_t *journal)
1228 1228
1229 if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || 1229 if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
1230 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { 1230 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
1231 printk(KERN_WARNING "JBD: no valid journal superblock found\n"); 1231 printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
1232 goto out; 1232 goto out;
1233 } 1233 }
1234 1234
@@ -1240,14 +1240,22 @@ static int journal_get_superblock(journal_t *journal)
1240 journal->j_format_version = 2; 1240 journal->j_format_version = 2;
1241 break; 1241 break;
1242 default: 1242 default:
1243 printk(KERN_WARNING "JBD: unrecognised superblock format ID\n"); 1243 printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
1244 goto out; 1244 goto out;
1245 } 1245 }
1246 1246
1247 if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) 1247 if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
1248 journal->j_maxlen = be32_to_cpu(sb->s_maxlen); 1248 journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
1249 else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { 1249 else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
1250 printk (KERN_WARNING "JBD: journal file too short\n"); 1250 printk(KERN_WARNING "JBD2: journal file too short\n");
1251 goto out;
1252 }
1253
1254 if (be32_to_cpu(sb->s_first) == 0 ||
1255 be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
1256 printk(KERN_WARNING
1257 "JBD2: Invalid start block of journal: %u\n",
1258 be32_to_cpu(sb->s_first));
1251 goto out; 1259 goto out;
1252 } 1260 }
1253 1261
@@ -1310,8 +1318,8 @@ int jbd2_journal_load(journal_t *journal)
1310 ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || 1318 ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
1311 (sb->s_feature_incompat & 1319 (sb->s_feature_incompat &
1312 ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { 1320 ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
1313 printk (KERN_WARNING 1321 printk(KERN_WARNING
1314 "JBD: Unrecognised features on journal\n"); 1322 "JBD2: Unrecognised features on journal\n");
1315 return -EINVAL; 1323 return -EINVAL;
1316 } 1324 }
1317 } 1325 }
@@ -1346,7 +1354,7 @@ int jbd2_journal_load(journal_t *journal)
1346 return 0; 1354 return 0;
1347 1355
1348recovery_error: 1356recovery_error:
1349 printk (KERN_WARNING "JBD: recovery failed\n"); 1357 printk(KERN_WARNING "JBD2: recovery failed\n");
1350 return -EIO; 1358 return -EIO;
1351} 1359}
1352 1360
@@ -1577,7 +1585,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
1577 struct buffer_head *bh; 1585 struct buffer_head *bh;
1578 1586
1579 printk(KERN_WARNING 1587 printk(KERN_WARNING
1580 "JBD: Converting superblock from version 1 to 2.\n"); 1588 "JBD2: Converting superblock from version 1 to 2.\n");
1581 1589
1582 /* Pre-initialise new fields to zero */ 1590 /* Pre-initialise new fields to zero */
1583 offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); 1591 offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
@@ -1694,7 +1702,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
1694 if (!journal->j_tail) 1702 if (!journal->j_tail)
1695 goto no_recovery; 1703 goto no_recovery;
1696 1704
1697 printk (KERN_WARNING "JBD: %s recovery information on journal\n", 1705 printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
1698 write ? "Clearing" : "Ignoring"); 1706 write ? "Clearing" : "Ignoring");
1699 1707
1700 err = jbd2_journal_skip_recovery(journal); 1708 err = jbd2_journal_skip_recovery(journal);
@@ -2020,7 +2028,7 @@ static int journal_init_jbd2_journal_head_cache(void)
2020 retval = 0; 2028 retval = 0;
2021 if (!jbd2_journal_head_cache) { 2029 if (!jbd2_journal_head_cache) {
2022 retval = -ENOMEM; 2030 retval = -ENOMEM;
2023 printk(KERN_EMERG "JBD: no memory for journal_head cache\n"); 2031 printk(KERN_EMERG "JBD2: no memory for journal_head cache\n");
2024 } 2032 }
2025 return retval; 2033 return retval;
2026} 2034}
@@ -2383,7 +2391,7 @@ static void __exit journal_exit(void)
2383#ifdef CONFIG_JBD2_DEBUG 2391#ifdef CONFIG_JBD2_DEBUG
2384 int n = atomic_read(&nr_journal_heads); 2392 int n = atomic_read(&nr_journal_heads);
2385 if (n) 2393 if (n)
2386 printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); 2394 printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
2387#endif 2395#endif
2388 jbd2_remove_debugfs_entry(); 2396 jbd2_remove_debugfs_entry();
2389 jbd2_remove_jbd_stats_proc_entry(); 2397 jbd2_remove_jbd_stats_proc_entry();
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 1cad869494f0..da6d7baf1390 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -89,7 +89,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
89 err = jbd2_journal_bmap(journal, next, &blocknr); 89 err = jbd2_journal_bmap(journal, next, &blocknr);
90 90
91 if (err) { 91 if (err) {
92 printk (KERN_ERR "JBD: bad block at offset %u\n", 92 printk(KERN_ERR "JBD2: bad block at offset %u\n",
93 next); 93 next);
94 goto failed; 94 goto failed;
95 } 95 }
@@ -138,14 +138,14 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
138 *bhp = NULL; 138 *bhp = NULL;
139 139
140 if (offset >= journal->j_maxlen) { 140 if (offset >= journal->j_maxlen) {
141 printk(KERN_ERR "JBD: corrupted journal superblock\n"); 141 printk(KERN_ERR "JBD2: corrupted journal superblock\n");
142 return -EIO; 142 return -EIO;
143 } 143 }
144 144
145 err = jbd2_journal_bmap(journal, offset, &blocknr); 145 err = jbd2_journal_bmap(journal, offset, &blocknr);
146 146
147 if (err) { 147 if (err) {
148 printk (KERN_ERR "JBD: bad block at offset %u\n", 148 printk(KERN_ERR "JBD2: bad block at offset %u\n",
149 offset); 149 offset);
150 return err; 150 return err;
151 } 151 }
@@ -163,7 +163,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
163 } 163 }
164 164
165 if (!buffer_uptodate(bh)) { 165 if (!buffer_uptodate(bh)) {
166 printk (KERN_ERR "JBD: Failed to read block at offset %u\n", 166 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
167 offset); 167 offset);
168 brelse(bh); 168 brelse(bh);
169 return -EIO; 169 return -EIO;
@@ -251,10 +251,10 @@ int jbd2_journal_recover(journal_t *journal)
251 if (!err) 251 if (!err)
252 err = do_one_pass(journal, &info, PASS_REPLAY); 252 err = do_one_pass(journal, &info, PASS_REPLAY);
253 253
254 jbd_debug(1, "JBD: recovery, exit status %d, " 254 jbd_debug(1, "JBD2: recovery, exit status %d, "
255 "recovered transactions %u to %u\n", 255 "recovered transactions %u to %u\n",
256 err, info.start_transaction, info.end_transaction); 256 err, info.start_transaction, info.end_transaction);
257 jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n", 257 jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
258 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 258 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
259 259
260 /* Restart the log at the next transaction ID, thus invalidating 260 /* Restart the log at the next transaction ID, thus invalidating
@@ -293,14 +293,14 @@ int jbd2_journal_skip_recovery(journal_t *journal)
293 err = do_one_pass(journal, &info, PASS_SCAN); 293 err = do_one_pass(journal, &info, PASS_SCAN);
294 294
295 if (err) { 295 if (err) {
296 printk(KERN_ERR "JBD: error %d scanning journal\n", err); 296 printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
297 ++journal->j_transaction_sequence; 297 ++journal->j_transaction_sequence;
298 } else { 298 } else {
299#ifdef CONFIG_JBD2_DEBUG 299#ifdef CONFIG_JBD2_DEBUG
300 int dropped = info.end_transaction - 300 int dropped = info.end_transaction -
301 be32_to_cpu(journal->j_superblock->s_sequence); 301 be32_to_cpu(journal->j_superblock->s_sequence);
302 jbd_debug(1, 302 jbd_debug(1,
303 "JBD: ignoring %d transaction%s from the journal.\n", 303 "JBD2: ignoring %d transaction%s from the journal.\n",
304 dropped, (dropped == 1) ? "" : "s"); 304 dropped, (dropped == 1) ? "" : "s");
305#endif 305#endif
306 journal->j_transaction_sequence = ++info.end_transaction; 306 journal->j_transaction_sequence = ++info.end_transaction;
@@ -338,7 +338,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
338 wrap(journal, *next_log_block); 338 wrap(journal, *next_log_block);
339 err = jread(&obh, journal, io_block); 339 err = jread(&obh, journal, io_block);
340 if (err) { 340 if (err) {
341 printk(KERN_ERR "JBD: IO error %d recovering block " 341 printk(KERN_ERR "JBD2: IO error %d recovering block "
342 "%lu in log\n", err, io_block); 342 "%lu in log\n", err, io_block);
343 return 1; 343 return 1;
344 } else { 344 } else {
@@ -411,7 +411,7 @@ static int do_one_pass(journal_t *journal,
411 * either the next descriptor block or the final commit 411 * either the next descriptor block or the final commit
412 * record. */ 412 * record. */
413 413
414 jbd_debug(3, "JBD: checking block %ld\n", next_log_block); 414 jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
415 err = jread(&bh, journal, next_log_block); 415 err = jread(&bh, journal, next_log_block);
416 if (err) 416 if (err)
417 goto failed; 417 goto failed;
@@ -491,8 +491,8 @@ static int do_one_pass(journal_t *journal,
491 /* Recover what we can, but 491 /* Recover what we can, but
492 * report failure at the end. */ 492 * report failure at the end. */
493 success = err; 493 success = err;
494 printk (KERN_ERR 494 printk(KERN_ERR
495 "JBD: IO error %d recovering " 495 "JBD2: IO error %d recovering "
496 "block %ld in log\n", 496 "block %ld in log\n",
497 err, io_block); 497 err, io_block);
498 } else { 498 } else {
@@ -520,7 +520,7 @@ static int do_one_pass(journal_t *journal,
520 journal->j_blocksize); 520 journal->j_blocksize);
521 if (nbh == NULL) { 521 if (nbh == NULL) {
522 printk(KERN_ERR 522 printk(KERN_ERR
523 "JBD: Out of memory " 523 "JBD2: Out of memory "
524 "during recovery.\n"); 524 "during recovery.\n");
525 err = -ENOMEM; 525 err = -ENOMEM;
526 brelse(bh); 526 brelse(bh);
@@ -689,7 +689,7 @@ static int do_one_pass(journal_t *journal,
689 /* It's really bad news if different passes end up at 689 /* It's really bad news if different passes end up at
690 * different places (but possible due to IO errors). */ 690 * different places (but possible due to IO errors). */
691 if (info->end_transaction != next_commit_ID) { 691 if (info->end_transaction != next_commit_ID) {
692 printk (KERN_ERR "JBD: recovery pass %d ended at " 692 printk(KERN_ERR "JBD2: recovery pass %d ended at "
693 "transaction %u, expected %u\n", 693 "transaction %u, expected %u\n",
694 pass, next_commit_ID, info->end_transaction); 694 pass, next_commit_ID, info->end_transaction);
695 if (!success) 695 if (!success)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 2d7109414cdd..a0e41a4c080e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -27,6 +27,7 @@
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h> 28#include <linux/hrtimer.h>
29#include <linux/backing-dev.h> 29#include <linux/backing-dev.h>
30#include <linux/bug.h>
30#include <linux/module.h> 31#include <linux/module.h>
31 32
32static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 33static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
@@ -115,7 +116,7 @@ static inline void update_t_max_wait(transaction_t *transaction,
115 */ 116 */
116 117
117static int start_this_handle(journal_t *journal, handle_t *handle, 118static int start_this_handle(journal_t *journal, handle_t *handle,
118 int gfp_mask) 119 gfp_t gfp_mask)
119{ 120{
120 transaction_t *transaction, *new_transaction = NULL; 121 transaction_t *transaction, *new_transaction = NULL;
121 tid_t tid; 122 tid_t tid;
@@ -124,7 +125,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
124 unsigned long ts = jiffies; 125 unsigned long ts = jiffies;
125 126
126 if (nblocks > journal->j_max_transaction_buffers) { 127 if (nblocks > journal->j_max_transaction_buffers) {
127 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 128 printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n",
128 current->comm, nblocks, 129 current->comm, nblocks,
129 journal->j_max_transaction_buffers); 130 journal->j_max_transaction_buffers);
130 return -ENOSPC; 131 return -ENOSPC;
@@ -320,7 +321,7 @@ static handle_t *new_handle(int nblocks)
320 * Return a pointer to a newly allocated handle, or an ERR_PTR() value 321 * Return a pointer to a newly allocated handle, or an ERR_PTR() value
321 * on failure. 322 * on failure.
322 */ 323 */
323handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) 324handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask)
324{ 325{
325 handle_t *handle = journal_current_handle(); 326 handle_t *handle = journal_current_handle();
326 int err; 327 int err;
@@ -443,7 +444,7 @@ out:
443 * transaction capabable of guaranteeing the requested number of 444 * transaction capabable of guaranteeing the requested number of
444 * credits. 445 * credits.
445 */ 446 */
446int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) 447int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
447{ 448{
448 transaction_t *transaction = handle->h_transaction; 449 transaction_t *transaction = handle->h_transaction;
449 journal_t *journal = transaction->t_journal; 450 journal_t *journal = transaction->t_journal;
@@ -563,7 +564,7 @@ static void warn_dirty_buffer(struct buffer_head *bh)
563 char b[BDEVNAME_SIZE]; 564 char b[BDEVNAME_SIZE];
564 565
565 printk(KERN_WARNING 566 printk(KERN_WARNING
566 "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " 567 "JBD2: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
567 "There's a risk of filesystem corruption in case of system " 568 "There's a risk of filesystem corruption in case of system "
568 "crash.\n", 569 "crash.\n",
569 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); 570 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
@@ -1049,6 +1050,10 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
1049 * mark dirty metadata which needs to be journaled as part of the current 1050 * mark dirty metadata which needs to be journaled as part of the current
1050 * transaction. 1051 * transaction.
1051 * 1052 *
1053 * The buffer must have previously had jbd2_journal_get_write_access()
1054 * called so that it has a valid journal_head attached to the buffer
1055 * head.
1056 *
1052 * The buffer is placed on the transaction's metadata list and is marked 1057 * The buffer is placed on the transaction's metadata list and is marked
1053 * as belonging to the transaction. 1058 * as belonging to the transaction.
1054 * 1059 *
@@ -1065,11 +1070,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1065 transaction_t *transaction = handle->h_transaction; 1070 transaction_t *transaction = handle->h_transaction;
1066 journal_t *journal = transaction->t_journal; 1071 journal_t *journal = transaction->t_journal;
1067 struct journal_head *jh = bh2jh(bh); 1072 struct journal_head *jh = bh2jh(bh);
1073 int ret = 0;
1068 1074
1069 jbd_debug(5, "journal_head %p\n", jh); 1075 jbd_debug(5, "journal_head %p\n", jh);
1070 JBUFFER_TRACE(jh, "entry"); 1076 JBUFFER_TRACE(jh, "entry");
1071 if (is_handle_aborted(handle)) 1077 if (is_handle_aborted(handle))
1072 goto out; 1078 goto out;
1079 if (!buffer_jbd(bh)) {
1080 ret = -EUCLEAN;
1081 goto out;
1082 }
1073 1083
1074 jbd_lock_bh_state(bh); 1084 jbd_lock_bh_state(bh);
1075 1085
@@ -1093,8 +1103,20 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1093 */ 1103 */
1094 if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { 1104 if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
1095 JBUFFER_TRACE(jh, "fastpath"); 1105 JBUFFER_TRACE(jh, "fastpath");
1096 J_ASSERT_JH(jh, jh->b_transaction == 1106 if (unlikely(jh->b_transaction !=
1097 journal->j_running_transaction); 1107 journal->j_running_transaction)) {
1108 printk(KERN_EMERG "JBD: %s: "
1109 "jh->b_transaction (%llu, %p, %u) != "
1110 "journal->j_running_transaction (%p, %u)",
1111 journal->j_devname,
1112 (unsigned long long) bh->b_blocknr,
1113 jh->b_transaction,
1114 jh->b_transaction ? jh->b_transaction->t_tid : 0,
1115 journal->j_running_transaction,
1116 journal->j_running_transaction ?
1117 journal->j_running_transaction->t_tid : 0);
1118 ret = -EINVAL;
1119 }
1098 goto out_unlock_bh; 1120 goto out_unlock_bh;
1099 } 1121 }
1100 1122
@@ -1108,9 +1130,32 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1108 */ 1130 */
1109 if (jh->b_transaction != transaction) { 1131 if (jh->b_transaction != transaction) {
1110 JBUFFER_TRACE(jh, "already on other transaction"); 1132 JBUFFER_TRACE(jh, "already on other transaction");
1111 J_ASSERT_JH(jh, jh->b_transaction == 1133 if (unlikely(jh->b_transaction !=
1112 journal->j_committing_transaction); 1134 journal->j_committing_transaction)) {
1113 J_ASSERT_JH(jh, jh->b_next_transaction == transaction); 1135 printk(KERN_EMERG "JBD: %s: "
1136 "jh->b_transaction (%llu, %p, %u) != "
1137 "journal->j_committing_transaction (%p, %u)",
1138 journal->j_devname,
1139 (unsigned long long) bh->b_blocknr,
1140 jh->b_transaction,
1141 jh->b_transaction ? jh->b_transaction->t_tid : 0,
1142 journal->j_committing_transaction,
1143 journal->j_committing_transaction ?
1144 journal->j_committing_transaction->t_tid : 0);
1145 ret = -EINVAL;
1146 }
1147 if (unlikely(jh->b_next_transaction != transaction)) {
1148 printk(KERN_EMERG "JBD: %s: "
1149 "jh->b_next_transaction (%llu, %p, %u) != "
1150 "transaction (%p, %u)",
1151 journal->j_devname,
1152 (unsigned long long) bh->b_blocknr,
1153 jh->b_next_transaction,
1154 jh->b_next_transaction ?
1155 jh->b_next_transaction->t_tid : 0,
1156 transaction, transaction->t_tid);
1157 ret = -EINVAL;
1158 }
1114 /* And this case is illegal: we can't reuse another 1159 /* And this case is illegal: we can't reuse another
1115 * transaction's data buffer, ever. */ 1160 * transaction's data buffer, ever. */
1116 goto out_unlock_bh; 1161 goto out_unlock_bh;
@@ -1127,7 +1172,8 @@ out_unlock_bh:
1127 jbd_unlock_bh_state(bh); 1172 jbd_unlock_bh_state(bh);
1128out: 1173out:
1129 JBUFFER_TRACE(jh, "exit"); 1174 JBUFFER_TRACE(jh, "exit");
1130 return 0; 1175 WARN_ON(ret); /* All errors are bugs, so dump the stack */
1176 return ret;
1131} 1177}
1132 1178
1133/* 1179/*
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 9659b7c00468..be6169bd8acd 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -245,7 +245,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
245 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, 245 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
246 dentry->d_name.len, dead_f, now); 246 dentry->d_name.len, dead_f, now);
247 if (dead_f->inocache) 247 if (dead_f->inocache)
248 dentry->d_inode->i_nlink = dead_f->inocache->pino_nlink; 248 set_nlink(dentry->d_inode, dead_f->inocache->pino_nlink);
249 if (!ret) 249 if (!ret)
250 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 250 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
251 return ret; 251 return ret;
@@ -278,7 +278,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
278 278
279 if (!ret) { 279 if (!ret) {
280 mutex_lock(&f->sem); 280 mutex_lock(&f->sem);
281 old_dentry->d_inode->i_nlink = ++f->inocache->pino_nlink; 281 set_nlink(old_dentry->d_inode, ++f->inocache->pino_nlink);
282 mutex_unlock(&f->sem); 282 mutex_unlock(&f->sem);
283 d_instantiate(dentry, old_dentry->d_inode); 283 d_instantiate(dentry, old_dentry->d_inode);
284 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 284 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
@@ -497,7 +497,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
497 f = JFFS2_INODE_INFO(inode); 497 f = JFFS2_INODE_INFO(inode);
498 498
499 /* Directories get nlink 2 at start */ 499 /* Directories get nlink 2 at start */
500 inode->i_nlink = 2; 500 set_nlink(inode, 2);
501 /* but ic->pino_nlink is the parent ino# */ 501 /* but ic->pino_nlink is the parent ino# */
502 f->inocache->pino_nlink = dir_i->i_ino; 502 f->inocache->pino_nlink = dir_i->i_ino;
503 503
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index bbcb9755dd2b..7286e44ac665 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -278,7 +278,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
278 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); 278 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
279 inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime)); 279 inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
280 280
281 inode->i_nlink = f->inocache->pino_nlink; 281 set_nlink(inode, f->inocache->pino_nlink);
282 282
283 inode->i_blocks = (inode->i_size + 511) >> 9; 283 inode->i_blocks = (inode->i_size + 511) >> 9;
284 284
@@ -291,7 +291,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
291 case S_IFDIR: 291 case S_IFDIR:
292 { 292 {
293 struct jffs2_full_dirent *fd; 293 struct jffs2_full_dirent *fd;
294 inode->i_nlink = 2; /* parent and '.' */ 294 set_nlink(inode, 2); /* parent and '.' */
295 295
296 for (fd=f->dents; fd; fd = fd->next) { 296 for (fd=f->dents; fd; fd = fd->next) {
297 if (fd->type == DT_DIR && fd->ino) 297 if (fd->type == DT_DIR && fd->ino)
@@ -453,7 +453,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
453 iput(inode); 453 iput(inode);
454 return ERR_PTR(ret); 454 return ERR_PTR(ret);
455 } 455 }
456 inode->i_nlink = 1; 456 set_nlink(inode, 1);
457 inode->i_ino = je32_to_cpu(ri->ino); 457 inode->i_ino = je32_to_cpu(ri->ino);
458 inode->i_mode = jemode_to_cpu(ri->mode); 458 inode->i_mode = jemode_to_cpu(ri->mode);
459 inode->i_gid = je16_to_cpu(ri->gid); 459 inode->i_gid = je16_to_cpu(ri->gid);
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index b78b2f978f04..1b6f15f191b3 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -457,7 +457,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
457 /* read the page of fixed disk inode (AIT) in raw mode */ 457 /* read the page of fixed disk inode (AIT) in raw mode */
458 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 458 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
459 if (mp == NULL) { 459 if (mp == NULL) {
460 ip->i_nlink = 1; /* Don't want iput() deleting it */ 460 set_nlink(ip, 1); /* Don't want iput() deleting it */
461 iput(ip); 461 iput(ip);
462 return (NULL); 462 return (NULL);
463 } 463 }
@@ -469,7 +469,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
469 /* copy on-disk inode to in-memory inode */ 469 /* copy on-disk inode to in-memory inode */
470 if ((copy_from_dinode(dp, ip)) != 0) { 470 if ((copy_from_dinode(dp, ip)) != 0) {
471 /* handle bad return by returning NULL for ip */ 471 /* handle bad return by returning NULL for ip */
472 ip->i_nlink = 1; /* Don't want iput() deleting it */ 472 set_nlink(ip, 1); /* Don't want iput() deleting it */
473 iput(ip); 473 iput(ip);
474 /* release the page */ 474 /* release the page */
475 release_metapage(mp); 475 release_metapage(mp);
@@ -3076,7 +3076,7 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3076 ip->i_mode |= 0001; 3076 ip->i_mode |= 0001;
3077 } 3077 }
3078 } 3078 }
3079 ip->i_nlink = le32_to_cpu(dip->di_nlink); 3079 set_nlink(ip, le32_to_cpu(dip->di_nlink));
3080 3080
3081 jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); 3081 jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
3082 if (sbi->uid == -1) 3082 if (sbi->uid == -1)
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 2686531e235a..c1a3e603279c 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -157,7 +157,7 @@ fail_drop:
157 dquot_drop(inode); 157 dquot_drop(inode);
158 inode->i_flags |= S_NOQUOTA; 158 inode->i_flags |= S_NOQUOTA;
159fail_unlock: 159fail_unlock:
160 inode->i_nlink = 0; 160 clear_nlink(inode);
161 unlock_new_inode(inode); 161 unlock_new_inode(inode);
162fail_put: 162fail_put:
163 iput(inode); 163 iput(inode);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index e17545e15664..a112ad96e474 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -172,7 +172,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
172 mutex_unlock(&JFS_IP(dip)->commit_mutex); 172 mutex_unlock(&JFS_IP(dip)->commit_mutex);
173 if (rc) { 173 if (rc) {
174 free_ea_wmap(ip); 174 free_ea_wmap(ip);
175 ip->i_nlink = 0; 175 clear_nlink(ip);
176 unlock_new_inode(ip); 176 unlock_new_inode(ip);
177 iput(ip); 177 iput(ip);
178 } else { 178 } else {
@@ -292,7 +292,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
292 goto out3; 292 goto out3;
293 } 293 }
294 294
295 ip->i_nlink = 2; /* for '.' */ 295 set_nlink(ip, 2); /* for '.' */
296 ip->i_op = &jfs_dir_inode_operations; 296 ip->i_op = &jfs_dir_inode_operations;
297 ip->i_fop = &jfs_dir_operations; 297 ip->i_fop = &jfs_dir_operations;
298 298
@@ -311,7 +311,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
311 mutex_unlock(&JFS_IP(dip)->commit_mutex); 311 mutex_unlock(&JFS_IP(dip)->commit_mutex);
312 if (rc) { 312 if (rc) {
313 free_ea_wmap(ip); 313 free_ea_wmap(ip);
314 ip->i_nlink = 0; 314 clear_nlink(ip);
315 unlock_new_inode(ip); 315 unlock_new_inode(ip);
316 iput(ip); 316 iput(ip);
317 } else { 317 } else {
@@ -844,7 +844,7 @@ static int jfs_link(struct dentry *old_dentry,
844 rc = txCommit(tid, 2, &iplist[0], 0); 844 rc = txCommit(tid, 2, &iplist[0], 0);
845 845
846 if (rc) { 846 if (rc) {
847 ip->i_nlink--; /* never instantiated */ 847 drop_nlink(ip); /* never instantiated */
848 iput(ip); 848 iput(ip);
849 } else 849 } else
850 d_instantiate(dentry, ip); 850 d_instantiate(dentry, ip);
@@ -1048,7 +1048,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
1048 mutex_unlock(&JFS_IP(dip)->commit_mutex); 1048 mutex_unlock(&JFS_IP(dip)->commit_mutex);
1049 if (rc) { 1049 if (rc) {
1050 free_ea_wmap(ip); 1050 free_ea_wmap(ip);
1051 ip->i_nlink = 0; 1051 clear_nlink(ip);
1052 unlock_new_inode(ip); 1052 unlock_new_inode(ip);
1053 iput(ip); 1053 iput(ip);
1054 } else { 1054 } else {
@@ -1433,7 +1433,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1433 mutex_unlock(&JFS_IP(dir)->commit_mutex); 1433 mutex_unlock(&JFS_IP(dir)->commit_mutex);
1434 if (rc) { 1434 if (rc) {
1435 free_ea_wmap(ip); 1435 free_ea_wmap(ip);
1436 ip->i_nlink = 0; 1436 clear_nlink(ip);
1437 unlock_new_inode(ip); 1437 unlock_new_inode(ip);
1438 iput(ip); 1438 iput(ip);
1439 } else { 1439 } else {
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 06c8a67cbe76..a44eff076c17 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -485,7 +485,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
485 goto out_unload; 485 goto out_unload;
486 } 486 }
487 inode->i_ino = 0; 487 inode->i_ino = 0;
488 inode->i_nlink = 1;
489 inode->i_size = sb->s_bdev->bd_inode->i_size; 488 inode->i_size = sb->s_bdev->bd_inode->i_size;
490 inode->i_mapping->a_ops = &jfs_metapage_aops; 489 inode->i_mapping->a_ops = &jfs_metapage_aops;
491 insert_inode_hash(inode); 490 insert_inode_hash(inode);
diff --git a/fs/libfs.c b/fs/libfs.c
index c18e9a1235b6..f6d411eef1e7 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -490,7 +490,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
490 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 490 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
491 inode->i_op = &simple_dir_inode_operations; 491 inode->i_op = &simple_dir_inode_operations;
492 inode->i_fop = &simple_dir_operations; 492 inode->i_fop = &simple_dir_operations;
493 inode->i_nlink = 2; 493 set_nlink(inode, 2);
494 root = d_alloc_root(inode); 494 root = d_alloc_root(inode);
495 if (!root) { 495 if (!root) {
496 iput(inode); 496 iput(inode);
@@ -510,8 +510,10 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
510 if (!dentry) 510 if (!dentry)
511 goto out; 511 goto out;
512 inode = new_inode(s); 512 inode = new_inode(s);
513 if (!inode) 513 if (!inode) {
514 dput(dentry);
514 goto out; 515 goto out;
516 }
515 inode->i_mode = S_IFREG | files->mode; 517 inode->i_mode = S_IFREG | files->mode;
516 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 518 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
517 inode->i_fop = files->ops; 519 inode->i_fop = files->ops;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index b3ff3d894165..b7d7f67cee5a 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -197,7 +197,7 @@ static int logfs_remove_inode(struct inode *inode)
197{ 197{
198 int ret; 198 int ret;
199 199
200 inode->i_nlink--; 200 drop_nlink(inode);
201 ret = write_inode(inode); 201 ret = write_inode(inode);
202 LOGFS_BUG_ON(ret, inode->i_sb); 202 LOGFS_BUG_ON(ret, inode->i_sb);
203 return ret; 203 return ret;
@@ -433,7 +433,7 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
433 433
434 ta = kzalloc(sizeof(*ta), GFP_KERNEL); 434 ta = kzalloc(sizeof(*ta), GFP_KERNEL);
435 if (!ta) { 435 if (!ta) {
436 inode->i_nlink--; 436 drop_nlink(inode);
437 iput(inode); 437 iput(inode);
438 return -ENOMEM; 438 return -ENOMEM;
439 } 439 }
@@ -456,7 +456,7 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
456 abort_transaction(inode, ta); 456 abort_transaction(inode, ta);
457 li->li_flags |= LOGFS_IF_STILLBORN; 457 li->li_flags |= LOGFS_IF_STILLBORN;
458 /* FIXME: truncate symlink */ 458 /* FIXME: truncate symlink */
459 inode->i_nlink--; 459 drop_nlink(inode);
460 iput(inode); 460 iput(inode);
461 goto out; 461 goto out;
462 } 462 }
@@ -563,7 +563,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
563 563
564 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 564 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
565 ihold(inode); 565 ihold(inode);
566 inode->i_nlink++; 566 inc_nlink(inode);
567 mark_inode_dirty_sync(inode); 567 mark_inode_dirty_sync(inode);
568 568
569 return __logfs_create(dir, dentry, inode, NULL, 0); 569 return __logfs_create(dir, dentry, inode, NULL, 0);
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index edfea7a3a747..7e441ad5f792 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -93,7 +93,7 @@ static struct inode *__logfs_iget(struct super_block *sb, ino_t ino)
93 /* inode->i_nlink == 0 can be true when called from 93 /* inode->i_nlink == 0 can be true when called from
94 * block validator */ 94 * block validator */
95 /* set i_nlink to 0 to prevent caching */ 95 /* set i_nlink to 0 to prevent caching */
96 inode->i_nlink = 0; 96 clear_nlink(inode);
97 logfs_inode(inode)->li_flags |= LOGFS_IF_ZOMBIE; 97 logfs_inode(inode)->li_flags |= LOGFS_IF_ZOMBIE;
98 iget_failed(inode); 98 iget_failed(inode);
99 if (!err) 99 if (!err)
@@ -199,7 +199,6 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode)
199 inode->i_blocks = 0; 199 inode->i_blocks = 0;
200 inode->i_ctime = CURRENT_TIME; 200 inode->i_ctime = CURRENT_TIME;
201 inode->i_mtime = CURRENT_TIME; 201 inode->i_mtime = CURRENT_TIME;
202 inode->i_nlink = 1;
203 li->li_refcount = 1; 202 li->li_refcount = 1;
204 INIT_LIST_HEAD(&li->li_freeing_list); 203 INIT_LIST_HEAD(&li->li_freeing_list);
205 204
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index d8d09380c7de..2ac4217b7901 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -126,7 +126,7 @@ static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode)
126 inode->i_atime = be64_to_timespec(di->di_atime); 126 inode->i_atime = be64_to_timespec(di->di_atime);
127 inode->i_ctime = be64_to_timespec(di->di_ctime); 127 inode->i_ctime = be64_to_timespec(di->di_ctime);
128 inode->i_mtime = be64_to_timespec(di->di_mtime); 128 inode->i_mtime = be64_to_timespec(di->di_mtime);
129 inode->i_nlink = be32_to_cpu(di->di_refcount); 129 set_nlink(inode, be32_to_cpu(di->di_refcount));
130 inode->i_generation = be32_to_cpu(di->di_generation); 130 inode->i_generation = be32_to_cpu(di->di_generation);
131 131
132 switch (inode->i_mode & S_IFMT) { 132 switch (inode->i_mode & S_IFMT) {
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e7d23e25bf1d..64cdcd662ffc 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -446,7 +446,7 @@ static struct inode *V1_minix_iget(struct inode *inode)
446 inode->i_mode = raw_inode->i_mode; 446 inode->i_mode = raw_inode->i_mode;
447 inode->i_uid = (uid_t)raw_inode->i_uid; 447 inode->i_uid = (uid_t)raw_inode->i_uid;
448 inode->i_gid = (gid_t)raw_inode->i_gid; 448 inode->i_gid = (gid_t)raw_inode->i_gid;
449 inode->i_nlink = raw_inode->i_nlinks; 449 set_nlink(inode, raw_inode->i_nlinks);
450 inode->i_size = raw_inode->i_size; 450 inode->i_size = raw_inode->i_size;
451 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time; 451 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time;
452 inode->i_mtime.tv_nsec = 0; 452 inode->i_mtime.tv_nsec = 0;
@@ -479,7 +479,7 @@ static struct inode *V2_minix_iget(struct inode *inode)
479 inode->i_mode = raw_inode->i_mode; 479 inode->i_mode = raw_inode->i_mode;
480 inode->i_uid = (uid_t)raw_inode->i_uid; 480 inode->i_uid = (uid_t)raw_inode->i_uid;
481 inode->i_gid = (gid_t)raw_inode->i_gid; 481 inode->i_gid = (gid_t)raw_inode->i_gid;
482 inode->i_nlink = raw_inode->i_nlinks; 482 set_nlink(inode, raw_inode->i_nlinks);
483 inode->i_size = raw_inode->i_size; 483 inode->i_size = raw_inode->i_size;
484 inode->i_mtime.tv_sec = raw_inode->i_mtime; 484 inode->i_mtime.tv_sec = raw_inode->i_mtime;
485 inode->i_atime.tv_sec = raw_inode->i_atime; 485 inode->i_atime.tv_sec = raw_inode->i_atime;
diff --git a/fs/namei.c b/fs/namei.c
index 7657be4352bf..ac6d214da827 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -137,7 +137,7 @@ static int do_getname(const char __user *filename, char *page)
137 return retval; 137 return retval;
138} 138}
139 139
140static char *getname_flags(const char __user * filename, int flags) 140static char *getname_flags(const char __user *filename, int flags, int *empty)
141{ 141{
142 char *tmp, *result; 142 char *tmp, *result;
143 143
@@ -148,6 +148,8 @@ static char *getname_flags(const char __user * filename, int flags)
148 148
149 result = tmp; 149 result = tmp;
150 if (retval < 0) { 150 if (retval < 0) {
151 if (retval == -ENOENT && empty)
152 *empty = 1;
151 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) { 153 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
152 __putname(tmp); 154 __putname(tmp);
153 result = ERR_PTR(retval); 155 result = ERR_PTR(retval);
@@ -160,7 +162,7 @@ static char *getname_flags(const char __user * filename, int flags)
160 162
161char *getname(const char __user * filename) 163char *getname(const char __user * filename)
162{ 164{
163 return getname_flags(filename, 0); 165 return getname_flags(filename, 0, 0);
164} 166}
165 167
166#ifdef CONFIG_AUDITSYSCALL 168#ifdef CONFIG_AUDITSYSCALL
@@ -1798,11 +1800,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1798 return __lookup_hash(&this, base, NULL); 1800 return __lookup_hash(&this, base, NULL);
1799} 1801}
1800 1802
1801int user_path_at(int dfd, const char __user *name, unsigned flags, 1803int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
1802 struct path *path) 1804 struct path *path, int *empty)
1803{ 1805{
1804 struct nameidata nd; 1806 struct nameidata nd;
1805 char *tmp = getname_flags(name, flags); 1807 char *tmp = getname_flags(name, flags, empty);
1806 int err = PTR_ERR(tmp); 1808 int err = PTR_ERR(tmp);
1807 if (!IS_ERR(tmp)) { 1809 if (!IS_ERR(tmp)) {
1808 1810
@@ -1816,6 +1818,12 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1816 return err; 1818 return err;
1817} 1819}
1818 1820
1821int user_path_at(int dfd, const char __user *name, unsigned flags,
1822 struct path *path)
1823{
1824 return user_path_at_empty(dfd, name, flags, path, 0);
1825}
1826
1819static int user_path_parent(int dfd, const char __user *path, 1827static int user_path_parent(int dfd, const char __user *path,
1820 struct nameidata *nd, char **name) 1828 struct nameidata *nd, char **name)
1821{ 1829{
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 202f370526a7..5b5fa33b6b9d 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -228,7 +228,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
228 228
229 DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode); 229 DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode);
230 230
231 inode->i_nlink = 1; 231 set_nlink(inode, 1);
232 inode->i_uid = server->m.uid; 232 inode->i_uid = server->m.uid;
233 inode->i_gid = server->m.gid; 233 inode->i_gid = server->m.gid;
234 234
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 4dc6d078f108..c07a55aec838 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -320,7 +320,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
320 memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); 320 memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
321 inode->i_version = 0; 321 inode->i_version = 0;
322 inode->i_size = 0; 322 inode->i_size = 0;
323 inode->i_nlink = 0; 323 clear_nlink(inode);
324 inode->i_uid = -2; 324 inode->i_uid = -2;
325 inode->i_gid = -2; 325 inode->i_gid = -2;
326 inode->i_blocks = 0; 326 inode->i_blocks = 0;
@@ -355,7 +355,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
355 | NFS_INO_INVALID_DATA 355 | NFS_INO_INVALID_DATA
356 | NFS_INO_REVAL_PAGECACHE; 356 | NFS_INO_REVAL_PAGECACHE;
357 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 357 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
358 inode->i_nlink = fattr->nlink; 358 set_nlink(inode, fattr->nlink);
359 else if (nfs_server_capable(inode, NFS_CAP_NLINK)) 359 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
360 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 360 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
361 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 361 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
@@ -1361,7 +1361,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1361 invalid |= NFS_INO_INVALID_ATTR; 1361 invalid |= NFS_INO_INVALID_ATTR;
1362 if (S_ISDIR(inode->i_mode)) 1362 if (S_ISDIR(inode->i_mode))
1363 invalid |= NFS_INO_INVALID_DATA; 1363 invalid |= NFS_INO_INVALID_DATA;
1364 inode->i_nlink = fattr->nlink; 1364 set_nlink(inode, fattr->nlink);
1365 } 1365 }
1366 } else if (server->caps & NFS_CAP_NLINK) 1366 } else if (server->caps & NFS_CAP_NLINK)
1367 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1367 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 666628b395f1..b50ffb72e5b3 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -354,7 +354,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
354 354
355 failed_acl: 355 failed_acl:
356 failed_bmap: 356 failed_bmap:
357 inode->i_nlink = 0; 357 clear_nlink(inode);
358 iput(inode); /* raw_inode will be deleted through 358 iput(inode); /* raw_inode will be deleted through
359 generic_delete_inode() */ 359 generic_delete_inode() */
360 goto failed; 360 goto failed;
@@ -396,7 +396,7 @@ int nilfs_read_inode_common(struct inode *inode,
396 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 396 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
397 inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); 397 inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid);
398 inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); 398 inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid);
399 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 399 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
400 inode->i_size = le64_to_cpu(raw_inode->i_size); 400 inode->i_size = le64_to_cpu(raw_inode->i_size);
401 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 401 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
402 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); 402 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index a3141990061e..768982de10e4 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -289,7 +289,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
289 nilfs_warning(inode->i_sb, __func__, 289 nilfs_warning(inode->i_sb, __func__,
290 "deleting nonexistent file (%lu), %d\n", 290 "deleting nonexistent file (%lu), %d\n",
291 inode->i_ino, inode->i_nlink); 291 inode->i_ino, inode->i_nlink);
292 inode->i_nlink = 1; 292 set_nlink(inode, 1);
293 } 293 }
294 err = nilfs_delete_entry(de, page); 294 err = nilfs_delete_entry(de, page);
295 if (err) 295 if (err)
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 1371487da955..97e2dacbc867 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -612,7 +612,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
612 * might be tricky due to vfs interactions. Need to think about this 612 * might be tricky due to vfs interactions. Need to think about this
613 * some more when implementing the unlink command. 613 * some more when implementing the unlink command.
614 */ 614 */
615 vi->i_nlink = le16_to_cpu(m->link_count); 615 set_nlink(vi, le16_to_cpu(m->link_count));
616 /* 616 /*
617 * FIXME: Reparse points can have the directory bit set even though 617 * FIXME: Reparse points can have the directory bit set even though
618 * they would be S_IFLNK. Need to deal with this further below when we 618 * they would be S_IFLNK. Need to deal with this further below when we
@@ -634,7 +634,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
634 vi->i_mode &= ~vol->dmask; 634 vi->i_mode &= ~vol->dmask;
635 /* Things break without this kludge! */ 635 /* Things break without this kludge! */
636 if (vi->i_nlink > 1) 636 if (vi->i_nlink > 1)
637 vi->i_nlink = 1; 637 set_nlink(vi, 1);
638 } else { 638 } else {
639 vi->i_mode |= S_IFREG; 639 vi->i_mode |= S_IFREG;
640 /* Apply the file permissions mask set in the mount options. */ 640 /* Apply the file permissions mask set in the mount options. */
@@ -1242,7 +1242,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1242 vi->i_version = base_vi->i_version; 1242 vi->i_version = base_vi->i_version;
1243 vi->i_uid = base_vi->i_uid; 1243 vi->i_uid = base_vi->i_uid;
1244 vi->i_gid = base_vi->i_gid; 1244 vi->i_gid = base_vi->i_gid;
1245 vi->i_nlink = base_vi->i_nlink; 1245 set_nlink(vi, base_vi->i_nlink);
1246 vi->i_mtime = base_vi->i_mtime; 1246 vi->i_mtime = base_vi->i_mtime;
1247 vi->i_ctime = base_vi->i_ctime; 1247 vi->i_ctime = base_vi->i_ctime;
1248 vi->i_atime = base_vi->i_atime; 1248 vi->i_atime = base_vi->i_atime;
@@ -1508,7 +1508,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1508 vi->i_version = base_vi->i_version; 1508 vi->i_version = base_vi->i_version;
1509 vi->i_uid = base_vi->i_uid; 1509 vi->i_uid = base_vi->i_uid;
1510 vi->i_gid = base_vi->i_gid; 1510 vi->i_gid = base_vi->i_gid;
1511 vi->i_nlink = base_vi->i_nlink; 1511 set_nlink(vi, base_vi->i_nlink);
1512 vi->i_mtime = base_vi->i_mtime; 1512 vi->i_mtime = base_vi->i_mtime;
1513 vi->i_ctime = base_vi->i_ctime; 1513 vi->i_ctime = base_vi->i_ctime;
1514 vi->i_atime = base_vi->i_atime; 1514 vi->i_atime = base_vi->i_atime;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8582e3f4f120..e2878b5895fb 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2292,7 +2292,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
2292 ocfs2_journal_dirty(handle, di_bh); 2292 ocfs2_journal_dirty(handle, di_bh);
2293 2293
2294 i_size_write(inode, size); 2294 i_size_write(inode, size);
2295 inode->i_nlink = 2; 2295 set_nlink(inode, 2);
2296 inode->i_blocks = ocfs2_inode_sector_count(inode); 2296 inode->i_blocks = ocfs2_inode_sector_count(inode);
2297 2297
2298 ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); 2298 ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
@@ -2354,7 +2354,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2354 ocfs2_journal_dirty(handle, new_bh); 2354 ocfs2_journal_dirty(handle, new_bh);
2355 2355
2356 i_size_write(inode, inode->i_sb->s_blocksize); 2356 i_size_write(inode, inode->i_sb->s_blocksize);
2357 inode->i_nlink = 2; 2357 set_nlink(inode, 2);
2358 inode->i_blocks = ocfs2_inode_sector_count(inode); 2358 inode->i_blocks = ocfs2_inode_sector_count(inode);
2359 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); 2359 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
2360 if (status < 0) { 2360 if (status < 0) {
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 7642d7ca73e5..e1ed5e502ff2 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2092,7 +2092,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2092 inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 2092 inode->i_uid = be32_to_cpu(lvb->lvb_iuid);
2093 inode->i_gid = be32_to_cpu(lvb->lvb_igid); 2093 inode->i_gid = be32_to_cpu(lvb->lvb_igid);
2094 inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2094 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
2095 inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); 2095 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
2096 ocfs2_unpack_timespec(&inode->i_atime, 2096 ocfs2_unpack_timespec(&inode->i_atime,
2097 be64_to_cpu(lvb->lvb_iatime_packed)); 2097 be64_to_cpu(lvb->lvb_iatime_packed));
2098 ocfs2_unpack_timespec(&inode->i_mtime, 2098 ocfs2_unpack_timespec(&inode->i_mtime,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index b4c8bb6b8d28..a22d2c098890 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -291,7 +291,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
291 (unsigned long long)OCFS2_I(inode)->ip_blkno, 291 (unsigned long long)OCFS2_I(inode)->ip_blkno,
292 (unsigned long long)le64_to_cpu(fe->i_blkno)); 292 (unsigned long long)le64_to_cpu(fe->i_blkno));
293 293
294 inode->i_nlink = ocfs2_read_links_count(fe); 294 set_nlink(inode, ocfs2_read_links_count(fe));
295 295
296 trace_ocfs2_populate_inode(OCFS2_I(inode)->ip_blkno, 296 trace_ocfs2_populate_inode(OCFS2_I(inode)->ip_blkno,
297 le32_to_cpu(fe->i_flags)); 297 le32_to_cpu(fe->i_flags));
@@ -1290,7 +1290,7 @@ void ocfs2_refresh_inode(struct inode *inode,
1290 OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features); 1290 OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
1291 ocfs2_set_inode_flags(inode); 1291 ocfs2_set_inode_flags(inode);
1292 i_size_write(inode, le64_to_cpu(fe->i_size)); 1292 i_size_write(inode, le64_to_cpu(fe->i_size));
1293 inode->i_nlink = ocfs2_read_links_count(fe); 1293 set_nlink(inode, ocfs2_read_links_count(fe));
1294 inode->i_uid = le32_to_cpu(fe->i_uid); 1294 inode->i_uid = le32_to_cpu(fe->i_uid);
1295 inode->i_gid = le32_to_cpu(fe->i_gid); 1295 inode->i_gid = le32_to_cpu(fe->i_gid);
1296 inode->i_mode = le16_to_cpu(fe->i_mode); 1296 inode->i_mode = le16_to_cpu(fe->i_mode);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 53aa41ed7bf3..a8b2bfea574e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -199,9 +199,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
199 * these are used by the support functions here and in 199 * these are used by the support functions here and in
200 * callers. */ 200 * callers. */
201 if (S_ISDIR(mode)) 201 if (S_ISDIR(mode))
202 inode->i_nlink = 2; 202 set_nlink(inode, 2);
203 else
204 inode->i_nlink = 1;
205 inode_init_owner(inode, dir, mode); 203 inode_init_owner(inode, dir, mode);
206 dquot_initialize(inode); 204 dquot_initialize(inode);
207 return inode; 205 return inode;
@@ -1379,7 +1377,7 @@ static int ocfs2_rename(struct inode *old_dir,
1379 } 1377 }
1380 1378
1381 if (new_inode) { 1379 if (new_inode) {
1382 new_inode->i_nlink--; 1380 drop_nlink(new_inode);
1383 new_inode->i_ctime = CURRENT_TIME; 1381 new_inode->i_ctime = CURRENT_TIME;
1384 } 1382 }
1385 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; 1383 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
@@ -1387,9 +1385,9 @@ static int ocfs2_rename(struct inode *old_dir,
1387 if (update_dot_dot) { 1385 if (update_dot_dot) {
1388 status = ocfs2_update_entry(old_inode, handle, 1386 status = ocfs2_update_entry(old_inode, handle,
1389 &old_inode_dot_dot_res, new_dir); 1387 &old_inode_dot_dot_res, new_dir);
1390 old_dir->i_nlink--; 1388 drop_nlink(old_dir);
1391 if (new_inode) { 1389 if (new_inode) {
1392 new_inode->i_nlink--; 1390 drop_nlink(new_inode);
1393 } else { 1391 } else {
1394 inc_nlink(new_dir); 1392 inc_nlink(new_dir);
1395 mark_inode_dirty(new_dir); 1393 mark_inode_dirty(new_dir);
@@ -2018,7 +2016,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
2018 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; 2016 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2019 if (S_ISDIR(inode->i_mode)) 2017 if (S_ISDIR(inode->i_mode))
2020 ocfs2_add_links_count(orphan_fe, 1); 2018 ocfs2_add_links_count(orphan_fe, 1);
2021 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); 2019 set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
2022 ocfs2_journal_dirty(handle, orphan_dir_bh); 2020 ocfs2_journal_dirty(handle, orphan_dir_bh);
2023 2021
2024 status = __ocfs2_add_entry(handle, orphan_dir_inode, name, 2022 status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
@@ -2116,7 +2114,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2116 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; 2114 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2117 if (S_ISDIR(inode->i_mode)) 2115 if (S_ISDIR(inode->i_mode))
2118 ocfs2_add_links_count(orphan_fe, -1); 2116 ocfs2_add_links_count(orphan_fe, -1);
2119 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); 2117 set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
2120 ocfs2_journal_dirty(handle, orphan_dir_bh); 2118 ocfs2_journal_dirty(handle, orphan_dir_bh);
2121 2119
2122leave: 2120leave:
@@ -2282,7 +2280,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2282 goto leave; 2280 goto leave;
2283 } 2281 }
2284 2282
2285 inode->i_nlink = 0; 2283 clear_nlink(inode);
2286 /* do the real work now. */ 2284 /* do the real work now. */
2287 status = __ocfs2_mknod_locked(dir, inode, 2285 status = __ocfs2_mknod_locked(dir, inode,
2288 0, &new_di_bh, parent_di_bh, handle, 2286 0, &new_di_bh, parent_di_bh, handle,
@@ -2437,7 +2435,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2437 di = (struct ocfs2_dinode *)di_bh->b_data; 2435 di = (struct ocfs2_dinode *)di_bh->b_data;
2438 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); 2436 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
2439 di->i_orphaned_slot = 0; 2437 di->i_orphaned_slot = 0;
2440 inode->i_nlink = 1; 2438 set_nlink(inode, 1);
2441 ocfs2_set_links_count(di, inode->i_nlink); 2439 ocfs2_set_links_count(di, inode->i_nlink);
2442 ocfs2_journal_dirty(handle, di_bh); 2440 ocfs2_journal_dirty(handle, di_bh);
2443 2441
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index a2a5bff774e3..e4e0ff7962e2 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -242,7 +242,7 @@ found:
242 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 242 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
243 inode->i_op = &openprom_inode_operations; 243 inode->i_op = &openprom_inode_operations;
244 inode->i_fop = &openprom_operations; 244 inode->i_fop = &openprom_operations;
245 inode->i_nlink = 2; 245 set_nlink(inode, 2);
246 break; 246 break;
247 case op_inode_prop: 247 case op_inode_prop:
248 if (!strcmp(dp->name, "options") && (len == 17) && 248 if (!strcmp(dp->name, "options") && (len == 17) &&
@@ -251,7 +251,7 @@ found:
251 else 251 else
252 inode->i_mode = S_IFREG | S_IRUGO; 252 inode->i_mode = S_IFREG | S_IRUGO;
253 inode->i_fop = &openpromfs_prop_ops; 253 inode->i_fop = &openpromfs_prop_ops;
254 inode->i_nlink = 1; 254 set_nlink(inode, 1);
255 inode->i_size = ent_oi->u.prop->length; 255 inode->i_size = ent_oi->u.prop->length;
256 break; 256 break;
257 } 257 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8f0087e20e16..851ba3dcdc29 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2248,7 +2248,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
2248 ei = PROC_I(inode); 2248 ei = PROC_I(inode);
2249 inode->i_mode = p->mode; 2249 inode->i_mode = p->mode;
2250 if (S_ISDIR(inode->i_mode)) 2250 if (S_ISDIR(inode->i_mode))
2251 inode->i_nlink = 2; /* Use getattr to fix if necessary */ 2251 set_nlink(inode, 2); /* Use getattr to fix if necessary */
2252 if (p->iop) 2252 if (p->iop)
2253 inode->i_op = p->iop; 2253 inode->i_op = p->iop;
2254 if (p->fop) 2254 if (p->fop)
@@ -2642,7 +2642,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2642 2642
2643 inode->i_mode = p->mode; 2643 inode->i_mode = p->mode;
2644 if (S_ISDIR(inode->i_mode)) 2644 if (S_ISDIR(inode->i_mode))
2645 inode->i_nlink = 2; 2645 set_nlink(inode, 2);
2646 if (S_ISLNK(inode->i_mode)) 2646 if (S_ISLNK(inode->i_mode))
2647 inode->i_size = 64; 2647 inode->i_size = 64;
2648 if (p->iop) 2648 if (p->iop)
@@ -2981,8 +2981,8 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
2981 inode->i_fop = &proc_tgid_base_operations; 2981 inode->i_fop = &proc_tgid_base_operations;
2982 inode->i_flags|=S_IMMUTABLE; 2982 inode->i_flags|=S_IMMUTABLE;
2983 2983
2984 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, 2984 set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
2985 ARRAY_SIZE(tgid_base_stuff)); 2985 ARRAY_SIZE(tgid_base_stuff)));
2986 2986
2987 d_set_d_op(dentry, &pid_dentry_operations); 2987 d_set_d_op(dentry, &pid_dentry_operations);
2988 2988
@@ -3233,8 +3233,8 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
3233 inode->i_fop = &proc_tid_base_operations; 3233 inode->i_fop = &proc_tid_base_operations;
3234 inode->i_flags|=S_IMMUTABLE; 3234 inode->i_flags|=S_IMMUTABLE;
3235 3235
3236 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, 3236 set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
3237 ARRAY_SIZE(tid_base_stuff)); 3237 ARRAY_SIZE(tid_base_stuff)));
3238 3238
3239 d_set_d_op(dentry, &pid_dentry_operations); 3239 d_set_d_op(dentry, &pid_dentry_operations);
3240 3240
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 9d99131d0d65..10090d9c7ad5 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -283,7 +283,7 @@ static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
283 struct inode *inode = dentry->d_inode; 283 struct inode *inode = dentry->d_inode;
284 struct proc_dir_entry *de = PROC_I(inode)->pde; 284 struct proc_dir_entry *de = PROC_I(inode)->pde;
285 if (de && de->nlink) 285 if (de && de->nlink)
286 inode->i_nlink = de->nlink; 286 set_nlink(inode, de->nlink);
287 287
288 generic_fillattr(inode, stat); 288 generic_fillattr(inode, stat);
289 return 0; 289 return 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 7ed72d6c1c6f..7737c5468a40 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -445,7 +445,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
445 if (de->size) 445 if (de->size)
446 inode->i_size = de->size; 446 inode->i_size = de->size;
447 if (de->nlink) 447 if (de->nlink)
448 inode->i_nlink = de->nlink; 448 set_nlink(inode, de->nlink);
449 if (de->proc_iops) 449 if (de->proc_iops)
450 inode->i_op = de->proc_iops; 450 inode->i_op = de->proc_iops;
451 if (de->proc_fops) { 451 if (de->proc_fops) {
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 1a77dbef226f..b44113279e30 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -39,7 +39,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
39 inode->i_fop = &proc_sys_file_operations; 39 inode->i_fop = &proc_sys_file_operations;
40 } else { 40 } else {
41 inode->i_mode |= S_IFDIR; 41 inode->i_mode |= S_IFDIR;
42 inode->i_nlink = 0; 42 clear_nlink(inode);
43 inode->i_op = &proc_sys_dir_operations; 43 inode->i_op = &proc_sys_dir_operations;
44 inode->i_fop = &proc_sys_dir_file_operations; 44 inode->i_fop = &proc_sys_dir_file_operations;
45 } 45 }
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 2b0646613f5a..3bdd21418432 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -379,7 +379,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
379 inode->i_mode = le16_to_cpu(raw_inode->di_mode); 379 inode->i_mode = le16_to_cpu(raw_inode->di_mode);
380 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->di_uid); 380 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->di_uid);
381 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->di_gid); 381 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->di_gid);
382 inode->i_nlink = le16_to_cpu(raw_inode->di_nlink); 382 set_nlink(inode, le16_to_cpu(raw_inode->di_nlink));
383 inode->i_size = le32_to_cpu(raw_inode->di_size); 383 inode->i_size = le32_to_cpu(raw_inode->di_size);
384 inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->di_mtime); 384 inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->di_mtime);
385 inode->i_mtime.tv_nsec = 0; 385 inode->i_mtime.tv_nsec = 0;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 10b6be3ca280..aae0edb95c6c 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -363,12 +363,15 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
363 } 363 }
364 364
365 sb = quotactl_block(special); 365 sb = quotactl_block(special);
366 if (IS_ERR(sb)) 366 if (IS_ERR(sb)) {
367 return PTR_ERR(sb); 367 ret = PTR_ERR(sb);
368 goto out;
369 }
368 370
369 ret = do_quotactl(sb, type, cmds, id, addr, pathp); 371 ret = do_quotactl(sb, type, cmds, id, addr, pathp);
370 372
371 drop_super(sb); 373 drop_super(sb);
374out:
372 if (pathp && !IS_ERR(pathp)) 375 if (pathp && !IS_ERR(pathp))
373 path_put(pathp); 376 path_put(pathp);
374 return ret; 377 return ret;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9b0d4b78b4fb..950f13af0951 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1154,7 +1154,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
1154 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 1154 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1155 set_inode_sd_version(inode, STAT_DATA_V1); 1155 set_inode_sd_version(inode, STAT_DATA_V1);
1156 inode->i_mode = sd_v1_mode(sd); 1156 inode->i_mode = sd_v1_mode(sd);
1157 inode->i_nlink = sd_v1_nlink(sd); 1157 set_nlink(inode, sd_v1_nlink(sd));
1158 inode->i_uid = sd_v1_uid(sd); 1158 inode->i_uid = sd_v1_uid(sd);
1159 inode->i_gid = sd_v1_gid(sd); 1159 inode->i_gid = sd_v1_gid(sd);
1160 inode->i_size = sd_v1_size(sd); 1160 inode->i_size = sd_v1_size(sd);
@@ -1199,7 +1199,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
1199 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); 1199 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
1200 1200
1201 inode->i_mode = sd_v2_mode(sd); 1201 inode->i_mode = sd_v2_mode(sd);
1202 inode->i_nlink = sd_v2_nlink(sd); 1202 set_nlink(inode, sd_v2_nlink(sd));
1203 inode->i_uid = sd_v2_uid(sd); 1203 inode->i_uid = sd_v2_uid(sd);
1204 inode->i_size = sd_v2_size(sd); 1204 inode->i_size = sd_v2_size(sd);
1205 inode->i_gid = sd_v2_gid(sd); 1205 inode->i_gid = sd_v2_gid(sd);
@@ -1444,7 +1444,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
1444 /* a stale NFS handle can trigger this without it being an error */ 1444 /* a stale NFS handle can trigger this without it being an error */
1445 pathrelse(&path_to_sd); 1445 pathrelse(&path_to_sd);
1446 reiserfs_make_bad_inode(inode); 1446 reiserfs_make_bad_inode(inode);
1447 inode->i_nlink = 0; 1447 clear_nlink(inode);
1448 return; 1448 return;
1449 } 1449 }
1450 1450
@@ -1832,7 +1832,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1832#endif 1832#endif
1833 1833
1834 /* fill stat data */ 1834 /* fill stat data */
1835 inode->i_nlink = (S_ISDIR(mode) ? 2 : 1); 1835 set_nlink(inode, (S_ISDIR(mode) ? 2 : 1));
1836 1836
1837 /* uid and gid must already be set by the caller for quota init */ 1837 /* uid and gid must already be set by the caller for quota init */
1838 1838
@@ -1987,7 +1987,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1987 make_bad_inode(inode); 1987 make_bad_inode(inode);
1988 1988
1989 out_inserted_sd: 1989 out_inserted_sd:
1990 inode->i_nlink = 0; 1990 clear_nlink(inode);
1991 th->t_trans_id = 0; /* so the caller can't use this handle later */ 1991 th->t_trans_id = 0; /* so the caller can't use this handle later */
1992 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ 1992 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
1993 iput(inode); 1993 iput(inode);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ef392324bbf1..80058e8ce361 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -19,7 +19,7 @@
19#include <linux/reiserfs_xattr.h> 19#include <linux/reiserfs_xattr.h>
20#include <linux/quotaops.h> 20#include <linux/quotaops.h>
21 21
22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } 22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
23#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); 23#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
24 24
25// directory item contains array of entry headers. This performs 25// directory item contains array of entry headers. This performs
@@ -622,7 +622,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
622 dentry->d_name.len, inode, 1 /*visible */ ); 622 dentry->d_name.len, inode, 1 /*visible */ );
623 if (retval) { 623 if (retval) {
624 int err; 624 int err;
625 inode->i_nlink--; 625 drop_nlink(inode);
626 reiserfs_update_sd(&th, inode); 626 reiserfs_update_sd(&th, inode);
627 err = journal_end(&th, dir->i_sb, jbegin_count); 627 err = journal_end(&th, dir->i_sb, jbegin_count);
628 if (err) 628 if (err)
@@ -702,7 +702,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
702 dentry->d_name.len, inode, 1 /*visible */ ); 702 dentry->d_name.len, inode, 1 /*visible */ );
703 if (retval) { 703 if (retval) {
704 int err; 704 int err;
705 inode->i_nlink--; 705 drop_nlink(inode);
706 reiserfs_update_sd(&th, inode); 706 reiserfs_update_sd(&th, inode);
707 err = journal_end(&th, dir->i_sb, jbegin_count); 707 err = journal_end(&th, dir->i_sb, jbegin_count);
708 if (err) 708 if (err)
@@ -787,7 +787,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
787 dentry->d_name.len, inode, 1 /*visible */ ); 787 dentry->d_name.len, inode, 1 /*visible */ );
788 if (retval) { 788 if (retval) {
789 int err; 789 int err;
790 inode->i_nlink = 0; 790 clear_nlink(inode);
791 DEC_DIR_INODE_NLINK(dir); 791 DEC_DIR_INODE_NLINK(dir);
792 reiserfs_update_sd(&th, inode); 792 reiserfs_update_sd(&th, inode);
793 err = journal_end(&th, dir->i_sb, jbegin_count); 793 err = journal_end(&th, dir->i_sb, jbegin_count);
@@ -964,7 +964,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
964 reiserfs_warning(inode->i_sb, "reiserfs-7042", 964 reiserfs_warning(inode->i_sb, "reiserfs-7042",
965 "deleting nonexistent file (%lu), %d", 965 "deleting nonexistent file (%lu), %d",
966 inode->i_ino, inode->i_nlink); 966 inode->i_ino, inode->i_nlink);
967 inode->i_nlink = 1; 967 set_nlink(inode, 1);
968 } 968 }
969 969
970 drop_nlink(inode); 970 drop_nlink(inode);
@@ -1086,7 +1086,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
1086 dentry->d_name.len, inode, 1 /*visible */ ); 1086 dentry->d_name.len, inode, 1 /*visible */ );
1087 if (retval) { 1087 if (retval) {
1088 int err; 1088 int err;
1089 inode->i_nlink--; 1089 drop_nlink(inode);
1090 reiserfs_update_sd(&th, inode); 1090 reiserfs_update_sd(&th, inode);
1091 err = journal_end(&th, parent_dir->i_sb, jbegin_count); 1091 err = journal_end(&th, parent_dir->i_sb, jbegin_count);
1092 if (err) 1092 if (err)
@@ -1129,7 +1129,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1129 1129
1130 retval = journal_begin(&th, dir->i_sb, jbegin_count); 1130 retval = journal_begin(&th, dir->i_sb, jbegin_count);
1131 if (retval) { 1131 if (retval) {
1132 inode->i_nlink--; 1132 drop_nlink(inode);
1133 reiserfs_write_unlock(dir->i_sb); 1133 reiserfs_write_unlock(dir->i_sb);
1134 return retval; 1134 return retval;
1135 } 1135 }
@@ -1144,7 +1144,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1144 1144
1145 if (retval) { 1145 if (retval) {
1146 int err; 1146 int err;
1147 inode->i_nlink--; 1147 drop_nlink(inode);
1148 err = journal_end(&th, dir->i_sb, jbegin_count); 1148 err = journal_end(&th, dir->i_sb, jbegin_count);
1149 reiserfs_write_unlock(dir->i_sb); 1149 reiserfs_write_unlock(dir->i_sb);
1150 return err ? err : retval; 1150 return err ? err : retval;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 2305e3121cb1..8b4089f30408 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -337,7 +337,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
337 inode->i_metasize = (ROMFH_SIZE + nlen + 1 + ROMFH_PAD) & ROMFH_MASK; 337 inode->i_metasize = (ROMFH_SIZE + nlen + 1 + ROMFH_PAD) & ROMFH_MASK;
338 inode->i_dataoffset = pos + inode->i_metasize; 338 inode->i_dataoffset = pos + inode->i_metasize;
339 339
340 i->i_nlink = 1; /* Hard to decide.. */ 340 set_nlink(i, 1); /* Hard to decide.. */
341 i->i_size = be32_to_cpu(ri.size); 341 i->i_size = be32_to_cpu(ri.size);
342 i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0; 342 i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0;
343 i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0; 343 i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 04bebcaa2373..fd7b3b3bda13 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -159,7 +159,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
159 frag_offset = 0; 159 frag_offset = 0;
160 } 160 }
161 161
162 inode->i_nlink = 1; 162 set_nlink(inode, 1);
163 inode->i_size = le32_to_cpu(sqsh_ino->file_size); 163 inode->i_size = le32_to_cpu(sqsh_ino->file_size);
164 inode->i_fop = &generic_ro_fops; 164 inode->i_fop = &generic_ro_fops;
165 inode->i_mode |= S_IFREG; 165 inode->i_mode |= S_IFREG;
@@ -203,7 +203,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
203 } 203 }
204 204
205 xattr_id = le32_to_cpu(sqsh_ino->xattr); 205 xattr_id = le32_to_cpu(sqsh_ino->xattr);
206 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 206 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
207 inode->i_size = le64_to_cpu(sqsh_ino->file_size); 207 inode->i_size = le64_to_cpu(sqsh_ino->file_size);
208 inode->i_op = &squashfs_inode_ops; 208 inode->i_op = &squashfs_inode_ops;
209 inode->i_fop = &generic_ro_fops; 209 inode->i_fop = &generic_ro_fops;
@@ -232,7 +232,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
232 if (err < 0) 232 if (err < 0)
233 goto failed_read; 233 goto failed_read;
234 234
235 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 235 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
236 inode->i_size = le16_to_cpu(sqsh_ino->file_size); 236 inode->i_size = le16_to_cpu(sqsh_ino->file_size);
237 inode->i_op = &squashfs_dir_inode_ops; 237 inode->i_op = &squashfs_dir_inode_ops;
238 inode->i_fop = &squashfs_dir_ops; 238 inode->i_fop = &squashfs_dir_ops;
@@ -257,7 +257,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
257 goto failed_read; 257 goto failed_read;
258 258
259 xattr_id = le32_to_cpu(sqsh_ino->xattr); 259 xattr_id = le32_to_cpu(sqsh_ino->xattr);
260 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 260 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
261 inode->i_size = le32_to_cpu(sqsh_ino->file_size); 261 inode->i_size = le32_to_cpu(sqsh_ino->file_size);
262 inode->i_op = &squashfs_dir_inode_ops; 262 inode->i_op = &squashfs_dir_inode_ops;
263 inode->i_fop = &squashfs_dir_ops; 263 inode->i_fop = &squashfs_dir_ops;
@@ -284,7 +284,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
284 if (err < 0) 284 if (err < 0)
285 goto failed_read; 285 goto failed_read;
286 286
287 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 287 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
288 inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); 288 inode->i_size = le32_to_cpu(sqsh_ino->symlink_size);
289 inode->i_op = &squashfs_symlink_inode_ops; 289 inode->i_op = &squashfs_symlink_inode_ops;
290 inode->i_data.a_ops = &squashfs_symlink_aops; 290 inode->i_data.a_ops = &squashfs_symlink_aops;
@@ -325,7 +325,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
325 inode->i_mode |= S_IFCHR; 325 inode->i_mode |= S_IFCHR;
326 else 326 else
327 inode->i_mode |= S_IFBLK; 327 inode->i_mode |= S_IFBLK;
328 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 328 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
329 rdev = le32_to_cpu(sqsh_ino->rdev); 329 rdev = le32_to_cpu(sqsh_ino->rdev);
330 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); 330 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
331 331
@@ -349,7 +349,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
349 inode->i_mode |= S_IFBLK; 349 inode->i_mode |= S_IFBLK;
350 xattr_id = le32_to_cpu(sqsh_ino->xattr); 350 xattr_id = le32_to_cpu(sqsh_ino->xattr);
351 inode->i_op = &squashfs_inode_ops; 351 inode->i_op = &squashfs_inode_ops;
352 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 352 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
353 rdev = le32_to_cpu(sqsh_ino->rdev); 353 rdev = le32_to_cpu(sqsh_ino->rdev);
354 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); 354 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
355 355
@@ -370,7 +370,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
370 inode->i_mode |= S_IFIFO; 370 inode->i_mode |= S_IFIFO;
371 else 371 else
372 inode->i_mode |= S_IFSOCK; 372 inode->i_mode |= S_IFSOCK;
373 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 373 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
374 init_special_inode(inode, inode->i_mode, 0); 374 init_special_inode(inode, inode->i_mode, 0);
375 break; 375 break;
376 } 376 }
@@ -389,7 +389,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
389 inode->i_mode |= S_IFSOCK; 389 inode->i_mode |= S_IFSOCK;
390 xattr_id = le32_to_cpu(sqsh_ino->xattr); 390 xattr_id = le32_to_cpu(sqsh_ino->xattr);
391 inode->i_op = &squashfs_inode_ops; 391 inode->i_op = &squashfs_inode_ops;
392 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 392 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
393 init_special_inode(inode, inode->i_mode, 0); 393 init_special_inode(inode, inode->i_mode, 0);
394 break; 394 break;
395 } 395 }
diff --git a/fs/stack.c b/fs/stack.c
index b4f2ab48a61f..9c11519245a6 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -71,6 +71,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
71 dest->i_ctime = src->i_ctime; 71 dest->i_ctime = src->i_ctime;
72 dest->i_blkbits = src->i_blkbits; 72 dest->i_blkbits = src->i_blkbits;
73 dest->i_flags = src->i_flags; 73 dest->i_flags = src->i_flags;
74 dest->i_nlink = src->i_nlink; 74 set_nlink(dest, src->i_nlink);
75} 75}
76EXPORT_SYMBOL_GPL(fsstack_copy_attr_all); 76EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/fs/stat.c b/fs/stat.c
index 78a3aa83c7ea..8806b8997d2e 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -294,15 +294,16 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
294{ 294{
295 struct path path; 295 struct path path;
296 int error; 296 int error;
297 int empty = 0;
297 298
298 if (bufsiz <= 0) 299 if (bufsiz <= 0)
299 return -EINVAL; 300 return -EINVAL;
300 301
301 error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path); 302 error = user_path_at_empty(dfd, pathname, LOOKUP_EMPTY, &path, &empty);
302 if (!error) { 303 if (!error) {
303 struct inode *inode = path.dentry->d_inode; 304 struct inode *inode = path.dentry->d_inode;
304 305
305 error = -EINVAL; 306 error = empty ? -ENOENT : -EINVAL;
306 if (inode->i_op->readlink) { 307 if (inode->i_op->readlink) {
307 error = security_inode_readlink(path.dentry); 308 error = security_inode_readlink(path.dentry);
308 if (!error) { 309 if (!error) {
diff --git a/fs/super.c b/fs/super.c
index 32a81f3467e0..afd0f1ad45e0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -727,8 +727,13 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
727 727
728 if (sb->s_op->remount_fs) { 728 if (sb->s_op->remount_fs) {
729 retval = sb->s_op->remount_fs(sb, &flags, data); 729 retval = sb->s_op->remount_fs(sb, &flags, data);
730 if (retval) 730 if (retval) {
731 return retval; 731 if (!force)
732 return retval;
733 /* If forced remount, go ahead despite any errors */
734 WARN(1, "forced remount of a %s fs returned %i\n",
735 sb->s_type->name, retval);
736 }
732 } 737 }
733 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); 738 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
734 739
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index e23f28894a3a..c81b22f3ace1 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -218,7 +218,7 @@ static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
218 } 218 }
219 219
220 if (sysfs_type(sd) == SYSFS_DIR) 220 if (sysfs_type(sd) == SYSFS_DIR)
221 inode->i_nlink = sd->s_dir.subdirs + 2; 221 set_nlink(inode, sd->s_dir.subdirs + 2);
222} 222}
223 223
224int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 224int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 0630eb969a28..25ffb3e9a3f8 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -219,7 +219,7 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino)
219 inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode); 219 inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode);
220 inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid); 220 inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid);
221 inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid); 221 inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid);
222 inode->i_nlink = fs16_to_cpu(sbi, raw_inode->i_nlink); 222 set_nlink(inode, fs16_to_cpu(sbi, raw_inode->i_nlink));
223 inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size); 223 inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size);
224 inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime); 224 inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime);
225 inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime); 225 inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index b28121278d46..20403dc5d437 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -129,7 +129,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
129 goto out_ino; 129 goto out_ino;
130 130
131 inode->i_flags |= (S_NOCMTIME | S_NOATIME); 131 inode->i_flags |= (S_NOCMTIME | S_NOATIME);
132 inode->i_nlink = le32_to_cpu(ino->nlink); 132 set_nlink(inode, le32_to_cpu(ino->nlink));
133 inode->i_uid = le32_to_cpu(ino->uid); 133 inode->i_uid = le32_to_cpu(ino->uid);
134 inode->i_gid = le32_to_cpu(ino->gid); 134 inode->i_gid = le32_to_cpu(ino->gid);
135 inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); 135 inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 16f19f55e63f..bf18f7a04544 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -558,10 +558,10 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
558 } 558 }
559 559
560 ubifs_assert(inode->i_nlink == 1); 560 ubifs_assert(inode->i_nlink == 1);
561 inode->i_nlink = 0; 561 clear_nlink(inode);
562 err = remove_xattr(c, host, inode, &nm); 562 err = remove_xattr(c, host, inode, &nm);
563 if (err) 563 if (err)
564 inode->i_nlink = 1; 564 set_nlink(inode, 1);
565 565
566 /* If @i_nlink is 0, 'iput()' will delete the inode */ 566 /* If @i_nlink is 0, 'iput()' will delete the inode */
567 iput(inode); 567 iput(inode);
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 95518a9f589e..987585bb0a1d 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -59,8 +59,8 @@ static int __load_block_bitmap(struct super_block *sb,
59 int nr_groups = bitmap->s_nr_groups; 59 int nr_groups = bitmap->s_nr_groups;
60 60
61 if (block_group >= nr_groups) { 61 if (block_group >= nr_groups) {
62 udf_debug("block_group (%d) > nr_groups (%d)\n", block_group, 62 udf_debug("block_group (%d) > nr_groups (%d)\n",
63 nr_groups); 63 block_group, nr_groups);
64 } 64 }
65 65
66 if (bitmap->s_block_bitmap[block_group]) { 66 if (bitmap->s_block_bitmap[block_group]) {
@@ -126,8 +126,9 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
126 if (bloc->logicalBlockNum + count < count || 126 if (bloc->logicalBlockNum + count < count ||
127 (bloc->logicalBlockNum + count) > partmap->s_partition_len) { 127 (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
128 udf_debug("%d < %d || %d + %d > %d\n", 128 udf_debug("%d < %d || %d + %d > %d\n",
129 bloc->logicalBlockNum, 0, bloc->logicalBlockNum, 129 bloc->logicalBlockNum, 0,
130 count, partmap->s_partition_len); 130 bloc->logicalBlockNum, count,
131 partmap->s_partition_len);
131 goto error_return; 132 goto error_return;
132 } 133 }
133 134
@@ -155,7 +156,7 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
155 if (udf_set_bit(bit + i, bh->b_data)) { 156 if (udf_set_bit(bit + i, bh->b_data)) {
156 udf_debug("bit %ld already set\n", bit + i); 157 udf_debug("bit %ld already set\n", bit + i);
157 udf_debug("byte=%2x\n", 158 udf_debug("byte=%2x\n",
158 ((char *)bh->b_data)[(bit + i) >> 3]); 159 ((char *)bh->b_data)[(bit + i) >> 3]);
159 } 160 }
160 } 161 }
161 udf_add_free_space(sb, sbi->s_partition, count); 162 udf_add_free_space(sb, sbi->s_partition, count);
@@ -369,7 +370,8 @@ static void udf_table_free_blocks(struct super_block *sb,
369 if (bloc->logicalBlockNum + count < count || 370 if (bloc->logicalBlockNum + count < count ||
370 (bloc->logicalBlockNum + count) > partmap->s_partition_len) { 371 (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
371 udf_debug("%d < %d || %d + %d > %d\n", 372 udf_debug("%d < %d || %d + %d > %d\n",
372 bloc->logicalBlockNum, 0, bloc->logicalBlockNum, count, 373 bloc->logicalBlockNum, 0,
374 bloc->logicalBlockNum, count,
373 partmap->s_partition_len); 375 partmap->s_partition_len);
374 goto error_return; 376 goto error_return;
375 } 377 }
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 2ffdb6733af1..3e44f575fb9c 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -162,8 +162,8 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset)
162 int padlen; 162 int padlen;
163 163
164 if ((!buffer) || (!offset)) { 164 if ((!buffer) || (!offset)) {
165 udf_debug("invalidparms\n, buffer=%p, offset=%p\n", buffer, 165 udf_debug("invalidparms, buffer=%p, offset=%p\n",
166 offset); 166 buffer, offset);
167 return NULL; 167 return NULL;
168 } 168 }
169 169
@@ -201,7 +201,7 @@ struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offs
201 struct short_ad *sa; 201 struct short_ad *sa;
202 202
203 if ((!ptr) || (!offset)) { 203 if ((!ptr) || (!offset)) {
204 printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n"); 204 pr_err("%s: invalidparms\n", __func__);
205 return NULL; 205 return NULL;
206 } 206 }
207 207
@@ -223,7 +223,7 @@ struct long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset
223 struct long_ad *la; 223 struct long_ad *la;
224 224
225 if ((!ptr) || (!offset)) { 225 if ((!ptr) || (!offset)) {
226 printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n"); 226 pr_err("%s: invalidparms\n", __func__);
227 return NULL; 227 return NULL;
228 } 228 }
229 229
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1d1358ed80c1..4fd1d809738c 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -37,6 +37,7 @@
37#include <linux/writeback.h> 37#include <linux/writeback.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/crc-itu-t.h> 39#include <linux/crc-itu-t.h>
40#include <linux/mpage.h>
40 41
41#include "udf_i.h" 42#include "udf_i.h"
42#include "udf_sb.h" 43#include "udf_sb.h"
@@ -83,12 +84,10 @@ void udf_evict_inode(struct inode *inode)
83 end_writeback(inode); 84 end_writeback(inode);
84 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && 85 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
85 inode->i_size != iinfo->i_lenExtents) { 86 inode->i_size != iinfo->i_lenExtents) {
86 printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has " 87 udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
87 "inode size %llu different from extent length %llu. " 88 inode->i_ino, inode->i_mode,
88 "Filesystem need not be standards compliant.\n", 89 (unsigned long long)inode->i_size,
89 inode->i_sb->s_id, inode->i_ino, inode->i_mode, 90 (unsigned long long)iinfo->i_lenExtents);
90 (unsigned long long)inode->i_size,
91 (unsigned long long)iinfo->i_lenExtents);
92 } 91 }
93 kfree(iinfo->i_ext.i_data); 92 kfree(iinfo->i_ext.i_data);
94 iinfo->i_ext.i_data = NULL; 93 iinfo->i_ext.i_data = NULL;
@@ -104,7 +103,13 @@ static int udf_writepage(struct page *page, struct writeback_control *wbc)
104 103
105static int udf_readpage(struct file *file, struct page *page) 104static int udf_readpage(struct file *file, struct page *page)
106{ 105{
107 return block_read_full_page(page, udf_get_block); 106 return mpage_readpage(page, udf_get_block);
107}
108
109static int udf_readpages(struct file *file, struct address_space *mapping,
110 struct list_head *pages, unsigned nr_pages)
111{
112 return mpage_readpages(mapping, pages, nr_pages, udf_get_block);
108} 113}
109 114
110static int udf_write_begin(struct file *file, struct address_space *mapping, 115static int udf_write_begin(struct file *file, struct address_space *mapping,
@@ -139,6 +144,7 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
139 144
140const struct address_space_operations udf_aops = { 145const struct address_space_operations udf_aops = {
141 .readpage = udf_readpage, 146 .readpage = udf_readpage,
147 .readpages = udf_readpages,
142 .writepage = udf_writepage, 148 .writepage = udf_writepage,
143 .write_begin = udf_write_begin, 149 .write_begin = udf_write_begin,
144 .write_end = generic_write_end, 150 .write_end = generic_write_end,
@@ -1169,16 +1175,15 @@ static void __udf_read_inode(struct inode *inode)
1169 */ 1175 */
1170 bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident); 1176 bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident);
1171 if (!bh) { 1177 if (!bh) {
1172 printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n", 1178 udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino);
1173 inode->i_ino);
1174 make_bad_inode(inode); 1179 make_bad_inode(inode);
1175 return; 1180 return;
1176 } 1181 }
1177 1182
1178 if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE && 1183 if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE &&
1179 ident != TAG_IDENT_USE) { 1184 ident != TAG_IDENT_USE) {
1180 printk(KERN_ERR "udf: udf_read_inode(ino %ld) " 1185 udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n",
1181 "failed ident=%d\n", inode->i_ino, ident); 1186 inode->i_ino, ident);
1182 brelse(bh); 1187 brelse(bh);
1183 make_bad_inode(inode); 1188 make_bad_inode(inode);
1184 return; 1189 return;
@@ -1218,8 +1223,8 @@ static void __udf_read_inode(struct inode *inode)
1218 } 1223 }
1219 brelse(ibh); 1224 brelse(ibh);
1220 } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { 1225 } else if (fe->icbTag.strategyType != cpu_to_le16(4)) {
1221 printk(KERN_ERR "udf: unsupported strategy type: %d\n", 1226 udf_err(inode->i_sb, "unsupported strategy type: %d\n",
1222 le16_to_cpu(fe->icbTag.strategyType)); 1227 le16_to_cpu(fe->icbTag.strategyType));
1223 brelse(bh); 1228 brelse(bh);
1224 make_bad_inode(inode); 1229 make_bad_inode(inode);
1225 return; 1230 return;
@@ -1236,6 +1241,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1236 int offset; 1241 int offset;
1237 struct udf_sb_info *sbi = UDF_SB(inode->i_sb); 1242 struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
1238 struct udf_inode_info *iinfo = UDF_I(inode); 1243 struct udf_inode_info *iinfo = UDF_I(inode);
1244 unsigned int link_count;
1239 1245
1240 fe = (struct fileEntry *)bh->b_data; 1246 fe = (struct fileEntry *)bh->b_data;
1241 efe = (struct extendedFileEntry *)bh->b_data; 1247 efe = (struct extendedFileEntry *)bh->b_data;
@@ -1318,9 +1324,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1318 inode->i_mode &= ~sbi->s_umask; 1324 inode->i_mode &= ~sbi->s_umask;
1319 read_unlock(&sbi->s_cred_lock); 1325 read_unlock(&sbi->s_cred_lock);
1320 1326
1321 inode->i_nlink = le16_to_cpu(fe->fileLinkCount); 1327 link_count = le16_to_cpu(fe->fileLinkCount);
1322 if (!inode->i_nlink) 1328 if (!link_count)
1323 inode->i_nlink = 1; 1329 link_count = 1;
1330 set_nlink(inode, link_count);
1324 1331
1325 inode->i_size = le64_to_cpu(fe->informationLength); 1332 inode->i_size = le64_to_cpu(fe->informationLength);
1326 iinfo->i_lenExtents = inode->i_size; 1333 iinfo->i_lenExtents = inode->i_size;
@@ -1413,9 +1420,8 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1413 udf_debug("METADATA BITMAP FILE-----\n"); 1420 udf_debug("METADATA BITMAP FILE-----\n");
1414 break; 1421 break;
1415 default: 1422 default:
1416 printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown " 1423 udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n",
1417 "file type=%d\n", inode->i_ino, 1424 inode->i_ino, fe->icbTag.fileType);
1418 fe->icbTag.fileType);
1419 make_bad_inode(inode); 1425 make_bad_inode(inode);
1420 return; 1426 return;
1421 } 1427 }
@@ -1438,8 +1444,8 @@ static int udf_alloc_i_data(struct inode *inode, size_t size)
1438 iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL); 1444 iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL);
1439 1445
1440 if (!iinfo->i_ext.i_data) { 1446 if (!iinfo->i_ext.i_data) {
1441 printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) " 1447 udf_err(inode->i_sb, "(ino %ld) no free memory\n",
1442 "no free memory\n", inode->i_ino); 1448 inode->i_ino);
1443 return -ENOMEM; 1449 return -ENOMEM;
1444 } 1450 }
1445 1451
@@ -1689,9 +1695,8 @@ out:
1689 if (do_sync) { 1695 if (do_sync) {
1690 sync_dirty_buffer(bh); 1696 sync_dirty_buffer(bh);
1691 if (buffer_write_io_error(bh)) { 1697 if (buffer_write_io_error(bh)) {
1692 printk(KERN_WARNING "IO error syncing udf inode " 1698 udf_warn(inode->i_sb, "IO error syncing udf inode [%08lx]\n",
1693 "[%s:%08lx]\n", inode->i_sb->s_id, 1699 inode->i_ino);
1694 inode->i_ino);
1695 err = -EIO; 1700 err = -EIO;
1696 } 1701 }
1697 } 1702 }
@@ -1982,8 +1987,7 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
1982 *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK; 1987 *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK;
1983 break; 1988 break;
1984 default: 1989 default:
1985 udf_debug("alloc_type = %d unsupported\n", 1990 udf_debug("alloc_type = %d unsupported\n", iinfo->i_alloc_type);
1986 iinfo->i_alloc_type);
1987 return -1; 1991 return -1;
1988 } 1992 }
1989 1993
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 43e24a3b8e10..6583fe9b0645 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -38,7 +38,7 @@ unsigned int udf_get_last_session(struct super_block *sb)
38 38
39 if (i == 0) { 39 if (i == 0) {
40 udf_debug("XA disk: %s, vol_desc_start=%d\n", 40 udf_debug("XA disk: %s, vol_desc_start=%d\n",
41 (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba); 41 ms_info.xa_flag ? "yes" : "no", ms_info.addr.lba);
42 if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ 42 if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
43 vol_desc_start = ms_info.addr.lba; 43 vol_desc_start = ms_info.addr.lba;
44 } else { 44 } else {
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 9215700c00a4..c175b4dabc14 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -204,6 +204,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
204{ 204{
205 struct tag *tag_p; 205 struct tag *tag_p;
206 struct buffer_head *bh = NULL; 206 struct buffer_head *bh = NULL;
207 u8 checksum;
207 208
208 /* Read the block */ 209 /* Read the block */
209 if (block == 0xFFFFFFFF) 210 if (block == 0xFFFFFFFF)
@@ -211,8 +212,8 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
211 212
212 bh = udf_tread(sb, block); 213 bh = udf_tread(sb, block);
213 if (!bh) { 214 if (!bh) {
214 udf_debug("block=%d, location=%d: read failed\n", 215 udf_err(sb, "read failed, block=%u, location=%d\n",
215 block, location); 216 block, location);
216 return NULL; 217 return NULL;
217 } 218 }
218 219
@@ -227,16 +228,18 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
227 } 228 }
228 229
229 /* Verify the tag checksum */ 230 /* Verify the tag checksum */
230 if (udf_tag_checksum(tag_p) != tag_p->tagChecksum) { 231 checksum = udf_tag_checksum(tag_p);
231 printk(KERN_ERR "udf: tag checksum failed block %d\n", block); 232 if (checksum != tag_p->tagChecksum) {
233 udf_err(sb, "tag checksum failed, block %u: 0x%02x != 0x%02x\n",
234 block, checksum, tag_p->tagChecksum);
232 goto error_out; 235 goto error_out;
233 } 236 }
234 237
235 /* Verify the tag version */ 238 /* Verify the tag version */
236 if (tag_p->descVersion != cpu_to_le16(0x0002U) && 239 if (tag_p->descVersion != cpu_to_le16(0x0002U) &&
237 tag_p->descVersion != cpu_to_le16(0x0003U)) { 240 tag_p->descVersion != cpu_to_le16(0x0003U)) {
238 udf_debug("tag version 0x%04x != 0x0002 || 0x0003 block %d\n", 241 udf_err(sb, "tag version 0x%04x != 0x0002 || 0x0003, block %u\n",
239 le16_to_cpu(tag_p->descVersion), block); 242 le16_to_cpu(tag_p->descVersion), block);
240 goto error_out; 243 goto error_out;
241 } 244 }
242 245
@@ -248,8 +251,8 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
248 return bh; 251 return bh;
249 252
250 udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", block, 253 udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", block,
251 le16_to_cpu(tag_p->descCRC), le16_to_cpu(tag_p->descCRCLength)); 254 le16_to_cpu(tag_p->descCRC),
252 255 le16_to_cpu(tag_p->descCRCLength));
253error_out: 256error_out:
254 brelse(bh); 257 brelse(bh);
255 return NULL; 258 return NULL;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index f1dce848ef96..4639e137222f 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -577,8 +577,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
577 577
578 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 578 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
579 if (!fi) { 579 if (!fi) {
580 inode->i_nlink--; 580 inode_dec_link_count(inode);
581 mark_inode_dirty(inode);
582 iput(inode); 581 iput(inode);
583 return err; 582 return err;
584 } 583 }
@@ -618,8 +617,7 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
618 init_special_inode(inode, mode, rdev); 617 init_special_inode(inode, mode, rdev);
619 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 618 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
620 if (!fi) { 619 if (!fi) {
621 inode->i_nlink--; 620 inode_dec_link_count(inode);
622 mark_inode_dirty(inode);
623 iput(inode); 621 iput(inode);
624 return err; 622 return err;
625 } 623 }
@@ -665,12 +663,11 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
665 inode->i_fop = &udf_dir_operations; 663 inode->i_fop = &udf_dir_operations;
666 fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); 664 fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err);
667 if (!fi) { 665 if (!fi) {
668 inode->i_nlink--; 666 inode_dec_link_count(inode);
669 mark_inode_dirty(inode);
670 iput(inode); 667 iput(inode);
671 goto out; 668 goto out;
672 } 669 }
673 inode->i_nlink = 2; 670 set_nlink(inode, 2);
674 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); 671 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
675 cfi.icb.extLocation = cpu_to_lelb(dinfo->i_location); 672 cfi.icb.extLocation = cpu_to_lelb(dinfo->i_location);
676 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = 673 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
@@ -683,7 +680,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
683 680
684 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 681 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
685 if (!fi) { 682 if (!fi) {
686 inode->i_nlink = 0; 683 clear_nlink(inode);
687 mark_inode_dirty(inode); 684 mark_inode_dirty(inode);
688 iput(inode); 685 iput(inode);
689 goto out; 686 goto out;
@@ -799,9 +796,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
799 if (retval) 796 if (retval)
800 goto end_rmdir; 797 goto end_rmdir;
801 if (inode->i_nlink != 2) 798 if (inode->i_nlink != 2)
802 udf_warning(inode->i_sb, "udf_rmdir", 799 udf_warn(inode->i_sb, "empty directory has nlink != 2 (%d)\n",
803 "empty directory has nlink != 2 (%d)", 800 inode->i_nlink);
804 inode->i_nlink);
805 clear_nlink(inode); 801 clear_nlink(inode);
806 inode->i_size = 0; 802 inode->i_size = 0;
807 inode_dec_link_count(dir); 803 inode_dec_link_count(dir);
@@ -840,7 +836,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
840 if (!inode->i_nlink) { 836 if (!inode->i_nlink) {
841 udf_debug("Deleting nonexistent file (%lu), %d\n", 837 udf_debug("Deleting nonexistent file (%lu), %d\n",
842 inode->i_ino, inode->i_nlink); 838 inode->i_ino, inode->i_nlink);
843 inode->i_nlink = 1; 839 set_nlink(inode, 1);
844 } 840 }
845 retval = udf_delete_entry(dir, fi, &fibh, &cfi); 841 retval = udf_delete_entry(dir, fi, &fibh, &cfi);
846 if (retval) 842 if (retval)
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index a71090ea0e07..d6caf01a2097 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -33,8 +33,8 @@ uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
33 struct udf_sb_info *sbi = UDF_SB(sb); 33 struct udf_sb_info *sbi = UDF_SB(sb);
34 struct udf_part_map *map; 34 struct udf_part_map *map;
35 if (partition >= sbi->s_partitions) { 35 if (partition >= sbi->s_partitions) {
36 udf_debug("block=%d, partition=%d, offset=%d: " 36 udf_debug("block=%d, partition=%d, offset=%d: invalid partition\n",
37 "invalid partition\n", block, partition, offset); 37 block, partition, offset);
38 return 0xFFFFFFFF; 38 return 0xFFFFFFFF;
39 } 39 }
40 map = &sbi->s_partmaps[partition]; 40 map = &sbi->s_partmaps[partition];
@@ -60,8 +60,8 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block,
60 vdata = &map->s_type_specific.s_virtual; 60 vdata = &map->s_type_specific.s_virtual;
61 61
62 if (block > vdata->s_num_entries) { 62 if (block > vdata->s_num_entries) {
63 udf_debug("Trying to access block beyond end of VAT " 63 udf_debug("Trying to access block beyond end of VAT (%d max %d)\n",
64 "(%d max %d)\n", block, vdata->s_num_entries); 64 block, vdata->s_num_entries);
65 return 0xFFFFFFFF; 65 return 0xFFFFFFFF;
66 } 66 }
67 67
@@ -321,9 +321,14 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block,
321 /* We shouldn't mount such media... */ 321 /* We shouldn't mount such media... */
322 BUG_ON(!inode); 322 BUG_ON(!inode);
323 retblk = udf_try_read_meta(inode, block, partition, offset); 323 retblk = udf_try_read_meta(inode, block, partition, offset);
324 if (retblk == 0xFFFFFFFF) { 324 if (retblk == 0xFFFFFFFF && mdata->s_metadata_fe) {
325 udf_warning(sb, __func__, "error reading from METADATA, " 325 udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n");
326 "trying to read from MIRROR"); 326 if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) {
327 mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
328 mdata->s_mirror_file_loc, map->s_partition_num);
329 mdata->s_flags |= MF_MIRROR_FE_LOADED;
330 }
331
327 inode = mdata->s_mirror_fe; 332 inode = mdata->s_mirror_fe;
328 if (!inode) 333 if (!inode)
329 return 0xFFFFFFFF; 334 return 0xFFFFFFFF;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 7b27b063ff6d..e185253470df 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -75,8 +75,6 @@
75 75
76#define UDF_DEFAULT_BLOCKSIZE 2048 76#define UDF_DEFAULT_BLOCKSIZE 2048
77 77
78static char error_buf[1024];
79
80/* These are the "meat" - everything else is stuffing */ 78/* These are the "meat" - everything else is stuffing */
81static int udf_fill_super(struct super_block *, void *, int); 79static int udf_fill_super(struct super_block *, void *, int);
82static void udf_put_super(struct super_block *); 80static void udf_put_super(struct super_block *);
@@ -92,8 +90,6 @@ static void udf_close_lvid(struct super_block *);
92static unsigned int udf_count_free(struct super_block *); 90static unsigned int udf_count_free(struct super_block *);
93static int udf_statfs(struct dentry *, struct kstatfs *); 91static int udf_statfs(struct dentry *, struct kstatfs *);
94static int udf_show_options(struct seq_file *, struct vfsmount *); 92static int udf_show_options(struct seq_file *, struct vfsmount *);
95static void udf_error(struct super_block *sb, const char *function,
96 const char *fmt, ...);
97 93
98struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) 94struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
99{ 95{
@@ -244,9 +240,8 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
244 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map), 240 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map),
245 GFP_KERNEL); 241 GFP_KERNEL);
246 if (!sbi->s_partmaps) { 242 if (!sbi->s_partmaps) {
247 udf_error(sb, __func__, 243 udf_err(sb, "Unable to allocate space for %d partition maps\n",
248 "Unable to allocate space for %d partition maps", 244 count);
249 count);
250 sbi->s_partitions = 0; 245 sbi->s_partitions = 0;
251 return -ENOMEM; 246 return -ENOMEM;
252 } 247 }
@@ -550,8 +545,7 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
550 uopt->dmode = option & 0777; 545 uopt->dmode = option & 0777;
551 break; 546 break;
552 default: 547 default:
553 printk(KERN_ERR "udf: bad mount option \"%s\" " 548 pr_err("bad mount option \"%s\" or missing value\n", p);
554 "or missing value\n", p);
555 return 0; 549 return 0;
556 } 550 }
557 } 551 }
@@ -645,20 +639,16 @@ static loff_t udf_check_vsd(struct super_block *sb)
645 udf_debug("ISO9660 Boot Record found\n"); 639 udf_debug("ISO9660 Boot Record found\n");
646 break; 640 break;
647 case 1: 641 case 1:
648 udf_debug("ISO9660 Primary Volume Descriptor " 642 udf_debug("ISO9660 Primary Volume Descriptor found\n");
649 "found\n");
650 break; 643 break;
651 case 2: 644 case 2:
652 udf_debug("ISO9660 Supplementary Volume " 645 udf_debug("ISO9660 Supplementary Volume Descriptor found\n");
653 "Descriptor found\n");
654 break; 646 break;
655 case 3: 647 case 3:
656 udf_debug("ISO9660 Volume Partition Descriptor " 648 udf_debug("ISO9660 Volume Partition Descriptor found\n");
657 "found\n");
658 break; 649 break;
659 case 255: 650 case 255:
660 udf_debug("ISO9660 Volume Descriptor Set " 651 udf_debug("ISO9660 Volume Descriptor Set Terminator found\n");
661 "Terminator found\n");
662 break; 652 break;
663 default: 653 default:
664 udf_debug("ISO9660 VRS (%u) found\n", 654 udf_debug("ISO9660 VRS (%u) found\n",
@@ -809,8 +799,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
809 pvoldesc->recordingDateAndTime)) { 799 pvoldesc->recordingDateAndTime)) {
810#ifdef UDFFS_DEBUG 800#ifdef UDFFS_DEBUG
811 struct timestamp *ts = &pvoldesc->recordingDateAndTime; 801 struct timestamp *ts = &pvoldesc->recordingDateAndTime;
812 udf_debug("recording time %04u/%02u/%02u" 802 udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n",
813 " %02u:%02u (%x)\n",
814 le16_to_cpu(ts->year), ts->month, ts->day, ts->hour, 803 le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
815 ts->minute, le16_to_cpu(ts->typeAndTimezone)); 804 ts->minute, le16_to_cpu(ts->typeAndTimezone));
816#endif 805#endif
@@ -821,7 +810,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
821 strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name, 810 strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name,
822 outstr->u_len > 31 ? 31 : outstr->u_len); 811 outstr->u_len > 31 ? 31 : outstr->u_len);
823 udf_debug("volIdent[] = '%s'\n", 812 udf_debug("volIdent[] = '%s'\n",
824 UDF_SB(sb)->s_volume_ident); 813 UDF_SB(sb)->s_volume_ident);
825 } 814 }
826 815
827 if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128)) 816 if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128))
@@ -837,64 +826,57 @@ out1:
837 return ret; 826 return ret;
838} 827}
839 828
829struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
830 u32 meta_file_loc, u32 partition_num)
831{
832 struct kernel_lb_addr addr;
833 struct inode *metadata_fe;
834
835 addr.logicalBlockNum = meta_file_loc;
836 addr.partitionReferenceNum = partition_num;
837
838 metadata_fe = udf_iget(sb, &addr);
839
840 if (metadata_fe == NULL)
841 udf_warn(sb, "metadata inode efe not found\n");
842 else if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) {
843 udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n");
844 iput(metadata_fe);
845 metadata_fe = NULL;
846 }
847
848 return metadata_fe;
849}
850
840static int udf_load_metadata_files(struct super_block *sb, int partition) 851static int udf_load_metadata_files(struct super_block *sb, int partition)
841{ 852{
842 struct udf_sb_info *sbi = UDF_SB(sb); 853 struct udf_sb_info *sbi = UDF_SB(sb);
843 struct udf_part_map *map; 854 struct udf_part_map *map;
844 struct udf_meta_data *mdata; 855 struct udf_meta_data *mdata;
845 struct kernel_lb_addr addr; 856 struct kernel_lb_addr addr;
846 int fe_error = 0;
847 857
848 map = &sbi->s_partmaps[partition]; 858 map = &sbi->s_partmaps[partition];
849 mdata = &map->s_type_specific.s_metadata; 859 mdata = &map->s_type_specific.s_metadata;
850 860
851 /* metadata address */ 861 /* metadata address */
852 addr.logicalBlockNum = mdata->s_meta_file_loc;
853 addr.partitionReferenceNum = map->s_partition_num;
854
855 udf_debug("Metadata file location: block = %d part = %d\n", 862 udf_debug("Metadata file location: block = %d part = %d\n",
856 addr.logicalBlockNum, addr.partitionReferenceNum); 863 mdata->s_meta_file_loc, map->s_partition_num);
857 864
858 mdata->s_metadata_fe = udf_iget(sb, &addr); 865 mdata->s_metadata_fe = udf_find_metadata_inode_efe(sb,
866 mdata->s_meta_file_loc, map->s_partition_num);
859 867
860 if (mdata->s_metadata_fe == NULL) { 868 if (mdata->s_metadata_fe == NULL) {
861 udf_warning(sb, __func__, "metadata inode efe not found, " 869 /* mirror file entry */
862 "will try mirror inode."); 870 udf_debug("Mirror metadata file location: block = %d part = %d\n",
863 fe_error = 1; 871 mdata->s_mirror_file_loc, map->s_partition_num);
864 } else if (UDF_I(mdata->s_metadata_fe)->i_alloc_type !=
865 ICBTAG_FLAG_AD_SHORT) {
866 udf_warning(sb, __func__, "metadata inode efe does not have "
867 "short allocation descriptors!");
868 fe_error = 1;
869 iput(mdata->s_metadata_fe);
870 mdata->s_metadata_fe = NULL;
871 }
872 872
873 /* mirror file entry */ 873 mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
874 addr.logicalBlockNum = mdata->s_mirror_file_loc; 874 mdata->s_mirror_file_loc, map->s_partition_num);
875 addr.partitionReferenceNum = map->s_partition_num;
876
877 udf_debug("Mirror metadata file location: block = %d part = %d\n",
878 addr.logicalBlockNum, addr.partitionReferenceNum);
879 875
880 mdata->s_mirror_fe = udf_iget(sb, &addr); 876 if (mdata->s_mirror_fe == NULL) {
881 877 udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
882 if (mdata->s_mirror_fe == NULL) {
883 if (fe_error) {
884 udf_error(sb, __func__, "mirror inode efe not found "
885 "and metadata inode is missing too, exiting...");
886 goto error_exit;
887 } else
888 udf_warning(sb, __func__, "mirror inode efe not found,"
889 " but metadata inode is OK");
890 } else if (UDF_I(mdata->s_mirror_fe)->i_alloc_type !=
891 ICBTAG_FLAG_AD_SHORT) {
892 udf_warning(sb, __func__, "mirror inode efe does not have "
893 "short allocation descriptors!");
894 iput(mdata->s_mirror_fe);
895 mdata->s_mirror_fe = NULL;
896 if (fe_error)
897 goto error_exit; 878 goto error_exit;
879 }
898 } 880 }
899 881
900 /* 882 /*
@@ -907,18 +889,15 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
907 addr.partitionReferenceNum = map->s_partition_num; 889 addr.partitionReferenceNum = map->s_partition_num;
908 890
909 udf_debug("Bitmap file location: block = %d part = %d\n", 891 udf_debug("Bitmap file location: block = %d part = %d\n",
910 addr.logicalBlockNum, addr.partitionReferenceNum); 892 addr.logicalBlockNum, addr.partitionReferenceNum);
911 893
912 mdata->s_bitmap_fe = udf_iget(sb, &addr); 894 mdata->s_bitmap_fe = udf_iget(sb, &addr);
913 895
914 if (mdata->s_bitmap_fe == NULL) { 896 if (mdata->s_bitmap_fe == NULL) {
915 if (sb->s_flags & MS_RDONLY) 897 if (sb->s_flags & MS_RDONLY)
916 udf_warning(sb, __func__, "bitmap inode efe " 898 udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n");
917 "not found but it's ok since the disc"
918 " is mounted read-only");
919 else { 899 else {
920 udf_error(sb, __func__, "bitmap inode efe not " 900 udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n");
921 "found and attempted read-write mount");
922 goto error_exit; 901 goto error_exit;
923 } 902 }
924 } 903 }
@@ -971,9 +950,8 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
971 bitmap = vzalloc(size); /* TODO: get rid of vzalloc */ 950 bitmap = vzalloc(size); /* TODO: get rid of vzalloc */
972 951
973 if (bitmap == NULL) { 952 if (bitmap == NULL) {
974 udf_error(sb, __func__, 953 udf_err(sb, "Unable to allocate space for bitmap and %d buffer_head pointers\n",
975 "Unable to allocate space for bitmap " 954 nr_groups);
976 "and %d buffer_head pointers", nr_groups);
977 return NULL; 955 return NULL;
978 } 956 }
979 957
@@ -1003,10 +981,9 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1003 if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE)) 981 if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE))
1004 map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE; 982 map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE;
1005 983
1006 udf_debug("Partition (%d type %x) starts at physical %d, " 984 udf_debug("Partition (%d type %x) starts at physical %d, block length %d\n",
1007 "block length %d\n", p_index, 985 p_index, map->s_partition_type,
1008 map->s_partition_type, map->s_partition_root, 986 map->s_partition_root, map->s_partition_len);
1009 map->s_partition_len);
1010 987
1011 if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) && 988 if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) &&
1012 strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03)) 989 strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03))
@@ -1023,12 +1000,12 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1023 map->s_uspace.s_table = udf_iget(sb, &loc); 1000 map->s_uspace.s_table = udf_iget(sb, &loc);
1024 if (!map->s_uspace.s_table) { 1001 if (!map->s_uspace.s_table) {
1025 udf_debug("cannot load unallocSpaceTable (part %d)\n", 1002 udf_debug("cannot load unallocSpaceTable (part %d)\n",
1026 p_index); 1003 p_index);
1027 return 1; 1004 return 1;
1028 } 1005 }
1029 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; 1006 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
1030 udf_debug("unallocSpaceTable (part %d) @ %ld\n", 1007 udf_debug("unallocSpaceTable (part %d) @ %ld\n",
1031 p_index, map->s_uspace.s_table->i_ino); 1008 p_index, map->s_uspace.s_table->i_ino);
1032 } 1009 }
1033 1010
1034 if (phd->unallocSpaceBitmap.extLength) { 1011 if (phd->unallocSpaceBitmap.extLength) {
@@ -1041,8 +1018,8 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1041 bitmap->s_extPosition = le32_to_cpu( 1018 bitmap->s_extPosition = le32_to_cpu(
1042 phd->unallocSpaceBitmap.extPosition); 1019 phd->unallocSpaceBitmap.extPosition);
1043 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP; 1020 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP;
1044 udf_debug("unallocSpaceBitmap (part %d) @ %d\n", p_index, 1021 udf_debug("unallocSpaceBitmap (part %d) @ %d\n",
1045 bitmap->s_extPosition); 1022 p_index, bitmap->s_extPosition);
1046 } 1023 }
1047 1024
1048 if (phd->partitionIntegrityTable.extLength) 1025 if (phd->partitionIntegrityTable.extLength)
@@ -1058,13 +1035,13 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1058 map->s_fspace.s_table = udf_iget(sb, &loc); 1035 map->s_fspace.s_table = udf_iget(sb, &loc);
1059 if (!map->s_fspace.s_table) { 1036 if (!map->s_fspace.s_table) {
1060 udf_debug("cannot load freedSpaceTable (part %d)\n", 1037 udf_debug("cannot load freedSpaceTable (part %d)\n",
1061 p_index); 1038 p_index);
1062 return 1; 1039 return 1;
1063 } 1040 }
1064 1041
1065 map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE; 1042 map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
1066 udf_debug("freedSpaceTable (part %d) @ %ld\n", 1043 udf_debug("freedSpaceTable (part %d) @ %ld\n",
1067 p_index, map->s_fspace.s_table->i_ino); 1044 p_index, map->s_fspace.s_table->i_ino);
1068 } 1045 }
1069 1046
1070 if (phd->freedSpaceBitmap.extLength) { 1047 if (phd->freedSpaceBitmap.extLength) {
@@ -1077,8 +1054,8 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1077 bitmap->s_extPosition = le32_to_cpu( 1054 bitmap->s_extPosition = le32_to_cpu(
1078 phd->freedSpaceBitmap.extPosition); 1055 phd->freedSpaceBitmap.extPosition);
1079 map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP; 1056 map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP;
1080 udf_debug("freedSpaceBitmap (part %d) @ %d\n", p_index, 1057 udf_debug("freedSpaceBitmap (part %d) @ %d\n",
1081 bitmap->s_extPosition); 1058 p_index, bitmap->s_extPosition);
1082 } 1059 }
1083 return 0; 1060 return 0;
1084} 1061}
@@ -1118,11 +1095,9 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
1118 udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block); 1095 udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block);
1119 if (!sbi->s_vat_inode && 1096 if (!sbi->s_vat_inode &&
1120 sbi->s_last_block != blocks - 1) { 1097 sbi->s_last_block != blocks - 1) {
1121 printk(KERN_NOTICE "UDF-fs: Failed to read VAT inode from the" 1098 pr_notice("Failed to read VAT inode from the last recorded block (%lu), retrying with the last block of the device (%lu).\n",
1122 " last recorded block (%lu), retrying with the last " 1099 (unsigned long)sbi->s_last_block,
1123 "block of the device (%lu).\n", 1100 (unsigned long)blocks - 1);
1124 (unsigned long)sbi->s_last_block,
1125 (unsigned long)blocks - 1);
1126 udf_find_vat_block(sb, p_index, type1_index, blocks - 1); 1101 udf_find_vat_block(sb, p_index, type1_index, blocks - 1);
1127 } 1102 }
1128 if (!sbi->s_vat_inode) 1103 if (!sbi->s_vat_inode)
@@ -1220,8 +1195,8 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
1220 if (map->s_partition_type == UDF_METADATA_MAP25) { 1195 if (map->s_partition_type == UDF_METADATA_MAP25) {
1221 ret = udf_load_metadata_files(sb, i); 1196 ret = udf_load_metadata_files(sb, i);
1222 if (ret) { 1197 if (ret) {
1223 printk(KERN_ERR "UDF-fs: error loading MetaData " 1198 udf_err(sb, "error loading MetaData partition map %d\n",
1224 "partition map %d\n", i); 1199 i);
1225 goto out_bh; 1200 goto out_bh;
1226 } 1201 }
1227 } else { 1202 } else {
@@ -1234,9 +1209,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
1234 * overwrite blocks instead of relocating them). 1209 * overwrite blocks instead of relocating them).
1235 */ 1210 */
1236 sb->s_flags |= MS_RDONLY; 1211 sb->s_flags |= MS_RDONLY;
1237 printk(KERN_NOTICE "UDF-fs: Filesystem marked read-only " 1212 pr_notice("Filesystem marked read-only because writing to pseudooverwrite partition is not implemented\n");
1238 "because writing to pseudooverwrite partition is "
1239 "not implemented.\n");
1240 } 1213 }
1241out_bh: 1214out_bh:
1242 /* In case loading failed, we handle cleanup in udf_fill_super */ 1215 /* In case loading failed, we handle cleanup in udf_fill_super */
@@ -1344,9 +1317,8 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1344 struct metadataPartitionMap *mdm = 1317 struct metadataPartitionMap *mdm =
1345 (struct metadataPartitionMap *) 1318 (struct metadataPartitionMap *)
1346 &(lvd->partitionMaps[offset]); 1319 &(lvd->partitionMaps[offset]);
1347 udf_debug("Parsing Logical vol part %d " 1320 udf_debug("Parsing Logical vol part %d type %d id=%s\n",
1348 "type %d id=%s\n", i, type, 1321 i, type, UDF_ID_METADATA);
1349 UDF_ID_METADATA);
1350 1322
1351 map->s_partition_type = UDF_METADATA_MAP25; 1323 map->s_partition_type = UDF_METADATA_MAP25;
1352 map->s_partition_func = udf_get_pblock_meta25; 1324 map->s_partition_func = udf_get_pblock_meta25;
@@ -1361,25 +1333,24 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1361 le32_to_cpu(mdm->allocUnitSize); 1333 le32_to_cpu(mdm->allocUnitSize);
1362 mdata->s_align_unit_size = 1334 mdata->s_align_unit_size =
1363 le16_to_cpu(mdm->alignUnitSize); 1335 le16_to_cpu(mdm->alignUnitSize);
1364 mdata->s_dup_md_flag = 1336 if (mdm->flags & 0x01)
1365 mdm->flags & 0x01; 1337 mdata->s_flags |= MF_DUPLICATE_MD;
1366 1338
1367 udf_debug("Metadata Ident suffix=0x%x\n", 1339 udf_debug("Metadata Ident suffix=0x%x\n",
1368 (le16_to_cpu( 1340 le16_to_cpu(*(__le16 *)
1369 ((__le16 *) 1341 mdm->partIdent.identSuffix));
1370 mdm->partIdent.identSuffix)[0])));
1371 udf_debug("Metadata part num=%d\n", 1342 udf_debug("Metadata part num=%d\n",
1372 le16_to_cpu(mdm->partitionNum)); 1343 le16_to_cpu(mdm->partitionNum));
1373 udf_debug("Metadata part alloc unit size=%d\n", 1344 udf_debug("Metadata part alloc unit size=%d\n",
1374 le32_to_cpu(mdm->allocUnitSize)); 1345 le32_to_cpu(mdm->allocUnitSize));
1375 udf_debug("Metadata file loc=%d\n", 1346 udf_debug("Metadata file loc=%d\n",
1376 le32_to_cpu(mdm->metadataFileLoc)); 1347 le32_to_cpu(mdm->metadataFileLoc));
1377 udf_debug("Mirror file loc=%d\n", 1348 udf_debug("Mirror file loc=%d\n",
1378 le32_to_cpu(mdm->metadataMirrorFileLoc)); 1349 le32_to_cpu(mdm->metadataMirrorFileLoc));
1379 udf_debug("Bitmap file loc=%d\n", 1350 udf_debug("Bitmap file loc=%d\n",
1380 le32_to_cpu(mdm->metadataBitmapFileLoc)); 1351 le32_to_cpu(mdm->metadataBitmapFileLoc));
1381 udf_debug("Duplicate Flag: %d %d\n", 1352 udf_debug("Flags: %d %d\n",
1382 mdata->s_dup_md_flag, mdm->flags); 1353 mdata->s_flags, mdm->flags);
1383 } else { 1354 } else {
1384 udf_debug("Unknown ident: %s\n", 1355 udf_debug("Unknown ident: %s\n",
1385 upm2->partIdent.ident); 1356 upm2->partIdent.ident);
@@ -1389,16 +1360,15 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1389 map->s_partition_num = le16_to_cpu(upm2->partitionNum); 1360 map->s_partition_num = le16_to_cpu(upm2->partitionNum);
1390 } 1361 }
1391 udf_debug("Partition (%d:%d) type %d on volume %d\n", 1362 udf_debug("Partition (%d:%d) type %d on volume %d\n",
1392 i, map->s_partition_num, type, 1363 i, map->s_partition_num, type, map->s_volumeseqnum);
1393 map->s_volumeseqnum);
1394 } 1364 }
1395 1365
1396 if (fileset) { 1366 if (fileset) {
1397 struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]); 1367 struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]);
1398 1368
1399 *fileset = lelb_to_cpu(la->extLocation); 1369 *fileset = lelb_to_cpu(la->extLocation);
1400 udf_debug("FileSet found in LogicalVolDesc at block=%d, " 1370 udf_debug("FileSet found in LogicalVolDesc at block=%d, partition=%d\n",
1401 "partition=%d\n", fileset->logicalBlockNum, 1371 fileset->logicalBlockNum,
1402 fileset->partitionReferenceNum); 1372 fileset->partitionReferenceNum);
1403 } 1373 }
1404 if (lvd->integritySeqExt.extLength) 1374 if (lvd->integritySeqExt.extLength)
@@ -1478,9 +1448,9 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
1478 1448
1479 bh = udf_read_tagged(sb, block, block, &ident); 1449 bh = udf_read_tagged(sb, block, block, &ident);
1480 if (!bh) { 1450 if (!bh) {
1481 printk(KERN_ERR "udf: Block %Lu of volume descriptor " 1451 udf_err(sb,
1482 "sequence is corrupted or we could not read " 1452 "Block %llu of volume descriptor sequence is corrupted or we could not read it\n",
1483 "it.\n", (unsigned long long)block); 1453 (unsigned long long)block);
1484 return 1; 1454 return 1;
1485 } 1455 }
1486 1456
@@ -1553,7 +1523,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
1553 * in a suitable order 1523 * in a suitable order
1554 */ 1524 */
1555 if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) { 1525 if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) {
1556 printk(KERN_ERR "udf: Primary Volume Descriptor not found!\n"); 1526 udf_err(sb, "Primary Volume Descriptor not found!\n");
1557 return 1; 1527 return 1;
1558 } 1528 }
1559 if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block)) 1529 if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block))
@@ -1740,7 +1710,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
1740 1710
1741 if (!sb_set_blocksize(sb, uopt->blocksize)) { 1711 if (!sb_set_blocksize(sb, uopt->blocksize)) {
1742 if (!silent) 1712 if (!silent)
1743 printk(KERN_WARNING "UDF-fs: Bad block size\n"); 1713 udf_warn(sb, "Bad block size\n");
1744 return 0; 1714 return 0;
1745 } 1715 }
1746 sbi->s_last_block = uopt->lastblock; 1716 sbi->s_last_block = uopt->lastblock;
@@ -1749,12 +1719,11 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
1749 nsr_off = udf_check_vsd(sb); 1719 nsr_off = udf_check_vsd(sb);
1750 if (!nsr_off) { 1720 if (!nsr_off) {
1751 if (!silent) 1721 if (!silent)
1752 printk(KERN_WARNING "UDF-fs: No VRS found\n"); 1722 udf_warn(sb, "No VRS found\n");
1753 return 0; 1723 return 0;
1754 } 1724 }
1755 if (nsr_off == -1) 1725 if (nsr_off == -1)
1756 udf_debug("Failed to read byte 32768. Assuming open " 1726 udf_debug("Failed to read byte 32768. Assuming open disc. Skipping validity check\n");
1757 "disc. Skipping validity check\n");
1758 if (!sbi->s_last_block) 1727 if (!sbi->s_last_block)
1759 sbi->s_last_block = udf_get_last_block(sb); 1728 sbi->s_last_block = udf_get_last_block(sb);
1760 } else { 1729 } else {
@@ -1765,7 +1734,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
1765 sbi->s_anchor = uopt->anchor; 1734 sbi->s_anchor = uopt->anchor;
1766 if (!udf_find_anchor(sb, fileset)) { 1735 if (!udf_find_anchor(sb, fileset)) {
1767 if (!silent) 1736 if (!silent)
1768 printk(KERN_WARNING "UDF-fs: No anchor found\n"); 1737 udf_warn(sb, "No anchor found\n");
1769 return 0; 1738 return 0;
1770 } 1739 }
1771 return 1; 1740 return 1;
@@ -1937,8 +1906,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1937 1906
1938 if (uopt.flags & (1 << UDF_FLAG_UTF8) && 1907 if (uopt.flags & (1 << UDF_FLAG_UTF8) &&
1939 uopt.flags & (1 << UDF_FLAG_NLS_MAP)) { 1908 uopt.flags & (1 << UDF_FLAG_NLS_MAP)) {
1940 udf_error(sb, "udf_read_super", 1909 udf_err(sb, "utf8 cannot be combined with iocharset\n");
1941 "utf8 cannot be combined with iocharset\n");
1942 goto error_out; 1910 goto error_out;
1943 } 1911 }
1944#ifdef CONFIG_UDF_NLS 1912#ifdef CONFIG_UDF_NLS
@@ -1987,15 +1955,14 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1987 ret = udf_load_vrs(sb, &uopt, silent, &fileset); 1955 ret = udf_load_vrs(sb, &uopt, silent, &fileset);
1988 if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) { 1956 if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
1989 if (!silent) 1957 if (!silent)
1990 printk(KERN_NOTICE 1958 pr_notice("Rescanning with blocksize %d\n",
1991 "UDF-fs: Rescanning with blocksize " 1959 UDF_DEFAULT_BLOCKSIZE);
1992 "%d\n", UDF_DEFAULT_BLOCKSIZE);
1993 uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; 1960 uopt.blocksize = UDF_DEFAULT_BLOCKSIZE;
1994 ret = udf_load_vrs(sb, &uopt, silent, &fileset); 1961 ret = udf_load_vrs(sb, &uopt, silent, &fileset);
1995 } 1962 }
1996 } 1963 }
1997 if (!ret) { 1964 if (!ret) {
1998 printk(KERN_WARNING "UDF-fs: No partition found (1)\n"); 1965 udf_warn(sb, "No partition found (1)\n");
1999 goto error_out; 1966 goto error_out;
2000 } 1967 }
2001 1968
@@ -2010,10 +1977,9 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2010 le16_to_cpu(lvidiu->maxUDFWriteRev); */ 1977 le16_to_cpu(lvidiu->maxUDFWriteRev); */
2011 1978
2012 if (minUDFReadRev > UDF_MAX_READ_VERSION) { 1979 if (minUDFReadRev > UDF_MAX_READ_VERSION) {
2013 printk(KERN_ERR "UDF-fs: minUDFReadRev=%x " 1980 udf_err(sb, "minUDFReadRev=%x (max is %x)\n",
2014 "(max is %x)\n", 1981 le16_to_cpu(lvidiu->minUDFReadRev),
2015 le16_to_cpu(lvidiu->minUDFReadRev), 1982 UDF_MAX_READ_VERSION);
2016 UDF_MAX_READ_VERSION);
2017 goto error_out; 1983 goto error_out;
2018 } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION) 1984 } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION)
2019 sb->s_flags |= MS_RDONLY; 1985 sb->s_flags |= MS_RDONLY;
@@ -2027,28 +1993,27 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2027 } 1993 }
2028 1994
2029 if (!sbi->s_partitions) { 1995 if (!sbi->s_partitions) {
2030 printk(KERN_WARNING "UDF-fs: No partition found (2)\n"); 1996 udf_warn(sb, "No partition found (2)\n");
2031 goto error_out; 1997 goto error_out;
2032 } 1998 }
2033 1999
2034 if (sbi->s_partmaps[sbi->s_partition].s_partition_flags & 2000 if (sbi->s_partmaps[sbi->s_partition].s_partition_flags &
2035 UDF_PART_FLAG_READ_ONLY) { 2001 UDF_PART_FLAG_READ_ONLY) {
2036 printk(KERN_NOTICE "UDF-fs: Partition marked readonly; " 2002 pr_notice("Partition marked readonly; forcing readonly mount\n");
2037 "forcing readonly mount\n");
2038 sb->s_flags |= MS_RDONLY; 2003 sb->s_flags |= MS_RDONLY;
2039 } 2004 }
2040 2005
2041 if (udf_find_fileset(sb, &fileset, &rootdir)) { 2006 if (udf_find_fileset(sb, &fileset, &rootdir)) {
2042 printk(KERN_WARNING "UDF-fs: No fileset found\n"); 2007 udf_warn(sb, "No fileset found\n");
2043 goto error_out; 2008 goto error_out;
2044 } 2009 }
2045 2010
2046 if (!silent) { 2011 if (!silent) {
2047 struct timestamp ts; 2012 struct timestamp ts;
2048 udf_time_to_disk_stamp(&ts, sbi->s_record_time); 2013 udf_time_to_disk_stamp(&ts, sbi->s_record_time);
2049 udf_info("UDF: Mounting volume '%s', " 2014 udf_info("Mounting volume '%s', timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
2050 "timestamp %04u/%02u/%02u %02u:%02u (%x)\n", 2015 sbi->s_volume_ident,
2051 sbi->s_volume_ident, le16_to_cpu(ts.year), ts.month, ts.day, 2016 le16_to_cpu(ts.year), ts.month, ts.day,
2052 ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone)); 2017 ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone));
2053 } 2018 }
2054 if (!(sb->s_flags & MS_RDONLY)) 2019 if (!(sb->s_flags & MS_RDONLY))
@@ -2059,8 +2024,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2059 /* perhaps it's not extensible enough, but for now ... */ 2024 /* perhaps it's not extensible enough, but for now ... */
2060 inode = udf_iget(sb, &rootdir); 2025 inode = udf_iget(sb, &rootdir);
2061 if (!inode) { 2026 if (!inode) {
2062 printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, " 2027 udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n",
2063 "partition=%d\n",
2064 rootdir.logicalBlockNum, rootdir.partitionReferenceNum); 2028 rootdir.logicalBlockNum, rootdir.partitionReferenceNum);
2065 goto error_out; 2029 goto error_out;
2066 } 2030 }
@@ -2068,7 +2032,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2068 /* Allocate a dentry for the root inode */ 2032 /* Allocate a dentry for the root inode */
2069 sb->s_root = d_alloc_root(inode); 2033 sb->s_root = d_alloc_root(inode);
2070 if (!sb->s_root) { 2034 if (!sb->s_root) {
2071 printk(KERN_ERR "UDF-fs: Couldn't allocate root dentry\n"); 2035 udf_err(sb, "Couldn't allocate root dentry\n");
2072 iput(inode); 2036 iput(inode);
2073 goto error_out; 2037 goto error_out;
2074 } 2038 }
@@ -2096,32 +2060,40 @@ error_out:
2096 return -EINVAL; 2060 return -EINVAL;
2097} 2061}
2098 2062
2099static void udf_error(struct super_block *sb, const char *function, 2063void _udf_err(struct super_block *sb, const char *function,
2100 const char *fmt, ...) 2064 const char *fmt, ...)
2101{ 2065{
2066 struct va_format vaf;
2102 va_list args; 2067 va_list args;
2103 2068
2104 if (!(sb->s_flags & MS_RDONLY)) { 2069 /* mark sb error */
2105 /* mark sb error */ 2070 if (!(sb->s_flags & MS_RDONLY))
2106 sb->s_dirt = 1; 2071 sb->s_dirt = 1;
2107 } 2072
2108 va_start(args, fmt); 2073 va_start(args, fmt);
2109 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 2074
2075 vaf.fmt = fmt;
2076 vaf.va = &args;
2077
2078 pr_err("error (device %s): %s: %pV", sb->s_id, function, &vaf);
2079
2110 va_end(args); 2080 va_end(args);
2111 printk(KERN_CRIT "UDF-fs error (device %s): %s: %s\n",
2112 sb->s_id, function, error_buf);
2113} 2081}
2114 2082
2115void udf_warning(struct super_block *sb, const char *function, 2083void _udf_warn(struct super_block *sb, const char *function,
2116 const char *fmt, ...) 2084 const char *fmt, ...)
2117{ 2085{
2086 struct va_format vaf;
2118 va_list args; 2087 va_list args;
2119 2088
2120 va_start(args, fmt); 2089 va_start(args, fmt);
2121 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 2090
2091 vaf.fmt = fmt;
2092 vaf.va = &args;
2093
2094 pr_warn("warning (device %s): %s: %pV", sb->s_id, function, &vaf);
2095
2122 va_end(args); 2096 va_end(args);
2123 printk(KERN_WARNING "UDF-fs warning (device %s): %s: %s\n",
2124 sb->s_id, function, error_buf);
2125} 2097}
2126 2098
2127static void udf_put_super(struct super_block *sb) 2099static void udf_put_super(struct super_block *sb)
@@ -2213,11 +2185,11 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
2213 bh = udf_read_ptagged(sb, &loc, 0, &ident); 2185 bh = udf_read_ptagged(sb, &loc, 0, &ident);
2214 2186
2215 if (!bh) { 2187 if (!bh) {
2216 printk(KERN_ERR "udf: udf_count_free failed\n"); 2188 udf_err(sb, "udf_count_free failed\n");
2217 goto out; 2189 goto out;
2218 } else if (ident != TAG_IDENT_SBD) { 2190 } else if (ident != TAG_IDENT_SBD) {
2219 brelse(bh); 2191 brelse(bh);
2220 printk(KERN_ERR "udf: udf_count_free failed\n"); 2192 udf_err(sb, "udf_count_free failed\n");
2221 goto out; 2193 goto out;
2222 } 2194 }
2223 2195
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 8424308db4b4..4b98fee8e161 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -95,23 +95,21 @@ void udf_truncate_tail_extent(struct inode *inode)
95 lbcount += elen; 95 lbcount += elen;
96 if (lbcount > inode->i_size) { 96 if (lbcount > inode->i_size) {
97 if (lbcount - inode->i_size >= inode->i_sb->s_blocksize) 97 if (lbcount - inode->i_size >= inode->i_sb->s_blocksize)
98 printk(KERN_WARNING 98 udf_warn(inode->i_sb,
99 "udf_truncate_tail_extent(): Too long " 99 "Too long extent after EOF in inode %u: i_size: %lld lbcount: %lld extent %u+%u\n",
100 "extent after EOF in inode %u: i_size: " 100 (unsigned)inode->i_ino,
101 "%Ld lbcount: %Ld extent %u+%u\n", 101 (long long)inode->i_size,
102 (unsigned)inode->i_ino, 102 (long long)lbcount,
103 (long long)inode->i_size, 103 (unsigned)eloc.logicalBlockNum,
104 (long long)lbcount, 104 (unsigned)elen);
105 (unsigned)eloc.logicalBlockNum,
106 (unsigned)elen);
107 nelen = elen - (lbcount - inode->i_size); 105 nelen = elen - (lbcount - inode->i_size);
108 epos.offset -= adsize; 106 epos.offset -= adsize;
109 extent_trunc(inode, &epos, &eloc, etype, elen, nelen); 107 extent_trunc(inode, &epos, &eloc, etype, elen, nelen);
110 epos.offset += adsize; 108 epos.offset += adsize;
111 if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1) 109 if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1)
112 printk(KERN_ERR "udf_truncate_tail_extent(): " 110 udf_err(inode->i_sb,
113 "Extent after EOF in inode %u.\n", 111 "Extent after EOF in inode %u\n",
114 (unsigned)inode->i_ino); 112 (unsigned)inode->i_ino);
115 break; 113 break;
116 } 114 }
117 } 115 }
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 4858c191242b..5142a82e3276 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -54,13 +54,16 @@
54 54
55#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */ 55#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */
56 56
57#define MF_DUPLICATE_MD 0x01
58#define MF_MIRROR_FE_LOADED 0x02
59
57struct udf_meta_data { 60struct udf_meta_data {
58 __u32 s_meta_file_loc; 61 __u32 s_meta_file_loc;
59 __u32 s_mirror_file_loc; 62 __u32 s_mirror_file_loc;
60 __u32 s_bitmap_file_loc; 63 __u32 s_bitmap_file_loc;
61 __u32 s_alloc_unit_size; 64 __u32 s_alloc_unit_size;
62 __u16 s_align_unit_size; 65 __u16 s_align_unit_size;
63 __u8 s_dup_md_flag; 66 int s_flags;
64 struct inode *s_metadata_fe; 67 struct inode *s_metadata_fe;
65 struct inode *s_mirror_fe; 68 struct inode *s_mirror_fe;
66 struct inode *s_bitmap_fe; 69 struct inode *s_bitmap_fe;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index dc8a8dcc5ae1..f34e6fc0cdaa 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -1,6 +1,8 @@
1#ifndef __UDF_DECL_H 1#ifndef __UDF_DECL_H
2#define __UDF_DECL_H 2#define __UDF_DECL_H
3 3
4#define pr_fmt(fmt) "UDF-fs: " fmt
5
4#include "ecma_167.h" 6#include "ecma_167.h"
5#include "osta_udf.h" 7#include "osta_udf.h"
6 8
@@ -16,23 +18,30 @@
16#define UDF_PREALLOCATE 18#define UDF_PREALLOCATE
17#define UDF_DEFAULT_PREALLOC_BLOCKS 8 19#define UDF_DEFAULT_PREALLOC_BLOCKS 8
18 20
21extern __printf(3, 4) void _udf_err(struct super_block *sb,
22 const char *function, const char *fmt, ...);
23#define udf_err(sb, fmt, ...) \
24 _udf_err(sb, __func__, fmt, ##__VA_ARGS__)
25
26extern __printf(3, 4) void _udf_warn(struct super_block *sb,
27 const char *function, const char *fmt, ...);
28#define udf_warn(sb, fmt, ...) \
29 _udf_warn(sb, __func__, fmt, ##__VA_ARGS__)
30
31#define udf_info(fmt, ...) \
32 pr_info("INFO " fmt, ##__VA_ARGS__)
33
19#undef UDFFS_DEBUG 34#undef UDFFS_DEBUG
20 35
21#ifdef UDFFS_DEBUG 36#ifdef UDFFS_DEBUG
22#define udf_debug(f, a...) \ 37#define udf_debug(fmt, ...) \
23do { \ 38 printk(KERN_DEBUG pr_fmt("%s:%d:%s: " fmt), \
24 printk(KERN_DEBUG "UDF-fs DEBUG %s:%d:%s: ", \ 39 __FILE__, __LINE__, __func__, ##__VA_ARGS__)
25 __FILE__, __LINE__, __func__); \
26 printk(f, ##a); \
27} while (0)
28#else 40#else
29#define udf_debug(f, a...) /**/ 41#define udf_debug(fmt, ...) \
42 no_printk(fmt, ##__VA_ARGS__)
30#endif 43#endif
31 44
32#define udf_info(f, a...) \
33 printk(KERN_INFO "UDF-fs INFO " f, ##a);
34
35
36#define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) ) 45#define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) )
37#define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) ) 46#define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) )
38 47
@@ -112,8 +121,6 @@ struct extent_position {
112 121
113/* super.c */ 122/* super.c */
114 123
115extern __printf(3, 4) void udf_warning(struct super_block *, const char *,
116 const char *, ...);
117static inline void udf_updated_lvid(struct super_block *sb) 124static inline void udf_updated_lvid(struct super_block *sb)
118{ 125{
119 struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh; 126 struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh;
@@ -126,6 +133,8 @@ static inline void udf_updated_lvid(struct super_block *sb)
126 UDF_SB(sb)->s_lvid_dirty = 1; 133 UDF_SB(sb)->s_lvid_dirty = 1;
127} 134}
128extern u64 lvid_get_unique_id(struct super_block *sb); 135extern u64 lvid_get_unique_id(struct super_block *sb);
136struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
137 u32 meta_file_loc, u32 partition_num);
129 138
130/* namei.c */ 139/* namei.c */
131extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, 140extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index b8c828c4d200..1f11483eba6a 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -34,9 +34,10 @@
34 * http://www.boulder.nist.gov/timefreq/pubs/bulletin/leapsecond.htm 34 * http://www.boulder.nist.gov/timefreq/pubs/bulletin/leapsecond.htm
35 */ 35 */
36 36
37#include "udfdecl.h"
38
37#include <linux/types.h> 39#include <linux/types.h>
38#include <linux/kernel.h> 40#include <linux/kernel.h>
39#include "udfdecl.h"
40 41
41#define EPOCH_YEAR 1970 42#define EPOCH_YEAR 1970
42 43
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index d03a90b6ad69..44b815e57f94 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -114,7 +114,7 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
114 cmp_id = ocu_i->u_cmpID; 114 cmp_id = ocu_i->u_cmpID;
115 if (cmp_id != 8 && cmp_id != 16) { 115 if (cmp_id != 8 && cmp_id != 16) {
116 memset(utf_o, 0, sizeof(struct ustr)); 116 memset(utf_o, 0, sizeof(struct ustr));
117 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", 117 pr_err("unknown compression code (%d) stri=%s\n",
118 cmp_id, ocu_i->u_name); 118 cmp_id, ocu_i->u_name);
119 return 0; 119 return 0;
120 } 120 }
@@ -242,7 +242,7 @@ try_again:
242 if (utf_cnt) { 242 if (utf_cnt) {
243error_out: 243error_out:
244 ocu[++u_len] = '?'; 244 ocu[++u_len] = '?';
245 printk(KERN_DEBUG "udf: bad UTF-8 character\n"); 245 printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
246 } 246 }
247 247
248 ocu[length - 1] = (uint8_t)u_len + 1; 248 ocu[length - 1] = (uint8_t)u_len + 1;
@@ -267,7 +267,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
267 cmp_id = ocu_i->u_cmpID; 267 cmp_id = ocu_i->u_cmpID;
268 if (cmp_id != 8 && cmp_id != 16) { 268 if (cmp_id != 8 && cmp_id != 16) {
269 memset(utf_o, 0, sizeof(struct ustr)); 269 memset(utf_o, 0, sizeof(struct ustr));
270 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", 270 pr_err("unknown compression code (%d) stri=%s\n",
271 cmp_id, ocu_i->u_name); 271 cmp_id, ocu_i->u_name);
272 return 0; 272 return 0;
273 } 273 }
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 2eabf04af3de..78a4c70d46b5 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -341,7 +341,7 @@ cg_found:
341 341
342fail_remove_inode: 342fail_remove_inode:
343 unlock_super(sb); 343 unlock_super(sb);
344 inode->i_nlink = 0; 344 clear_nlink(inode);
345 iput(inode); 345 iput(inode);
346 UFSD("EXIT (FAILED): err %d\n", err); 346 UFSD("EXIT (FAILED): err %d\n", err);
347 return ERR_PTR(err); 347 return ERR_PTR(err);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index b4d791a83207..879b13436fa4 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -589,7 +589,7 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
589 * Copy data to the in-core inode. 589 * Copy data to the in-core inode.
590 */ 590 */
591 inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode); 591 inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode);
592 inode->i_nlink = fs16_to_cpu(sb, ufs_inode->ui_nlink); 592 set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink));
593 if (inode->i_nlink == 0) { 593 if (inode->i_nlink == 0) {
594 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); 594 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
595 return -1; 595 return -1;
@@ -637,7 +637,7 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
637 * Copy data to the in-core inode. 637 * Copy data to the in-core inode.
638 */ 638 */
639 inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode); 639 inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode);
640 inode->i_nlink = fs16_to_cpu(sb, ufs2_inode->ui_nlink); 640 set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink));
641 if (inode->i_nlink == 0) { 641 if (inode->i_nlink == 0) {
642 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); 642 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
643 return -1; 643 return -1;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 9ba2a07b7343..23ce927973a4 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1153,7 +1153,7 @@ xfs_setup_inode(
1153 hlist_add_fake(&inode->i_hash); 1153 hlist_add_fake(&inode->i_hash);
1154 1154
1155 inode->i_mode = ip->i_d.di_mode; 1155 inode->i_mode = ip->i_d.di_mode;
1156 inode->i_nlink = ip->i_d.di_nlink; 1156 set_nlink(inode, ip->i_d.di_nlink);
1157 inode->i_uid = ip->i_d.di_uid; 1157 inode->i_uid = ip->i_d.di_uid;
1158 inode->i_gid = ip->i_d.di_gid; 1158 inode->i_gid = ip->i_d.di_gid;
1159 1159