aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c4
-rw-r--r--fs/9p/vfs_inode_dotl.c4
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/adfs/inode.c2
-rw-r--r--fs/affs/amigaffs.c4
-rw-r--r--fs/affs/inode.c8
-rw-r--r--fs/affs/namei.c6
-rw-r--r--fs/afs/fsclient.c2
-rw-r--r--fs/afs/inode.c4
-rw-r--r--fs/aio.c140
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/binfmt_elf.c11
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/bio.c1
-rw-r--r--fs/block_dev.c15
-rw-r--r--fs/btrfs/delayed-inode.c2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/buffer.c7
-rw-r--r--fs/ceph/caps.c10
-rw-r--r--fs/ceph/dir.c87
-rw-r--r--fs/ceph/inode.c10
-rw-r--r--fs/ceph/mds_client.c10
-rw-r--r--fs/ceph/super.c4
-rw-r--r--fs/ceph/super.h23
-rw-r--r--fs/cifs/cifsencrypt.c8
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsproto.h8
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/file.c11
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/link.c2
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smbencrypt.c63
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/compat.c7
-rw-r--r--fs/dcache.c40
-rw-r--r--fs/devpts/inode.c4
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/inode.c12
-rw-r--r--fs/efs/inode.c2
-rw-r--r--fs/eventpoll.c25
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/Kconfig2
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/ext2/balloc.c2
-rw-r--r--fs/ext2/ext2.h8
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/super.c8
-rw-r--r--fs/ext3/balloc.c17
-rw-r--r--fs/ext3/fsync.c10
-rw-r--r--fs/ext3/ialloc.c47
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/ioctl.c24
-rw-r--r--fs/ext3/namei.c6
-rw-r--r--fs/ext3/super.c12
-rw-r--r--fs/ext4/balloc.c345
-rw-r--r--fs/ext4/ext4.h185
-rw-r--r--fs/ext4/ext4_extents.h2
-rw-r--r--fs/ext4/ext4_jbd2.c8
-rw-r--r--fs/ext4/extents.c1168
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/fsync.c10
-rw-r--r--fs/ext4/ialloc.c206
-rw-r--r--fs/ext4/indirect.c20
-rw-r--r--fs/ext4/inode.c522
-rw-r--r--fs/ext4/ioctl.c65
-rw-r--r--fs/ext4/mballoc.c331
-rw-r--r--fs/ext4/mballoc.h11
-rw-r--r--fs/ext4/migrate.c111
-rw-r--r--fs/ext4/mmp.c10
-rw-r--r--fs/ext4/move_extent.c1
-rw-r--r--fs/ext4/namei.c29
-rw-r--r--fs/ext4/page-io.c66
-rw-r--r--fs/ext4/resize.c10
-rw-r--r--fs/ext4/super.c263
-rw-r--r--fs/ext4/xattr.c12
-rw-r--r--fs/fat/dir.c4
-rw-r--r--fs/fat/fat.h9
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_msdos.c2
-rw-r--r--fs/fat/namei_vfat.c2
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/fs-writeback.c84
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/glock.h2
-rw-r--r--fs/gfs2/glops.c2
-rw-r--r--fs/hfs/btree.c20
-rw-r--r--fs/hfs/dir.c4
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hfsplus/inode.c10
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hostfs/hostfs_user.c1
-rw-r--r--fs/hpfs/dir.c2
-rw-r--r--fs/hpfs/hpfs_fn.h4
-rw-r--r--fs/hpfs/inode.c10
-rw-r--r--fs/hpfs/namei.c8
-rw-r--r--fs/hppfs/hppfs.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/isofs/inode.c14
-rw-r--r--fs/isofs/rock.c4
-rw-r--r--fs/jbd/journal.c8
-rw-r--r--fs/jbd2/commit.c26
-rw-r--r--fs/jbd2/journal.c44
-rw-r--r--fs/jbd2/recovery.c28
-rw-r--r--fs/jbd2/transaction.c68
-rw-r--r--fs/jffs2/dir.c6
-rw-r--r--fs/jffs2/fs.c6
-rw-r--r--fs/jfs/jfs_imap.c6
-rw-r--r--fs/jfs/jfs_inode.c2
-rw-r--r--fs/jfs/namei.c12
-rw-r--r--fs/jfs/super.c1
-rw-r--r--fs/libfs.c6
-rw-r--r--fs/logfs/dir.c8
-rw-r--r--fs/logfs/inode.c3
-rw-r--r--fs/logfs/logfs.h1
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/logfs/super.c22
-rw-r--r--fs/minix/inode.c4
-rw-r--r--fs/namei.c18
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/callback_xdr.c12
-rw-r--r--fs/nfs/file.c9
-rw-r--r--fs/nfs/inode.c6
-rw-r--r--fs/nfs/nfs4filelayout.c7
-rw-r--r--fs/nfs/nfs4proc.c6
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c872
-rw-r--r--fs/nfs/objlayout/objlayout.c209
-rw-r--r--fs/nfs/objlayout/objlayout.h48
-rw-r--r--fs/nfs/pagelist.c2
-rw-r--r--fs/nfs/pnfs.c25
-rw-r--r--fs/nfs/write.c3
-rw-r--r--fs/nfsd/nfs4xdr.c2
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/nilfs2/namei.c2
-rw-r--r--fs/nilfs2/nilfs.h8
-rw-r--r--fs/ntfs/debug.h15
-rw-r--r--fs/ntfs/inode.c8
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/dlmglue.c2
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/namei.c18
-rw-r--r--fs/ocfs2/super.h14
-rw-r--r--fs/openpromfs/inode.c4
-rw-r--r--fs/partitions/ldm.c16
-rw-r--r--fs/pipe.c1
-rw-r--r--fs/proc/base.c171
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/proc_sysctl.c48
-rw-r--r--fs/proc/task_mmu.c5
-rw-r--r--fs/pstore/inode.c40
-rw-r--r--fs/pstore/internal.h2
-rw-r--r--fs/pstore/platform.c93
-rw-r--r--fs/qnx4/inode.c2
-rw-r--r--fs/quota/quota.c9
-rw-r--r--fs/ramfs/inode.c10
-rw-r--r--fs/read_write.c8
-rw-r--r--fs/reiserfs/inode.c10
-rw-r--r--fs/reiserfs/namei.c16
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/squashfs/Kconfig22
-rw-r--r--fs/squashfs/inode.c18
-rw-r--r--fs/squashfs/squashfs_fs.h7
-rw-r--r--fs/squashfs/super.c2
-rw-r--r--fs/stack.c2
-rw-r--r--fs/stat.c5
-rw-r--r--fs/statfs.c2
-rw-r--r--fs/super.c11
-rw-r--r--fs/sync.c4
-rw-r--r--fs/sysfs/dir.c14
-rw-r--r--fs/sysfs/inode.c2
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/ubifs/xattr.c4
-rw-r--r--fs/udf/balloc.c14
-rw-r--r--fs/udf/directory.c8
-rw-r--r--fs/udf/inode.c56
-rw-r--r--fs/udf/lowlevel.c2
-rw-r--r--fs/udf/misc.c19
-rw-r--r--fs/udf/namei.c20
-rw-r--r--fs/udf/partition.c19
-rw-r--r--fs/udf/super.c280
-rw-r--r--fs/udf/truncate.c22
-rw-r--r--fs/udf/udf_sb.h5
-rw-r--r--fs/udf/udfdecl.h35
-rw-r--r--fs/udf/udftime.c3
-rw-r--r--fs/udf/unicode.c6
-rw-r--r--fs/ufs/ialloc.c2
-rw-r--r--fs/ufs/inode.c4
-rw-r--r--fs/ufs/ufs.h9
-rw-r--r--fs/xfs/xfs_aops.c8
-rw-r--r--fs/xfs/xfs_iops.c2
-rw-r--r--fs/xfs/xfs_message.h42
207 files changed, 3991 insertions, 2974 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b5a1076aaa6c..879ed8851737 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1138,7 +1138,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1138 struct v9fs_session_info *v9ses = sb->s_fs_info; 1138 struct v9fs_session_info *v9ses = sb->s_fs_info;
1139 struct v9fs_inode *v9inode = V9FS_I(inode); 1139 struct v9fs_inode *v9inode = V9FS_I(inode);
1140 1140
1141 inode->i_nlink = 1; 1141 set_nlink(inode, 1);
1142 1142
1143 inode->i_atime.tv_sec = stat->atime; 1143 inode->i_atime.tv_sec = stat->atime;
1144 inode->i_mtime.tv_sec = stat->mtime; 1144 inode->i_mtime.tv_sec = stat->mtime;
@@ -1164,7 +1164,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1164 /* HARDLINKCOUNT %u */ 1164 /* HARDLINKCOUNT %u */
1165 sscanf(ext, "%13s %u", tag_name, &i_nlink); 1165 sscanf(ext, "%13s %u", tag_name, &i_nlink);
1166 if (!strncmp(tag_name, "HARDLINKCOUNT", 13)) 1166 if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
1167 inode->i_nlink = i_nlink; 1167 set_nlink(inode, i_nlink);
1168 } 1168 }
1169 } 1169 }
1170 mode = stat->mode & S_IALLUGO; 1170 mode = stat->mode & S_IALLUGO;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index aded79fcd5cf..0b5745e21946 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -606,7 +606,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
606 inode->i_ctime.tv_nsec = stat->st_ctime_nsec; 606 inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
607 inode->i_uid = stat->st_uid; 607 inode->i_uid = stat->st_uid;
608 inode->i_gid = stat->st_gid; 608 inode->i_gid = stat->st_gid;
609 inode->i_nlink = stat->st_nlink; 609 set_nlink(inode, stat->st_nlink);
610 610
611 mode = stat->st_mode & S_IALLUGO; 611 mode = stat->st_mode & S_IALLUGO;
612 mode |= inode->i_mode & ~S_IALLUGO; 612 mode |= inode->i_mode & ~S_IALLUGO;
@@ -632,7 +632,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
632 if (stat->st_result_mask & P9_STATS_GID) 632 if (stat->st_result_mask & P9_STATS_GID)
633 inode->i_gid = stat->st_gid; 633 inode->i_gid = stat->st_gid;
634 if (stat->st_result_mask & P9_STATS_NLINK) 634 if (stat->st_result_mask & P9_STATS_NLINK)
635 inode->i_nlink = stat->st_nlink; 635 set_nlink(inode, stat->st_nlink);
636 if (stat->st_result_mask & P9_STATS_MODE) { 636 if (stat->st_result_mask & P9_STATS_MODE) {
637 inode->i_mode = stat->st_mode; 637 inode->i_mode = stat->st_mode;
638 if ((S_ISBLK(inode->i_mode)) || 638 if ((S_ISBLK(inode->i_mode)) ||
diff --git a/fs/Kconfig b/fs/Kconfig
index 9fe0b349f4cd..5f4c45d4aa10 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -109,7 +109,7 @@ source "fs/proc/Kconfig"
109source "fs/sysfs/Kconfig" 109source "fs/sysfs/Kconfig"
110 110
111config TMPFS 111config TMPFS
112 bool "Virtual memory file system support (former shm fs)" 112 bool "Tmpfs virtual memory file system support (former shm fs)"
113 depends on SHMEM 113 depends on SHMEM
114 help 114 help
115 Tmpfs is a file system which keeps all files in virtual memory. 115 Tmpfs is a file system which keeps all files in virtual memory.
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index d5250c5aae21..1dab6a174d6a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -247,7 +247,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
247 inode->i_gid = ADFS_SB(sb)->s_gid; 247 inode->i_gid = ADFS_SB(sb)->s_gid;
248 inode->i_ino = obj->file_id; 248 inode->i_ino = obj->file_id;
249 inode->i_size = obj->size; 249 inode->i_size = obj->size;
250 inode->i_nlink = 2; 250 set_nlink(inode, 2);
251 inode->i_blocks = (inode->i_size + sb->s_blocksize - 1) >> 251 inode->i_blocks = (inode->i_size + sb->s_blocksize - 1) >>
252 sb->s_blocksize_bits; 252 sb->s_blocksize_bits;
253 253
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 3a4557e8325c..de37ec842340 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -215,7 +215,7 @@ affs_remove_link(struct dentry *dentry)
215 break; 215 break;
216 default: 216 default:
217 if (!AFFS_TAIL(sb, bh)->link_chain) 217 if (!AFFS_TAIL(sb, bh)->link_chain)
218 inode->i_nlink = 1; 218 set_nlink(inode, 1);
219 } 219 }
220 affs_free_block(sb, link_ino); 220 affs_free_block(sb, link_ino);
221 goto done; 221 goto done;
@@ -316,7 +316,7 @@ affs_remove_header(struct dentry *dentry)
316 if (inode->i_nlink > 1) 316 if (inode->i_nlink > 1)
317 retval = affs_remove_link(dentry); 317 retval = affs_remove_link(dentry);
318 else 318 else
319 inode->i_nlink = 0; 319 clear_nlink(inode);
320 affs_unlock_link(inode); 320 affs_unlock_link(inode);
321 inode->i_ctime = CURRENT_TIME_SEC; 321 inode->i_ctime = CURRENT_TIME_SEC;
322 mark_inode_dirty(inode); 322 mark_inode_dirty(inode);
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 5d828903ac69..88a4b0b50058 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -54,7 +54,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
54 prot = be32_to_cpu(tail->protect); 54 prot = be32_to_cpu(tail->protect);
55 55
56 inode->i_size = 0; 56 inode->i_size = 0;
57 inode->i_nlink = 1; 57 set_nlink(inode, 1);
58 inode->i_mode = 0; 58 inode->i_mode = 0;
59 AFFS_I(inode)->i_extcnt = 1; 59 AFFS_I(inode)->i_extcnt = 1;
60 AFFS_I(inode)->i_ext_last = ~1; 60 AFFS_I(inode)->i_ext_last = ~1;
@@ -137,7 +137,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
137 sbi->s_hashsize + 1; 137 sbi->s_hashsize + 1;
138 } 138 }
139 if (tail->link_chain) 139 if (tail->link_chain)
140 inode->i_nlink = 2; 140 set_nlink(inode, 2);
141 inode->i_mapping->a_ops = (sbi->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops; 141 inode->i_mapping->a_ops = (sbi->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
142 inode->i_op = &affs_file_inode_operations; 142 inode->i_op = &affs_file_inode_operations;
143 inode->i_fop = &affs_file_operations; 143 inode->i_fop = &affs_file_operations;
@@ -304,7 +304,7 @@ affs_new_inode(struct inode *dir)
304 inode->i_uid = current_fsuid(); 304 inode->i_uid = current_fsuid();
305 inode->i_gid = current_fsgid(); 305 inode->i_gid = current_fsgid();
306 inode->i_ino = block; 306 inode->i_ino = block;
307 inode->i_nlink = 1; 307 set_nlink(inode, 1);
308 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 308 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
309 atomic_set(&AFFS_I(inode)->i_opencnt, 0); 309 atomic_set(&AFFS_I(inode)->i_opencnt, 0);
310 AFFS_I(inode)->i_blkcnt = 0; 310 AFFS_I(inode)->i_blkcnt = 0;
@@ -387,7 +387,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
387 AFFS_TAIL(sb, inode_bh)->link_chain = cpu_to_be32(block); 387 AFFS_TAIL(sb, inode_bh)->link_chain = cpu_to_be32(block);
388 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain)); 388 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
389 mark_buffer_dirty_inode(inode_bh, inode); 389 mark_buffer_dirty_inode(inode_bh, inode);
390 inode->i_nlink = 2; 390 set_nlink(inode, 2);
391 ihold(inode); 391 ihold(inode);
392 } 392 }
393 affs_fix_checksum(sb, bh); 393 affs_fix_checksum(sb, bh);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index e3e9efc1fdd8..780a11dc6318 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -277,7 +277,7 @@ affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata
277 inode->i_mapping->a_ops = (AFFS_SB(sb)->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops; 277 inode->i_mapping->a_ops = (AFFS_SB(sb)->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
278 error = affs_add_entry(dir, inode, dentry, ST_FILE); 278 error = affs_add_entry(dir, inode, dentry, ST_FILE);
279 if (error) { 279 if (error) {
280 inode->i_nlink = 0; 280 clear_nlink(inode);
281 iput(inode); 281 iput(inode);
282 return error; 282 return error;
283 } 283 }
@@ -305,7 +305,7 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
305 305
306 error = affs_add_entry(dir, inode, dentry, ST_USERDIR); 306 error = affs_add_entry(dir, inode, dentry, ST_USERDIR);
307 if (error) { 307 if (error) {
308 inode->i_nlink = 0; 308 clear_nlink(inode);
309 mark_inode_dirty(inode); 309 mark_inode_dirty(inode);
310 iput(inode); 310 iput(inode);
311 return error; 311 return error;
@@ -392,7 +392,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
392 return 0; 392 return 0;
393 393
394err: 394err:
395 inode->i_nlink = 0; 395 clear_nlink(inode);
396 mark_inode_dirty(inode); 396 mark_inode_dirty(inode);
397 iput(inode); 397 iput(inode);
398 return error; 398 return error;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 346e3289abd7..2f213d109c21 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -90,7 +90,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
90 vnode->vfs_inode.i_uid = status->owner; 90 vnode->vfs_inode.i_uid = status->owner;
91 vnode->vfs_inode.i_gid = status->group; 91 vnode->vfs_inode.i_gid = status->group;
92 vnode->vfs_inode.i_generation = vnode->fid.unique; 92 vnode->vfs_inode.i_generation = vnode->fid.unique;
93 vnode->vfs_inode.i_nlink = status->nlink; 93 set_nlink(&vnode->vfs_inode, status->nlink);
94 94
95 mode = vnode->vfs_inode.i_mode; 95 mode = vnode->vfs_inode.i_mode;
96 mode &= ~S_IALLUGO; 96 mode &= ~S_IALLUGO;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 0fdab6e03d87..d890ae3b2ce6 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -67,7 +67,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
67 fscache_attr_changed(vnode->cache); 67 fscache_attr_changed(vnode->cache);
68#endif 68#endif
69 69
70 inode->i_nlink = vnode->status.nlink; 70 set_nlink(inode, vnode->status.nlink);
71 inode->i_uid = vnode->status.owner; 71 inode->i_uid = vnode->status.owner;
72 inode->i_gid = 0; 72 inode->i_gid = 0;
73 inode->i_size = vnode->status.size; 73 inode->i_size = vnode->status.size;
@@ -174,7 +174,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
174 inode->i_size = 0; 174 inode->i_size = 0;
175 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 175 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
176 inode->i_op = &afs_autocell_inode_operations; 176 inode->i_op = &afs_autocell_inode_operations;
177 inode->i_nlink = 2; 177 set_nlink(inode, 2);
178 inode->i_uid = 0; 178 inode->i_uid = 0;
179 inode->i_gid = 0; 179 inode->i_gid = 0;
180 inode->i_ctime.tv_sec = get_seconds(); 180 inode->i_ctime.tv_sec = get_seconds();
diff --git a/fs/aio.c b/fs/aio.c
index e29ec485af25..78c514cfd212 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -440,8 +440,6 @@ void exit_aio(struct mm_struct *mm)
440static struct kiocb *__aio_get_req(struct kioctx *ctx) 440static struct kiocb *__aio_get_req(struct kioctx *ctx)
441{ 441{
442 struct kiocb *req = NULL; 442 struct kiocb *req = NULL;
443 struct aio_ring *ring;
444 int okay = 0;
445 443
446 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL); 444 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
447 if (unlikely(!req)) 445 if (unlikely(!req))
@@ -459,39 +457,114 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
459 INIT_LIST_HEAD(&req->ki_run_list); 457 INIT_LIST_HEAD(&req->ki_run_list);
460 req->ki_eventfd = NULL; 458 req->ki_eventfd = NULL;
461 459
462 /* Check if the completion queue has enough free space to 460 return req;
463 * accept an event from this io. 461}
464 */ 462
463/*
464 * struct kiocb's are allocated in batches to reduce the number of
465 * times the ctx lock is acquired and released.
466 */
467#define KIOCB_BATCH_SIZE 32L
468struct kiocb_batch {
469 struct list_head head;
470 long count; /* number of requests left to allocate */
471};
472
473static void kiocb_batch_init(struct kiocb_batch *batch, long total)
474{
475 INIT_LIST_HEAD(&batch->head);
476 batch->count = total;
477}
478
479static void kiocb_batch_free(struct kiocb_batch *batch)
480{
481 struct kiocb *req, *n;
482
483 list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
484 list_del(&req->ki_batch);
485 kmem_cache_free(kiocb_cachep, req);
486 }
487}
488
489/*
490 * Allocate a batch of kiocbs. This avoids taking and dropping the
491 * context lock a lot during setup.
492 */
493static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
494{
495 unsigned short allocated, to_alloc;
496 long avail;
497 bool called_fput = false;
498 struct kiocb *req, *n;
499 struct aio_ring *ring;
500
501 to_alloc = min(batch->count, KIOCB_BATCH_SIZE);
502 for (allocated = 0; allocated < to_alloc; allocated++) {
503 req = __aio_get_req(ctx);
504 if (!req)
505 /* allocation failed, go with what we've got */
506 break;
507 list_add(&req->ki_batch, &batch->head);
508 }
509
510 if (allocated == 0)
511 goto out;
512
513retry:
465 spin_lock_irq(&ctx->ctx_lock); 514 spin_lock_irq(&ctx->ctx_lock);
466 ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0); 515 ring = kmap_atomic(ctx->ring_info.ring_pages[0]);
467 if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) { 516
517 avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active;
518 BUG_ON(avail < 0);
519 if (avail == 0 && !called_fput) {
520 /*
521 * Handle a potential starvation case. It is possible that
522 * we hold the last reference on a struct file, causing us
523 * to delay the final fput to non-irq context. In this case,
524 * ctx->reqs_active is artificially high. Calling the fput
525 * routine here may free up a slot in the event completion
526 * ring, allowing this allocation to succeed.
527 */
528 kunmap_atomic(ring);
529 spin_unlock_irq(&ctx->ctx_lock);
530 aio_fput_routine(NULL);
531 called_fput = true;
532 goto retry;
533 }
534
535 if (avail < allocated) {
536 /* Trim back the number of requests. */
537 list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
538 list_del(&req->ki_batch);
539 kmem_cache_free(kiocb_cachep, req);
540 if (--allocated <= avail)
541 break;
542 }
543 }
544
545 batch->count -= allocated;
546 list_for_each_entry(req, &batch->head, ki_batch) {
468 list_add(&req->ki_list, &ctx->active_reqs); 547 list_add(&req->ki_list, &ctx->active_reqs);
469 ctx->reqs_active++; 548 ctx->reqs_active++;
470 okay = 1;
471 } 549 }
472 kunmap_atomic(ring, KM_USER0);
473 spin_unlock_irq(&ctx->ctx_lock);
474 550
475 if (!okay) { 551 kunmap_atomic(ring);
476 kmem_cache_free(kiocb_cachep, req); 552 spin_unlock_irq(&ctx->ctx_lock);
477 req = NULL;
478 }
479 553
480 return req; 554out:
555 return allocated;
481} 556}
482 557
483static inline struct kiocb *aio_get_req(struct kioctx *ctx) 558static inline struct kiocb *aio_get_req(struct kioctx *ctx,
559 struct kiocb_batch *batch)
484{ 560{
485 struct kiocb *req; 561 struct kiocb *req;
486 /* Handle a potential starvation case -- should be exceedingly rare as 562
487 * requests will be stuck on fput_head only if the aio_fput_routine is 563 if (list_empty(&batch->head))
488 * delayed and the requests were the last user of the struct file. 564 if (kiocb_batch_refill(ctx, batch) == 0)
489 */ 565 return NULL;
490 req = __aio_get_req(ctx); 566 req = list_first_entry(&batch->head, struct kiocb, ki_batch);
491 if (unlikely(NULL == req)) { 567 list_del(&req->ki_batch);
492 aio_fput_routine(NULL);
493 req = __aio_get_req(ctx);
494 }
495 return req; 568 return req;
496} 569}
497 570
@@ -1387,13 +1460,13 @@ static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat)
1387 ret = compat_rw_copy_check_uvector(type, 1460 ret = compat_rw_copy_check_uvector(type,
1388 (struct compat_iovec __user *)kiocb->ki_buf, 1461 (struct compat_iovec __user *)kiocb->ki_buf,
1389 kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, 1462 kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
1390 &kiocb->ki_iovec); 1463 &kiocb->ki_iovec, 1);
1391 else 1464 else
1392#endif 1465#endif
1393 ret = rw_copy_check_uvector(type, 1466 ret = rw_copy_check_uvector(type,
1394 (struct iovec __user *)kiocb->ki_buf, 1467 (struct iovec __user *)kiocb->ki_buf,
1395 kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, 1468 kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
1396 &kiocb->ki_iovec); 1469 &kiocb->ki_iovec, 1);
1397 if (ret < 0) 1470 if (ret < 0)
1398 goto out; 1471 goto out;
1399 1472
@@ -1515,7 +1588,8 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
1515} 1588}
1516 1589
1517static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1590static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1518 struct iocb *iocb, bool compat) 1591 struct iocb *iocb, struct kiocb_batch *batch,
1592 bool compat)
1519{ 1593{
1520 struct kiocb *req; 1594 struct kiocb *req;
1521 struct file *file; 1595 struct file *file;
@@ -1541,7 +1615,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1541 if (unlikely(!file)) 1615 if (unlikely(!file))
1542 return -EBADF; 1616 return -EBADF;
1543 1617
1544 req = aio_get_req(ctx); /* returns with 2 references to req */ 1618 req = aio_get_req(ctx, batch); /* returns with 2 references to req */
1545 if (unlikely(!req)) { 1619 if (unlikely(!req)) {
1546 fput(file); 1620 fput(file);
1547 return -EAGAIN; 1621 return -EAGAIN;
@@ -1621,8 +1695,9 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1621{ 1695{
1622 struct kioctx *ctx; 1696 struct kioctx *ctx;
1623 long ret = 0; 1697 long ret = 0;
1624 int i; 1698 int i = 0;
1625 struct blk_plug plug; 1699 struct blk_plug plug;
1700 struct kiocb_batch batch;
1626 1701
1627 if (unlikely(nr < 0)) 1702 if (unlikely(nr < 0))
1628 return -EINVAL; 1703 return -EINVAL;
@@ -1639,6 +1714,8 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1639 return -EINVAL; 1714 return -EINVAL;
1640 } 1715 }
1641 1716
1717 kiocb_batch_init(&batch, nr);
1718
1642 blk_start_plug(&plug); 1719 blk_start_plug(&plug);
1643 1720
1644 /* 1721 /*
@@ -1659,12 +1736,13 @@ long do_io_submit(aio_context_t ctx_id, long nr,
1659 break; 1736 break;
1660 } 1737 }
1661 1738
1662 ret = io_submit_one(ctx, user_iocb, &tmp, compat); 1739 ret = io_submit_one(ctx, user_iocb, &tmp, &batch, compat);
1663 if (ret) 1740 if (ret)
1664 break; 1741 break;
1665 } 1742 }
1666 blk_finish_plug(&plug); 1743 blk_finish_plug(&plug);
1667 1744
1745 kiocb_batch_free(&batch);
1668 put_ioctx(ctx); 1746 put_ioctx(ctx);
1669 return i ? i : ret; 1747 return i ? i : ret;
1670} 1748}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 180fa2425e49..8179f1ab8175 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -342,7 +342,7 @@ struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode)
342 inode->i_ino = get_next_ino(); 342 inode->i_ino = get_next_ino();
343 343
344 if (S_ISDIR(mode)) { 344 if (S_ISDIR(mode)) {
345 inode->i_nlink = 2; 345 set_nlink(inode, 2);
346 inode->i_op = &autofs4_dir_inode_operations; 346 inode->i_op = &autofs4_dir_inode_operations;
347 inode->i_fop = &autofs4_dir_operations; 347 inode->i_fop = &autofs4_dir_operations;
348 } else if (S_ISLNK(mode)) { 348 } else if (S_ISLNK(mode)) {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 720d885e8dca..8342ca67abcd 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -357,7 +357,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
357 inode->i_gid = befs_sb->mount_opts.use_gid ? 357 inode->i_gid = befs_sb->mount_opts.use_gid ?
358 befs_sb->mount_opts.gid : (gid_t) fs32_to_cpu(sb, raw_inode->gid); 358 befs_sb->mount_opts.gid : (gid_t) fs32_to_cpu(sb, raw_inode->gid);
359 359
360 inode->i_nlink = 1; 360 set_nlink(inode, 1);
361 361
362 /* 362 /*
363 * BEFS's time is 64 bits, but current VFS is 32 bits... 363 * BEFS's time is 64 bits, but current VFS is 32 bits...
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index b14cebfd9047..9cc074019479 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -199,7 +199,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
199 printf("unlinking non-existent file %s:%lu (nlink=%d)\n", 199 printf("unlinking non-existent file %s:%lu (nlink=%d)\n",
200 inode->i_sb->s_id, inode->i_ino, 200 inode->i_sb->s_id, inode->i_ino,
201 inode->i_nlink); 201 inode->i_nlink);
202 inode->i_nlink = 1; 202 set_nlink(inode, 1);
203 } 203 }
204 de->ino = 0; 204 de->ino = 0;
205 mark_buffer_dirty_inode(bh, dir); 205 mark_buffer_dirty_inode(bh, dir);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index a8e37f81d097..697af5bf70b3 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -78,7 +78,7 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
78 BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); 78 BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino);
79 inode->i_uid = le32_to_cpu(di->i_uid); 79 inode->i_uid = le32_to_cpu(di->i_uid);
80 inode->i_gid = le32_to_cpu(di->i_gid); 80 inode->i_gid = le32_to_cpu(di->i_gid);
81 inode->i_nlink = le32_to_cpu(di->i_nlink); 81 set_nlink(inode, le32_to_cpu(di->i_nlink));
82 inode->i_size = BFS_FILESIZE(di); 82 inode->i_size = BFS_FILESIZE(di);
83 inode->i_blocks = BFS_FILEBLOCKS(di); 83 inode->i_blocks = BFS_FILEBLOCKS(di);
84 inode->i_atime.tv_sec = le32_to_cpu(di->i_atime); 84 inode->i_atime.tv_sec = le32_to_cpu(di->i_atime);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index dd0fdfc56d38..21ac5ee4b43f 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -795,7 +795,16 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
795 * might try to exec. This is because the brk will 795 * might try to exec. This is because the brk will
796 * follow the loader, and is not movable. */ 796 * follow the loader, and is not movable. */
797#if defined(CONFIG_X86) || defined(CONFIG_ARM) 797#if defined(CONFIG_X86) || defined(CONFIG_ARM)
798 load_bias = 0; 798 /* Memory randomization might have been switched off
799 * in runtime via sysctl.
800 * If that is the case, retain the original non-zero
801 * load_bias value in order to establish proper
802 * non-randomized mappings.
803 */
804 if (current->flags & PF_RANDOMIZE)
805 load_bias = 0;
806 else
807 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
799#else 808#else
800 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); 809 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
801#endif 810#endif
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index ba1a1ae4a18a..1e9edbdeda7e 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -521,7 +521,7 @@ static void kill_node(Node *e)
521 write_unlock(&entries_lock); 521 write_unlock(&entries_lock);
522 522
523 if (dentry) { 523 if (dentry) {
524 dentry->d_inode->i_nlink--; 524 drop_nlink(dentry->d_inode);
525 d_drop(dentry); 525 d_drop(dentry);
526 dput(dentry); 526 dput(dentry);
527 simple_release_fs(&bm_mnt, &entry_count); 527 simple_release_fs(&bm_mnt, &entry_count);
diff --git a/fs/bio.c b/fs/bio.c
index 9bfade8a609b..41c93c722244 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -255,7 +255,6 @@ void bio_init(struct bio *bio)
255{ 255{
256 memset(bio, 0, sizeof(*bio)); 256 memset(bio, 0, sizeof(*bio));
257 bio->bi_flags = 1 << BIO_UPTODATE; 257 bio->bi_flags = 1 << BIO_UPTODATE;
258 bio->bi_comp_cpu = -1;
259 atomic_set(&bio->bi_cnt, 1); 258 atomic_set(&bio->bi_cnt, 1);
260} 259}
261EXPORT_SYMBOL(bio_init); 260EXPORT_SYMBOL(bio_init);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 95f786ec7f08..b07f1da1de4e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -971,7 +971,7 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
971 971
972 if (!bdev->bd_disk) 972 if (!bdev->bd_disk)
973 return; 973 return;
974 if (disk_partitionable(bdev->bd_disk)) 974 if (disk_part_scan_enabled(bdev->bd_disk))
975 bdev->bd_invalidated = 1; 975 bdev->bd_invalidated = 1;
976} 976}
977 977
@@ -1085,6 +1085,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1085static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) 1085static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1086{ 1086{
1087 struct gendisk *disk; 1087 struct gendisk *disk;
1088 struct module *owner;
1088 int ret; 1089 int ret;
1089 int partno; 1090 int partno;
1090 int perm = 0; 1091 int perm = 0;
@@ -1110,6 +1111,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1110 disk = get_gendisk(bdev->bd_dev, &partno); 1111 disk = get_gendisk(bdev->bd_dev, &partno);
1111 if (!disk) 1112 if (!disk)
1112 goto out; 1113 goto out;
1114 owner = disk->fops->owner;
1113 1115
1114 disk_block_events(disk); 1116 disk_block_events(disk);
1115 mutex_lock_nested(&bdev->bd_mutex, for_part); 1117 mutex_lock_nested(&bdev->bd_mutex, for_part);
@@ -1137,8 +1139,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1137 bdev->bd_disk = NULL; 1139 bdev->bd_disk = NULL;
1138 mutex_unlock(&bdev->bd_mutex); 1140 mutex_unlock(&bdev->bd_mutex);
1139 disk_unblock_events(disk); 1141 disk_unblock_events(disk);
1140 module_put(disk->fops->owner);
1141 put_disk(disk); 1142 put_disk(disk);
1143 module_put(owner);
1142 goto restart; 1144 goto restart;
1143 } 1145 }
1144 } 1146 }
@@ -1194,8 +1196,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1194 goto out_unlock_bdev; 1196 goto out_unlock_bdev;
1195 } 1197 }
1196 /* only one opener holds refs to the module and disk */ 1198 /* only one opener holds refs to the module and disk */
1197 module_put(disk->fops->owner);
1198 put_disk(disk); 1199 put_disk(disk);
1200 module_put(owner);
1199 } 1201 }
1200 bdev->bd_openers++; 1202 bdev->bd_openers++;
1201 if (for_part) 1203 if (for_part)
@@ -1215,8 +1217,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1215 out_unlock_bdev: 1217 out_unlock_bdev:
1216 mutex_unlock(&bdev->bd_mutex); 1218 mutex_unlock(&bdev->bd_mutex);
1217 disk_unblock_events(disk); 1219 disk_unblock_events(disk);
1218 module_put(disk->fops->owner);
1219 put_disk(disk); 1220 put_disk(disk);
1221 module_put(owner);
1220 out: 1222 out:
1221 bdput(bdev); 1223 bdput(bdev);
1222 1224
@@ -1442,14 +1444,15 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1442 if (!bdev->bd_openers) { 1444 if (!bdev->bd_openers) {
1443 struct module *owner = disk->fops->owner; 1445 struct module *owner = disk->fops->owner;
1444 1446
1445 put_disk(disk);
1446 module_put(owner);
1447 disk_put_part(bdev->bd_part); 1447 disk_put_part(bdev->bd_part);
1448 bdev->bd_part = NULL; 1448 bdev->bd_part = NULL;
1449 bdev->bd_disk = NULL; 1449 bdev->bd_disk = NULL;
1450 if (bdev != bdev->bd_contains) 1450 if (bdev != bdev->bd_contains)
1451 victim = bdev->bd_contains; 1451 victim = bdev->bd_contains;
1452 bdev->bd_contains = NULL; 1452 bdev->bd_contains = NULL;
1453
1454 put_disk(disk);
1455 module_put(owner);
1453 } 1456 }
1454 mutex_unlock(&bdev->bd_mutex); 1457 mutex_unlock(&bdev->bd_mutex);
1455 bdput(bdev); 1458 bdput(bdev);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index b52c672f4c18..ae4d9cd10961 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1641,7 +1641,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1641 inode->i_gid = btrfs_stack_inode_gid(inode_item); 1641 inode->i_gid = btrfs_stack_inode_gid(inode_item);
1642 btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); 1642 btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
1643 inode->i_mode = btrfs_stack_inode_mode(inode_item); 1643 inode->i_mode = btrfs_stack_inode_mode(inode_item);
1644 inode->i_nlink = btrfs_stack_inode_nlink(inode_item); 1644 set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1645 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); 1645 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1646 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); 1646 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1647 BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item); 1647 BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 07b3ac662e19..07ea91879a91 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1705,7 +1705,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1705 sb->s_bdi = &fs_info->bdi; 1705 sb->s_bdi = &fs_info->bdi;
1706 1706
1707 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; 1707 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
1708 fs_info->btree_inode->i_nlink = 1; 1708 set_nlink(fs_info->btree_inode, 1);
1709 /* 1709 /*
1710 * we set the i_size on the btree inode to the max possible int. 1710 * we set the i_size on the btree inode to the max possible int.
1711 * the real end of the address space is determined by all of 1711 * the real end of the address space is determined by all of
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f5be06a2462f..c9ee0e18bbdc 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3340,7 +3340,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3340 smp_mb(); 3340 smp_mb();
3341 nr_pages = min_t(unsigned long, nr_pages, 3341 nr_pages = min_t(unsigned long, nr_pages,
3342 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); 3342 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
3343 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3343 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
3344 WB_REASON_FS_FREE_SPACE);
3344 3345
3345 spin_lock(&space_info->lock); 3346 spin_lock(&space_info->lock);
3346 if (reserved > space_info->bytes_reserved) 3347 if (reserved > space_info->bytes_reserved)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b2d004ad66a0..75686a61bd45 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2534,7 +2534,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
2534 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2534 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2535 struct btrfs_inode_item); 2535 struct btrfs_inode_item);
2536 inode->i_mode = btrfs_inode_mode(leaf, inode_item); 2536 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
2537 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); 2537 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
2538 inode->i_uid = btrfs_inode_uid(leaf, inode_item); 2538 inode->i_uid = btrfs_inode_uid(leaf, inode_item);
2539 inode->i_gid = btrfs_inode_gid(leaf, inode_item); 2539 inode->i_gid = btrfs_inode_gid(leaf, inode_item);
2540 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); 2540 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
@@ -6728,7 +6728,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6728 inode->i_op = &btrfs_dir_inode_operations; 6728 inode->i_op = &btrfs_dir_inode_operations;
6729 inode->i_fop = &btrfs_dir_file_operations; 6729 inode->i_fop = &btrfs_dir_file_operations;
6730 6730
6731 inode->i_nlink = 1; 6731 set_nlink(inode, 1);
6732 btrfs_i_size_write(inode, 0); 6732 btrfs_i_size_write(inode, 0);
6733 6733
6734 err = btrfs_update_inode(trans, new_root, inode); 6734 err = btrfs_update_inode(trans, new_root, inode);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 786639fca067..0618aa39740b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1030,7 +1030,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1030 } 1030 }
1031 btrfs_release_path(path); 1031 btrfs_release_path(path);
1032 if (nlink != inode->i_nlink) { 1032 if (nlink != inode->i_nlink) {
1033 inode->i_nlink = nlink; 1033 set_nlink(inode, nlink);
1034 btrfs_update_inode(trans, root, inode); 1034 btrfs_update_inode(trans, root, inode);
1035 } 1035 }
1036 BTRFS_I(inode)->index_cnt = (u64)-1; 1036 BTRFS_I(inode)->index_cnt = (u64)-1;
diff --git a/fs/buffer.c b/fs/buffer.c
index 936d6035f6e2..19d8eb7fdc81 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -213,13 +213,16 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
213 * elsewhere, don't buffer_error if we had some unmapped buffers 213 * elsewhere, don't buffer_error if we had some unmapped buffers
214 */ 214 */
215 if (all_mapped) { 215 if (all_mapped) {
216 char b[BDEVNAME_SIZE];
217
216 printk("__find_get_block_slow() failed. " 218 printk("__find_get_block_slow() failed. "
217 "block=%llu, b_blocknr=%llu\n", 219 "block=%llu, b_blocknr=%llu\n",
218 (unsigned long long)block, 220 (unsigned long long)block,
219 (unsigned long long)bh->b_blocknr); 221 (unsigned long long)bh->b_blocknr);
220 printk("b_state=0x%08lx, b_size=%zu\n", 222 printk("b_state=0x%08lx, b_size=%zu\n",
221 bh->b_state, bh->b_size); 223 bh->b_state, bh->b_size);
222 printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits); 224 printk("device %s blocksize: %d\n", bdevname(bdev, b),
225 1 << bd_inode->i_blkbits);
223 } 226 }
224out_unlock: 227out_unlock:
225 spin_unlock(&bd_mapping->private_lock); 228 spin_unlock(&bd_mapping->private_lock);
@@ -285,7 +288,7 @@ static void free_more_memory(void)
285 struct zone *zone; 288 struct zone *zone;
286 int nid; 289 int nid;
287 290
288 wakeup_flusher_threads(1024); 291 wakeup_flusher_threads(1024, WB_REASON_FREE_MORE_MEM);
289 yield(); 292 yield();
290 293
291 for_each_online_node(nid) { 294 for_each_online_node(nid) {
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index b8731bf3ef1f..0f327c6c9679 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -487,17 +487,15 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
487 ci->i_rdcache_gen++; 487 ci->i_rdcache_gen++;
488 488
489 /* 489 /*
490 * if we are newly issued FILE_SHARED, clear I_COMPLETE; we 490 * if we are newly issued FILE_SHARED, clear D_COMPLETE; we
491 * don't know what happened to this directory while we didn't 491 * don't know what happened to this directory while we didn't
492 * have the cap. 492 * have the cap.
493 */ 493 */
494 if ((issued & CEPH_CAP_FILE_SHARED) && 494 if ((issued & CEPH_CAP_FILE_SHARED) &&
495 (had & CEPH_CAP_FILE_SHARED) == 0) { 495 (had & CEPH_CAP_FILE_SHARED) == 0) {
496 ci->i_shared_gen++; 496 ci->i_shared_gen++;
497 if (S_ISDIR(ci->vfs_inode.i_mode)) { 497 if (S_ISDIR(ci->vfs_inode.i_mode))
498 dout(" marking %p NOT complete\n", &ci->vfs_inode); 498 ceph_dir_clear_complete(&ci->vfs_inode);
499 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
500 }
501 } 499 }
502} 500}
503 501
@@ -2363,7 +2361,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2363 } 2361 }
2364 2362
2365 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 2363 if ((issued & CEPH_CAP_LINK_EXCL) == 0)
2366 inode->i_nlink = le32_to_cpu(grant->nlink); 2364 set_nlink(inode, le32_to_cpu(grant->nlink));
2367 2365
2368 if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) { 2366 if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
2369 int len = le32_to_cpu(grant->xattr_len); 2367 int len = le32_to_cpu(grant->xattr_len);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 382abc9a6a54..2abd0dfad7f8 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -108,7 +108,7 @@ static unsigned fpos_off(loff_t p)
108 * falling back to a "normal" sync readdir if any dentries in the dir 108 * falling back to a "normal" sync readdir if any dentries in the dir
109 * are dropped. 109 * are dropped.
110 * 110 *
111 * I_COMPLETE tells indicates we have all dentries in the dir. It is 111 * D_COMPLETE tells indicates we have all dentries in the dir. It is
112 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by 112 * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
113 * the MDS if/when the directory is modified). 113 * the MDS if/when the directory is modified).
114 */ 114 */
@@ -199,8 +199,8 @@ more:
199 filp->f_pos++; 199 filp->f_pos++;
200 200
201 /* make sure a dentry wasn't dropped while we didn't have parent lock */ 201 /* make sure a dentry wasn't dropped while we didn't have parent lock */
202 if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { 202 if (!ceph_dir_test_complete(dir)) {
203 dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); 203 dout(" lost D_COMPLETE on %p; falling back to mds\n", dir);
204 err = -EAGAIN; 204 err = -EAGAIN;
205 goto out; 205 goto out;
206 } 206 }
@@ -285,7 +285,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
285 if ((filp->f_pos == 2 || fi->dentry) && 285 if ((filp->f_pos == 2 || fi->dentry) &&
286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
287 ceph_snap(inode) != CEPH_SNAPDIR && 287 ceph_snap(inode) != CEPH_SNAPDIR &&
288 (ci->i_ceph_flags & CEPH_I_COMPLETE) && 288 ceph_dir_test_complete(inode) &&
289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
290 spin_unlock(&inode->i_lock); 290 spin_unlock(&inode->i_lock);
291 err = __dcache_readdir(filp, dirent, filldir); 291 err = __dcache_readdir(filp, dirent, filldir);
@@ -351,7 +351,7 @@ more:
351 351
352 if (!req->r_did_prepopulate) { 352 if (!req->r_did_prepopulate) {
353 dout("readdir !did_prepopulate"); 353 dout("readdir !did_prepopulate");
354 fi->dir_release_count--; /* preclude I_COMPLETE */ 354 fi->dir_release_count--; /* preclude D_COMPLETE */
355 } 355 }
356 356
357 /* note next offset and last dentry name */ 357 /* note next offset and last dentry name */
@@ -430,8 +430,7 @@ more:
430 */ 430 */
431 spin_lock(&inode->i_lock); 431 spin_lock(&inode->i_lock);
432 if (ci->i_release_count == fi->dir_release_count) { 432 if (ci->i_release_count == fi->dir_release_count) {
433 dout(" marking %p complete\n", inode); 433 ceph_dir_set_complete(inode);
434 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
435 ci->i_max_offset = filp->f_pos; 434 ci->i_max_offset = filp->f_pos;
436 } 435 }
437 spin_unlock(&inode->i_lock); 436 spin_unlock(&inode->i_lock);
@@ -614,7 +613,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
614 fsc->mount_options->snapdir_name, 613 fsc->mount_options->snapdir_name,
615 dentry->d_name.len) && 614 dentry->d_name.len) &&
616 !is_root_ceph_dentry(dir, dentry) && 615 !is_root_ceph_dentry(dir, dentry) &&
617 (ci->i_ceph_flags & CEPH_I_COMPLETE) && 616 ceph_dir_test_complete(dir) &&
618 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 617 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
619 spin_unlock(&dir->i_lock); 618 spin_unlock(&dir->i_lock);
620 dout(" dir %p complete, -ENOENT\n", dir); 619 dout(" dir %p complete, -ENOENT\n", dir);
@@ -934,7 +933,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
934 */ 933 */
935 934
936 /* d_move screws up d_subdirs order */ 935 /* d_move screws up d_subdirs order */
937 ceph_i_clear(new_dir, CEPH_I_COMPLETE); 936 ceph_dir_clear_complete(new_dir);
938 937
939 d_move(old_dentry, new_dentry); 938 d_move(old_dentry, new_dentry);
940 939
@@ -1092,7 +1091,75 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry,
1092 return 1; 1091 return 1;
1093} 1092}
1094 1093
1094/*
1095 * Set/clear/test dir complete flag on the dir's dentry.
1096 */
1097static struct dentry * __d_find_any_alias(struct inode *inode)
1098{
1099 struct dentry *alias;
1100
1101 if (list_empty(&inode->i_dentry))
1102 return NULL;
1103 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1104 return alias;
1105}
1106
1107void ceph_dir_set_complete(struct inode *inode)
1108{
1109 struct dentry *dentry = __d_find_any_alias(inode);
1110
1111 if (dentry && ceph_dentry(dentry)) {
1112 dout(" marking %p (%p) complete\n", inode, dentry);
1113 set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1114 }
1115}
1116
1117void ceph_dir_clear_complete(struct inode *inode)
1118{
1119 struct dentry *dentry = __d_find_any_alias(inode);
1120
1121 if (dentry && ceph_dentry(dentry)) {
1122 dout(" marking %p (%p) NOT complete\n", inode, dentry);
1123 clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1124 }
1125}
1126
1127bool ceph_dir_test_complete(struct inode *inode)
1128{
1129 struct dentry *dentry = __d_find_any_alias(inode);
1130
1131 if (dentry && ceph_dentry(dentry))
1132 return test_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1133 return false;
1134}
1135
1136/*
1137 * When the VFS prunes a dentry from the cache, we need to clear the
1138 * complete flag on the parent directory.
1139 *
1140 * Called under dentry->d_lock.
1141 */
1142static void ceph_d_prune(struct dentry *dentry)
1143{
1144 struct ceph_dentry_info *di;
1145
1146 dout("d_release %p\n", dentry);
1147
1148 /* do we have a valid parent? */
1149 if (!dentry->d_parent || IS_ROOT(dentry))
1150 return;
1095 1151
1152 /* if we are not hashed, we don't affect D_COMPLETE */
1153 if (d_unhashed(dentry))
1154 return;
1155
1156 /*
1157 * we hold d_lock, so d_parent is stable, and d_fsdata is never
1158 * cleared until d_release
1159 */
1160 di = ceph_dentry(dentry->d_parent);
1161 clear_bit(CEPH_D_COMPLETE, &di->flags);
1162}
1096 1163
1097/* 1164/*
1098 * read() on a dir. This weird interface hack only works if mounted 1165 * read() on a dir. This weird interface hack only works if mounted
@@ -1306,6 +1373,7 @@ const struct inode_operations ceph_dir_iops = {
1306const struct dentry_operations ceph_dentry_ops = { 1373const struct dentry_operations ceph_dentry_ops = {
1307 .d_revalidate = ceph_d_revalidate, 1374 .d_revalidate = ceph_d_revalidate,
1308 .d_release = ceph_d_release, 1375 .d_release = ceph_d_release,
1376 .d_prune = ceph_d_prune,
1309}; 1377};
1310 1378
1311const struct dentry_operations ceph_snapdir_dentry_ops = { 1379const struct dentry_operations ceph_snapdir_dentry_ops = {
@@ -1315,4 +1383,5 @@ const struct dentry_operations ceph_snapdir_dentry_ops = {
1315 1383
1316const struct dentry_operations ceph_snap_dentry_ops = { 1384const struct dentry_operations ceph_snap_dentry_ops = {
1317 .d_release = ceph_d_release, 1385 .d_release = ceph_d_release,
1386 .d_prune = ceph_d_prune,
1318}; 1387};
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5dde7d51dc11..e392bfce84a3 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -618,7 +618,7 @@ static int fill_inode(struct inode *inode,
618 } 618 }
619 619
620 if ((issued & CEPH_CAP_LINK_EXCL) == 0) 620 if ((issued & CEPH_CAP_LINK_EXCL) == 0)
621 inode->i_nlink = le32_to_cpu(info->nlink); 621 set_nlink(inode, le32_to_cpu(info->nlink));
622 622
623 /* be careful with mtime, atime, size */ 623 /* be careful with mtime, atime, size */
624 ceph_decode_timespec(&atime, &info->atime); 624 ceph_decode_timespec(&atime, &info->atime);
@@ -771,9 +771,9 @@ no_change:
771 ceph_snap(inode) == CEPH_NOSNAP && 771 ceph_snap(inode) == CEPH_NOSNAP &&
772 (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && 772 (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
773 (issued & CEPH_CAP_FILE_EXCL) == 0 && 773 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
774 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 774 !ceph_dir_test_complete(inode)) {
775 dout(" marking %p complete (empty)\n", inode); 775 dout(" marking %p complete (empty)\n", inode);
776 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ 776 ceph_dir_set_complete(inode);
777 ci->i_max_offset = 2; 777 ci->i_max_offset = 2;
778 } 778 }
779 779
@@ -856,7 +856,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
856 di = ceph_dentry(dn); 856 di = ceph_dentry(dn);
857 857
858 spin_lock(&inode->i_lock); 858 spin_lock(&inode->i_lock);
859 if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 859 if (!ceph_dir_test_complete(inode)) {
860 spin_unlock(&inode->i_lock); 860 spin_unlock(&inode->i_lock);
861 return; 861 return;
862 } 862 }
@@ -1056,7 +1056,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1056 * d_move() puts the renamed dentry at the end of 1056 * d_move() puts the renamed dentry at the end of
1057 * d_subdirs. We need to assign it an appropriate 1057 * d_subdirs. We need to assign it an appropriate
1058 * directory offset so we can behave when holding 1058 * directory offset so we can behave when holding
1059 * I_COMPLETE. 1059 * D_COMPLETE.
1060 */ 1060 */
1061 ceph_set_dentry_offset(req->r_old_dentry); 1061 ceph_set_dentry_offset(req->r_old_dentry);
1062 dout("dn %p gets new offset %lld\n", req->r_old_dentry, 1062 dout("dn %p gets new offset %lld\n", req->r_old_dentry,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 1d72f15fe9f4..264ab701154f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -619,7 +619,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
619 * 619 *
620 * Called under mdsc->mutex. 620 * Called under mdsc->mutex.
621 */ 621 */
622struct dentry *get_nonsnap_parent(struct dentry *dentry) 622static struct dentry *get_nonsnap_parent(struct dentry *dentry)
623{ 623{
624 /* 624 /*
625 * we don't need to worry about protecting the d_parent access 625 * we don't need to worry about protecting the d_parent access
@@ -2002,7 +2002,7 @@ out:
2002} 2002}
2003 2003
2004/* 2004/*
2005 * Invalidate dir I_COMPLETE, dentry lease state on an aborted MDS 2005 * Invalidate dir D_COMPLETE, dentry lease state on an aborted MDS
2006 * namespace request. 2006 * namespace request.
2007 */ 2007 */
2008void ceph_invalidate_dir_request(struct ceph_mds_request *req) 2008void ceph_invalidate_dir_request(struct ceph_mds_request *req)
@@ -2010,9 +2010,9 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
2010 struct inode *inode = req->r_locked_dir; 2010 struct inode *inode = req->r_locked_dir;
2011 struct ceph_inode_info *ci = ceph_inode(inode); 2011 struct ceph_inode_info *ci = ceph_inode(inode);
2012 2012
2013 dout("invalidate_dir_request %p (I_COMPLETE, lease(s))\n", inode); 2013 dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
2014 spin_lock(&inode->i_lock); 2014 spin_lock(&inode->i_lock);
2015 ci->i_ceph_flags &= ~CEPH_I_COMPLETE; 2015 ceph_dir_clear_complete(inode);
2016 ci->i_release_count++; 2016 ci->i_release_count++;
2017 spin_unlock(&inode->i_lock); 2017 spin_unlock(&inode->i_lock);
2018 2018
@@ -3154,7 +3154,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
3154/* 3154/*
3155 * true if all sessions are closed, or we force unmount 3155 * true if all sessions are closed, or we force unmount
3156 */ 3156 */
3157bool done_closing_sessions(struct ceph_mds_client *mdsc) 3157static bool done_closing_sessions(struct ceph_mds_client *mdsc)
3158{ 3158{
3159 int i, n = 0; 3159 int i, n = 0;
3160 3160
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 788f5ad8e66d..a90846fac759 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -426,7 +426,7 @@ static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
426/* 426/*
427 * create a new fs client 427 * create a new fs client
428 */ 428 */
429struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, 429static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
430 struct ceph_options *opt) 430 struct ceph_options *opt)
431{ 431{
432 struct ceph_fs_client *fsc; 432 struct ceph_fs_client *fsc;
@@ -502,7 +502,7 @@ fail:
502 return ERR_PTR(err); 502 return ERR_PTR(err);
503} 503}
504 504
505void destroy_fs_client(struct ceph_fs_client *fsc) 505static void destroy_fs_client(struct ceph_fs_client *fsc)
506{ 506{
507 dout("destroy_fs_client %p\n", fsc); 507 dout("destroy_fs_client %p\n", fsc);
508 508
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b01442aaf278..01bf189e08a9 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -203,6 +203,7 @@ struct ceph_inode_xattr {
203 * Ceph dentry state 203 * Ceph dentry state
204 */ 204 */
205struct ceph_dentry_info { 205struct ceph_dentry_info {
206 unsigned long flags;
206 struct ceph_mds_session *lease_session; 207 struct ceph_mds_session *lease_session;
207 u32 lease_gen, lease_shared_gen; 208 u32 lease_gen, lease_shared_gen;
208 u32 lease_seq; 209 u32 lease_seq;
@@ -213,6 +214,18 @@ struct ceph_dentry_info {
213 u64 offset; 214 u64 offset;
214}; 215};
215 216
217/*
218 * dentry flags
219 *
220 * The locking for D_COMPLETE is a bit odd:
221 * - we can clear it at almost any time (see ceph_d_prune)
222 * - it is only meaningful if:
223 * - we hold dir inode i_lock
224 * - we hold dir FILE_SHARED caps
225 * - the dentry D_COMPLETE is set
226 */
227#define CEPH_D_COMPLETE 1 /* if set, d_u.d_subdirs is complete directory */
228
216struct ceph_inode_xattrs_info { 229struct ceph_inode_xattrs_info {
217 /* 230 /*
218 * (still encoded) xattr blob. we avoid the overhead of parsing 231 * (still encoded) xattr blob. we avoid the overhead of parsing
@@ -251,7 +264,7 @@ struct ceph_inode_info {
251 struct timespec i_rctime; 264 struct timespec i_rctime;
252 u64 i_rbytes, i_rfiles, i_rsubdirs; 265 u64 i_rbytes, i_rfiles, i_rsubdirs;
253 u64 i_files, i_subdirs; 266 u64 i_files, i_subdirs;
254 u64 i_max_offset; /* largest readdir offset, set with I_COMPLETE */ 267 u64 i_max_offset; /* largest readdir offset, set with D_COMPLETE */
255 268
256 struct rb_root i_fragtree; 269 struct rb_root i_fragtree;
257 struct mutex i_fragtree_mutex; 270 struct mutex i_fragtree_mutex;
@@ -416,7 +429,6 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
416/* 429/*
417 * Ceph inode. 430 * Ceph inode.
418 */ 431 */
419#define CEPH_I_COMPLETE 1 /* we have complete directory cached */
420#define CEPH_I_NODELAY 4 /* do not delay cap release */ 432#define CEPH_I_NODELAY 4 /* do not delay cap release */
421#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ 433#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
422#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ 434#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
@@ -474,6 +486,13 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
474} 486}
475 487
476/* 488/*
489 * set/clear directory D_COMPLETE flag
490 */
491void ceph_dir_set_complete(struct inode *inode);
492void ceph_dir_clear_complete(struct inode *inode);
493bool ceph_dir_test_complete(struct inode *inode);
494
495/*
477 * caps helpers 496 * caps helpers
478 */ 497 */
479static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci) 498static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 2cfb695d1f89..5d9b9acc5fce 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -204,7 +204,7 @@ int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov,
204} 204}
205 205
206/* first calculate 24 bytes ntlm response and then 16 byte session key */ 206/* first calculate 24 bytes ntlm response and then 16 byte session key */
207int setup_ntlm_response(struct cifs_ses *ses) 207int setup_ntlm_response(struct cifs_ses *ses, const struct nls_table *nls_cp)
208{ 208{
209 int rc = 0; 209 int rc = 0;
210 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE; 210 unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
@@ -221,14 +221,14 @@ int setup_ntlm_response(struct cifs_ses *ses)
221 ses->auth_key.len = temp_len; 221 ses->auth_key.len = temp_len;
222 222
223 rc = SMBNTencrypt(ses->password, ses->server->cryptkey, 223 rc = SMBNTencrypt(ses->password, ses->server->cryptkey,
224 ses->auth_key.response + CIFS_SESS_KEY_SIZE); 224 ses->auth_key.response + CIFS_SESS_KEY_SIZE, nls_cp);
225 if (rc) { 225 if (rc) {
226 cFYI(1, "%s Can't generate NTLM response, error: %d", 226 cFYI(1, "%s Can't generate NTLM response, error: %d",
227 __func__, rc); 227 __func__, rc);
228 return rc; 228 return rc;
229 } 229 }
230 230
231 rc = E_md4hash(ses->password, temp_key); 231 rc = E_md4hash(ses->password, temp_key, nls_cp);
232 if (rc) { 232 if (rc) {
233 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc); 233 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
234 return rc; 234 return rc;
@@ -404,7 +404,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
404 } 404 }
405 405
406 /* calculate md4 hash of password */ 406 /* calculate md4 hash of password */
407 E_md4hash(ses->password, nt_hash); 407 E_md4hash(ses->password, nt_hash, nls_cp);
408 408
409 rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash, 409 rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
410 CIFS_NTHASH_SIZE); 410 CIFS_NTHASH_SIZE);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index d9dbaf869cd1..30ff56005d8f 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -125,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
125extern const struct export_operations cifs_export_ops; 125extern const struct export_operations cifs_export_ops;
126#endif /* CONFIG_CIFS_NFSD_EXPORT */ 126#endif /* CONFIG_CIFS_NFSD_EXPORT */
127 127
128#define CIFS_VERSION "1.75" 128#define CIFS_VERSION "1.76"
129#endif /* _CIFSFS_H */ 129#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index ef4f631e4c01..6f4e243e0f62 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -395,8 +395,9 @@ extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
395extern int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov, 395extern int cifs_verify_signature(struct kvec *iov, unsigned int nr_iov,
396 struct TCP_Server_Info *server, 396 struct TCP_Server_Info *server,
397 __u32 expected_sequence_number); 397 __u32 expected_sequence_number);
398extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *); 398extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *,
399extern int setup_ntlm_response(struct cifs_ses *); 399 const struct nls_table *);
400extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *);
400extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *); 401extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
401extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); 402extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
402extern void cifs_crypto_shash_release(struct TCP_Server_Info *); 403extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
@@ -448,7 +449,8 @@ extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
448 const unsigned char *path, 449 const unsigned char *path,
449 struct cifs_sb_info *cifs_sb, int xid); 450 struct cifs_sb_info *cifs_sb, int xid);
450extern int mdfour(unsigned char *, unsigned char *, int); 451extern int mdfour(unsigned char *, unsigned char *, int);
451extern int E_md4hash(const unsigned char *passwd, unsigned char *p16); 452extern int E_md4hash(const unsigned char *passwd, unsigned char *p16,
453 const struct nls_table *codepage);
452extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, 454extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
453 unsigned char *p24); 455 unsigned char *p24);
454 456
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7ef4e2846658..d6a972df0338 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3453,7 +3453,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses,
3453 else 3453 else
3454#endif /* CIFS_WEAK_PW_HASH */ 3454#endif /* CIFS_WEAK_PW_HASH */
3455 rc = SMBNTencrypt(tcon->password, ses->server->cryptkey, 3455 rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
3456 bcc_ptr); 3456 bcc_ptr, nls_codepage);
3457 3457
3458 bcc_ptr += CIFS_AUTH_RESP_SIZE; 3458 bcc_ptr += CIFS_AUTH_RESP_SIZE;
3459 if (ses->capabilities & CAP_UNICODE) { 3459 if (ses->capabilities & CAP_UNICODE) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ea096ce5d4f7..c1f063cd1b0c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -778,7 +778,6 @@ try_again:
778 else { 778 else {
779 mutex_lock(&cinode->lock_mutex); 779 mutex_lock(&cinode->lock_mutex);
780 list_del_init(&lock->blist); 780 list_del_init(&lock->blist);
781 mutex_unlock(&cinode->lock_mutex);
782 } 781 }
783 } 782 }
784 783
@@ -794,6 +793,9 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
794 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode); 793 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
795 unsigned char saved_type = flock->fl_type; 794 unsigned char saved_type = flock->fl_type;
796 795
796 if ((flock->fl_flags & FL_POSIX) == 0)
797 return 1;
798
797 mutex_lock(&cinode->lock_mutex); 799 mutex_lock(&cinode->lock_mutex);
798 posix_test_lock(file, flock); 800 posix_test_lock(file, flock);
799 801
@@ -810,12 +812,15 @@ static int
810cifs_posix_lock_set(struct file *file, struct file_lock *flock) 812cifs_posix_lock_set(struct file *file, struct file_lock *flock)
811{ 813{
812 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode); 814 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
813 int rc; 815 int rc = 1;
816
817 if ((flock->fl_flags & FL_POSIX) == 0)
818 return rc;
814 819
815 mutex_lock(&cinode->lock_mutex); 820 mutex_lock(&cinode->lock_mutex);
816 if (!cinode->can_cache_brlcks) { 821 if (!cinode->can_cache_brlcks) {
817 mutex_unlock(&cinode->lock_mutex); 822 mutex_unlock(&cinode->lock_mutex);
818 return 1; 823 return rc;
819 } 824 }
820 rc = posix_lock_file_wait(file, flock); 825 rc = posix_lock_file_wait(file, flock);
821 mutex_unlock(&cinode->lock_mutex); 826 mutex_unlock(&cinode->lock_mutex);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2c50bd2f65d1..e851d5b8931e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -132,7 +132,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
132 inode->i_mtime = fattr->cf_mtime; 132 inode->i_mtime = fattr->cf_mtime;
133 inode->i_ctime = fattr->cf_ctime; 133 inode->i_ctime = fattr->cf_ctime;
134 inode->i_rdev = fattr->cf_rdev; 134 inode->i_rdev = fattr->cf_rdev;
135 inode->i_nlink = fattr->cf_nlink; 135 set_nlink(inode, fattr->cf_nlink);
136 inode->i_uid = fattr->cf_uid; 136 inode->i_uid = fattr->cf_uid;
137 inode->i_gid = fattr->cf_gid; 137 inode->i_gid = fattr->cf_gid;
138 138
@@ -905,7 +905,7 @@ struct inode *cifs_root_iget(struct super_block *sb)
905 if (rc && tcon->ipc) { 905 if (rc && tcon->ipc) {
906 cFYI(1, "ipc connection - fake read inode"); 906 cFYI(1, "ipc connection - fake read inode");
907 inode->i_mode |= S_IFDIR; 907 inode->i_mode |= S_IFDIR;
908 inode->i_nlink = 2; 908 set_nlink(inode, 2);
909 inode->i_op = &cifs_ipc_inode_ops; 909 inode->i_op = &cifs_ipc_inode_ops;
910 inode->i_fop = &simple_dir_operations; 910 inode->i_fop = &simple_dir_operations;
911 inode->i_uid = cifs_sb->mnt_uid; 911 inode->i_uid = cifs_sb->mnt_uid;
@@ -1367,7 +1367,7 @@ mkdir_get_info:
1367 /* setting nlink not necessary except in cases where we 1367 /* setting nlink not necessary except in cases where we
1368 * failed to get it from the server or was set bogus */ 1368 * failed to get it from the server or was set bogus */
1369 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) 1369 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
1370 direntry->d_inode->i_nlink = 2; 1370 set_nlink(direntry->d_inode, 2);
1371 1371
1372 mode &= ~current_umask(); 1372 mode &= ~current_umask();
1373 /* must turn on setgid bit if parent dir has it */ 1373 /* must turn on setgid bit if parent dir has it */
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 8693b5d0e180..6b0e06434391 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -433,7 +433,7 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
433 if (old_file->d_inode) { 433 if (old_file->d_inode) {
434 cifsInode = CIFS_I(old_file->d_inode); 434 cifsInode = CIFS_I(old_file->d_inode);
435 if (rc == 0) { 435 if (rc == 0) {
436 old_file->d_inode->i_nlink++; 436 inc_nlink(old_file->d_inode);
437/* BB should we make this contingent on superblock flag NOATIME? */ 437/* BB should we make this contingent on superblock flag NOATIME? */
438/* old_file->d_inode->i_ctime = CURRENT_TIME;*/ 438/* old_file->d_inode->i_ctime = CURRENT_TIME;*/
439 /* parent dir timestamps will update from srv 439 /* parent dir timestamps will update from srv
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index c7d80e24f24e..4ec3ee9d72cc 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -683,7 +683,7 @@ ssetup_ntlmssp_authenticate:
683 cpu_to_le16(CIFS_AUTH_RESP_SIZE); 683 cpu_to_le16(CIFS_AUTH_RESP_SIZE);
684 684
685 /* calculate ntlm response and session key */ 685 /* calculate ntlm response and session key */
686 rc = setup_ntlm_response(ses); 686 rc = setup_ntlm_response(ses, nls_cp);
687 if (rc) { 687 if (rc) {
688 cERROR(1, "Error %d during NTLM authentication", rc); 688 cERROR(1, "Error %d during NTLM authentication", rc);
689 goto ssetup_exit; 689 goto ssetup_exit;
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index ac1221d969d6..7cacba12b8f1 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -199,75 +199,36 @@ SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
199 return rc; 199 return rc;
200} 200}
201 201
202/* Routines for Windows NT MD4 Hash functions. */
203static int
204_my_wcslen(__u16 *str)
205{
206 int len = 0;
207 while (*str++ != 0)
208 len++;
209 return len;
210}
211
212/*
213 * Convert a string into an NT UNICODE string.
214 * Note that regardless of processor type
215 * this must be in intel (little-endian)
216 * format.
217 */
218
219static int
220_my_mbstowcs(__u16 *dst, const unsigned char *src, int len)
221{ /* BB not a very good conversion routine - change/fix */
222 int i;
223 __u16 val;
224
225 for (i = 0; i < len; i++) {
226 val = *src;
227 SSVAL(dst, 0, val);
228 dst++;
229 src++;
230 if (val == 0)
231 break;
232 }
233 return i;
234}
235
236/* 202/*
237 * Creates the MD4 Hash of the users password in NT UNICODE. 203 * Creates the MD4 Hash of the users password in NT UNICODE.
238 */ 204 */
239 205
240int 206int
241E_md4hash(const unsigned char *passwd, unsigned char *p16) 207E_md4hash(const unsigned char *passwd, unsigned char *p16,
208 const struct nls_table *codepage)
242{ 209{
243 int rc; 210 int rc;
244 int len; 211 int len;
245 __u16 wpwd[129]; 212 __u16 wpwd[129];
246 213
247 /* Password cannot be longer than 128 characters */ 214 /* Password cannot be longer than 128 characters */
248 if (passwd) { 215 if (passwd) /* Password must be converted to NT unicode */
249 len = strlen((char *) passwd); 216 len = cifs_strtoUCS(wpwd, passwd, 128, codepage);
250 if (len > 128) 217 else {
251 len = 128;
252
253 /* Password must be converted to NT unicode */
254 _my_mbstowcs(wpwd, passwd, len);
255 } else
256 len = 0; 218 len = 0;
219 *wpwd = 0; /* Ensure string is null terminated */
220 }
257 221
258 wpwd[len] = 0; /* Ensure string is null terminated */ 222 rc = mdfour(p16, (unsigned char *) wpwd, len * sizeof(__u16));
259 /* Calculate length in bytes */ 223 memset(wpwd, 0, 129 * sizeof(__u16));
260 len = _my_wcslen(wpwd) * sizeof(__u16);
261
262 rc = mdfour(p16, (unsigned char *) wpwd, len);
263 memset(wpwd, 0, 129 * 2);
264 224
265 return rc; 225 return rc;
266} 226}
267 227
268/* Does the NT MD4 hash then des encryption. */ 228/* Does the NT MD4 hash then des encryption. */
269int 229int
270SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) 230SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24,
231 const struct nls_table *codepage)
271{ 232{
272 int rc; 233 int rc;
273 unsigned char p16[16], p21[21]; 234 unsigned char p16[16], p21[21];
@@ -275,7 +236,7 @@ SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
275 memset(p16, '\0', 16); 236 memset(p16, '\0', 16);
276 memset(p21, '\0', 21); 237 memset(p21, '\0', 21);
277 238
278 rc = E_md4hash(passwd, p16); 239 rc = E_md4hash(passwd, p16, codepage);
279 if (rc) { 240 if (rc) {
280 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc); 241 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
281 return rc; 242 return rc;
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 2bdbcc11b373..854ace712685 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -104,7 +104,7 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
104 if (attr->va_gid != -1) 104 if (attr->va_gid != -1)
105 inode->i_gid = (gid_t) attr->va_gid; 105 inode->i_gid = (gid_t) attr->va_gid;
106 if (attr->va_nlink != -1) 106 if (attr->va_nlink != -1)
107 inode->i_nlink = attr->va_nlink; 107 set_nlink(inode, attr->va_nlink);
108 if (attr->va_size != -1) 108 if (attr->va_size != -1)
109 inode->i_size = attr->va_size; 109 inode->i_size = attr->va_size;
110 if (attr->va_size != -1) 110 if (attr->va_size != -1)
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 0239433f50cb..28e7e135cfab 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -340,7 +340,7 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
340 if (!error) { 340 if (!error) {
341 /* VFS may delete the child */ 341 /* VFS may delete the child */
342 if (de->d_inode) 342 if (de->d_inode)
343 de->d_inode->i_nlink = 0; 343 clear_nlink(de->d_inode);
344 344
345 /* fix the link count of the parent */ 345 /* fix the link count of the parent */
346 coda_dir_drop_nlink(dir); 346 coda_dir_drop_nlink(dir);
diff --git a/fs/compat.c b/fs/compat.c
index 302e761bd0aa..c98787536bb8 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -546,7 +546,7 @@ out:
546ssize_t compat_rw_copy_check_uvector(int type, 546ssize_t compat_rw_copy_check_uvector(int type,
547 const struct compat_iovec __user *uvector, unsigned long nr_segs, 547 const struct compat_iovec __user *uvector, unsigned long nr_segs,
548 unsigned long fast_segs, struct iovec *fast_pointer, 548 unsigned long fast_segs, struct iovec *fast_pointer,
549 struct iovec **ret_pointer) 549 struct iovec **ret_pointer, int check_access)
550{ 550{
551 compat_ssize_t tot_len; 551 compat_ssize_t tot_len;
552 struct iovec *iov = *ret_pointer = fast_pointer; 552 struct iovec *iov = *ret_pointer = fast_pointer;
@@ -593,7 +593,8 @@ ssize_t compat_rw_copy_check_uvector(int type,
593 } 593 }
594 if (len < 0) /* size_t not fitting in compat_ssize_t .. */ 594 if (len < 0) /* size_t not fitting in compat_ssize_t .. */
595 goto out; 595 goto out;
596 if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) { 596 if (check_access &&
597 !access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
597 ret = -EFAULT; 598 ret = -EFAULT;
598 goto out; 599 goto out;
599 } 600 }
@@ -1107,7 +1108,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1107 goto out; 1108 goto out;
1108 1109
1109 tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs, 1110 tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs,
1110 UIO_FASTIOV, iovstack, &iov); 1111 UIO_FASTIOV, iovstack, &iov, 1);
1111 if (tot_len == 0) { 1112 if (tot_len == 0) {
1112 ret = 0; 1113 ret = 0;
1113 goto out; 1114 goto out;
diff --git a/fs/dcache.c b/fs/dcache.c
index a88948b8bd17..274f13e2f094 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -225,7 +225,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
225} 225}
226 226
227/* 227/*
228 * dentry_lru_(add|del|move_tail) must be called with d_lock held. 228 * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
229 */ 229 */
230static void dentry_lru_add(struct dentry *dentry) 230static void dentry_lru_add(struct dentry *dentry)
231{ 231{
@@ -245,6 +245,9 @@ static void __dentry_lru_del(struct dentry *dentry)
245 dentry_stat.nr_unused--; 245 dentry_stat.nr_unused--;
246} 246}
247 247
248/*
249 * Remove a dentry with references from the LRU.
250 */
248static void dentry_lru_del(struct dentry *dentry) 251static void dentry_lru_del(struct dentry *dentry)
249{ 252{
250 if (!list_empty(&dentry->d_lru)) { 253 if (!list_empty(&dentry->d_lru)) {
@@ -254,6 +257,23 @@ static void dentry_lru_del(struct dentry *dentry)
254 } 257 }
255} 258}
256 259
260/*
261 * Remove a dentry that is unreferenced and about to be pruned
262 * (unhashed and destroyed) from the LRU, and inform the file system.
263 * This wrapper should be called _prior_ to unhashing a victim dentry.
264 */
265static void dentry_lru_prune(struct dentry *dentry)
266{
267 if (!list_empty(&dentry->d_lru)) {
268 if (dentry->d_flags & DCACHE_OP_PRUNE)
269 dentry->d_op->d_prune(dentry);
270
271 spin_lock(&dcache_lru_lock);
272 __dentry_lru_del(dentry);
273 spin_unlock(&dcache_lru_lock);
274 }
275}
276
257static void dentry_lru_move_tail(struct dentry *dentry) 277static void dentry_lru_move_tail(struct dentry *dentry)
258{ 278{
259 spin_lock(&dcache_lru_lock); 279 spin_lock(&dcache_lru_lock);
@@ -403,8 +423,12 @@ relock:
403 423
404 if (ref) 424 if (ref)
405 dentry->d_count--; 425 dentry->d_count--;
406 /* if dentry was on the d_lru list delete it from there */ 426 /*
407 dentry_lru_del(dentry); 427 * if dentry was on the d_lru list delete it from there.
428 * inform the fs via d_prune that this dentry is about to be
429 * unhashed and destroyed.
430 */
431 dentry_lru_prune(dentry);
408 /* if it was on the hash then remove it */ 432 /* if it was on the hash then remove it */
409 __d_drop(dentry); 433 __d_drop(dentry);
410 return d_kill(dentry, parent); 434 return d_kill(dentry, parent);
@@ -854,8 +878,12 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
854 do { 878 do {
855 struct inode *inode; 879 struct inode *inode;
856 880
857 /* detach from the system */ 881 /*
858 dentry_lru_del(dentry); 882 * remove the dentry from the lru, and inform
883 * the fs that this dentry is about to be
884 * unhashed and destroyed.
885 */
886 dentry_lru_prune(dentry);
859 __d_shrink(dentry); 887 __d_shrink(dentry);
860 888
861 if (dentry->d_count != 0) { 889 if (dentry->d_count != 0) {
@@ -1283,6 +1311,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1283 dentry->d_flags |= DCACHE_OP_REVALIDATE; 1311 dentry->d_flags |= DCACHE_OP_REVALIDATE;
1284 if (op->d_delete) 1312 if (op->d_delete)
1285 dentry->d_flags |= DCACHE_OP_DELETE; 1313 dentry->d_flags |= DCACHE_OP_DELETE;
1314 if (op->d_prune)
1315 dentry->d_flags |= DCACHE_OP_PRUNE;
1286 1316
1287} 1317}
1288EXPORT_SYMBOL(d_set_d_op); 1318EXPORT_SYMBOL(d_set_d_op);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 2f27e578d466..d5d5297efe97 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -307,7 +307,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
307 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; 307 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
308 inode->i_op = &simple_dir_inode_operations; 308 inode->i_op = &simple_dir_inode_operations;
309 inode->i_fop = &simple_dir_operations; 309 inode->i_fop = &simple_dir_operations;
310 inode->i_nlink = 2; 310 set_nlink(inode, 2);
311 311
312 s->s_root = d_alloc_root(inode); 312 s->s_root = d_alloc_root(inode);
313 if (s->s_root) 313 if (s->s_root)
@@ -549,7 +549,7 @@ void devpts_pty_kill(struct tty_struct *tty)
549 549
550 dentry = d_find_alias(inode); 550 dentry = d_find_alias(inode);
551 551
552 inode->i_nlink--; 552 drop_nlink(inode);
553 d_delete(dentry); 553 d_delete(dentry);
554 dput(dentry); /* d_alloc_name() in devpts_pty_new() */ 554 dput(dentry); /* d_alloc_name() in devpts_pty_new() */
555 dput(dentry); /* d_find_alias above */ 555 dput(dentry); /* d_find_alias above */
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index b36c5572b3f3..54481a3b2c79 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -514,7 +514,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
514 514
515#define ecryptfs_printk(type, fmt, arg...) \ 515#define ecryptfs_printk(type, fmt, arg...) \
516 __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); 516 __ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
517__attribute__ ((format(printf, 1, 2))) 517__printf(1, 2)
518void __ecryptfs_printk(const char *fmt, ...); 518void __ecryptfs_printk(const char *fmt, ...);
519 519
520extern const struct file_operations ecryptfs_main_fops; 520extern const struct file_operations ecryptfs_main_fops;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 11f8582d7218..a36d327f1521 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -474,8 +474,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
474 goto out_lock; 474 goto out_lock;
475 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 475 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
476 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); 476 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
477 old_dentry->d_inode->i_nlink = 477 set_nlink(old_dentry->d_inode,
478 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink; 478 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink);
479 i_size_write(new_dentry->d_inode, file_size_save); 479 i_size_write(new_dentry->d_inode, file_size_save);
480out_lock: 480out_lock:
481 unlock_dir(lower_dir_dentry); 481 unlock_dir(lower_dir_dentry);
@@ -499,8 +499,8 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
499 goto out_unlock; 499 goto out_unlock;
500 } 500 }
501 fsstack_copy_attr_times(dir, lower_dir_inode); 501 fsstack_copy_attr_times(dir, lower_dir_inode);
502 dentry->d_inode->i_nlink = 502 set_nlink(dentry->d_inode,
503 ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink; 503 ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink);
504 dentry->d_inode->i_ctime = dir->i_ctime; 504 dentry->d_inode->i_ctime = dir->i_ctime;
505 d_drop(dentry); 505 d_drop(dentry);
506out_unlock: 506out_unlock:
@@ -565,7 +565,7 @@ static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
565 goto out; 565 goto out;
566 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 566 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
567 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode); 567 fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
568 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; 568 set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
569out: 569out:
570 unlock_dir(lower_dir_dentry); 570 unlock_dir(lower_dir_dentry);
571 if (!dentry->d_inode) 571 if (!dentry->d_inode)
@@ -588,7 +588,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
588 if (!rc && dentry->d_inode) 588 if (!rc && dentry->d_inode)
589 clear_nlink(dentry->d_inode); 589 clear_nlink(dentry->d_inode);
590 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 590 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
591 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; 591 set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
592 unlock_dir(lower_dir_dentry); 592 unlock_dir(lower_dir_dentry);
593 if (!rc) 593 if (!rc)
594 d_drop(dentry); 594 d_drop(dentry);
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 9c13412e6c99..bc84f365d75c 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -96,7 +96,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
96 efs_inode = (struct efs_dinode *) (bh->b_data + offset); 96 efs_inode = (struct efs_dinode *) (bh->b_data + offset);
97 97
98 inode->i_mode = be16_to_cpu(efs_inode->di_mode); 98 inode->i_mode = be16_to_cpu(efs_inode->di_mode);
99 inode->i_nlink = be16_to_cpu(efs_inode->di_nlink); 99 set_nlink(inode, be16_to_cpu(efs_inode->di_nlink));
100 inode->i_uid = (uid_t)be16_to_cpu(efs_inode->di_uid); 100 inode->i_uid = (uid_t)be16_to_cpu(efs_inode->di_uid);
101 inode->i_gid = (gid_t)be16_to_cpu(efs_inode->di_gid); 101 inode->i_gid = (gid_t)be16_to_cpu(efs_inode->di_gid);
102 inode->i_size = be32_to_cpu(efs_inode->di_size); 102 inode->i_size = be32_to_cpu(efs_inode->di_size);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9026fc91fe3b..828e750af23a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -70,6 +70,15 @@
70 * simultaneous inserts (A into B and B into A) from racing and 70 * simultaneous inserts (A into B and B into A) from racing and
71 * constructing a cycle without either insert observing that it is 71 * constructing a cycle without either insert observing that it is
72 * going to. 72 * going to.
73 * It is necessary to acquire multiple "ep->mtx"es at once in the
74 * case when one epoll fd is added to another. In this case, we
75 * always acquire the locks in the order of nesting (i.e. after
76 * epoll_ctl(e1, EPOLL_CTL_ADD, e2), e1->mtx will always be acquired
77 * before e2->mtx). Since we disallow cycles of epoll file
78 * descriptors, this ensures that the mutexes are well-ordered. In
79 * order to communicate this nesting to lockdep, when walking a tree
80 * of epoll file descriptors, we use the current recursion depth as
81 * the lockdep subkey.
73 * It is possible to drop the "ep->mtx" and to use the global 82 * It is possible to drop the "ep->mtx" and to use the global
74 * mutex "epmutex" (together with "ep->lock") to have it working, 83 * mutex "epmutex" (together with "ep->lock") to have it working,
75 * but having "ep->mtx" will make the interface more scalable. 84 * but having "ep->mtx" will make the interface more scalable.
@@ -464,13 +473,15 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
464 * @ep: Pointer to the epoll private data structure. 473 * @ep: Pointer to the epoll private data structure.
465 * @sproc: Pointer to the scan callback. 474 * @sproc: Pointer to the scan callback.
466 * @priv: Private opaque data passed to the @sproc callback. 475 * @priv: Private opaque data passed to the @sproc callback.
476 * @depth: The current depth of recursive f_op->poll calls.
467 * 477 *
468 * Returns: The same integer error code returned by the @sproc callback. 478 * Returns: The same integer error code returned by the @sproc callback.
469 */ 479 */
470static int ep_scan_ready_list(struct eventpoll *ep, 480static int ep_scan_ready_list(struct eventpoll *ep,
471 int (*sproc)(struct eventpoll *, 481 int (*sproc)(struct eventpoll *,
472 struct list_head *, void *), 482 struct list_head *, void *),
473 void *priv) 483 void *priv,
484 int depth)
474{ 485{
475 int error, pwake = 0; 486 int error, pwake = 0;
476 unsigned long flags; 487 unsigned long flags;
@@ -481,7 +492,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
481 * We need to lock this because we could be hit by 492 * We need to lock this because we could be hit by
482 * eventpoll_release_file() and epoll_ctl(). 493 * eventpoll_release_file() and epoll_ctl().
483 */ 494 */
484 mutex_lock(&ep->mtx); 495 mutex_lock_nested(&ep->mtx, depth);
485 496
486 /* 497 /*
487 * Steal the ready list, and re-init the original one to the 498 * Steal the ready list, and re-init the original one to the
@@ -670,7 +681,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
670 681
671static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) 682static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
672{ 683{
673 return ep_scan_ready_list(priv, ep_read_events_proc, NULL); 684 return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
674} 685}
675 686
676static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) 687static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
@@ -737,7 +748,7 @@ void eventpoll_release_file(struct file *file)
737 748
738 ep = epi->ep; 749 ep = epi->ep;
739 list_del_init(&epi->fllink); 750 list_del_init(&epi->fllink);
740 mutex_lock(&ep->mtx); 751 mutex_lock_nested(&ep->mtx, 0);
741 ep_remove(ep, epi); 752 ep_remove(ep, epi);
742 mutex_unlock(&ep->mtx); 753 mutex_unlock(&ep->mtx);
743 } 754 }
@@ -1134,7 +1145,7 @@ static int ep_send_events(struct eventpoll *ep,
1134 esed.maxevents = maxevents; 1145 esed.maxevents = maxevents;
1135 esed.events = events; 1146 esed.events = events;
1136 1147
1137 return ep_scan_ready_list(ep, ep_send_events_proc, &esed); 1148 return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
1138} 1149}
1139 1150
1140static inline struct timespec ep_set_mstimeout(long ms) 1151static inline struct timespec ep_set_mstimeout(long ms)
@@ -1267,7 +1278,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
1267 struct rb_node *rbp; 1278 struct rb_node *rbp;
1268 struct epitem *epi; 1279 struct epitem *epi;
1269 1280
1270 mutex_lock(&ep->mtx); 1281 mutex_lock_nested(&ep->mtx, call_nests + 1);
1271 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { 1282 for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
1272 epi = rb_entry(rbp, struct epitem, rbn); 1283 epi = rb_entry(rbp, struct epitem, rbn);
1273 if (unlikely(is_file_epoll(epi->ffd.file))) { 1284 if (unlikely(is_file_epoll(epi->ffd.file))) {
@@ -1409,7 +1420,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1409 } 1420 }
1410 1421
1411 1422
1412 mutex_lock(&ep->mtx); 1423 mutex_lock_nested(&ep->mtx, 0);
1413 1424
1414 /* 1425 /*
1415 * Try to lookup the file inside our RB tree, Since we grabbed "mtx" 1426 * Try to lookup the file inside our RB tree, Since we grabbed "mtx"
diff --git a/fs/exec.c b/fs/exec.c
index 25dcbe5fc356..36254645b7cc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -841,10 +841,6 @@ static int exec_mmap(struct mm_struct *mm)
841 tsk->mm = mm; 841 tsk->mm = mm;
842 tsk->active_mm = mm; 842 tsk->active_mm = mm;
843 activate_mm(active_mm, mm); 843 activate_mm(active_mm, mm);
844 if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
845 atomic_dec(&old_mm->oom_disable_count);
846 atomic_inc(&tsk->mm->oom_disable_count);
847 }
848 task_unlock(tsk); 844 task_unlock(tsk);
849 arch_pick_mmap_layout(mm); 845 arch_pick_mmap_layout(mm);
850 if (old_mm) { 846 if (old_mm) {
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig
index fa9a286c8771..da42f32c49be 100644
--- a/fs/exofs/Kconfig
+++ b/fs/exofs/Kconfig
@@ -5,7 +5,7 @@
5# selected by any of the users. 5# selected by any of the users.
6config ORE 6config ORE
7 tristate 7 tristate
8 depends on EXOFS_FS 8 depends on EXOFS_FS || PNFS_OBJLAYOUT
9 select ASYNC_XOR 9 select ASYNC_XOR
10 default SCSI_OSD_ULD 10 default SCSI_OSD_ULD
11 11
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 3e5f3a6be90a..f6dbf7768ce6 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1165,7 +1165,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1165 inode->i_mode = le16_to_cpu(fcb.i_mode); 1165 inode->i_mode = le16_to_cpu(fcb.i_mode);
1166 inode->i_uid = le32_to_cpu(fcb.i_uid); 1166 inode->i_uid = le32_to_cpu(fcb.i_uid);
1167 inode->i_gid = le32_to_cpu(fcb.i_gid); 1167 inode->i_gid = le32_to_cpu(fcb.i_gid);
1168 inode->i_nlink = le16_to_cpu(fcb.i_links_count); 1168 set_nlink(inode, le16_to_cpu(fcb.i_links_count));
1169 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime); 1169 inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime);
1170 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime); 1170 inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime);
1171 inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime); 1171 inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime);
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 8f44cef1b3ef..a8cbe1bc6ad4 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -421,7 +421,7 @@ static inline int rsv_is_empty(struct ext2_reserve_window *rsv)
421void ext2_init_block_alloc_info(struct inode *inode) 421void ext2_init_block_alloc_info(struct inode *inode)
422{ 422{
423 struct ext2_inode_info *ei = EXT2_I(inode); 423 struct ext2_inode_info *ei = EXT2_I(inode);
424 struct ext2_block_alloc_info *block_i = ei->i_block_alloc_info; 424 struct ext2_block_alloc_info *block_i;
425 struct super_block *sb = inode->i_sb; 425 struct super_block *sb = inode->i_sb;
426 426
427 block_i = kmalloc(sizeof(*block_i), GFP_NOFS); 427 block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index af9fc89b1b2d..9a4e5e206d08 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -135,10 +135,10 @@ extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long);
135struct dentry *ext2_get_parent(struct dentry *child); 135struct dentry *ext2_get_parent(struct dentry *child);
136 136
137/* super.c */ 137/* super.c */
138extern void ext2_error (struct super_block *, const char *, const char *, ...) 138extern __printf(3, 4)
139 __attribute__ ((format (printf, 3, 4))); 139void ext2_error(struct super_block *, const char *, const char *, ...);
140extern void ext2_msg(struct super_block *, const char *, const char *, ...) 140extern __printf(3, 4)
141 __attribute__ ((format (printf, 3, 4))); 141void ext2_msg(struct super_block *, const char *, const char *, ...);
142extern void ext2_update_dynamic_rev (struct super_block *sb); 142extern void ext2_update_dynamic_rev (struct super_block *sb);
143extern void ext2_write_super (struct super_block *); 143extern void ext2_write_super (struct super_block *);
144 144
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index ee9ed31948e1..c4e81dfb74ba 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -601,7 +601,7 @@ fail_free_drop:
601fail_drop: 601fail_drop:
602 dquot_drop(inode); 602 dquot_drop(inode);
603 inode->i_flags |= S_NOQUOTA; 603 inode->i_flags |= S_NOQUOTA;
604 inode->i_nlink = 0; 604 clear_nlink(inode);
605 unlock_new_inode(inode); 605 unlock_new_inode(inode);
606 iput(inode); 606 iput(inode);
607 return ERR_PTR(err); 607 return ERR_PTR(err);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index a8a58f63f07c..91a6945af6d8 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1321,7 +1321,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1321 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 1321 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
1322 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 1322 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
1323 } 1323 }
1324 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 1324 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
1325 inode->i_size = le32_to_cpu(raw_inode->i_size); 1325 inode->i_size = le32_to_cpu(raw_inode->i_size);
1326 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); 1326 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
1327 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); 1327 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1dd62ed35b85..bd8ac164a3bf 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -327,10 +327,10 @@ static struct inode *ext2_nfs_get_inode(struct super_block *sb,
327 if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count)) 327 if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
328 return ERR_PTR(-ESTALE); 328 return ERR_PTR(-ESTALE);
329 329
330 /* iget isn't really right if the inode is currently unallocated!! 330 /*
331 * ext2_read_inode currently does appropriate checks, but 331 * ext2_iget isn't quite right if the inode is currently unallocated!
332 * it might be "neater" to call ext2_get_inode first and check 332 * However ext2_iget currently does appropriate checks to handle stale
333 * if the inode is valid..... 333 * inodes so everything is OK.
334 */ 334 */
335 inode = ext2_iget(sb, ino); 335 inode = ext2_iget(sb, ino);
336 if (IS_ERR(inode)) 336 if (IS_ERR(inode))
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 6386d76f44a7..a2038928f9a3 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -427,7 +427,7 @@ static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
427void ext3_init_block_alloc_info(struct inode *inode) 427void ext3_init_block_alloc_info(struct inode *inode)
428{ 428{
429 struct ext3_inode_info *ei = EXT3_I(inode); 429 struct ext3_inode_info *ei = EXT3_I(inode);
430 struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info; 430 struct ext3_block_alloc_info *block_i;
431 struct super_block *sb = inode->i_sb; 431 struct super_block *sb = inode->i_sb;
432 432
433 block_i = kmalloc(sizeof(*block_i), GFP_NOFS); 433 block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
@@ -1440,14 +1440,14 @@ out:
1440 * 1440 *
1441 * Check if filesystem has at least 1 free block available for allocation. 1441 * Check if filesystem has at least 1 free block available for allocation.
1442 */ 1442 */
1443static int ext3_has_free_blocks(struct ext3_sb_info *sbi) 1443static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation)
1444{ 1444{
1445 ext3_fsblk_t free_blocks, root_blocks; 1445 ext3_fsblk_t free_blocks, root_blocks;
1446 1446
1447 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 1447 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1448 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); 1448 root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
1449 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && 1449 if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
1450 sbi->s_resuid != current_fsuid() && 1450 !use_reservation && sbi->s_resuid != current_fsuid() &&
1451 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { 1451 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
1452 return 0; 1452 return 0;
1453 } 1453 }
@@ -1468,7 +1468,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1468 */ 1468 */
1469int ext3_should_retry_alloc(struct super_block *sb, int *retries) 1469int ext3_should_retry_alloc(struct super_block *sb, int *retries)
1470{ 1470{
1471 if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3) 1471 if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3)
1472 return 0; 1472 return 0;
1473 1473
1474 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); 1474 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
@@ -1546,7 +1546,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
1546 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) 1546 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
1547 my_rsv = &block_i->rsv_window_node; 1547 my_rsv = &block_i->rsv_window_node;
1548 1548
1549 if (!ext3_has_free_blocks(sbi)) { 1549 if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) {
1550 *errp = -ENOSPC; 1550 *errp = -ENOSPC;
1551 goto out; 1551 goto out;
1552 } 1552 }
@@ -1924,9 +1924,10 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
1924 * reaches any used block. Then issue a TRIM command on this extent and free 1924 * reaches any used block. Then issue a TRIM command on this extent and free
1925 * the extent in the block bitmap. This is done until whole group is scanned. 1925 * the extent in the block bitmap. This is done until whole group is scanned.
1926 */ 1926 */
1927ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group, 1927static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
1928 ext3_grpblk_t start, ext3_grpblk_t max, 1928 unsigned int group,
1929 ext3_grpblk_t minblocks) 1929 ext3_grpblk_t start, ext3_grpblk_t max,
1930 ext3_grpblk_t minblocks)
1930{ 1931{
1931 handle_t *handle; 1932 handle_t *handle;
1932 ext3_grpblk_t next, free_blocks, bit, freed, count = 0; 1933 ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index d494c554c6e6..1860ed356323 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -61,13 +61,6 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
61 if (ret) 61 if (ret)
62 goto out; 62 goto out;
63 63
64 /*
65 * Taking the mutex here just to keep consistent with how fsync was
66 * called previously, however it looks like we don't need to take
67 * i_mutex at all.
68 */
69 mutex_lock(&inode->i_mutex);
70
71 J_ASSERT(ext3_journal_current_handle() == NULL); 64 J_ASSERT(ext3_journal_current_handle() == NULL);
72 65
73 /* 66 /*
@@ -85,7 +78,6 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
85 * safe in-journal, which is all fsync() needs to ensure. 78 * safe in-journal, which is all fsync() needs to ensure.
86 */ 79 */
87 if (ext3_should_journal_data(inode)) { 80 if (ext3_should_journal_data(inode)) {
88 mutex_unlock(&inode->i_mutex);
89 ret = ext3_force_commit(inode->i_sb); 81 ret = ext3_force_commit(inode->i_sb);
90 goto out; 82 goto out;
91 } 83 }
@@ -108,8 +100,6 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
108 */ 100 */
109 if (needs_barrier) 101 if (needs_barrier)
110 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 102 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
111
112 mutex_unlock(&inode->i_mutex);
113out: 103out:
114 trace_ext3_sync_file_exit(inode, ret); 104 trace_ext3_sync_file_exit(inode, ret);
115 return ret; 105 return ret;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index bf09cbf938cc..5c866e06e7ab 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -178,42 +178,6 @@ error_return:
178} 178}
179 179
180/* 180/*
181 * There are two policies for allocating an inode. If the new inode is
182 * a directory, then a forward search is made for a block group with both
183 * free space and a low directory-to-inode ratio; if that fails, then of
184 * the groups with above-average free space, that group with the fewest
185 * directories already is chosen.
186 *
187 * For other inodes, search forward from the parent directory\'s block
188 * group to find a free inode.
189 */
190static int find_group_dir(struct super_block *sb, struct inode *parent)
191{
192 int ngroups = EXT3_SB(sb)->s_groups_count;
193 unsigned int freei, avefreei;
194 struct ext3_group_desc *desc, *best_desc = NULL;
195 int group, best_group = -1;
196
197 freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
198 avefreei = freei / ngroups;
199
200 for (group = 0; group < ngroups; group++) {
201 desc = ext3_get_group_desc (sb, group, NULL);
202 if (!desc || !desc->bg_free_inodes_count)
203 continue;
204 if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
205 continue;
206 if (!best_desc ||
207 (le16_to_cpu(desc->bg_free_blocks_count) >
208 le16_to_cpu(best_desc->bg_free_blocks_count))) {
209 best_group = group;
210 best_desc = desc;
211 }
212 }
213 return best_group;
214}
215
216/*
217 * Orlov's allocator for directories. 181 * Orlov's allocator for directories.
218 * 182 *
219 * We always try to spread first-level directories. 183 * We always try to spread first-level directories.
@@ -436,12 +400,9 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
436 400
437 sbi = EXT3_SB(sb); 401 sbi = EXT3_SB(sb);
438 es = sbi->s_es; 402 es = sbi->s_es;
439 if (S_ISDIR(mode)) { 403 if (S_ISDIR(mode))
440 if (test_opt (sb, OLDALLOC)) 404 group = find_group_orlov(sb, dir);
441 group = find_group_dir(sb, dir); 405 else
442 else
443 group = find_group_orlov(sb, dir);
444 } else
445 group = find_group_other(sb, dir); 406 group = find_group_other(sb, dir);
446 407
447 err = -ENOSPC; 408 err = -ENOSPC;
@@ -621,7 +582,7 @@ fail_free_drop:
621fail_drop: 582fail_drop:
622 dquot_drop(inode); 583 dquot_drop(inode);
623 inode->i_flags |= S_NOQUOTA; 584 inode->i_flags |= S_NOQUOTA;
624 inode->i_nlink = 0; 585 clear_nlink(inode);
625 unlock_new_inode(inode); 586 unlock_new_inode(inode);
626 iput(inode); 587 iput(inode);
627 brelse(bitmap_bh); 588 brelse(bitmap_bh);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 12661e1deedd..85fe655fe3e0 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2899,7 +2899,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2899 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 2899 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
2900 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 2900 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
2901 } 2901 }
2902 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 2902 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
2903 inode->i_size = le32_to_cpu(raw_inode->i_size); 2903 inode->i_size = le32_to_cpu(raw_inode->i_size);
2904 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); 2904 inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
2905 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); 2905 inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index c7f43944f160..ba1b54e23cae 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -150,30 +150,6 @@ setversion_out:
150 mnt_drop_write(filp->f_path.mnt); 150 mnt_drop_write(filp->f_path.mnt);
151 return err; 151 return err;
152 } 152 }
153#ifdef CONFIG_JBD_DEBUG
154 case EXT3_IOC_WAIT_FOR_READONLY:
155 /*
156 * This is racy - by the time we're woken up and running,
157 * the superblock could be released. And the module could
158 * have been unloaded. So sue me.
159 *
160 * Returns 1 if it slept, else zero.
161 */
162 {
163 struct super_block *sb = inode->i_sb;
164 DECLARE_WAITQUEUE(wait, current);
165 int ret = 0;
166
167 set_current_state(TASK_INTERRUPTIBLE);
168 add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
169 if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
170 schedule();
171 ret = 1;
172 }
173 remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
174 return ret;
175 }
176#endif
177 case EXT3_IOC_GETRSVSZ: 153 case EXT3_IOC_GETRSVSZ:
178 if (test_opt(inode->i_sb, RESERVATION) 154 if (test_opt(inode->i_sb, RESERVATION)
179 && S_ISREG(inode->i_mode) 155 && S_ISREG(inode->i_mode)
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0629e09f6511..642dc6d66dfd 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1821,7 +1821,7 @@ retry:
1821 de->name_len = 2; 1821 de->name_len = 2;
1822 strcpy (de->name, ".."); 1822 strcpy (de->name, "..");
1823 ext3_set_de_type(dir->i_sb, de, S_IFDIR); 1823 ext3_set_de_type(dir->i_sb, de, S_IFDIR);
1824 inode->i_nlink = 2; 1824 set_nlink(inode, 2);
1825 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); 1825 BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
1826 err = ext3_journal_dirty_metadata(handle, dir_block); 1826 err = ext3_journal_dirty_metadata(handle, dir_block);
1827 if (err) 1827 if (err)
@@ -1833,7 +1833,7 @@ retry:
1833 1833
1834 if (err) { 1834 if (err) {
1835out_clear_inode: 1835out_clear_inode:
1836 inode->i_nlink = 0; 1836 clear_nlink(inode);
1837 unlock_new_inode(inode); 1837 unlock_new_inode(inode);
1838 ext3_mark_inode_dirty(handle, inode); 1838 ext3_mark_inode_dirty(handle, inode);
1839 iput (inode); 1839 iput (inode);
@@ -2170,7 +2170,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2170 ext3_warning (inode->i_sb, "ext3_unlink", 2170 ext3_warning (inode->i_sb, "ext3_unlink",
2171 "Deleting nonexistent file (%lu), %d", 2171 "Deleting nonexistent file (%lu), %d",
2172 inode->i_ino, inode->i_nlink); 2172 inode->i_ino, inode->i_nlink);
2173 inode->i_nlink = 1; 2173 set_nlink(inode, 1);
2174 } 2174 }
2175 retval = ext3_delete_entry(handle, dir, de, bh); 2175 retval = ext3_delete_entry(handle, dir, de, bh);
2176 if (retval) 2176 if (retval)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7beb69ae0015..922d289aeeb3 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -652,8 +652,6 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
652 seq_puts(seq, ",nouid32"); 652 seq_puts(seq, ",nouid32");
653 if (test_opt(sb, DEBUG)) 653 if (test_opt(sb, DEBUG))
654 seq_puts(seq, ",debug"); 654 seq_puts(seq, ",debug");
655 if (test_opt(sb, OLDALLOC))
656 seq_puts(seq, ",oldalloc");
657#ifdef CONFIG_EXT3_FS_XATTR 655#ifdef CONFIG_EXT3_FS_XATTR
658 if (test_opt(sb, XATTR_USER)) 656 if (test_opt(sb, XATTR_USER))
659 seq_puts(seq, ",user_xattr"); 657 seq_puts(seq, ",user_xattr");
@@ -1049,10 +1047,12 @@ static int parse_options (char *options, struct super_block *sb,
1049 set_opt (sbi->s_mount_opt, DEBUG); 1047 set_opt (sbi->s_mount_opt, DEBUG);
1050 break; 1048 break;
1051 case Opt_oldalloc: 1049 case Opt_oldalloc:
1052 set_opt (sbi->s_mount_opt, OLDALLOC); 1050 ext3_msg(sb, KERN_WARNING,
1051 "Ignoring deprecated oldalloc option");
1053 break; 1052 break;
1054 case Opt_orlov: 1053 case Opt_orlov:
1055 clear_opt (sbi->s_mount_opt, OLDALLOC); 1054 ext3_msg(sb, KERN_WARNING,
1055 "Ignoring deprecated orlov option");
1056 break; 1056 break;
1057#ifdef CONFIG_EXT3_FS_XATTR 1057#ifdef CONFIG_EXT3_FS_XATTR
1058 case Opt_user_xattr: 1058 case Opt_user_xattr:
@@ -2669,13 +2669,13 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2669 /* 2669 /*
2670 * If we have an unprocessed orphan list hanging 2670 * If we have an unprocessed orphan list hanging
2671 * around from a previously readonly bdev mount, 2671 * around from a previously readonly bdev mount,
2672 * require a full umount/remount for now. 2672 * require a full umount & mount for now.
2673 */ 2673 */
2674 if (es->s_last_orphan) { 2674 if (es->s_last_orphan) {
2675 ext3_msg(sb, KERN_WARNING, "warning: couldn't " 2675 ext3_msg(sb, KERN_WARNING, "warning: couldn't "
2676 "remount RDWR because of unprocessed " 2676 "remount RDWR because of unprocessed "
2677 "orphan inode list. Please " 2677 "orphan inode list. Please "
2678 "umount/remount instead."); 2678 "umount & mount instead.");
2679 err = -EINVAL; 2679 err = -EINVAL;
2680 goto restore_opts; 2680 goto restore_opts;
2681 } 2681 }
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f8224adf496e..f6dba4505f1c 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -28,7 +28,8 @@
28 */ 28 */
29 29
30/* 30/*
31 * Calculate the block group number and offset, given a block number 31 * Calculate the block group number and offset into the block/cluster
32 * allocation bitmap, given a block number
32 */ 33 */
33void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 34void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
34 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp) 35 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
@@ -37,7 +38,8 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
37 ext4_grpblk_t offset; 38 ext4_grpblk_t offset;
38 39
39 blocknr = blocknr - le32_to_cpu(es->s_first_data_block); 40 blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
40 offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); 41 offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >>
42 EXT4_SB(sb)->s_cluster_bits;
41 if (offsetp) 43 if (offsetp)
42 *offsetp = offset; 44 *offsetp = offset;
43 if (blockgrpp) 45 if (blockgrpp)
@@ -55,130 +57,169 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
55 return 0; 57 return 0;
56} 58}
57 59
58static int ext4_group_used_meta_blocks(struct super_block *sb, 60/* Return the number of clusters used for file system metadata; this
59 ext4_group_t block_group, 61 * represents the overhead needed by the file system.
60 struct ext4_group_desc *gdp) 62 */
63unsigned ext4_num_overhead_clusters(struct super_block *sb,
64 ext4_group_t block_group,
65 struct ext4_group_desc *gdp)
61{ 66{
62 ext4_fsblk_t tmp; 67 unsigned num_clusters;
68 int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c;
69 ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
70 ext4_fsblk_t itbl_blk;
63 struct ext4_sb_info *sbi = EXT4_SB(sb); 71 struct ext4_sb_info *sbi = EXT4_SB(sb);
64 /* block bitmap, inode bitmap, and inode table blocks */
65 int used_blocks = sbi->s_itb_per_group + 2;
66 72
67 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 73 /* This is the number of clusters used by the superblock,
68 if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), 74 * block group descriptors, and reserved block group
69 block_group)) 75 * descriptor blocks */
70 used_blocks--; 76 num_clusters = ext4_num_base_meta_clusters(sb, block_group);
71 77
72 if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), 78 /*
73 block_group)) 79 * For the allocation bitmaps and inode table, we first need
74 used_blocks--; 80 * to check to see if the block is in the block group. If it
75 81 * is, then check to see if the cluster is already accounted
76 tmp = ext4_inode_table(sb, gdp); 82 * for in the clusters used for the base metadata cluster, or
77 for (; tmp < ext4_inode_table(sb, gdp) + 83 * if we can increment the base metadata cluster to include
78 sbi->s_itb_per_group; tmp++) { 84 * that block. Otherwise, we will have to track the cluster
79 if (!ext4_block_in_group(sb, tmp, block_group)) 85 * used for the allocation bitmap or inode table explicitly.
80 used_blocks -= 1; 86 * Normally all of these blocks are contiguous, so the special
87 * case handling shouldn't be necessary except for *very*
88 * unusual file system layouts.
89 */
90 if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
91 block_cluster = EXT4_B2C(sbi, (start -
92 ext4_block_bitmap(sb, gdp)));
93 if (block_cluster < num_clusters)
94 block_cluster = -1;
95 else if (block_cluster == num_clusters) {
96 num_clusters++;
97 block_cluster = -1;
81 } 98 }
82 } 99 }
83 return used_blocks;
84}
85 100
86/* Initializes an uninitialized block bitmap if given, and returns the 101 if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
87 * number of blocks free in the group. */ 102 inode_cluster = EXT4_B2C(sbi,
88unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 103 start - ext4_inode_bitmap(sb, gdp));
89 ext4_group_t block_group, struct ext4_group_desc *gdp) 104 if (inode_cluster < num_clusters)
90{ 105 inode_cluster = -1;
91 int bit, bit_max; 106 else if (inode_cluster == num_clusters) {
92 ext4_group_t ngroups = ext4_get_groups_count(sb); 107 num_clusters++;
93 unsigned free_blocks, group_blocks; 108 inode_cluster = -1;
94 struct ext4_sb_info *sbi = EXT4_SB(sb);
95
96 if (bh) {
97 J_ASSERT_BH(bh, buffer_locked(bh));
98
99 /* If checksum is bad mark all blocks used to prevent allocation
100 * essentially implementing a per-group read-only flag. */
101 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
102 ext4_error(sb, "Checksum bad for group %u",
103 block_group);
104 ext4_free_blks_set(sb, gdp, 0);
105 ext4_free_inodes_set(sb, gdp, 0);
106 ext4_itable_unused_set(sb, gdp, 0);
107 memset(bh->b_data, 0xff, sb->s_blocksize);
108 return 0;
109 } 109 }
110 memset(bh->b_data, 0, sb->s_blocksize);
111 } 110 }
112 111
113 /* Check for superblock and gdt backups in this group */ 112 itbl_blk = ext4_inode_table(sb, gdp);
114 bit_max = ext4_bg_has_super(sb, block_group); 113 for (i = 0; i < sbi->s_itb_per_group; i++) {
115 114 if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
116 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 115 c = EXT4_B2C(sbi, start - itbl_blk + i);
117 block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * 116 if ((c < num_clusters) || (c == inode_cluster) ||
118 sbi->s_desc_per_block) { 117 (c == block_cluster) || (c == itbl_cluster))
119 if (bit_max) { 118 continue;
120 bit_max += ext4_bg_num_gdb(sb, block_group); 119 if (c == num_clusters) {
121 bit_max += 120 num_clusters++;
122 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); 121 continue;
122 }
123 num_clusters++;
124 itbl_cluster = c;
123 } 125 }
124 } else { /* For META_BG_BLOCK_GROUPS */
125 bit_max += ext4_bg_num_gdb(sb, block_group);
126 } 126 }
127 127
128 if (block_group == ngroups - 1) { 128 if (block_cluster != -1)
129 num_clusters++;
130 if (inode_cluster != -1)
131 num_clusters++;
132
133 return num_clusters;
134}
135
136static unsigned int num_clusters_in_group(struct super_block *sb,
137 ext4_group_t block_group)
138{
139 unsigned int blocks;
140
141 if (block_group == ext4_get_groups_count(sb) - 1) {
129 /* 142 /*
130 * Even though mke2fs always initialize first and last group 143 * Even though mke2fs always initializes the first and
131 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need 144 * last group, just in case some other tool was used,
132 * to make sure we calculate the right free blocks 145 * we need to make sure we calculate the right free
146 * blocks.
133 */ 147 */
134 group_blocks = ext4_blocks_count(sbi->s_es) - 148 blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) -
135 ext4_group_first_block_no(sb, ngroups - 1); 149 ext4_group_first_block_no(sb, block_group);
136 } else { 150 } else
137 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 151 blocks = EXT4_BLOCKS_PER_GROUP(sb);
138 } 152 return EXT4_NUM_B2C(EXT4_SB(sb), blocks);
153}
139 154
140 free_blocks = group_blocks - bit_max; 155/* Initializes an uninitialized block bitmap */
156void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
157 ext4_group_t block_group,
158 struct ext4_group_desc *gdp)
159{
160 unsigned int bit, bit_max;
161 struct ext4_sb_info *sbi = EXT4_SB(sb);
162 ext4_fsblk_t start, tmp;
163 int flex_bg = 0;
164
165 J_ASSERT_BH(bh, buffer_locked(bh));
166
167 /* If checksum is bad mark all blocks used to prevent allocation
168 * essentially implementing a per-group read-only flag. */
169 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
170 ext4_error(sb, "Checksum bad for group %u", block_group);
171 ext4_free_group_clusters_set(sb, gdp, 0);
172 ext4_free_inodes_set(sb, gdp, 0);
173 ext4_itable_unused_set(sb, gdp, 0);
174 memset(bh->b_data, 0xff, sb->s_blocksize);
175 return;
176 }
177 memset(bh->b_data, 0, sb->s_blocksize);
141 178
142 if (bh) { 179 bit_max = ext4_num_base_meta_clusters(sb, block_group);
143 ext4_fsblk_t start, tmp; 180 for (bit = 0; bit < bit_max; bit++)
144 int flex_bg = 0; 181 ext4_set_bit(bit, bh->b_data);
145 182
146 for (bit = 0; bit < bit_max; bit++) 183 start = ext4_group_first_block_no(sb, block_group);
147 ext4_set_bit(bit, bh->b_data);
148 184
149 start = ext4_group_first_block_no(sb, block_group); 185 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
186 flex_bg = 1;
150 187
151 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 188 /* Set bits for block and inode bitmaps, and inode table */
152 EXT4_FEATURE_INCOMPAT_FLEX_BG)) 189 tmp = ext4_block_bitmap(sb, gdp);
153 flex_bg = 1; 190 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
191 ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
154 192
155 /* Set bits for block and inode bitmaps, and inode table */ 193 tmp = ext4_inode_bitmap(sb, gdp);
156 tmp = ext4_block_bitmap(sb, gdp); 194 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
157 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) 195 ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
158 ext4_set_bit(tmp - start, bh->b_data);
159 196
160 tmp = ext4_inode_bitmap(sb, gdp); 197 tmp = ext4_inode_table(sb, gdp);
198 for (; tmp < ext4_inode_table(sb, gdp) +
199 sbi->s_itb_per_group; tmp++) {
161 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) 200 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
162 ext4_set_bit(tmp - start, bh->b_data); 201 ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
163
164 tmp = ext4_inode_table(sb, gdp);
165 for (; tmp < ext4_inode_table(sb, gdp) +
166 sbi->s_itb_per_group; tmp++) {
167 if (!flex_bg ||
168 ext4_block_in_group(sb, tmp, block_group))
169 ext4_set_bit(tmp - start, bh->b_data);
170 }
171 /*
172 * Also if the number of blocks within the group is
173 * less than the blocksize * 8 ( which is the size
174 * of bitmap ), set rest of the block bitmap to 1
175 */
176 ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
177 bh->b_data);
178 } 202 }
179 return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); 203
204 /*
205 * Also if the number of blocks within the group is less than
206 * the blocksize * 8 ( which is the size of bitmap ), set rest
207 * of the block bitmap to 1
208 */
209 ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
210 sb->s_blocksize * 8, bh->b_data);
180} 211}
181 212
213/* Return the number of free blocks in a block group. It is used when
214 * the block bitmap is uninitialized, so we can't just count the bits
215 * in the bitmap. */
216unsigned ext4_free_clusters_after_init(struct super_block *sb,
217 ext4_group_t block_group,
218 struct ext4_group_desc *gdp)
219{
220 return num_clusters_in_group(sb, block_group) -
221 ext4_num_overhead_clusters(sb, block_group, gdp);
222}
182 223
183/* 224/*
184 * The free blocks are managed by bitmaps. A file system contains several 225 * The free blocks are managed by bitmaps. A file system contains several
@@ -362,53 +403,54 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
362} 403}
363 404
364/** 405/**
365 * ext4_has_free_blocks() 406 * ext4_has_free_clusters()
366 * @sbi: in-core super block structure. 407 * @sbi: in-core super block structure.
367 * @nblocks: number of needed blocks 408 * @nclusters: number of needed blocks
409 * @flags: flags from ext4_mb_new_blocks()
368 * 410 *
369 * Check if filesystem has nblocks free & available for allocation. 411 * Check if filesystem has nclusters free & available for allocation.
370 * On success return 1, return 0 on failure. 412 * On success return 1, return 0 on failure.
371 */ 413 */
372static int ext4_has_free_blocks(struct ext4_sb_info *sbi, 414static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
373 s64 nblocks, unsigned int flags) 415 s64 nclusters, unsigned int flags)
374{ 416{
375 s64 free_blocks, dirty_blocks, root_blocks; 417 s64 free_clusters, dirty_clusters, root_clusters;
376 struct percpu_counter *fbc = &sbi->s_freeblocks_counter; 418 struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
377 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; 419 struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
378 420
379 free_blocks = percpu_counter_read_positive(fbc); 421 free_clusters = percpu_counter_read_positive(fcc);
380 dirty_blocks = percpu_counter_read_positive(dbc); 422 dirty_clusters = percpu_counter_read_positive(dcc);
381 root_blocks = ext4_r_blocks_count(sbi->s_es); 423 root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es));
382 424
383 if (free_blocks - (nblocks + root_blocks + dirty_blocks) < 425 if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
384 EXT4_FREEBLOCKS_WATERMARK) { 426 EXT4_FREECLUSTERS_WATERMARK) {
385 free_blocks = percpu_counter_sum_positive(fbc); 427 free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc));
386 dirty_blocks = percpu_counter_sum_positive(dbc); 428 dirty_clusters = percpu_counter_sum_positive(dcc);
387 } 429 }
388 /* Check whether we have space after 430 /* Check whether we have space after accounting for current
389 * accounting for current dirty blocks & root reserved blocks. 431 * dirty clusters & root reserved clusters.
390 */ 432 */
391 if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks)) 433 if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters))
392 return 1; 434 return 1;
393 435
394 /* Hm, nope. Are (enough) root reserved blocks available? */ 436 /* Hm, nope. Are (enough) root reserved clusters available? */
395 if (sbi->s_resuid == current_fsuid() || 437 if (sbi->s_resuid == current_fsuid() ||
396 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || 438 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
397 capable(CAP_SYS_RESOURCE) || 439 capable(CAP_SYS_RESOURCE) ||
398 (flags & EXT4_MB_USE_ROOT_BLOCKS)) { 440 (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
399 441
400 if (free_blocks >= (nblocks + dirty_blocks)) 442 if (free_clusters >= (nclusters + dirty_clusters))
401 return 1; 443 return 1;
402 } 444 }
403 445
404 return 0; 446 return 0;
405} 447}
406 448
407int ext4_claim_free_blocks(struct ext4_sb_info *sbi, 449int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
408 s64 nblocks, unsigned int flags) 450 s64 nclusters, unsigned int flags)
409{ 451{
410 if (ext4_has_free_blocks(sbi, nblocks, flags)) { 452 if (ext4_has_free_clusters(sbi, nclusters, flags)) {
411 percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); 453 percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters);
412 return 0; 454 return 0;
413 } else 455 } else
414 return -ENOSPC; 456 return -ENOSPC;
@@ -428,7 +470,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
428 */ 470 */
429int ext4_should_retry_alloc(struct super_block *sb, int *retries) 471int ext4_should_retry_alloc(struct super_block *sb, int *retries)
430{ 472{
431 if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) || 473 if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
432 (*retries)++ > 3 || 474 (*retries)++ > 3 ||
433 !EXT4_SB(sb)->s_journal) 475 !EXT4_SB(sb)->s_journal)
434 return 0; 476 return 0;
@@ -444,7 +486,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
444 * @handle: handle to this transaction 486 * @handle: handle to this transaction
445 * @inode: file inode 487 * @inode: file inode
446 * @goal: given target block(filesystem wide) 488 * @goal: given target block(filesystem wide)
447 * @count: pointer to total number of blocks needed 489 * @count: pointer to total number of clusters needed
448 * @errp: error code 490 * @errp: error code
449 * 491 *
450 * Return 1st allocated block number on success, *count stores total account 492 * Return 1st allocated block number on success, *count stores total account
@@ -476,18 +518,19 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
476 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 518 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
477 EXT4_I(inode)->i_allocated_meta_blocks += ar.len; 519 EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
478 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 520 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
479 dquot_alloc_block_nofail(inode, ar.len); 521 dquot_alloc_block_nofail(inode,
522 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
480 } 523 }
481 return ret; 524 return ret;
482} 525}
483 526
484/** 527/**
485 * ext4_count_free_blocks() -- count filesystem free blocks 528 * ext4_count_free_clusters() -- count filesystem free clusters
486 * @sb: superblock 529 * @sb: superblock
487 * 530 *
488 * Adds up the number of free blocks from each block group. 531 * Adds up the number of free clusters from each block group.
489 */ 532 */
490ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) 533ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
491{ 534{
492 ext4_fsblk_t desc_count; 535 ext4_fsblk_t desc_count;
493 struct ext4_group_desc *gdp; 536 struct ext4_group_desc *gdp;
@@ -508,7 +551,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
508 gdp = ext4_get_group_desc(sb, i, NULL); 551 gdp = ext4_get_group_desc(sb, i, NULL);
509 if (!gdp) 552 if (!gdp)
510 continue; 553 continue;
511 desc_count += ext4_free_blks_count(sb, gdp); 554 desc_count += ext4_free_group_clusters(sb, gdp);
512 brelse(bitmap_bh); 555 brelse(bitmap_bh);
513 bitmap_bh = ext4_read_block_bitmap(sb, i); 556 bitmap_bh = ext4_read_block_bitmap(sb, i);
514 if (bitmap_bh == NULL) 557 if (bitmap_bh == NULL)
@@ -516,12 +559,13 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
516 559
517 x = ext4_count_free(bitmap_bh, sb->s_blocksize); 560 x = ext4_count_free(bitmap_bh, sb->s_blocksize);
518 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", 561 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
519 i, ext4_free_blks_count(sb, gdp), x); 562 i, ext4_free_group_clusters(sb, gdp), x);
520 bitmap_count += x; 563 bitmap_count += x;
521 } 564 }
522 brelse(bitmap_bh); 565 brelse(bitmap_bh);
523 printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" 566 printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
524 ", computed = %llu, %llu\n", ext4_free_blocks_count(es), 567 ", computed = %llu, %llu\n",
568 EXT4_B2C(sbi, ext4_free_blocks_count(es)),
525 desc_count, bitmap_count); 569 desc_count, bitmap_count);
526 return bitmap_count; 570 return bitmap_count;
527#else 571#else
@@ -530,7 +574,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
530 gdp = ext4_get_group_desc(sb, i, NULL); 574 gdp = ext4_get_group_desc(sb, i, NULL);
531 if (!gdp) 575 if (!gdp)
532 continue; 576 continue;
533 desc_count += ext4_free_blks_count(sb, gdp); 577 desc_count += ext4_free_group_clusters(sb, gdp);
534 } 578 }
535 579
536 return desc_count; 580 return desc_count;
@@ -620,6 +664,31 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
620 664
621} 665}
622 666
667/*
668 * This function returns the number of file system metadata clusters at
669 * the beginning of a block group, including the reserved gdt blocks.
670 */
671unsigned ext4_num_base_meta_clusters(struct super_block *sb,
672 ext4_group_t block_group)
673{
674 struct ext4_sb_info *sbi = EXT4_SB(sb);
675 unsigned num;
676
677 /* Check for superblock and gdt backups in this group */
678 num = ext4_bg_has_super(sb, block_group);
679
680 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
681 block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
682 sbi->s_desc_per_block) {
683 if (num) {
684 num += ext4_bg_num_gdb(sb, block_group);
685 num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
686 }
687 } else { /* For META_BG_BLOCK_GROUPS */
688 num += ext4_bg_num_gdb(sb, block_group);
689 }
690 return EXT4_NUM_B2C(sbi, num);
691}
623/** 692/**
624 * ext4_inode_to_goal_block - return a hint for block allocation 693 * ext4_inode_to_goal_block - return a hint for block allocation
625 * @inode: inode for block allocation 694 * @inode: inode for block allocation
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b7d7bd0f066e..5b0e26a1272d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -144,9 +144,17 @@ struct ext4_allocation_request {
144#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) 144#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
145#define EXT4_MAP_BOUNDARY (1 << BH_Boundary) 145#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
146#define EXT4_MAP_UNINIT (1 << BH_Uninit) 146#define EXT4_MAP_UNINIT (1 << BH_Uninit)
147/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
148 * ext4_map_blocks wants to know whether or not the underlying cluster has
149 * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
150 * the requested mapping was from previously mapped (or delayed allocated)
151 * cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
152 * should never appear on buffer_head's state flags.
153 */
154#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
147#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ 155#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
148 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ 156 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
149 EXT4_MAP_UNINIT) 157 EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
150 158
151struct ext4_map_blocks { 159struct ext4_map_blocks {
152 ext4_fsblk_t m_pblk; 160 ext4_fsblk_t m_pblk;
@@ -239,8 +247,11 @@ struct ext4_io_submit {
239# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) 247# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
240#endif 248#endif
241#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) 249#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
250#define EXT4_CLUSTER_SIZE(s) (EXT4_BLOCK_SIZE(s) << \
251 EXT4_SB(s)->s_cluster_bits)
242#ifdef __KERNEL__ 252#ifdef __KERNEL__
243# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) 253# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
254# define EXT4_CLUSTER_BITS(s) (EXT4_SB(s)->s_cluster_bits)
244#else 255#else
245# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) 256# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
246#endif 257#endif
@@ -258,6 +269,14 @@ struct ext4_io_submit {
258#endif 269#endif
259#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) 270#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
260 271
272/* Translate a block number to a cluster number */
273#define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits)
274/* Translate a cluster number to a block number */
275#define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits)
276/* Translate # of blks to # of clusters */
277#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
278 (sbi)->s_cluster_bits)
279
261/* 280/*
262 * Structure of a blocks group descriptor 281 * Structure of a blocks group descriptor
263 */ 282 */
@@ -289,7 +308,7 @@ struct ext4_group_desc
289 308
290struct flex_groups { 309struct flex_groups {
291 atomic_t free_inodes; 310 atomic_t free_inodes;
292 atomic_t free_blocks; 311 atomic_t free_clusters;
293 atomic_t used_dirs; 312 atomic_t used_dirs;
294}; 313};
295 314
@@ -306,6 +325,7 @@ struct flex_groups {
306#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size) 325#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size)
307#ifdef __KERNEL__ 326#ifdef __KERNEL__
308# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group) 327# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group)
328# define EXT4_CLUSTERS_PER_GROUP(s) (EXT4_SB(s)->s_clusters_per_group)
309# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block) 329# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block)
310# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group) 330# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group)
311# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits) 331# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits)
@@ -358,8 +378,7 @@ struct flex_groups {
358 378
359/* Flags that should be inherited by new inodes from their parent. */ 379/* Flags that should be inherited by new inodes from their parent. */
360#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ 380#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
361 EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\ 381 EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
362 EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
363 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ 382 EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
364 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) 383 EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
365 384
@@ -520,6 +539,8 @@ struct ext4_new_group_data {
520#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020 539#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020
521 /* Don't normalize allocation size (used for fallocate) */ 540 /* Don't normalize allocation size (used for fallocate) */
522#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 541#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
542 /* Request will not result in inode size update (user for fallocate) */
543#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
523 544
524/* 545/*
525 * Flags used by ext4_free_blocks 546 * Flags used by ext4_free_blocks
@@ -528,6 +549,13 @@ struct ext4_new_group_data {
528#define EXT4_FREE_BLOCKS_FORGET 0x0002 549#define EXT4_FREE_BLOCKS_FORGET 0x0002
529#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 550#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
530#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 551#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
552#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
553#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
554
555/*
556 * Flags used by ext4_discard_partial_page_buffers
557 */
558#define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001
531 559
532/* 560/*
533 * ioctl commands 561 * ioctl commands
@@ -538,9 +566,6 @@ struct ext4_new_group_data {
538#define EXT4_IOC_SETVERSION _IOW('f', 4, long) 566#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
539#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION 567#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
540#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION 568#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
541#ifdef CONFIG_JBD2_DEBUG
542#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
543#endif
544#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) 569#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
545#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) 570#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
546#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) 571#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
@@ -563,9 +588,6 @@ struct ext4_new_group_data {
563#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) 588#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
564#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) 589#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
565#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) 590#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input)
566#ifdef CONFIG_JBD2_DEBUG
567#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
568#endif
569#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION 591#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
570#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION 592#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
571#endif 593#endif
@@ -837,6 +859,7 @@ struct ext4_inode_info {
837 ext4_group_t i_last_alloc_group; 859 ext4_group_t i_last_alloc_group;
838 860
839 /* allocation reservation info for delalloc */ 861 /* allocation reservation info for delalloc */
862 /* In case of bigalloc, these refer to clusters rather than blocks */
840 unsigned int i_reserved_data_blocks; 863 unsigned int i_reserved_data_blocks;
841 unsigned int i_reserved_meta_blocks; 864 unsigned int i_reserved_meta_blocks;
842 unsigned int i_allocated_meta_blocks; 865 unsigned int i_allocated_meta_blocks;
@@ -886,7 +909,6 @@ struct ext4_inode_info {
886/* 909/*
887 * Mount flags 910 * Mount flags
888 */ 911 */
889#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
890#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ 912#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
891#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ 913#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
892#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ 914#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
@@ -918,6 +940,9 @@ struct ext4_inode_info {
918#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ 940#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
919#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */ 941#define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
920 942
943#define EXT4_MOUNT2_EXPLICIT_DELALLOC 0x00000001 /* User explicitly
944 specified delalloc */
945
921#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ 946#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
922 ~EXT4_MOUNT_##opt 947 ~EXT4_MOUNT_##opt
923#define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ 948#define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \
@@ -968,9 +993,9 @@ struct ext4_super_block {
968/*10*/ __le32 s_free_inodes_count; /* Free inodes count */ 993/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
969 __le32 s_first_data_block; /* First Data Block */ 994 __le32 s_first_data_block; /* First Data Block */
970 __le32 s_log_block_size; /* Block size */ 995 __le32 s_log_block_size; /* Block size */
971 __le32 s_obso_log_frag_size; /* Obsoleted fragment size */ 996 __le32 s_log_cluster_size; /* Allocation cluster size */
972/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ 997/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
973 __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */ 998 __le32 s_clusters_per_group; /* # Clusters per group */
974 __le32 s_inodes_per_group; /* # Inodes per group */ 999 __le32 s_inodes_per_group; /* # Inodes per group */
975 __le32 s_mtime; /* Mount time */ 1000 __le32 s_mtime; /* Mount time */
976/*30*/ __le32 s_wtime; /* Write time */ 1001/*30*/ __le32 s_wtime; /* Write time */
@@ -1066,7 +1091,10 @@ struct ext4_super_block {
1066 __u8 s_last_error_func[32]; /* function where the error happened */ 1091 __u8 s_last_error_func[32]; /* function where the error happened */
1067#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts) 1092#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
1068 __u8 s_mount_opts[64]; 1093 __u8 s_mount_opts[64];
1069 __le32 s_reserved[112]; /* Padding to the end of the block */ 1094 __le32 s_usr_quota_inum; /* inode for tracking user quota */
1095 __le32 s_grp_quota_inum; /* inode for tracking group quota */
1096 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
1097 __le32 s_reserved[109]; /* Padding to the end of the block */
1070}; 1098};
1071 1099
1072#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START) 1100#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
@@ -1086,6 +1114,7 @@ struct ext4_sb_info {
1086 unsigned long s_desc_size; /* Size of a group descriptor in bytes */ 1114 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
1087 unsigned long s_inodes_per_block;/* Number of inodes per block */ 1115 unsigned long s_inodes_per_block;/* Number of inodes per block */
1088 unsigned long s_blocks_per_group;/* Number of blocks in a group */ 1116 unsigned long s_blocks_per_group;/* Number of blocks in a group */
1117 unsigned long s_clusters_per_group; /* Number of clusters in a group */
1089 unsigned long s_inodes_per_group;/* Number of inodes in a group */ 1118 unsigned long s_inodes_per_group;/* Number of inodes in a group */
1090 unsigned long s_itb_per_group; /* Number of inode table blocks per group */ 1119 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
1091 unsigned long s_gdb_count; /* Number of group descriptor blocks */ 1120 unsigned long s_gdb_count; /* Number of group descriptor blocks */
@@ -1094,6 +1123,8 @@ struct ext4_sb_info {
1094 ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */ 1123 ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
1095 unsigned long s_overhead_last; /* Last calculated overhead */ 1124 unsigned long s_overhead_last; /* Last calculated overhead */
1096 unsigned long s_blocks_last; /* Last seen block count */ 1125 unsigned long s_blocks_last; /* Last seen block count */
1126 unsigned int s_cluster_ratio; /* Number of blocks per cluster */
1127 unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */
1097 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ 1128 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
1098 struct buffer_head * s_sbh; /* Buffer containing the super block */ 1129 struct buffer_head * s_sbh; /* Buffer containing the super block */
1099 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ 1130 struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
@@ -1117,10 +1148,10 @@ struct ext4_sb_info {
1117 u32 s_hash_seed[4]; 1148 u32 s_hash_seed[4];
1118 int s_def_hash_version; 1149 int s_def_hash_version;
1119 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ 1150 int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
1120 struct percpu_counter s_freeblocks_counter; 1151 struct percpu_counter s_freeclusters_counter;
1121 struct percpu_counter s_freeinodes_counter; 1152 struct percpu_counter s_freeinodes_counter;
1122 struct percpu_counter s_dirs_counter; 1153 struct percpu_counter s_dirs_counter;
1123 struct percpu_counter s_dirtyblocks_counter; 1154 struct percpu_counter s_dirtyclusters_counter;
1124 struct blockgroup_lock *s_blockgroup_lock; 1155 struct blockgroup_lock *s_blockgroup_lock;
1125 struct proc_dir_entry *s_proc; 1156 struct proc_dir_entry *s_proc;
1126 struct kobject s_kobj; 1157 struct kobject s_kobj;
@@ -1136,10 +1167,6 @@ struct ext4_sb_info {
1136 u32 s_max_batch_time; 1167 u32 s_max_batch_time;
1137 u32 s_min_batch_time; 1168 u32 s_min_batch_time;
1138 struct block_device *journal_bdev; 1169 struct block_device *journal_bdev;
1139#ifdef CONFIG_JBD2_DEBUG
1140 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
1141 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
1142#endif
1143#ifdef CONFIG_QUOTA 1170#ifdef CONFIG_QUOTA
1144 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ 1171 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
1145 int s_jquota_fmt; /* Format of quota to use */ 1172 int s_jquota_fmt; /* Format of quota to use */
@@ -1248,6 +1275,15 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
1248 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); 1275 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
1249} 1276}
1250 1277
1278static inline void ext4_set_io_unwritten_flag(struct inode *inode,
1279 struct ext4_io_end *io_end)
1280{
1281 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
1282 io_end->flag |= EXT4_IO_END_UNWRITTEN;
1283 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
1284 }
1285}
1286
1251/* 1287/*
1252 * Inode dynamic state flags 1288 * Inode dynamic state flags
1253 */ 1289 */
@@ -1360,6 +1396,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1360#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 1396#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
1361#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 1397#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
1362#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 1398#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
1399#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
1363 1400
1364#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 1401#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
1365#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 1402#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -1402,7 +1439,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1402 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ 1439 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
1403 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ 1440 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
1404 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\ 1441 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
1405 EXT4_FEATURE_RO_COMPAT_HUGE_FILE) 1442 EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
1443 EXT4_FEATURE_RO_COMPAT_BIGALLOC)
1406 1444
1407/* 1445/*
1408 * Default values for user and/or group using reserved blocks 1446 * Default values for user and/or group using reserved blocks
@@ -1735,9 +1773,9 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1735 unsigned int flags, 1773 unsigned int flags,
1736 unsigned long *count, 1774 unsigned long *count,
1737 int *errp); 1775 int *errp);
1738extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, 1776extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
1739 s64 nblocks, unsigned int flags); 1777 s64 nclusters, unsigned int flags);
1740extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1778extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
1741extern void ext4_check_blocks_bitmap(struct super_block *); 1779extern void ext4_check_blocks_bitmap(struct super_block *);
1742extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 1780extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
1743 ext4_group_t block_group, 1781 ext4_group_t block_group,
@@ -1745,12 +1783,18 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
1745extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); 1783extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
1746struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, 1784struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
1747 ext4_group_t block_group); 1785 ext4_group_t block_group);
1748extern unsigned ext4_init_block_bitmap(struct super_block *sb, 1786extern void ext4_init_block_bitmap(struct super_block *sb,
1749 struct buffer_head *bh, 1787 struct buffer_head *bh,
1750 ext4_group_t group, 1788 ext4_group_t group,
1751 struct ext4_group_desc *desc); 1789 struct ext4_group_desc *desc);
1752#define ext4_free_blocks_after_init(sb, group, desc) \ 1790extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
1753 ext4_init_block_bitmap(sb, NULL, group, desc) 1791 ext4_group_t block_group,
1792 struct ext4_group_desc *gdp);
1793extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
1794 ext4_group_t block_group);
1795extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
1796 ext4_group_t block_group,
1797 struct ext4_group_desc *gdp);
1754ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); 1798ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
1755 1799
1756/* dir.c */ 1800/* dir.c */
@@ -1776,7 +1820,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct
1776 1820
1777/* ialloc.c */ 1821/* ialloc.c */
1778extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, 1822extern struct inode *ext4_new_inode(handle_t *, struct inode *, int,
1779 const struct qstr *qstr, __u32 goal); 1823 const struct qstr *qstr, __u32 goal,
1824 uid_t *owner);
1780extern void ext4_free_inode(handle_t *, struct inode *); 1825extern void ext4_free_inode(handle_t *, struct inode *);
1781extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); 1826extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
1782extern unsigned long ext4_count_free_inodes(struct super_block *); 1827extern unsigned long ext4_count_free_inodes(struct super_block *);
@@ -1839,6 +1884,12 @@ extern int ext4_block_truncate_page(handle_t *handle,
1839 struct address_space *mapping, loff_t from); 1884 struct address_space *mapping, loff_t from);
1840extern int ext4_block_zero_page_range(handle_t *handle, 1885extern int ext4_block_zero_page_range(handle_t *handle,
1841 struct address_space *mapping, loff_t from, loff_t length); 1886 struct address_space *mapping, loff_t from, loff_t length);
1887extern int ext4_discard_partial_page_buffers(handle_t *handle,
1888 struct address_space *mapping, loff_t from,
1889 loff_t length, int flags);
1890extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
1891 struct inode *inode, struct page *page, loff_t from,
1892 loff_t length, int flags);
1842extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1893extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1843extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1894extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1844extern void ext4_da_update_reserve_space(struct inode *inode, 1895extern void ext4_da_update_reserve_space(struct inode *inode,
@@ -1878,40 +1929,40 @@ extern int ext4_group_extend(struct super_block *sb,
1878extern void *ext4_kvmalloc(size_t size, gfp_t flags); 1929extern void *ext4_kvmalloc(size_t size, gfp_t flags);
1879extern void *ext4_kvzalloc(size_t size, gfp_t flags); 1930extern void *ext4_kvzalloc(size_t size, gfp_t flags);
1880extern void ext4_kvfree(void *ptr); 1931extern void ext4_kvfree(void *ptr);
1881extern void __ext4_error(struct super_block *, const char *, unsigned int, 1932extern __printf(4, 5)
1882 const char *, ...) 1933void __ext4_error(struct super_block *, const char *, unsigned int,
1883 __attribute__ ((format (printf, 4, 5))); 1934 const char *, ...);
1884#define ext4_error(sb, message...) __ext4_error(sb, __func__, \ 1935#define ext4_error(sb, message...) __ext4_error(sb, __func__, \
1885 __LINE__, ## message) 1936 __LINE__, ## message)
1886extern void ext4_error_inode(struct inode *, const char *, unsigned int, 1937extern __printf(5, 6)
1887 ext4_fsblk_t, const char *, ...) 1938void ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t,
1888 __attribute__ ((format (printf, 5, 6))); 1939 const char *, ...);
1889extern void ext4_error_file(struct file *, const char *, unsigned int, 1940extern __printf(5, 6)
1890 ext4_fsblk_t, const char *, ...) 1941void ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t,
1891 __attribute__ ((format (printf, 5, 6))); 1942 const char *, ...);
1892extern void __ext4_std_error(struct super_block *, const char *, 1943extern void __ext4_std_error(struct super_block *, const char *,
1893 unsigned int, int); 1944 unsigned int, int);
1894extern void __ext4_abort(struct super_block *, const char *, unsigned int, 1945extern __printf(4, 5)
1895 const char *, ...) 1946void __ext4_abort(struct super_block *, const char *, unsigned int,
1896 __attribute__ ((format (printf, 4, 5))); 1947 const char *, ...);
1897#define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \ 1948#define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \
1898 __LINE__, ## message) 1949 __LINE__, ## message)
1899extern void __ext4_warning(struct super_block *, const char *, unsigned int, 1950extern __printf(4, 5)
1900 const char *, ...) 1951void __ext4_warning(struct super_block *, const char *, unsigned int,
1901 __attribute__ ((format (printf, 4, 5))); 1952 const char *, ...);
1902#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \ 1953#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \
1903 __LINE__, ## message) 1954 __LINE__, ## message)
1904extern void ext4_msg(struct super_block *, const char *, const char *, ...) 1955extern __printf(3, 4)
1905 __attribute__ ((format (printf, 3, 4))); 1956void ext4_msg(struct super_block *, const char *, const char *, ...);
1906extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, 1957extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
1907 const char *, unsigned int, const char *); 1958 const char *, unsigned int, const char *);
1908#define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \ 1959#define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \
1909 __LINE__, msg) 1960 __LINE__, msg)
1910extern void __ext4_grp_locked_error(const char *, unsigned int, \ 1961extern __printf(7, 8)
1911 struct super_block *, ext4_group_t, \ 1962void __ext4_grp_locked_error(const char *, unsigned int,
1912 unsigned long, ext4_fsblk_t, \ 1963 struct super_block *, ext4_group_t,
1913 const char *, ...) 1964 unsigned long, ext4_fsblk_t,
1914 __attribute__ ((format (printf, 7, 8))); 1965 const char *, ...);
1915#define ext4_grp_locked_error(sb, grp, message...) \ 1966#define ext4_grp_locked_error(sb, grp, message...) \
1916 __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message) 1967 __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message)
1917extern void ext4_update_dynamic_rev(struct super_block *sb); 1968extern void ext4_update_dynamic_rev(struct super_block *sb);
@@ -1927,8 +1978,8 @@ extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
1927 struct ext4_group_desc *bg); 1978 struct ext4_group_desc *bg);
1928extern ext4_fsblk_t ext4_inode_table(struct super_block *sb, 1979extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
1929 struct ext4_group_desc *bg); 1980 struct ext4_group_desc *bg);
1930extern __u32 ext4_free_blks_count(struct super_block *sb, 1981extern __u32 ext4_free_group_clusters(struct super_block *sb,
1931 struct ext4_group_desc *bg); 1982 struct ext4_group_desc *bg);
1932extern __u32 ext4_free_inodes_count(struct super_block *sb, 1983extern __u32 ext4_free_inodes_count(struct super_block *sb,
1933 struct ext4_group_desc *bg); 1984 struct ext4_group_desc *bg);
1934extern __u32 ext4_used_dirs_count(struct super_block *sb, 1985extern __u32 ext4_used_dirs_count(struct super_block *sb,
@@ -1941,8 +1992,9 @@ extern void ext4_inode_bitmap_set(struct super_block *sb,
1941 struct ext4_group_desc *bg, ext4_fsblk_t blk); 1992 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1942extern void ext4_inode_table_set(struct super_block *sb, 1993extern void ext4_inode_table_set(struct super_block *sb,
1943 struct ext4_group_desc *bg, ext4_fsblk_t blk); 1994 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1944extern void ext4_free_blks_set(struct super_block *sb, 1995extern void ext4_free_group_clusters_set(struct super_block *sb,
1945 struct ext4_group_desc *bg, __u32 count); 1996 struct ext4_group_desc *bg,
1997 __u32 count);
1946extern void ext4_free_inodes_set(struct super_block *sb, 1998extern void ext4_free_inodes_set(struct super_block *sb,
1947 struct ext4_group_desc *bg, __u32 count); 1999 struct ext4_group_desc *bg, __u32 count);
1948extern void ext4_used_dirs_set(struct super_block *sb, 2000extern void ext4_used_dirs_set(struct super_block *sb,
@@ -2051,13 +2103,13 @@ do { \
2051} while (0) 2103} while (0)
2052 2104
2053#ifdef CONFIG_SMP 2105#ifdef CONFIG_SMP
2054/* Each CPU can accumulate percpu_counter_batch blocks in their local 2106/* Each CPU can accumulate percpu_counter_batch clusters in their local
2055 * counters. So we need to make sure we have free blocks more 2107 * counters. So we need to make sure we have free clusters more
2056 * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times. 2108 * than percpu_counter_batch * nr_cpu_ids. Also add a window of 4 times.
2057 */ 2109 */
2058#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids)) 2110#define EXT4_FREECLUSTERS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
2059#else 2111#else
2060#define EXT4_FREEBLOCKS_WATERMARK 0 2112#define EXT4_FREECLUSTERS_WATERMARK 0
2061#endif 2113#endif
2062 2114
2063static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) 2115static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
@@ -2243,10 +2295,19 @@ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
2243enum ext4_state_bits { 2295enum ext4_state_bits {
2244 BH_Uninit /* blocks are allocated but uninitialized on disk */ 2296 BH_Uninit /* blocks are allocated but uninitialized on disk */
2245 = BH_JBDPrivateStart, 2297 = BH_JBDPrivateStart,
2298 BH_AllocFromCluster, /* allocated blocks were part of already
2299 * allocated cluster. Note that this flag will
2300 * never, ever appear in a buffer_head's state
2301 * flag. See EXT4_MAP_FROM_CLUSTER to see where
2302 * this is used. */
2303 BH_Da_Mapped, /* Delayed allocated block that now has a mapping. This
2304 * flag is set when ext4_map_blocks is called on a
2305 * delayed allocated block to get its real mapping. */
2246}; 2306};
2247 2307
2248BUFFER_FNS(Uninit, uninit) 2308BUFFER_FNS(Uninit, uninit)
2249TAS_BUFFER_FNS(Uninit, uninit) 2309TAS_BUFFER_FNS(Uninit, uninit)
2310BUFFER_FNS(Da_Mapped, da_mapped)
2250 2311
2251/* 2312/*
2252 * Add new method to test wether block and inode bitmaps are properly 2313 * Add new method to test wether block and inode bitmaps are properly
@@ -2282,4 +2343,6 @@ extern void ext4_resize_end(struct super_block *sb);
2282 2343
2283#endif /* __KERNEL__ */ 2344#endif /* __KERNEL__ */
2284 2345
2346#include "ext4_extents.h"
2347
2285#endif /* _EXT4_H */ 2348#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 095c36f3b612..a52db3a69a30 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -290,5 +290,7 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
290 struct ext4_ext_path *); 290 struct ext4_ext_path *);
291extern void ext4_ext_drop_refs(struct ext4_ext_path *); 291extern void ext4_ext_drop_refs(struct ext4_ext_path *);
292extern int ext4_ext_check_inode(struct inode *inode); 292extern int ext4_ext_check_inode(struct inode *inode);
293extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
294 int search_hint_reverse);
293#endif /* _EXT4_EXTENTS */ 295#endif /* _EXT4_EXTENTS */
294 296
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index f5240aa15601..aca179017582 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -109,9 +109,11 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
109 109
110 if (ext4_handle_valid(handle)) { 110 if (ext4_handle_valid(handle)) {
111 err = jbd2_journal_dirty_metadata(handle, bh); 111 err = jbd2_journal_dirty_metadata(handle, bh);
112 if (err) 112 if (err) {
113 ext4_journal_abort_handle(where, line, __func__, 113 /* Errors can only happen if there is a bug */
114 bh, handle, err); 114 handle->h_err = err;
115 __ext4_journal_stop(where, line, handle);
116 }
115 } else { 117 } else {
116 if (inode) 118 if (inode)
117 mark_buffer_dirty_inode(bh, inode); 119 mark_buffer_dirty_inode(bh, inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 57cf568a98ab..61fa9e1614af 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -42,7 +42,6 @@
42#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include <linux/fiemap.h> 43#include <linux/fiemap.h>
44#include "ext4_jbd2.h" 44#include "ext4_jbd2.h"
45#include "ext4_extents.h"
46 45
47#include <trace/events/ext4.h> 46#include <trace/events/ext4.h>
48 47
@@ -96,13 +95,17 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
96 * - ENOMEM 95 * - ENOMEM
97 * - EIO 96 * - EIO
98 */ 97 */
99static int ext4_ext_dirty(handle_t *handle, struct inode *inode, 98#define ext4_ext_dirty(handle, inode, path) \
100 struct ext4_ext_path *path) 99 __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
100static int __ext4_ext_dirty(const char *where, unsigned int line,
101 handle_t *handle, struct inode *inode,
102 struct ext4_ext_path *path)
101{ 103{
102 int err; 104 int err;
103 if (path->p_bh) { 105 if (path->p_bh) {
104 /* path points to block */ 106 /* path points to block */
105 err = ext4_handle_dirty_metadata(handle, inode, path->p_bh); 107 err = __ext4_handle_dirty_metadata(where, line, handle,
108 inode, path->p_bh);
106 } else { 109 } else {
107 /* path points to leaf/index in inode body */ 110 /* path points to leaf/index in inode body */
108 err = ext4_mark_inode_dirty(handle, inode); 111 err = ext4_mark_inode_dirty(handle, inode);
@@ -114,11 +117,9 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
114 struct ext4_ext_path *path, 117 struct ext4_ext_path *path,
115 ext4_lblk_t block) 118 ext4_lblk_t block)
116{ 119{
117 int depth;
118
119 if (path) { 120 if (path) {
121 int depth = path->p_depth;
120 struct ext4_extent *ex; 122 struct ext4_extent *ex;
121 depth = path->p_depth;
122 123
123 /* 124 /*
124 * Try to predict block placement assuming that we are 125 * Try to predict block placement assuming that we are
@@ -180,12 +181,10 @@ static inline int ext4_ext_space_block(struct inode *inode, int check)
180 181
181 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 182 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
182 / sizeof(struct ext4_extent); 183 / sizeof(struct ext4_extent);
183 if (!check) {
184#ifdef AGGRESSIVE_TEST 184#ifdef AGGRESSIVE_TEST
185 if (size > 6) 185 if (!check && size > 6)
186 size = 6; 186 size = 6;
187#endif 187#endif
188 }
189 return size; 188 return size;
190} 189}
191 190
@@ -195,12 +194,10 @@ static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
195 194
196 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 195 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
197 / sizeof(struct ext4_extent_idx); 196 / sizeof(struct ext4_extent_idx);
198 if (!check) {
199#ifdef AGGRESSIVE_TEST 197#ifdef AGGRESSIVE_TEST
200 if (size > 5) 198 if (!check && size > 5)
201 size = 5; 199 size = 5;
202#endif 200#endif
203 }
204 return size; 201 return size;
205} 202}
206 203
@@ -211,12 +208,10 @@ static inline int ext4_ext_space_root(struct inode *inode, int check)
211 size = sizeof(EXT4_I(inode)->i_data); 208 size = sizeof(EXT4_I(inode)->i_data);
212 size -= sizeof(struct ext4_extent_header); 209 size -= sizeof(struct ext4_extent_header);
213 size /= sizeof(struct ext4_extent); 210 size /= sizeof(struct ext4_extent);
214 if (!check) {
215#ifdef AGGRESSIVE_TEST 211#ifdef AGGRESSIVE_TEST
216 if (size > 3) 212 if (!check && size > 3)
217 size = 3; 213 size = 3;
218#endif 214#endif
219 }
220 return size; 215 return size;
221} 216}
222 217
@@ -227,12 +222,10 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
227 size = sizeof(EXT4_I(inode)->i_data); 222 size = sizeof(EXT4_I(inode)->i_data);
228 size -= sizeof(struct ext4_extent_header); 223 size -= sizeof(struct ext4_extent_header);
229 size /= sizeof(struct ext4_extent_idx); 224 size /= sizeof(struct ext4_extent_idx);
230 if (!check) {
231#ifdef AGGRESSIVE_TEST 225#ifdef AGGRESSIVE_TEST
232 if (size > 4) 226 if (!check && size > 4)
233 size = 4; 227 size = 4;
234#endif 228#endif
235 }
236 return size; 229 return size;
237} 230}
238 231
@@ -244,7 +237,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
244int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) 237int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
245{ 238{
246 struct ext4_inode_info *ei = EXT4_I(inode); 239 struct ext4_inode_info *ei = EXT4_I(inode);
247 int idxs, num = 0; 240 int idxs;
248 241
249 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 242 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
250 / sizeof(struct ext4_extent_idx)); 243 / sizeof(struct ext4_extent_idx));
@@ -259,6 +252,8 @@ int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
259 */ 252 */
260 if (ei->i_da_metadata_calc_len && 253 if (ei->i_da_metadata_calc_len &&
261 ei->i_da_metadata_calc_last_lblock+1 == lblock) { 254 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
255 int num = 0;
256
262 if ((ei->i_da_metadata_calc_len % idxs) == 0) 257 if ((ei->i_da_metadata_calc_len % idxs) == 0)
263 num++; 258 num++;
264 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) 259 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
@@ -321,8 +316,6 @@ static int ext4_valid_extent_entries(struct inode *inode,
321 struct ext4_extent_header *eh, 316 struct ext4_extent_header *eh,
322 int depth) 317 int depth)
323{ 318{
324 struct ext4_extent *ext;
325 struct ext4_extent_idx *ext_idx;
326 unsigned short entries; 319 unsigned short entries;
327 if (eh->eh_entries == 0) 320 if (eh->eh_entries == 0)
328 return 1; 321 return 1;
@@ -331,7 +324,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
331 324
332 if (depth == 0) { 325 if (depth == 0) {
333 /* leaf entries */ 326 /* leaf entries */
334 ext = EXT_FIRST_EXTENT(eh); 327 struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
335 while (entries) { 328 while (entries) {
336 if (!ext4_valid_extent(inode, ext)) 329 if (!ext4_valid_extent(inode, ext))
337 return 0; 330 return 0;
@@ -339,7 +332,7 @@ static int ext4_valid_extent_entries(struct inode *inode,
339 entries--; 332 entries--;
340 } 333 }
341 } else { 334 } else {
342 ext_idx = EXT_FIRST_INDEX(eh); 335 struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
343 while (entries) { 336 while (entries) {
344 if (!ext4_valid_extent_idx(inode, ext_idx)) 337 if (!ext4_valid_extent_idx(inode, ext_idx))
345 return 0; 338 return 0;
@@ -751,31 +744,30 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
751 return -EIO; 744 return -EIO;
752 } 745 }
753 746
754 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
755 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 747 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
756 /* insert after */ 748 /* insert after */
757 if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { 749 ext_debug("insert new index %d after: %llu\n", logical, ptr);
758 len = (len - 1) * sizeof(struct ext4_extent_idx);
759 len = len < 0 ? 0 : len;
760 ext_debug("insert new index %d after: %llu. "
761 "move %d from 0x%p to 0x%p\n",
762 logical, ptr, len,
763 (curp->p_idx + 1), (curp->p_idx + 2));
764 memmove(curp->p_idx + 2, curp->p_idx + 1, len);
765 }
766 ix = curp->p_idx + 1; 750 ix = curp->p_idx + 1;
767 } else { 751 } else {
768 /* insert before */ 752 /* insert before */
769 len = len * sizeof(struct ext4_extent_idx); 753 ext_debug("insert new index %d before: %llu\n", logical, ptr);
770 len = len < 0 ? 0 : len;
771 ext_debug("insert new index %d before: %llu. "
772 "move %d from 0x%p to 0x%p\n",
773 logical, ptr, len,
774 curp->p_idx, (curp->p_idx + 1));
775 memmove(curp->p_idx + 1, curp->p_idx, len);
776 ix = curp->p_idx; 754 ix = curp->p_idx;
777 } 755 }
778 756
757 len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
758 BUG_ON(len < 0);
759 if (len > 0) {
760 ext_debug("insert new index %d: "
761 "move %d indices from 0x%p to 0x%p\n",
762 logical, len, ix, ix + 1);
763 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
764 }
765
766 if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
767 EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
768 return -EIO;
769 }
770
779 ix->ei_block = cpu_to_le32(logical); 771 ix->ei_block = cpu_to_le32(logical);
780 ext4_idx_store_pblock(ix, ptr); 772 ext4_idx_store_pblock(ix, ptr);
781 le16_add_cpu(&curp->p_hdr->eh_entries, 1); 773 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
@@ -1042,16 +1034,14 @@ cleanup:
1042 */ 1034 */
1043static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, 1035static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1044 unsigned int flags, 1036 unsigned int flags,
1045 struct ext4_ext_path *path,
1046 struct ext4_extent *newext) 1037 struct ext4_extent *newext)
1047{ 1038{
1048 struct ext4_ext_path *curp = path;
1049 struct ext4_extent_header *neh; 1039 struct ext4_extent_header *neh;
1050 struct buffer_head *bh; 1040 struct buffer_head *bh;
1051 ext4_fsblk_t newblock; 1041 ext4_fsblk_t newblock;
1052 int err = 0; 1042 int err = 0;
1053 1043
1054 newblock = ext4_ext_new_meta_block(handle, inode, path, 1044 newblock = ext4_ext_new_meta_block(handle, inode, NULL,
1055 newext, &err, flags); 1045 newext, &err, flags);
1056 if (newblock == 0) 1046 if (newblock == 0)
1057 return err; 1047 return err;
@@ -1071,7 +1061,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1071 } 1061 }
1072 1062
1073 /* move top-level index/leaf into new block */ 1063 /* move top-level index/leaf into new block */
1074 memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data)); 1064 memmove(bh->b_data, EXT4_I(inode)->i_data,
1065 sizeof(EXT4_I(inode)->i_data));
1075 1066
1076 /* set size of new block */ 1067 /* set size of new block */
1077 neh = ext_block_hdr(bh); 1068 neh = ext_block_hdr(bh);
@@ -1089,32 +1080,23 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1089 if (err) 1080 if (err)
1090 goto out; 1081 goto out;
1091 1082
1092 /* create index in new top-level index: num,max,pointer */ 1083 /* Update top-level index: num,max,pointer */
1093 err = ext4_ext_get_access(handle, inode, curp);
1094 if (err)
1095 goto out;
1096
1097 curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
1098 curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1099 curp->p_hdr->eh_entries = cpu_to_le16(1);
1100 curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
1101
1102 if (path[0].p_hdr->eh_depth)
1103 curp->p_idx->ei_block =
1104 EXT_FIRST_INDEX(path[0].p_hdr)->ei_block;
1105 else
1106 curp->p_idx->ei_block =
1107 EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
1108 ext4_idx_store_pblock(curp->p_idx, newblock);
1109
1110 neh = ext_inode_hdr(inode); 1084 neh = ext_inode_hdr(inode);
1085 neh->eh_entries = cpu_to_le16(1);
1086 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1087 if (neh->eh_depth == 0) {
1088 /* Root extent block becomes index block */
1089 neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1090 EXT_FIRST_INDEX(neh)->ei_block =
1091 EXT_FIRST_EXTENT(neh)->ee_block;
1092 }
1111 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 1093 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
1112 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), 1094 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
1113 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1095 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1114 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1096 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1115 1097
1116 neh->eh_depth = cpu_to_le16(path->p_depth + 1); 1098 neh->eh_depth = cpu_to_le16(neh->eh_depth + 1);
1117 err = ext4_ext_dirty(handle, inode, curp); 1099 ext4_mark_inode_dirty(handle, inode);
1118out: 1100out:
1119 brelse(bh); 1101 brelse(bh);
1120 1102
@@ -1162,8 +1144,7 @@ repeat:
1162 err = PTR_ERR(path); 1144 err = PTR_ERR(path);
1163 } else { 1145 } else {
1164 /* tree is full, time to grow in depth */ 1146 /* tree is full, time to grow in depth */
1165 err = ext4_ext_grow_indepth(handle, inode, flags, 1147 err = ext4_ext_grow_indepth(handle, inode, flags, newext);
1166 path, newext);
1167 if (err) 1148 if (err)
1168 goto out; 1149 goto out;
1169 1150
@@ -1235,9 +1216,9 @@ static int ext4_ext_search_left(struct inode *inode,
1235 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { 1216 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1236 EXT4_ERROR_INODE(inode, 1217 EXT4_ERROR_INODE(inode,
1237 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", 1218 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
1238 ix != NULL ? ix->ei_block : 0, 1219 ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
1239 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? 1220 EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
1240 EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0, 1221 le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block) : 0,
1241 depth); 1222 depth);
1242 return -EIO; 1223 return -EIO;
1243 } 1224 }
@@ -1260,13 +1241,14 @@ static int ext4_ext_search_left(struct inode *inode,
1260/* 1241/*
1261 * search the closest allocated block to the right for *logical 1242 * search the closest allocated block to the right for *logical
1262 * and returns it at @logical + it's physical address at @phys 1243 * and returns it at @logical + it's physical address at @phys
1263 * if *logical is the smallest allocated block, the function 1244 * if *logical is the largest allocated block, the function
1264 * returns 0 at @phys 1245 * returns 0 at @phys
1265 * return value contains 0 (success) or error code 1246 * return value contains 0 (success) or error code
1266 */ 1247 */
1267static int ext4_ext_search_right(struct inode *inode, 1248static int ext4_ext_search_right(struct inode *inode,
1268 struct ext4_ext_path *path, 1249 struct ext4_ext_path *path,
1269 ext4_lblk_t *logical, ext4_fsblk_t *phys) 1250 ext4_lblk_t *logical, ext4_fsblk_t *phys,
1251 struct ext4_extent **ret_ex)
1270{ 1252{
1271 struct buffer_head *bh = NULL; 1253 struct buffer_head *bh = NULL;
1272 struct ext4_extent_header *eh; 1254 struct ext4_extent_header *eh;
@@ -1308,9 +1290,7 @@ static int ext4_ext_search_right(struct inode *inode,
1308 return -EIO; 1290 return -EIO;
1309 } 1291 }
1310 } 1292 }
1311 *logical = le32_to_cpu(ex->ee_block); 1293 goto found_extent;
1312 *phys = ext4_ext_pblock(ex);
1313 return 0;
1314 } 1294 }
1315 1295
1316 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { 1296 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
@@ -1323,9 +1303,7 @@ static int ext4_ext_search_right(struct inode *inode,
1323 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { 1303 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1324 /* next allocated block in this leaf */ 1304 /* next allocated block in this leaf */
1325 ex++; 1305 ex++;
1326 *logical = le32_to_cpu(ex->ee_block); 1306 goto found_extent;
1327 *phys = ext4_ext_pblock(ex);
1328 return 0;
1329 } 1307 }
1330 1308
1331 /* go up and search for index to the right */ 1309 /* go up and search for index to the right */
@@ -1368,9 +1346,12 @@ got_index:
1368 return -EIO; 1346 return -EIO;
1369 } 1347 }
1370 ex = EXT_FIRST_EXTENT(eh); 1348 ex = EXT_FIRST_EXTENT(eh);
1349found_extent:
1371 *logical = le32_to_cpu(ex->ee_block); 1350 *logical = le32_to_cpu(ex->ee_block);
1372 *phys = ext4_ext_pblock(ex); 1351 *phys = ext4_ext_pblock(ex);
1373 put_bh(bh); 1352 *ret_ex = ex;
1353 if (bh)
1354 put_bh(bh);
1374 return 0; 1355 return 0;
1375} 1356}
1376 1357
@@ -1395,7 +1376,8 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1395 while (depth >= 0) { 1376 while (depth >= 0) {
1396 if (depth == path->p_depth) { 1377 if (depth == path->p_depth) {
1397 /* leaf */ 1378 /* leaf */
1398 if (path[depth].p_ext != 1379 if (path[depth].p_ext &&
1380 path[depth].p_ext !=
1399 EXT_LAST_EXTENT(path[depth].p_hdr)) 1381 EXT_LAST_EXTENT(path[depth].p_hdr))
1400 return le32_to_cpu(path[depth].p_ext[1].ee_block); 1382 return le32_to_cpu(path[depth].p_ext[1].ee_block);
1401 } else { 1383 } else {
@@ -1623,7 +1605,8 @@ static int ext4_ext_try_to_merge(struct inode *inode,
1623 * such that there will be no overlap, and then returns 1. 1605 * such that there will be no overlap, and then returns 1.
1624 * If there is no overlap found, it returns 0. 1606 * If there is no overlap found, it returns 0.
1625 */ 1607 */
1626static unsigned int ext4_ext_check_overlap(struct inode *inode, 1608static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1609 struct inode *inode,
1627 struct ext4_extent *newext, 1610 struct ext4_extent *newext,
1628 struct ext4_ext_path *path) 1611 struct ext4_ext_path *path)
1629{ 1612{
@@ -1637,6 +1620,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode,
1637 if (!path[depth].p_ext) 1620 if (!path[depth].p_ext)
1638 goto out; 1621 goto out;
1639 b2 = le32_to_cpu(path[depth].p_ext->ee_block); 1622 b2 = le32_to_cpu(path[depth].p_ext->ee_block);
1623 b2 &= ~(sbi->s_cluster_ratio - 1);
1640 1624
1641 /* 1625 /*
1642 * get the next allocated block if the extent in the path 1626 * get the next allocated block if the extent in the path
@@ -1646,6 +1630,7 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode,
1646 b2 = ext4_ext_next_allocated_block(path); 1630 b2 = ext4_ext_next_allocated_block(path);
1647 if (b2 == EXT_MAX_BLOCKS) 1631 if (b2 == EXT_MAX_BLOCKS)
1648 goto out; 1632 goto out;
1633 b2 &= ~(sbi->s_cluster_ratio - 1);
1649 } 1634 }
1650 1635
1651 /* check for wrap through zero on extent logical start block*/ 1636 /* check for wrap through zero on extent logical start block*/
@@ -1697,7 +1682,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1697 /* try to insert block into found extent and return */ 1682 /* try to insert block into found extent and return */
1698 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) 1683 if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
1699 && ext4_can_extents_be_merged(inode, ex, newext)) { 1684 && ext4_can_extents_be_merged(inode, ex, newext)) {
1700 ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", 1685 ext_debug("append [%d]%d block to %u:[%d]%d (from %llu)\n",
1701 ext4_ext_is_uninitialized(newext), 1686 ext4_ext_is_uninitialized(newext),
1702 ext4_ext_get_actual_len(newext), 1687 ext4_ext_get_actual_len(newext),
1703 le32_to_cpu(ex->ee_block), 1688 le32_to_cpu(ex->ee_block),
@@ -1735,7 +1720,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1735 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) 1720 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
1736 next = ext4_ext_next_leaf_block(path); 1721 next = ext4_ext_next_leaf_block(path);
1737 if (next != EXT_MAX_BLOCKS) { 1722 if (next != EXT_MAX_BLOCKS) {
1738 ext_debug("next leaf block - %d\n", next); 1723 ext_debug("next leaf block - %u\n", next);
1739 BUG_ON(npath != NULL); 1724 BUG_ON(npath != NULL);
1740 npath = ext4_ext_find_extent(inode, next, NULL); 1725 npath = ext4_ext_find_extent(inode, next, NULL);
1741 if (IS_ERR(npath)) 1726 if (IS_ERR(npath))
@@ -1773,46 +1758,51 @@ has_space:
1773 1758
1774 if (!nearex) { 1759 if (!nearex) {
1775 /* there is no extent in this leaf, create first one */ 1760 /* there is no extent in this leaf, create first one */
1776 ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", 1761 ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
1777 le32_to_cpu(newext->ee_block), 1762 le32_to_cpu(newext->ee_block),
1778 ext4_ext_pblock(newext), 1763 ext4_ext_pblock(newext),
1779 ext4_ext_is_uninitialized(newext), 1764 ext4_ext_is_uninitialized(newext),
1780 ext4_ext_get_actual_len(newext)); 1765 ext4_ext_get_actual_len(newext));
1781 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1766 nearex = EXT_FIRST_EXTENT(eh);
1782 } else if (le32_to_cpu(newext->ee_block) 1767 } else {
1768 if (le32_to_cpu(newext->ee_block)
1783 > le32_to_cpu(nearex->ee_block)) { 1769 > le32_to_cpu(nearex->ee_block)) {
1784/* BUG_ON(newext->ee_block == nearex->ee_block); */ 1770 /* Insert after */
1785 if (nearex != EXT_LAST_EXTENT(eh)) { 1771 ext_debug("insert %u:%llu:[%d]%d before: "
1786 len = EXT_MAX_EXTENT(eh) - nearex; 1772 "nearest %p\n",
1787 len = (len - 1) * sizeof(struct ext4_extent);
1788 len = len < 0 ? 0 : len;
1789 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
1790 "move %d from 0x%p to 0x%p\n",
1791 le32_to_cpu(newext->ee_block), 1773 le32_to_cpu(newext->ee_block),
1792 ext4_ext_pblock(newext), 1774 ext4_ext_pblock(newext),
1793 ext4_ext_is_uninitialized(newext), 1775 ext4_ext_is_uninitialized(newext),
1794 ext4_ext_get_actual_len(newext), 1776 ext4_ext_get_actual_len(newext),
1795 nearex, len, nearex + 1, nearex + 2); 1777 nearex);
1796 memmove(nearex + 2, nearex + 1, len); 1778 nearex++;
1779 } else {
1780 /* Insert before */
1781 BUG_ON(newext->ee_block == nearex->ee_block);
1782 ext_debug("insert %u:%llu:[%d]%d after: "
1783 "nearest %p\n",
1784 le32_to_cpu(newext->ee_block),
1785 ext4_ext_pblock(newext),
1786 ext4_ext_is_uninitialized(newext),
1787 ext4_ext_get_actual_len(newext),
1788 nearex);
1789 }
1790 len = EXT_LAST_EXTENT(eh) - nearex + 1;
1791 if (len > 0) {
1792 ext_debug("insert %u:%llu:[%d]%d: "
1793 "move %d extents from 0x%p to 0x%p\n",
1794 le32_to_cpu(newext->ee_block),
1795 ext4_ext_pblock(newext),
1796 ext4_ext_is_uninitialized(newext),
1797 ext4_ext_get_actual_len(newext),
1798 len, nearex, nearex + 1);
1799 memmove(nearex + 1, nearex,
1800 len * sizeof(struct ext4_extent));
1797 } 1801 }
1798 path[depth].p_ext = nearex + 1;
1799 } else {
1800 BUG_ON(newext->ee_block == nearex->ee_block);
1801 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
1802 len = len < 0 ? 0 : len;
1803 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
1804 "move %d from 0x%p to 0x%p\n",
1805 le32_to_cpu(newext->ee_block),
1806 ext4_ext_pblock(newext),
1807 ext4_ext_is_uninitialized(newext),
1808 ext4_ext_get_actual_len(newext),
1809 nearex, len, nearex, nearex + 1);
1810 memmove(nearex + 1, nearex, len);
1811 path[depth].p_ext = nearex;
1812 } 1802 }
1813 1803
1814 le16_add_cpu(&eh->eh_entries, 1); 1804 le16_add_cpu(&eh->eh_entries, 1);
1815 nearex = path[depth].p_ext; 1805 path[depth].p_ext = nearex;
1816 nearex->ee_block = newext->ee_block; 1806 nearex->ee_block = newext->ee_block;
1817 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); 1807 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
1818 nearex->ee_len = newext->ee_len; 1808 nearex->ee_len = newext->ee_len;
@@ -1962,6 +1952,7 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
1962 struct ext4_ext_cache *cex; 1952 struct ext4_ext_cache *cex;
1963 BUG_ON(len == 0); 1953 BUG_ON(len == 0);
1964 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1954 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1955 trace_ext4_ext_put_in_cache(inode, block, len, start);
1965 cex = &EXT4_I(inode)->i_cached_extent; 1956 cex = &EXT4_I(inode)->i_cached_extent;
1966 cex->ec_block = block; 1957 cex->ec_block = block;
1967 cex->ec_len = len; 1958 cex->ec_len = len;
@@ -2063,6 +2054,7 @@ errout:
2063 sbi->extent_cache_misses++; 2054 sbi->extent_cache_misses++;
2064 else 2055 else
2065 sbi->extent_cache_hits++; 2056 sbi->extent_cache_hits++;
2057 trace_ext4_ext_in_cache(inode, block, ret);
2066 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2058 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
2067 return ret; 2059 return ret;
2068} 2060}
@@ -2130,6 +2122,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
2130 if (err) 2122 if (err)
2131 return err; 2123 return err;
2132 ext_debug("index is empty, remove it, free block %llu\n", leaf); 2124 ext_debug("index is empty, remove it, free block %llu\n", leaf);
2125 trace_ext4_ext_rm_idx(inode, leaf);
2126
2133 ext4_free_blocks(handle, inode, NULL, leaf, 1, 2127 ext4_free_blocks(handle, inode, NULL, leaf, 1,
2134 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); 2128 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
2135 return err; 2129 return err;
@@ -2158,7 +2152,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
2158 * need to account for leaf block credit 2152 * need to account for leaf block credit
2159 * 2153 *
2160 * bitmaps and block group descriptor blocks 2154 * bitmaps and block group descriptor blocks
2161 * and other metadat blocks still need to be 2155 * and other metadata blocks still need to be
2162 * accounted. 2156 * accounted.
2163 */ 2157 */
2164 /* 1 bitmap, 1 block group descriptor */ 2158 /* 1 bitmap, 1 block group descriptor */
@@ -2195,14 +2189,40 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
2195} 2189}
2196 2190
2197static int ext4_remove_blocks(handle_t *handle, struct inode *inode, 2191static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2198 struct ext4_extent *ex, 2192 struct ext4_extent *ex,
2199 ext4_lblk_t from, ext4_lblk_t to) 2193 ext4_fsblk_t *partial_cluster,
2194 ext4_lblk_t from, ext4_lblk_t to)
2200{ 2195{
2196 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2201 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2197 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2198 ext4_fsblk_t pblk;
2202 int flags = EXT4_FREE_BLOCKS_FORGET; 2199 int flags = EXT4_FREE_BLOCKS_FORGET;
2203 2200
2204 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2201 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2205 flags |= EXT4_FREE_BLOCKS_METADATA; 2202 flags |= EXT4_FREE_BLOCKS_METADATA;
2203 /*
2204 * For bigalloc file systems, we never free a partial cluster
2205 * at the beginning of the extent. Instead, we make a note
2206 * that we tried freeing the cluster, and check to see if we
2207 * need to free it on a subsequent call to ext4_remove_blocks,
2208 * or at the end of the ext4_truncate() operation.
2209 */
2210 flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
2211
2212 trace_ext4_remove_blocks(inode, ex, from, to, *partial_cluster);
2213 /*
2214 * If we have a partial cluster, and it's different from the
2215 * cluster of the last block, we need to explicitly free the
2216 * partial cluster here.
2217 */
2218 pblk = ext4_ext_pblock(ex) + ee_len - 1;
2219 if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
2220 ext4_free_blocks(handle, inode, NULL,
2221 EXT4_C2B(sbi, *partial_cluster),
2222 sbi->s_cluster_ratio, flags);
2223 *partial_cluster = 0;
2224 }
2225
2206#ifdef EXTENTS_STATS 2226#ifdef EXTENTS_STATS
2207 { 2227 {
2208 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2228 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2222,12 +2242,24 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2222 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { 2242 && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
2223 /* tail removal */ 2243 /* tail removal */
2224 ext4_lblk_t num; 2244 ext4_lblk_t num;
2225 ext4_fsblk_t start;
2226 2245
2227 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2246 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2228 start = ext4_ext_pblock(ex) + ee_len - num; 2247 pblk = ext4_ext_pblock(ex) + ee_len - num;
2229 ext_debug("free last %u blocks starting %llu\n", num, start); 2248 ext_debug("free last %u blocks starting %llu\n", num, pblk);
2230 ext4_free_blocks(handle, inode, NULL, start, num, flags); 2249 ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
2250 /*
2251 * If the block range to be freed didn't start at the
2252 * beginning of a cluster, and we removed the entire
2253 * extent, save the partial cluster here, since we
2254 * might need to delete if we determine that the
2255 * truncate operation has removed all of the blocks in
2256 * the cluster.
2257 */
2258 if (pblk & (sbi->s_cluster_ratio - 1) &&
2259 (ee_len == num))
2260 *partial_cluster = EXT4_B2C(sbi, pblk);
2261 else
2262 *partial_cluster = 0;
2231 } else if (from == le32_to_cpu(ex->ee_block) 2263 } else if (from == le32_to_cpu(ex->ee_block)
2232 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2264 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
2233 /* head removal */ 2265 /* head removal */
@@ -2238,7 +2270,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2238 start = ext4_ext_pblock(ex); 2270 start = ext4_ext_pblock(ex);
2239 2271
2240 ext_debug("free first %u blocks starting %llu\n", num, start); 2272 ext_debug("free first %u blocks starting %llu\n", num, start);
2241 ext4_free_blocks(handle, inode, 0, start, num, flags); 2273 ext4_free_blocks(handle, inode, NULL, start, num, flags);
2242 2274
2243 } else { 2275 } else {
2244 printk(KERN_INFO "strange request: removal(2) " 2276 printk(KERN_INFO "strange request: removal(2) "
@@ -2262,19 +2294,19 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2262 */ 2294 */
2263static int 2295static int
2264ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, 2296ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2265 struct ext4_ext_path *path, ext4_lblk_t start, 2297 struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster,
2266 ext4_lblk_t end) 2298 ext4_lblk_t start, ext4_lblk_t end)
2267{ 2299{
2300 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2268 int err = 0, correct_index = 0; 2301 int err = 0, correct_index = 0;
2269 int depth = ext_depth(inode), credits; 2302 int depth = ext_depth(inode), credits;
2270 struct ext4_extent_header *eh; 2303 struct ext4_extent_header *eh;
2271 ext4_lblk_t a, b, block; 2304 ext4_lblk_t a, b;
2272 unsigned num; 2305 unsigned num;
2273 ext4_lblk_t ex_ee_block; 2306 ext4_lblk_t ex_ee_block;
2274 unsigned short ex_ee_len; 2307 unsigned short ex_ee_len;
2275 unsigned uninitialized = 0; 2308 unsigned uninitialized = 0;
2276 struct ext4_extent *ex; 2309 struct ext4_extent *ex;
2277 struct ext4_map_blocks map;
2278 2310
2279 /* the header must be checked already in ext4_ext_remove_space() */ 2311 /* the header must be checked already in ext4_ext_remove_space() */
2280 ext_debug("truncate since %u in leaf\n", start); 2312 ext_debug("truncate since %u in leaf\n", start);
@@ -2291,6 +2323,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2291 ex_ee_block = le32_to_cpu(ex->ee_block); 2323 ex_ee_block = le32_to_cpu(ex->ee_block);
2292 ex_ee_len = ext4_ext_get_actual_len(ex); 2324 ex_ee_len = ext4_ext_get_actual_len(ex);
2293 2325
2326 trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
2327
2294 while (ex >= EXT_FIRST_EXTENT(eh) && 2328 while (ex >= EXT_FIRST_EXTENT(eh) &&
2295 ex_ee_block + ex_ee_len > start) { 2329 ex_ee_block + ex_ee_len > start) {
2296 2330
@@ -2315,86 +2349,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2315 ex_ee_block = le32_to_cpu(ex->ee_block); 2349 ex_ee_block = le32_to_cpu(ex->ee_block);
2316 ex_ee_len = ext4_ext_get_actual_len(ex); 2350 ex_ee_len = ext4_ext_get_actual_len(ex);
2317 continue; 2351 continue;
2318 } else if (a != ex_ee_block && 2352 } else if (b != ex_ee_block + ex_ee_len - 1) {
2319 b != ex_ee_block + ex_ee_len - 1) { 2353 EXT4_ERROR_INODE(inode," bad truncate %u:%u\n",
2320 /* 2354 start, end);
2321 * If this is a truncate, then this condition should 2355 err = -EIO;
2322 * never happen because at least one of the end points 2356 goto out;
2323 * needs to be on the edge of the extent.
2324 */
2325 if (end == EXT_MAX_BLOCKS - 1) {
2326 ext_debug(" bad truncate %u:%u\n",
2327 start, end);
2328 block = 0;
2329 num = 0;
2330 err = -EIO;
2331 goto out;
2332 }
2333 /*
2334 * else this is a hole punch, so the extent needs to
2335 * be split since neither edge of the hole is on the
2336 * extent edge
2337 */
2338 else{
2339 map.m_pblk = ext4_ext_pblock(ex);
2340 map.m_lblk = ex_ee_block;
2341 map.m_len = b - ex_ee_block;
2342
2343 err = ext4_split_extent(handle,
2344 inode, path, &map, 0,
2345 EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
2346 EXT4_GET_BLOCKS_PRE_IO);
2347
2348 if (err < 0)
2349 goto out;
2350
2351 ex_ee_len = ext4_ext_get_actual_len(ex);
2352
2353 b = ex_ee_block+ex_ee_len - 1 < end ?
2354 ex_ee_block+ex_ee_len - 1 : end;
2355
2356 /* Then remove tail of this extent */
2357 block = ex_ee_block;
2358 num = a - block;
2359 }
2360 } else if (a != ex_ee_block) { 2357 } else if (a != ex_ee_block) {
2361 /* remove tail of the extent */ 2358 /* remove tail of the extent */
2362 block = ex_ee_block; 2359 num = a - ex_ee_block;
2363 num = a - block;
2364 } else if (b != ex_ee_block + ex_ee_len - 1) {
2365 /* remove head of the extent */
2366 block = b;
2367 num = ex_ee_block + ex_ee_len - b;
2368
2369 /*
2370 * If this is a truncate, this condition
2371 * should never happen
2372 */
2373 if (end == EXT_MAX_BLOCKS - 1) {
2374 ext_debug(" bad truncate %u:%u\n",
2375 start, end);
2376 err = -EIO;
2377 goto out;
2378 }
2379 } else { 2360 } else {
2380 /* remove whole extent: excellent! */ 2361 /* remove whole extent: excellent! */
2381 block = ex_ee_block;
2382 num = 0; 2362 num = 0;
2383 if (a != ex_ee_block) {
2384 ext_debug(" bad truncate %u:%u\n",
2385 start, end);
2386 err = -EIO;
2387 goto out;
2388 }
2389
2390 if (b != ex_ee_block + ex_ee_len - 1) {
2391 ext_debug(" bad truncate %u:%u\n",
2392 start, end);
2393 err = -EIO;
2394 goto out;
2395 }
2396 } 2363 }
2397
2398 /* 2364 /*
2399 * 3 for leaf, sb, and inode plus 2 (bmap and group 2365 * 3 for leaf, sb, and inode plus 2 (bmap and group
2400 * descriptor) for each block group; assume two block 2366 * descriptor) for each block group; assume two block
@@ -2416,23 +2382,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2416 if (err) 2382 if (err)
2417 goto out; 2383 goto out;
2418 2384
2419 err = ext4_remove_blocks(handle, inode, ex, a, b); 2385 err = ext4_remove_blocks(handle, inode, ex, partial_cluster,
2386 a, b);
2420 if (err) 2387 if (err)
2421 goto out; 2388 goto out;
2422 2389
2423 if (num == 0) { 2390 if (num == 0)
2424 /* this extent is removed; mark slot entirely unused */ 2391 /* this extent is removed; mark slot entirely unused */
2425 ext4_ext_store_pblock(ex, 0); 2392 ext4_ext_store_pblock(ex, 0);
2426 } else if (block != ex_ee_block) {
2427 /*
2428 * If this was a head removal, then we need to update
2429 * the physical block since it is now at a different
2430 * location
2431 */
2432 ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a));
2433 }
2434 2393
2435 ex->ee_block = cpu_to_le32(block);
2436 ex->ee_len = cpu_to_le16(num); 2394 ex->ee_len = cpu_to_le16(num);
2437 /* 2395 /*
2438 * Do not mark uninitialized if all the blocks in the 2396 * Do not mark uninitialized if all the blocks in the
@@ -2440,11 +2398,6 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2440 */ 2398 */
2441 if (uninitialized && num) 2399 if (uninitialized && num)
2442 ext4_ext_mark_uninitialized(ex); 2400 ext4_ext_mark_uninitialized(ex);
2443
2444 err = ext4_ext_dirty(handle, inode, path + depth);
2445 if (err)
2446 goto out;
2447
2448 /* 2401 /*
2449 * If the extent was completely released, 2402 * If the extent was completely released,
2450 * we need to remove it from the leaf 2403 * we need to remove it from the leaf
@@ -2464,9 +2417,14 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2464 sizeof(struct ext4_extent)); 2417 sizeof(struct ext4_extent));
2465 } 2418 }
2466 le16_add_cpu(&eh->eh_entries, -1); 2419 le16_add_cpu(&eh->eh_entries, -1);
2467 } 2420 } else
2421 *partial_cluster = 0;
2468 2422
2469 ext_debug("new extent: %u:%u:%llu\n", block, num, 2423 err = ext4_ext_dirty(handle, inode, path + depth);
2424 if (err)
2425 goto out;
2426
2427 ext_debug("new extent: %u:%u:%llu\n", ex_ee_block, num,
2470 ext4_ext_pblock(ex)); 2428 ext4_ext_pblock(ex));
2471 ex--; 2429 ex--;
2472 ex_ee_block = le32_to_cpu(ex->ee_block); 2430 ex_ee_block = le32_to_cpu(ex->ee_block);
@@ -2476,6 +2434,25 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2476 if (correct_index && eh->eh_entries) 2434 if (correct_index && eh->eh_entries)
2477 err = ext4_ext_correct_indexes(handle, inode, path); 2435 err = ext4_ext_correct_indexes(handle, inode, path);
2478 2436
2437 /*
2438 * If there is still a entry in the leaf node, check to see if
2439 * it references the partial cluster. This is the only place
2440 * where it could; if it doesn't, we can free the cluster.
2441 */
2442 if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) &&
2443 (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
2444 *partial_cluster)) {
2445 int flags = EXT4_FREE_BLOCKS_FORGET;
2446
2447 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2448 flags |= EXT4_FREE_BLOCKS_METADATA;
2449
2450 ext4_free_blocks(handle, inode, NULL,
2451 EXT4_C2B(sbi, *partial_cluster),
2452 sbi->s_cluster_ratio, flags);
2453 *partial_cluster = 0;
2454 }
2455
2479 /* if this leaf is free, then we should 2456 /* if this leaf is free, then we should
2480 * remove it from index block above */ 2457 * remove it from index block above */
2481 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) 2458 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
@@ -2511,6 +2488,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2511 struct super_block *sb = inode->i_sb; 2488 struct super_block *sb = inode->i_sb;
2512 int depth = ext_depth(inode); 2489 int depth = ext_depth(inode);
2513 struct ext4_ext_path *path; 2490 struct ext4_ext_path *path;
2491 ext4_fsblk_t partial_cluster = 0;
2514 handle_t *handle; 2492 handle_t *handle;
2515 int i, err; 2493 int i, err;
2516 2494
@@ -2524,6 +2502,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2524again: 2502again:
2525 ext4_ext_invalidate_cache(inode); 2503 ext4_ext_invalidate_cache(inode);
2526 2504
2505 trace_ext4_ext_remove_space(inode, start, depth);
2506
2527 /* 2507 /*
2528 * We start scanning from right side, freeing all the blocks 2508 * We start scanning from right side, freeing all the blocks
2529 * after i_size and walking into the tree depth-wise. 2509 * after i_size and walking into the tree depth-wise.
@@ -2546,7 +2526,8 @@ again:
2546 if (i == depth) { 2526 if (i == depth) {
2547 /* this is leaf block */ 2527 /* this is leaf block */
2548 err = ext4_ext_rm_leaf(handle, inode, path, 2528 err = ext4_ext_rm_leaf(handle, inode, path,
2549 start, EXT_MAX_BLOCKS - 1); 2529 &partial_cluster, start,
2530 EXT_MAX_BLOCKS - 1);
2550 /* root level has p_bh == NULL, brelse() eats this */ 2531 /* root level has p_bh == NULL, brelse() eats this */
2551 brelse(path[i].p_bh); 2532 brelse(path[i].p_bh);
2552 path[i].p_bh = NULL; 2533 path[i].p_bh = NULL;
@@ -2618,6 +2599,24 @@ again:
2618 } 2599 }
2619 } 2600 }
2620 2601
2602 trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster,
2603 path->p_hdr->eh_entries);
2604
2605 /* If we still have something in the partial cluster and we have removed
2606 * even the first extent, then we should free the blocks in the partial
2607 * cluster as well. */
2608 if (partial_cluster && path->p_hdr->eh_entries == 0) {
2609 int flags = EXT4_FREE_BLOCKS_FORGET;
2610
2611 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2612 flags |= EXT4_FREE_BLOCKS_METADATA;
2613
2614 ext4_free_blocks(handle, inode, NULL,
2615 EXT4_C2B(EXT4_SB(sb), partial_cluster),
2616 EXT4_SB(sb)->s_cluster_ratio, flags);
2617 partial_cluster = 0;
2618 }
2619
2621 /* TODO: flexible tree reduction should be here */ 2620 /* TODO: flexible tree reduction should be here */
2622 if (path->p_hdr->eh_entries == 0) { 2621 if (path->p_hdr->eh_entries == 0) {
2623 /* 2622 /*
@@ -2909,17 +2908,29 @@ out:
2909 * a> There is no split required: Entire extent should be initialized 2908 * a> There is no split required: Entire extent should be initialized
2910 * b> Splits in two extents: Write is happening at either end of the extent 2909 * b> Splits in two extents: Write is happening at either end of the extent
2911 * c> Splits in three extents: Somone is writing in middle of the extent 2910 * c> Splits in three extents: Somone is writing in middle of the extent
2911 *
2912 * Pre-conditions:
2913 * - The extent pointed to by 'path' is uninitialized.
2914 * - The extent pointed to by 'path' contains a superset
2915 * of the logical span [map->m_lblk, map->m_lblk + map->m_len).
2916 *
2917 * Post-conditions on success:
2918 * - the returned value is the number of blocks beyond map->l_lblk
2919 * that are allocated and initialized.
2920 * It is guaranteed to be >= map->m_len.
2912 */ 2921 */
2913static int ext4_ext_convert_to_initialized(handle_t *handle, 2922static int ext4_ext_convert_to_initialized(handle_t *handle,
2914 struct inode *inode, 2923 struct inode *inode,
2915 struct ext4_map_blocks *map, 2924 struct ext4_map_blocks *map,
2916 struct ext4_ext_path *path) 2925 struct ext4_ext_path *path)
2917{ 2926{
2927 struct ext4_extent_header *eh;
2918 struct ext4_map_blocks split_map; 2928 struct ext4_map_blocks split_map;
2919 struct ext4_extent zero_ex; 2929 struct ext4_extent zero_ex;
2920 struct ext4_extent *ex; 2930 struct ext4_extent *ex;
2921 ext4_lblk_t ee_block, eof_block; 2931 ext4_lblk_t ee_block, eof_block;
2922 unsigned int allocated, ee_len, depth; 2932 unsigned int ee_len, depth;
2933 int allocated;
2923 int err = 0; 2934 int err = 0;
2924 int split_flag = 0; 2935 int split_flag = 0;
2925 2936
@@ -2933,11 +2944,93 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2933 eof_block = map->m_lblk + map->m_len; 2944 eof_block = map->m_lblk + map->m_len;
2934 2945
2935 depth = ext_depth(inode); 2946 depth = ext_depth(inode);
2947 eh = path[depth].p_hdr;
2936 ex = path[depth].p_ext; 2948 ex = path[depth].p_ext;
2937 ee_block = le32_to_cpu(ex->ee_block); 2949 ee_block = le32_to_cpu(ex->ee_block);
2938 ee_len = ext4_ext_get_actual_len(ex); 2950 ee_len = ext4_ext_get_actual_len(ex);
2939 allocated = ee_len - (map->m_lblk - ee_block); 2951 allocated = ee_len - (map->m_lblk - ee_block);
2940 2952
2953 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
2954
2955 /* Pre-conditions */
2956 BUG_ON(!ext4_ext_is_uninitialized(ex));
2957 BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
2958 BUG_ON(map->m_lblk + map->m_len > ee_block + ee_len);
2959
2960 /*
2961 * Attempt to transfer newly initialized blocks from the currently
2962 * uninitialized extent to its left neighbor. This is much cheaper
2963 * than an insertion followed by a merge as those involve costly
2964 * memmove() calls. This is the common case in steady state for
2965 * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append
2966 * writes.
2967 *
2968 * Limitations of the current logic:
2969 * - L1: we only deal with writes at the start of the extent.
2970 * The approach could be extended to writes at the end
2971 * of the extent but this scenario was deemed less common.
2972 * - L2: we do not deal with writes covering the whole extent.
2973 * This would require removing the extent if the transfer
2974 * is possible.
2975 * - L3: we only attempt to merge with an extent stored in the
2976 * same extent tree node.
2977 */
2978 if ((map->m_lblk == ee_block) && /*L1*/
2979 (map->m_len < ee_len) && /*L2*/
2980 (ex > EXT_FIRST_EXTENT(eh))) { /*L3*/
2981 struct ext4_extent *prev_ex;
2982 ext4_lblk_t prev_lblk;
2983 ext4_fsblk_t prev_pblk, ee_pblk;
2984 unsigned int prev_len, write_len;
2985
2986 prev_ex = ex - 1;
2987 prev_lblk = le32_to_cpu(prev_ex->ee_block);
2988 prev_len = ext4_ext_get_actual_len(prev_ex);
2989 prev_pblk = ext4_ext_pblock(prev_ex);
2990 ee_pblk = ext4_ext_pblock(ex);
2991 write_len = map->m_len;
2992
2993 /*
2994 * A transfer of blocks from 'ex' to 'prev_ex' is allowed
2995 * upon those conditions:
2996 * - C1: prev_ex is initialized,
2997 * - C2: prev_ex is logically abutting ex,
2998 * - C3: prev_ex is physically abutting ex,
2999 * - C4: prev_ex can receive the additional blocks without
3000 * overflowing the (initialized) length limit.
3001 */
3002 if ((!ext4_ext_is_uninitialized(prev_ex)) && /*C1*/
3003 ((prev_lblk + prev_len) == ee_block) && /*C2*/
3004 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
3005 (prev_len < (EXT_INIT_MAX_LEN - write_len))) { /*C4*/
3006 err = ext4_ext_get_access(handle, inode, path + depth);
3007 if (err)
3008 goto out;
3009
3010 trace_ext4_ext_convert_to_initialized_fastpath(inode,
3011 map, ex, prev_ex);
3012
3013 /* Shift the start of ex by 'write_len' blocks */
3014 ex->ee_block = cpu_to_le32(ee_block + write_len);
3015 ext4_ext_store_pblock(ex, ee_pblk + write_len);
3016 ex->ee_len = cpu_to_le16(ee_len - write_len);
3017 ext4_ext_mark_uninitialized(ex); /* Restore the flag */
3018
3019 /* Extend prev_ex by 'write_len' blocks */
3020 prev_ex->ee_len = cpu_to_le16(prev_len + write_len);
3021
3022 /* Mark the block containing both extents as dirty */
3023 ext4_ext_dirty(handle, inode, path + depth);
3024
3025 /* Update path to point to the right extent */
3026 path[depth].p_ext = prev_ex;
3027
3028 /* Result: number of initialized blocks past m_lblk */
3029 allocated = write_len;
3030 goto out;
3031 }
3032 }
3033
2941 WARN_ON(map->m_lblk < ee_block); 3034 WARN_ON(map->m_lblk < ee_block);
2942 /* 3035 /*
2943 * It is safe to convert extent to initialized via explicit 3036 * It is safe to convert extent to initialized via explicit
@@ -3165,6 +3258,192 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
3165 return ext4_mark_inode_dirty(handle, inode); 3258 return ext4_mark_inode_dirty(handle, inode);
3166} 3259}
3167 3260
3261/**
3262 * ext4_find_delalloc_range: find delayed allocated block in the given range.
3263 *
3264 * Goes through the buffer heads in the range [lblk_start, lblk_end] and returns
3265 * whether there are any buffers marked for delayed allocation. It returns '1'
3266 * on the first delalloc'ed buffer head found. If no buffer head in the given
3267 * range is marked for delalloc, it returns 0.
3268 * lblk_start should always be <= lblk_end.
3269 * search_hint_reverse is to indicate that searching in reverse from lblk_end to
3270 * lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
3271 * block sooner). This is useful when blocks are truncated sequentially from
3272 * lblk_start towards lblk_end.
3273 */
3274static int ext4_find_delalloc_range(struct inode *inode,
3275 ext4_lblk_t lblk_start,
3276 ext4_lblk_t lblk_end,
3277 int search_hint_reverse)
3278{
3279 struct address_space *mapping = inode->i_mapping;
3280 struct buffer_head *head, *bh = NULL;
3281 struct page *page;
3282 ext4_lblk_t i, pg_lblk;
3283 pgoff_t index;
3284
3285 /* reverse search wont work if fs block size is less than page size */
3286 if (inode->i_blkbits < PAGE_CACHE_SHIFT)
3287 search_hint_reverse = 0;
3288
3289 if (search_hint_reverse)
3290 i = lblk_end;
3291 else
3292 i = lblk_start;
3293
3294 index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
3295
3296 while ((i >= lblk_start) && (i <= lblk_end)) {
3297 page = find_get_page(mapping, index);
3298 if (!page)
3299 goto nextpage;
3300
3301 if (!page_has_buffers(page))
3302 goto nextpage;
3303
3304 head = page_buffers(page);
3305 if (!head)
3306 goto nextpage;
3307
3308 bh = head;
3309 pg_lblk = index << (PAGE_CACHE_SHIFT -
3310 inode->i_blkbits);
3311 do {
3312 if (unlikely(pg_lblk < lblk_start)) {
3313 /*
3314 * This is possible when fs block size is less
3315 * than page size and our cluster starts/ends in
3316 * middle of the page. So we need to skip the
3317 * initial few blocks till we reach the 'lblk'
3318 */
3319 pg_lblk++;
3320 continue;
3321 }
3322
3323 /* Check if the buffer is delayed allocated and that it
3324 * is not yet mapped. (when da-buffers are mapped during
3325 * their writeout, their da_mapped bit is set.)
3326 */
3327 if (buffer_delay(bh) && !buffer_da_mapped(bh)) {
3328 page_cache_release(page);
3329 trace_ext4_find_delalloc_range(inode,
3330 lblk_start, lblk_end,
3331 search_hint_reverse,
3332 1, i);
3333 return 1;
3334 }
3335 if (search_hint_reverse)
3336 i--;
3337 else
3338 i++;
3339 } while ((i >= lblk_start) && (i <= lblk_end) &&
3340 ((bh = bh->b_this_page) != head));
3341nextpage:
3342 if (page)
3343 page_cache_release(page);
3344 /*
3345 * Move to next page. 'i' will be the first lblk in the next
3346 * page.
3347 */
3348 if (search_hint_reverse)
3349 index--;
3350 else
3351 index++;
3352 i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
3353 }
3354
3355 trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end,
3356 search_hint_reverse, 0, 0);
3357 return 0;
3358}
3359
3360int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
3361 int search_hint_reverse)
3362{
3363 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3364 ext4_lblk_t lblk_start, lblk_end;
3365 lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
3366 lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
3367
3368 return ext4_find_delalloc_range(inode, lblk_start, lblk_end,
3369 search_hint_reverse);
3370}
3371
3372/**
3373 * Determines how many complete clusters (out of those specified by the 'map')
3374 * are under delalloc and were reserved quota for.
3375 * This function is called when we are writing out the blocks that were
3376 * originally written with their allocation delayed, but then the space was
3377 * allocated using fallocate() before the delayed allocation could be resolved.
3378 * The cases to look for are:
3379 * ('=' indicated delayed allocated blocks
3380 * '-' indicates non-delayed allocated blocks)
3381 * (a) partial clusters towards beginning and/or end outside of allocated range
3382 * are not delalloc'ed.
3383 * Ex:
3384 * |----c---=|====c====|====c====|===-c----|
3385 * |++++++ allocated ++++++|
3386 * ==> 4 complete clusters in above example
3387 *
3388 * (b) partial cluster (outside of allocated range) towards either end is
3389 * marked for delayed allocation. In this case, we will exclude that
3390 * cluster.
3391 * Ex:
3392 * |----====c========|========c========|
3393 * |++++++ allocated ++++++|
3394 * ==> 1 complete clusters in above example
3395 *
3396 * Ex:
3397 * |================c================|
3398 * |++++++ allocated ++++++|
3399 * ==> 0 complete clusters in above example
3400 *
3401 * The ext4_da_update_reserve_space will be called only if we
3402 * determine here that there were some "entire" clusters that span
3403 * this 'allocated' range.
3404 * In the non-bigalloc case, this function will just end up returning num_blks
3405 * without ever calling ext4_find_delalloc_range.
3406 */
3407static unsigned int
3408get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3409 unsigned int num_blks)
3410{
3411 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3412 ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
3413 ext4_lblk_t lblk_from, lblk_to, c_offset;
3414 unsigned int allocated_clusters = 0;
3415
3416 alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
3417 alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
3418
3419 /* max possible clusters for this allocation */
3420 allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
3421
3422 trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
3423
3424 /* Check towards left side */
3425 c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
3426 if (c_offset) {
3427 lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
3428 lblk_to = lblk_from + c_offset - 1;
3429
3430 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
3431 allocated_clusters--;
3432 }
3433
3434 /* Now check towards right. */
3435 c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
3436 if (allocated_clusters && c_offset) {
3437 lblk_from = lblk_start + num_blks;
3438 lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
3439
3440 if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
3441 allocated_clusters--;
3442 }
3443
3444 return allocated_clusters;
3445}
3446
3168static int 3447static int
3169ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3448ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3170 struct ext4_map_blocks *map, 3449 struct ext4_map_blocks *map,
@@ -3181,6 +3460,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3181 flags, allocated); 3460 flags, allocated);
3182 ext4_ext_show_leaf(inode, path); 3461 ext4_ext_show_leaf(inode, path);
3183 3462
3463 trace_ext4_ext_handle_uninitialized_extents(inode, map, allocated,
3464 newblock);
3465
3184 /* get_block() before submit the IO, split the extent */ 3466 /* get_block() before submit the IO, split the extent */
3185 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3467 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3186 ret = ext4_split_unwritten_extents(handle, inode, map, 3468 ret = ext4_split_unwritten_extents(handle, inode, map,
@@ -3190,10 +3472,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3190 * that this IO needs to conversion to written when IO is 3472 * that this IO needs to conversion to written when IO is
3191 * completed 3473 * completed
3192 */ 3474 */
3193 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 3475 if (io)
3194 io->flag = EXT4_IO_END_UNWRITTEN; 3476 ext4_set_io_unwritten_flag(inode, io);
3195 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); 3477 else
3196 } else
3197 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3478 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3198 if (ext4_should_dioread_nolock(inode)) 3479 if (ext4_should_dioread_nolock(inode))
3199 map->m_flags |= EXT4_MAP_UNINIT; 3480 map->m_flags |= EXT4_MAP_UNINIT;
@@ -3234,14 +3515,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3234 3515
3235 /* buffered write, writepage time, convert*/ 3516 /* buffered write, writepage time, convert*/
3236 ret = ext4_ext_convert_to_initialized(handle, inode, map, path); 3517 ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
3237 if (ret >= 0) { 3518 if (ret >= 0)
3238 ext4_update_inode_fsync_trans(handle, inode, 1); 3519 ext4_update_inode_fsync_trans(handle, inode, 1);
3239 err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
3240 map->m_len);
3241 if (err < 0)
3242 goto out2;
3243 }
3244
3245out: 3520out:
3246 if (ret <= 0) { 3521 if (ret <= 0) {
3247 err = ret; 3522 err = ret;
@@ -3270,11 +3545,24 @@ out:
3270 * But fallocate would have already updated quota and block 3545 * But fallocate would have already updated quota and block
3271 * count for this offset. So cancel these reservation 3546 * count for this offset. So cancel these reservation
3272 */ 3547 */
3273 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 3548 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
3274 ext4_da_update_reserve_space(inode, allocated, 0); 3549 unsigned int reserved_clusters;
3550 reserved_clusters = get_reserved_cluster_alloc(inode,
3551 map->m_lblk, map->m_len);
3552 if (reserved_clusters)
3553 ext4_da_update_reserve_space(inode,
3554 reserved_clusters,
3555 0);
3556 }
3275 3557
3276map_out: 3558map_out:
3277 map->m_flags |= EXT4_MAP_MAPPED; 3559 map->m_flags |= EXT4_MAP_MAPPED;
3560 if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0) {
3561 err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
3562 map->m_len);
3563 if (err < 0)
3564 goto out2;
3565 }
3278out1: 3566out1:
3279 if (allocated > map->m_len) 3567 if (allocated > map->m_len)
3280 allocated = map->m_len; 3568 allocated = map->m_len;
@@ -3290,6 +3578,111 @@ out2:
3290} 3578}
3291 3579
3292/* 3580/*
3581 * get_implied_cluster_alloc - check to see if the requested
3582 * allocation (in the map structure) overlaps with a cluster already
3583 * allocated in an extent.
3584 * @sb The filesystem superblock structure
3585 * @map The requested lblk->pblk mapping
3586 * @ex The extent structure which might contain an implied
3587 * cluster allocation
3588 *
3589 * This function is called by ext4_ext_map_blocks() after we failed to
3590 * find blocks that were already in the inode's extent tree. Hence,
3591 * we know that the beginning of the requested region cannot overlap
3592 * the extent from the inode's extent tree. There are three cases we
3593 * want to catch. The first is this case:
3594 *
3595 * |--- cluster # N--|
3596 * |--- extent ---| |---- requested region ---|
3597 * |==========|
3598 *
3599 * The second case that we need to test for is this one:
3600 *
3601 * |--------- cluster # N ----------------|
3602 * |--- requested region --| |------- extent ----|
3603 * |=======================|
3604 *
3605 * The third case is when the requested region lies between two extents
3606 * within the same cluster:
3607 * |------------- cluster # N-------------|
3608 * |----- ex -----| |---- ex_right ----|
3609 * |------ requested region ------|
3610 * |================|
3611 *
3612 * In each of the above cases, we need to set the map->m_pblk and
3613 * map->m_len so it corresponds to the return the extent labelled as
3614 * "|====|" from cluster #N, since it is already in use for data in
3615 * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
3616 * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
3617 * as a new "allocated" block region. Otherwise, we will return 0 and
3618 * ext4_ext_map_blocks() will then allocate one or more new clusters
3619 * by calling ext4_mb_new_blocks().
3620 */
3621static int get_implied_cluster_alloc(struct super_block *sb,
3622 struct ext4_map_blocks *map,
3623 struct ext4_extent *ex,
3624 struct ext4_ext_path *path)
3625{
3626 struct ext4_sb_info *sbi = EXT4_SB(sb);
3627 ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3628 ext4_lblk_t ex_cluster_start, ex_cluster_end;
3629 ext4_lblk_t rr_cluster_start, rr_cluster_end;
3630 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
3631 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
3632 unsigned short ee_len = ext4_ext_get_actual_len(ex);
3633
3634 /* The extent passed in that we are trying to match */
3635 ex_cluster_start = EXT4_B2C(sbi, ee_block);
3636 ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
3637
3638 /* The requested region passed into ext4_map_blocks() */
3639 rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
3640 rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
3641
3642 if ((rr_cluster_start == ex_cluster_end) ||
3643 (rr_cluster_start == ex_cluster_start)) {
3644 if (rr_cluster_start == ex_cluster_end)
3645 ee_start += ee_len - 1;
3646 map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
3647 c_offset;
3648 map->m_len = min(map->m_len,
3649 (unsigned) sbi->s_cluster_ratio - c_offset);
3650 /*
3651 * Check for and handle this case:
3652 *
3653 * |--------- cluster # N-------------|
3654 * |------- extent ----|
3655 * |--- requested region ---|
3656 * |===========|
3657 */
3658
3659 if (map->m_lblk < ee_block)
3660 map->m_len = min(map->m_len, ee_block - map->m_lblk);
3661
3662 /*
3663 * Check for the case where there is already another allocated
3664 * block to the right of 'ex' but before the end of the cluster.
3665 *
3666 * |------------- cluster # N-------------|
3667 * |----- ex -----| |---- ex_right ----|
3668 * |------ requested region ------|
3669 * |================|
3670 */
3671 if (map->m_lblk > ee_block) {
3672 ext4_lblk_t next = ext4_ext_next_allocated_block(path);
3673 map->m_len = min(map->m_len, next - map->m_lblk);
3674 }
3675
3676 trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
3677 return 1;
3678 }
3679
3680 trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
3681 return 0;
3682}
3683
3684
3685/*
3293 * Block allocation/map/preallocation routine for extents based files 3686 * Block allocation/map/preallocation routine for extents based files
3294 * 3687 *
3295 * 3688 *
@@ -3311,15 +3704,17 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3311 struct ext4_map_blocks *map, int flags) 3704 struct ext4_map_blocks *map, int flags)
3312{ 3705{
3313 struct ext4_ext_path *path = NULL; 3706 struct ext4_ext_path *path = NULL;
3314 struct ext4_extent newex, *ex; 3707 struct ext4_extent newex, *ex, *ex2;
3708 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3315 ext4_fsblk_t newblock = 0; 3709 ext4_fsblk_t newblock = 0;
3316 int err = 0, depth, ret; 3710 int free_on_err = 0, err = 0, depth, ret;
3317 unsigned int allocated = 0; 3711 unsigned int allocated = 0, offset = 0;
3712 unsigned int allocated_clusters = 0;
3318 unsigned int punched_out = 0; 3713 unsigned int punched_out = 0;
3319 unsigned int result = 0; 3714 unsigned int result = 0;
3320 struct ext4_allocation_request ar; 3715 struct ext4_allocation_request ar;
3321 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3716 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
3322 struct ext4_map_blocks punch_map; 3717 ext4_lblk_t cluster_offset;
3323 3718
3324 ext_debug("blocks %u/%u requested for inode %lu\n", 3719 ext_debug("blocks %u/%u requested for inode %lu\n",
3325 map->m_lblk, map->m_len, inode->i_ino); 3720 map->m_lblk, map->m_len, inode->i_ino);
@@ -3329,6 +3724,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3329 if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) && 3724 if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
3330 ext4_ext_in_cache(inode, map->m_lblk, &newex)) { 3725 ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
3331 if (!newex.ee_start_lo && !newex.ee_start_hi) { 3726 if (!newex.ee_start_lo && !newex.ee_start_hi) {
3727 if ((sbi->s_cluster_ratio > 1) &&
3728 ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
3729 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3730
3332 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { 3731 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3333 /* 3732 /*
3334 * block isn't allocated yet and 3733 * block isn't allocated yet and
@@ -3339,6 +3738,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3339 /* we should allocate requested block */ 3738 /* we should allocate requested block */
3340 } else { 3739 } else {
3341 /* block is already allocated */ 3740 /* block is already allocated */
3741 if (sbi->s_cluster_ratio > 1)
3742 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3342 newblock = map->m_lblk 3743 newblock = map->m_lblk
3343 - le32_to_cpu(newex.ee_block) 3744 - le32_to_cpu(newex.ee_block)
3344 + ext4_ext_pblock(&newex); 3745 + ext4_ext_pblock(&newex);
@@ -3384,8 +3785,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3384 * we split out initialized portions during a write. 3785 * we split out initialized portions during a write.
3385 */ 3786 */
3386 ee_len = ext4_ext_get_actual_len(ex); 3787 ee_len = ext4_ext_get_actual_len(ex);
3788
3789 trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
3790
3387 /* if found extent covers block, simply return it */ 3791 /* if found extent covers block, simply return it */
3388 if (in_range(map->m_lblk, ee_block, ee_len)) { 3792 if (in_range(map->m_lblk, ee_block, ee_len)) {
3793 struct ext4_map_blocks punch_map;
3794 ext4_fsblk_t partial_cluster = 0;
3795
3389 newblock = map->m_lblk - ee_block + ee_start; 3796 newblock = map->m_lblk - ee_block + ee_start;
3390 /* number of remaining blocks in the extent */ 3797 /* number of remaining blocks in the extent */
3391 allocated = ee_len - (map->m_lblk - ee_block); 3798 allocated = ee_len - (map->m_lblk - ee_block);
@@ -3469,7 +3876,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3469 ext4_ext_invalidate_cache(inode); 3876 ext4_ext_invalidate_cache(inode);
3470 3877
3471 err = ext4_ext_rm_leaf(handle, inode, path, 3878 err = ext4_ext_rm_leaf(handle, inode, path,
3472 map->m_lblk, map->m_lblk + punched_out); 3879 &partial_cluster, map->m_lblk,
3880 map->m_lblk + punched_out);
3473 3881
3474 if (!err && path->p_hdr->eh_entries == 0) { 3882 if (!err && path->p_hdr->eh_entries == 0) {
3475 /* 3883 /*
@@ -3492,6 +3900,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3492 } 3900 }
3493 } 3901 }
3494 3902
3903 if ((sbi->s_cluster_ratio > 1) &&
3904 ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
3905 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3906
3495 /* 3907 /*
3496 * requested block isn't allocated yet; 3908 * requested block isn't allocated yet;
3497 * we couldn't try to create block if create flag is zero 3909 * we couldn't try to create block if create flag is zero
@@ -3504,9 +3916,25 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3504 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); 3916 ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
3505 goto out2; 3917 goto out2;
3506 } 3918 }
3919
3507 /* 3920 /*
3508 * Okay, we need to do block allocation. 3921 * Okay, we need to do block allocation.
3509 */ 3922 */
3923 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
3924 newex.ee_block = cpu_to_le32(map->m_lblk);
3925 cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3926
3927 /*
3928 * If we are doing bigalloc, check to see if the extent returned
3929 * by ext4_ext_find_extent() implies a cluster we can use.
3930 */
3931 if (cluster_offset && ex &&
3932 get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
3933 ar.len = allocated = map->m_len;
3934 newblock = map->m_pblk;
3935 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3936 goto got_allocated_blocks;
3937 }
3510 3938
3511 /* find neighbour allocated blocks */ 3939 /* find neighbour allocated blocks */
3512 ar.lleft = map->m_lblk; 3940 ar.lleft = map->m_lblk;
@@ -3514,10 +3942,21 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3514 if (err) 3942 if (err)
3515 goto out2; 3943 goto out2;
3516 ar.lright = map->m_lblk; 3944 ar.lright = map->m_lblk;
3517 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); 3945 ex2 = NULL;
3946 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
3518 if (err) 3947 if (err)
3519 goto out2; 3948 goto out2;
3520 3949
3950 /* Check if the extent after searching to the right implies a
3951 * cluster we can use. */
3952 if ((sbi->s_cluster_ratio > 1) && ex2 &&
3953 get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
3954 ar.len = allocated = map->m_len;
3955 newblock = map->m_pblk;
3956 map->m_flags |= EXT4_MAP_FROM_CLUSTER;
3957 goto got_allocated_blocks;
3958 }
3959
3521 /* 3960 /*
3522 * See if request is beyond maximum number of blocks we can have in 3961 * See if request is beyond maximum number of blocks we can have in
3523 * a single extent. For an initialized extent this limit is 3962 * a single extent. For an initialized extent this limit is
@@ -3532,9 +3971,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3532 map->m_len = EXT_UNINIT_MAX_LEN; 3971 map->m_len = EXT_UNINIT_MAX_LEN;
3533 3972
3534 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ 3973 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
3535 newex.ee_block = cpu_to_le32(map->m_lblk);
3536 newex.ee_len = cpu_to_le16(map->m_len); 3974 newex.ee_len = cpu_to_le16(map->m_len);
3537 err = ext4_ext_check_overlap(inode, &newex, path); 3975 err = ext4_ext_check_overlap(sbi, inode, &newex, path);
3538 if (err) 3976 if (err)
3539 allocated = ext4_ext_get_actual_len(&newex); 3977 allocated = ext4_ext_get_actual_len(&newex);
3540 else 3978 else
@@ -3544,7 +3982,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3544 ar.inode = inode; 3982 ar.inode = inode;
3545 ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); 3983 ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
3546 ar.logical = map->m_lblk; 3984 ar.logical = map->m_lblk;
3547 ar.len = allocated; 3985 /*
3986 * We calculate the offset from the beginning of the cluster
3987 * for the logical block number, since when we allocate a
3988 * physical cluster, the physical block should start at the
3989 * same offset from the beginning of the cluster. This is
3990 * needed so that future calls to get_implied_cluster_alloc()
3991 * work correctly.
3992 */
3993 offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
3994 ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
3995 ar.goal -= offset;
3996 ar.logical -= offset;
3548 if (S_ISREG(inode->i_mode)) 3997 if (S_ISREG(inode->i_mode))
3549 ar.flags = EXT4_MB_HINT_DATA; 3998 ar.flags = EXT4_MB_HINT_DATA;
3550 else 3999 else
@@ -3557,9 +4006,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3557 goto out2; 4006 goto out2;
3558 ext_debug("allocate new block: goal %llu, found %llu/%u\n", 4007 ext_debug("allocate new block: goal %llu, found %llu/%u\n",
3559 ar.goal, newblock, allocated); 4008 ar.goal, newblock, allocated);
4009 free_on_err = 1;
4010 allocated_clusters = ar.len;
4011 ar.len = EXT4_C2B(sbi, ar.len) - offset;
4012 if (ar.len > allocated)
4013 ar.len = allocated;
3560 4014
4015got_allocated_blocks:
3561 /* try to insert new extent into found leaf and return */ 4016 /* try to insert new extent into found leaf and return */
3562 ext4_ext_store_pblock(&newex, newblock); 4017 ext4_ext_store_pblock(&newex, newblock + offset);
3563 newex.ee_len = cpu_to_le16(ar.len); 4018 newex.ee_len = cpu_to_le16(ar.len);
3564 /* Mark uninitialized */ 4019 /* Mark uninitialized */
3565 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ 4020 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
@@ -3572,10 +4027,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3572 * that we need to perform conversion when IO is done. 4027 * that we need to perform conversion when IO is done.
3573 */ 4028 */
3574 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4029 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3575 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 4030 if (io)
3576 io->flag = EXT4_IO_END_UNWRITTEN; 4031 ext4_set_io_unwritten_flag(inode, io);
3577 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); 4032 else
3578 } else
3579 ext4_set_inode_state(inode, 4033 ext4_set_inode_state(inode,
3580 EXT4_STATE_DIO_UNWRITTEN); 4034 EXT4_STATE_DIO_UNWRITTEN);
3581 } 4035 }
@@ -3583,11 +4037,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3583 map->m_flags |= EXT4_MAP_UNINIT; 4037 map->m_flags |= EXT4_MAP_UNINIT;
3584 } 4038 }
3585 4039
3586 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); 4040 err = 0;
4041 if ((flags & EXT4_GET_BLOCKS_KEEP_SIZE) == 0)
4042 err = check_eofblocks_fl(handle, inode, map->m_lblk,
4043 path, ar.len);
3587 if (!err) 4044 if (!err)
3588 err = ext4_ext_insert_extent(handle, inode, path, 4045 err = ext4_ext_insert_extent(handle, inode, path,
3589 &newex, flags); 4046 &newex, flags);
3590 if (err) { 4047 if (err && free_on_err) {
3591 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? 4048 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
3592 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; 4049 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
3593 /* free data blocks we just allocated */ 4050 /* free data blocks we just allocated */
@@ -3610,8 +4067,82 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3610 * Update reserved blocks/metadata blocks after successful 4067 * Update reserved blocks/metadata blocks after successful
3611 * block allocation which had been deferred till now. 4068 * block allocation which had been deferred till now.
3612 */ 4069 */
3613 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 4070 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
3614 ext4_da_update_reserve_space(inode, allocated, 1); 4071 unsigned int reserved_clusters;
4072 /*
4073 * Check how many clusters we had reserved this allocated range
4074 */
4075 reserved_clusters = get_reserved_cluster_alloc(inode,
4076 map->m_lblk, allocated);
4077 if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
4078 if (reserved_clusters) {
4079 /*
4080 * We have clusters reserved for this range.
4081 * But since we are not doing actual allocation
4082 * and are simply using blocks from previously
4083 * allocated cluster, we should release the
4084 * reservation and not claim quota.
4085 */
4086 ext4_da_update_reserve_space(inode,
4087 reserved_clusters, 0);
4088 }
4089 } else {
4090 BUG_ON(allocated_clusters < reserved_clusters);
4091 /* We will claim quota for all newly allocated blocks.*/
4092 ext4_da_update_reserve_space(inode, allocated_clusters,
4093 1);
4094 if (reserved_clusters < allocated_clusters) {
4095 struct ext4_inode_info *ei = EXT4_I(inode);
4096 int reservation = allocated_clusters -
4097 reserved_clusters;
4098 /*
4099 * It seems we claimed few clusters outside of
4100 * the range of this allocation. We should give
4101 * it back to the reservation pool. This can
4102 * happen in the following case:
4103 *
4104 * * Suppose s_cluster_ratio is 4 (i.e., each
4105 * cluster has 4 blocks. Thus, the clusters
4106 * are [0-3],[4-7],[8-11]...
4107 * * First comes delayed allocation write for
4108 * logical blocks 10 & 11. Since there were no
4109 * previous delayed allocated blocks in the
4110 * range [8-11], we would reserve 1 cluster
4111 * for this write.
4112 * * Next comes write for logical blocks 3 to 8.
4113 * In this case, we will reserve 2 clusters
4114 * (for [0-3] and [4-7]; and not for [8-11] as
4115 * that range has a delayed allocated blocks.
4116 * Thus total reserved clusters now becomes 3.
4117 * * Now, during the delayed allocation writeout
4118 * time, we will first write blocks [3-8] and
4119 * allocate 3 clusters for writing these
4120 * blocks. Also, we would claim all these
4121 * three clusters above.
4122 * * Now when we come here to writeout the
4123 * blocks [10-11], we would expect to claim
4124 * the reservation of 1 cluster we had made
4125 * (and we would claim it since there are no
4126 * more delayed allocated blocks in the range
4127 * [8-11]. But our reserved cluster count had
4128 * already gone to 0.
4129 *
4130 * Thus, at the step 4 above when we determine
4131 * that there are still some unwritten delayed
4132 * allocated blocks outside of our current
4133 * block range, we should increment the
4134 * reserved clusters count so that when the
4135 * remaining blocks finally gets written, we
4136 * could claim them.
4137 */
4138 dquot_reserve_block(inode,
4139 EXT4_C2B(sbi, reservation));
4140 spin_lock(&ei->i_block_reservation_lock);
4141 ei->i_reserved_data_blocks += reservation;
4142 spin_unlock(&ei->i_block_reservation_lock);
4143 }
4144 }
4145 }
3615 4146
3616 /* 4147 /*
3617 * Cache the extent and update transaction to commit on fdatasync only 4148 * Cache the extent and update transaction to commit on fdatasync only
@@ -3634,12 +4165,12 @@ out2:
3634 ext4_ext_drop_refs(path); 4165 ext4_ext_drop_refs(path);
3635 kfree(path); 4166 kfree(path);
3636 } 4167 }
3637 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
3638 newblock, map->m_len, err ? err : allocated);
3639
3640 result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ? 4168 result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ?
3641 punched_out : allocated; 4169 punched_out : allocated;
3642 4170
4171 trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
4172 newblock, map->m_len, err ? err : result);
4173
3643 return err ? err : result; 4174 return err ? err : result;
3644} 4175}
3645 4176
@@ -3649,6 +4180,7 @@ void ext4_ext_truncate(struct inode *inode)
3649 struct super_block *sb = inode->i_sb; 4180 struct super_block *sb = inode->i_sb;
3650 ext4_lblk_t last_block; 4181 ext4_lblk_t last_block;
3651 handle_t *handle; 4182 handle_t *handle;
4183 loff_t page_len;
3652 int err = 0; 4184 int err = 0;
3653 4185
3654 /* 4186 /*
@@ -3665,8 +4197,16 @@ void ext4_ext_truncate(struct inode *inode)
3665 if (IS_ERR(handle)) 4197 if (IS_ERR(handle))
3666 return; 4198 return;
3667 4199
3668 if (inode->i_size & (sb->s_blocksize - 1)) 4200 if (inode->i_size % PAGE_CACHE_SIZE != 0) {
3669 ext4_block_truncate_page(handle, mapping, inode->i_size); 4201 page_len = PAGE_CACHE_SIZE -
4202 (inode->i_size & (PAGE_CACHE_SIZE - 1));
4203
4204 err = ext4_discard_partial_page_buffers(handle,
4205 mapping, inode->i_size, page_len, 0);
4206
4207 if (err)
4208 goto out_stop;
4209 }
3670 4210
3671 if (ext4_orphan_add(handle, inode)) 4211 if (ext4_orphan_add(handle, inode))
3672 goto out_stop; 4212 goto out_stop;
@@ -3760,6 +4300,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3760 int ret = 0; 4300 int ret = 0;
3761 int ret2 = 0; 4301 int ret2 = 0;
3762 int retries = 0; 4302 int retries = 0;
4303 int flags;
3763 struct ext4_map_blocks map; 4304 struct ext4_map_blocks map;
3764 unsigned int credits, blkbits = inode->i_blkbits; 4305 unsigned int credits, blkbits = inode->i_blkbits;
3765 4306
@@ -3796,6 +4337,16 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
3796 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); 4337 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
3797 return ret; 4338 return ret;
3798 } 4339 }
4340 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT;
4341 if (mode & FALLOC_FL_KEEP_SIZE)
4342 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4343 /*
4344 * Don't normalize the request if it can fit in one extent so
4345 * that it doesn't get unnecessarily split into multiple
4346 * extents.
4347 */
4348 if (len <= EXT_UNINIT_MAX_LEN << blkbits)
4349 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
3799retry: 4350retry:
3800 while (ret >= 0 && ret < max_blocks) { 4351 while (ret >= 0 && ret < max_blocks) {
3801 map.m_lblk = map.m_lblk + ret; 4352 map.m_lblk = map.m_lblk + ret;
@@ -3805,9 +4356,7 @@ retry:
3805 ret = PTR_ERR(handle); 4356 ret = PTR_ERR(handle);
3806 break; 4357 break;
3807 } 4358 }
3808 ret = ext4_map_blocks(handle, inode, &map, 4359 ret = ext4_map_blocks(handle, inode, &map, flags);
3809 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
3810 EXT4_GET_BLOCKS_NO_NORMALIZE);
3811 if (ret <= 0) { 4360 if (ret <= 0) {
3812#ifdef EXT4FS_DEBUG 4361#ifdef EXT4FS_DEBUG
3813 WARN_ON(ret <= 0); 4362 WARN_ON(ret <= 0);
@@ -4102,7 +4651,6 @@ found_delayed_extent:
4102 return EXT_BREAK; 4651 return EXT_BREAK;
4103 return EXT_CONTINUE; 4652 return EXT_CONTINUE;
4104} 4653}
4105
4106/* fiemap flags we can handle specified here */ 4654/* fiemap flags we can handle specified here */
4107#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 4655#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
4108 4656
@@ -4162,17 +4710,28 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4162 struct address_space *mapping = inode->i_mapping; 4710 struct address_space *mapping = inode->i_mapping;
4163 struct ext4_map_blocks map; 4711 struct ext4_map_blocks map;
4164 handle_t *handle; 4712 handle_t *handle;
4165 loff_t first_block_offset, last_block_offset, block_len; 4713 loff_t first_page, last_page, page_len;
4166 loff_t first_page, last_page, first_page_offset, last_page_offset; 4714 loff_t first_page_offset, last_page_offset;
4167 int ret, credits, blocks_released, err = 0; 4715 int ret, credits, blocks_released, err = 0;
4168 4716
4717 /* No need to punch hole beyond i_size */
4718 if (offset >= inode->i_size)
4719 return 0;
4720
4721 /*
4722 * If the hole extends beyond i_size, set the hole
4723 * to end after the page that contains i_size
4724 */
4725 if (offset + length > inode->i_size) {
4726 length = inode->i_size +
4727 PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
4728 offset;
4729 }
4730
4169 first_block = (offset + sb->s_blocksize - 1) >> 4731 first_block = (offset + sb->s_blocksize - 1) >>
4170 EXT4_BLOCK_SIZE_BITS(sb); 4732 EXT4_BLOCK_SIZE_BITS(sb);
4171 last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); 4733 last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
4172 4734
4173 first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb);
4174 last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb);
4175
4176 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 4735 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
4177 last_page = (offset + length) >> PAGE_CACHE_SHIFT; 4736 last_page = (offset + length) >> PAGE_CACHE_SHIFT;
4178 4737
@@ -4185,11 +4744,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4185 */ 4744 */
4186 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 4745 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4187 err = filemap_write_and_wait_range(mapping, 4746 err = filemap_write_and_wait_range(mapping,
4188 first_page_offset == 0 ? 0 : first_page_offset-1, 4747 offset, offset + length - 1);
4189 last_page_offset);
4190 4748
4191 if (err) 4749 if (err)
4192 return err; 4750 return err;
4193 } 4751 }
4194 4752
4195 /* Now release the pages */ 4753 /* Now release the pages */
@@ -4211,24 +4769,64 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4211 goto out; 4769 goto out;
4212 4770
4213 /* 4771 /*
4214 * Now we need to zero out the un block aligned data. 4772 * Now we need to zero out the non-page-aligned data in the
4215 * If the file is smaller than a block, just 4773 * pages at the start and tail of the hole, and unmap the buffer
4216 * zero out the middle 4774 * heads for the block aligned regions of the page that were
4775 * completely zeroed.
4217 */ 4776 */
4218 if (first_block > last_block) 4777 if (first_page > last_page) {
4219 ext4_block_zero_page_range(handle, mapping, offset, length); 4778 /*
4220 else { 4779 * If the file space being truncated is contained within a page
4221 /* zero out the head of the hole before the first block */ 4780 * just zero out and unmap the middle of that page
4222 block_len = first_block_offset - offset; 4781 */
4223 if (block_len > 0) 4782 err = ext4_discard_partial_page_buffers(handle,
4224 ext4_block_zero_page_range(handle, mapping, 4783 mapping, offset, length, 0);
4225 offset, block_len); 4784
4226 4785 if (err)
4227 /* zero out the tail of the hole after the last block */ 4786 goto out;
4228 block_len = offset + length - last_block_offset; 4787 } else {
4229 if (block_len > 0) { 4788 /*
4230 ext4_block_zero_page_range(handle, mapping, 4789 * zero out and unmap the partial page that contains
4231 last_block_offset, block_len); 4790 * the start of the hole
4791 */
4792 page_len = first_page_offset - offset;
4793 if (page_len > 0) {
4794 err = ext4_discard_partial_page_buffers(handle, mapping,
4795 offset, page_len, 0);
4796 if (err)
4797 goto out;
4798 }
4799
4800 /*
4801 * zero out and unmap the partial page that contains
4802 * the end of the hole
4803 */
4804 page_len = offset + length - last_page_offset;
4805 if (page_len > 0) {
4806 err = ext4_discard_partial_page_buffers(handle, mapping,
4807 last_page_offset, page_len, 0);
4808 if (err)
4809 goto out;
4810 }
4811 }
4812
4813
4814 /*
4815 * If i_size is contained in the last page, we need to
4816 * unmap and zero the partial page after i_size
4817 */
4818 if (inode->i_size >> PAGE_CACHE_SHIFT == last_page &&
4819 inode->i_size % PAGE_CACHE_SIZE != 0) {
4820
4821 page_len = PAGE_CACHE_SIZE -
4822 (inode->i_size & (PAGE_CACHE_SIZE - 1));
4823
4824 if (page_len > 0) {
4825 err = ext4_discard_partial_page_buffers(handle,
4826 mapping, inode->i_size, page_len, 0);
4827
4828 if (err)
4829 goto out;
4232 } 4830 }
4233 } 4831 }
4234 4832
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index b9548f477bb8..cb70f1812a70 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -181,8 +181,8 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
181 path.dentry = mnt->mnt_root; 181 path.dentry = mnt->mnt_root;
182 cp = d_path(&path, buf, sizeof(buf)); 182 cp = d_path(&path, buf, sizeof(buf));
183 if (!IS_ERR(cp)) { 183 if (!IS_ERR(cp)) {
184 memcpy(sbi->s_es->s_last_mounted, cp, 184 strlcpy(sbi->s_es->s_last_mounted, cp,
185 sizeof(sbi->s_es->s_last_mounted)); 185 sizeof(sbi->s_es->s_last_mounted));
186 ext4_mark_super_dirty(sb); 186 ext4_mark_super_dirty(sb);
187 } 187 }
188 } 188 }
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 036f78f7a1ef..00a2cb753efd 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -75,7 +75,7 @@ static void dump_completed_IO(struct inode * inode)
75 * to written. 75 * to written.
76 * The function return the number of pending IOs on success. 76 * The function return the number of pending IOs on success.
77 */ 77 */
78extern int ext4_flush_completed_IO(struct inode *inode) 78int ext4_flush_completed_IO(struct inode *inode)
79{ 79{
80 ext4_io_end_t *io; 80 ext4_io_end_t *io;
81 struct ext4_inode_info *ei = EXT4_I(inode); 81 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -83,14 +83,12 @@ extern int ext4_flush_completed_IO(struct inode *inode)
83 int ret = 0; 83 int ret = 0;
84 int ret2 = 0; 84 int ret2 = 0;
85 85
86 if (list_empty(&ei->i_completed_io_list))
87 return ret;
88
89 dump_completed_IO(inode); 86 dump_completed_IO(inode);
90 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 87 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
91 while (!list_empty(&ei->i_completed_io_list)){ 88 while (!list_empty(&ei->i_completed_io_list)){
92 io = list_entry(ei->i_completed_io_list.next, 89 io = list_entry(ei->i_completed_io_list.next,
93 ext4_io_end_t, list); 90 ext4_io_end_t, list);
91 list_del_init(&io->list);
94 /* 92 /*
95 * Calling ext4_end_io_nolock() to convert completed 93 * Calling ext4_end_io_nolock() to convert completed
96 * IO to written. 94 * IO to written.
@@ -107,11 +105,9 @@ extern int ext4_flush_completed_IO(struct inode *inode)
107 */ 105 */
108 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 106 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
109 ret = ext4_end_io_nolock(io); 107 ret = ext4_end_io_nolock(io);
110 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
111 if (ret < 0) 108 if (ret < 0)
112 ret2 = ret; 109 ret2 = ret;
113 else 110 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
114 list_del_init(&io->list);
115 } 111 }
116 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 112 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
117 return (ret2 < 0) ? ret2 : 0; 113 return (ret2 < 0) ? ret2 : 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 9c63f273b550..00beb4f9cc4f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -78,7 +78,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
78 * allocation, essentially implementing a per-group read-only flag. */ 78 * allocation, essentially implementing a per-group read-only flag. */
79 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 79 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
80 ext4_error(sb, "Checksum bad for group %u", block_group); 80 ext4_error(sb, "Checksum bad for group %u", block_group);
81 ext4_free_blks_set(sb, gdp, 0); 81 ext4_free_group_clusters_set(sb, gdp, 0);
82 ext4_free_inodes_set(sb, gdp, 0); 82 ext4_free_inodes_set(sb, gdp, 0);
83 ext4_itable_unused_set(sb, gdp, 0); 83 ext4_itable_unused_set(sb, gdp, 0);
84 memset(bh->b_data, 0xff, sb->s_blocksize); 84 memset(bh->b_data, 0xff, sb->s_blocksize);
@@ -293,121 +293,9 @@ error_return:
293 ext4_std_error(sb, fatal); 293 ext4_std_error(sb, fatal);
294} 294}
295 295
296/*
297 * There are two policies for allocating an inode. If the new inode is
298 * a directory, then a forward search is made for a block group with both
299 * free space and a low directory-to-inode ratio; if that fails, then of
300 * the groups with above-average free space, that group with the fewest
301 * directories already is chosen.
302 *
303 * For other inodes, search forward from the parent directory\'s block
304 * group to find a free inode.
305 */
306static int find_group_dir(struct super_block *sb, struct inode *parent,
307 ext4_group_t *best_group)
308{
309 ext4_group_t ngroups = ext4_get_groups_count(sb);
310 unsigned int freei, avefreei;
311 struct ext4_group_desc *desc, *best_desc = NULL;
312 ext4_group_t group;
313 int ret = -1;
314
315 freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter);
316 avefreei = freei / ngroups;
317
318 for (group = 0; group < ngroups; group++) {
319 desc = ext4_get_group_desc(sb, group, NULL);
320 if (!desc || !ext4_free_inodes_count(sb, desc))
321 continue;
322 if (ext4_free_inodes_count(sb, desc) < avefreei)
323 continue;
324 if (!best_desc ||
325 (ext4_free_blks_count(sb, desc) >
326 ext4_free_blks_count(sb, best_desc))) {
327 *best_group = group;
328 best_desc = desc;
329 ret = 0;
330 }
331 }
332 return ret;
333}
334
335#define free_block_ratio 10
336
337static int find_group_flex(struct super_block *sb, struct inode *parent,
338 ext4_group_t *best_group)
339{
340 struct ext4_sb_info *sbi = EXT4_SB(sb);
341 struct ext4_group_desc *desc;
342 struct flex_groups *flex_group = sbi->s_flex_groups;
343 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
344 ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
345 ext4_group_t ngroups = ext4_get_groups_count(sb);
346 int flex_size = ext4_flex_bg_size(sbi);
347 ext4_group_t best_flex = parent_fbg_group;
348 int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
349 int flexbg_free_blocks;
350 int flex_freeb_ratio;
351 ext4_group_t n_fbg_groups;
352 ext4_group_t i;
353
354 n_fbg_groups = (ngroups + flex_size - 1) >>
355 sbi->s_log_groups_per_flex;
356
357find_close_to_parent:
358 flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks);
359 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
360 if (atomic_read(&flex_group[best_flex].free_inodes) &&
361 flex_freeb_ratio > free_block_ratio)
362 goto found_flexbg;
363
364 if (best_flex && best_flex == parent_fbg_group) {
365 best_flex--;
366 goto find_close_to_parent;
367 }
368
369 for (i = 0; i < n_fbg_groups; i++) {
370 if (i == parent_fbg_group || i == parent_fbg_group - 1)
371 continue;
372
373 flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks);
374 flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
375
376 if (flex_freeb_ratio > free_block_ratio &&
377 (atomic_read(&flex_group[i].free_inodes))) {
378 best_flex = i;
379 goto found_flexbg;
380 }
381
382 if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) ||
383 ((atomic_read(&flex_group[i].free_blocks) >
384 atomic_read(&flex_group[best_flex].free_blocks)) &&
385 atomic_read(&flex_group[i].free_inodes)))
386 best_flex = i;
387 }
388
389 if (!atomic_read(&flex_group[best_flex].free_inodes) ||
390 !atomic_read(&flex_group[best_flex].free_blocks))
391 return -1;
392
393found_flexbg:
394 for (i = best_flex * flex_size; i < ngroups &&
395 i < (best_flex + 1) * flex_size; i++) {
396 desc = ext4_get_group_desc(sb, i, NULL);
397 if (ext4_free_inodes_count(sb, desc)) {
398 *best_group = i;
399 goto out;
400 }
401 }
402
403 return -1;
404out:
405 return 0;
406}
407
408struct orlov_stats { 296struct orlov_stats {
409 __u32 free_inodes; 297 __u32 free_inodes;
410 __u32 free_blocks; 298 __u32 free_clusters;
411 __u32 used_dirs; 299 __u32 used_dirs;
412}; 300};
413 301
@@ -424,7 +312,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
424 312
425 if (flex_size > 1) { 313 if (flex_size > 1) {
426 stats->free_inodes = atomic_read(&flex_group[g].free_inodes); 314 stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
427 stats->free_blocks = atomic_read(&flex_group[g].free_blocks); 315 stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
428 stats->used_dirs = atomic_read(&flex_group[g].used_dirs); 316 stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
429 return; 317 return;
430 } 318 }
@@ -432,11 +320,11 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
432 desc = ext4_get_group_desc(sb, g, NULL); 320 desc = ext4_get_group_desc(sb, g, NULL);
433 if (desc) { 321 if (desc) {
434 stats->free_inodes = ext4_free_inodes_count(sb, desc); 322 stats->free_inodes = ext4_free_inodes_count(sb, desc);
435 stats->free_blocks = ext4_free_blks_count(sb, desc); 323 stats->free_clusters = ext4_free_group_clusters(sb, desc);
436 stats->used_dirs = ext4_used_dirs_count(sb, desc); 324 stats->used_dirs = ext4_used_dirs_count(sb, desc);
437 } else { 325 } else {
438 stats->free_inodes = 0; 326 stats->free_inodes = 0;
439 stats->free_blocks = 0; 327 stats->free_clusters = 0;
440 stats->used_dirs = 0; 328 stats->used_dirs = 0;
441 } 329 }
442} 330}
@@ -471,10 +359,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
471 ext4_group_t real_ngroups = ext4_get_groups_count(sb); 359 ext4_group_t real_ngroups = ext4_get_groups_count(sb);
472 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 360 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
473 unsigned int freei, avefreei; 361 unsigned int freei, avefreei;
474 ext4_fsblk_t freeb, avefreeb; 362 ext4_fsblk_t freeb, avefreec;
475 unsigned int ndirs; 363 unsigned int ndirs;
476 int max_dirs, min_inodes; 364 int max_dirs, min_inodes;
477 ext4_grpblk_t min_blocks; 365 ext4_grpblk_t min_clusters;
478 ext4_group_t i, grp, g, ngroups; 366 ext4_group_t i, grp, g, ngroups;
479 struct ext4_group_desc *desc; 367 struct ext4_group_desc *desc;
480 struct orlov_stats stats; 368 struct orlov_stats stats;
@@ -490,9 +378,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
490 378
491 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); 379 freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
492 avefreei = freei / ngroups; 380 avefreei = freei / ngroups;
493 freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 381 freeb = EXT4_C2B(sbi,
494 avefreeb = freeb; 382 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
495 do_div(avefreeb, ngroups); 383 avefreec = freeb;
384 do_div(avefreec, ngroups);
496 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); 385 ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
497 386
498 if (S_ISDIR(mode) && 387 if (S_ISDIR(mode) &&
@@ -518,7 +407,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
518 continue; 407 continue;
519 if (stats.free_inodes < avefreei) 408 if (stats.free_inodes < avefreei)
520 continue; 409 continue;
521 if (stats.free_blocks < avefreeb) 410 if (stats.free_clusters < avefreec)
522 continue; 411 continue;
523 grp = g; 412 grp = g;
524 ret = 0; 413 ret = 0;
@@ -556,7 +445,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
556 min_inodes = avefreei - inodes_per_group*flex_size / 4; 445 min_inodes = avefreei - inodes_per_group*flex_size / 4;
557 if (min_inodes < 1) 446 if (min_inodes < 1)
558 min_inodes = 1; 447 min_inodes = 1;
559 min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4; 448 min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
560 449
561 /* 450 /*
562 * Start looking in the flex group where we last allocated an 451 * Start looking in the flex group where we last allocated an
@@ -575,7 +464,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
575 continue; 464 continue;
576 if (stats.free_inodes < min_inodes) 465 if (stats.free_inodes < min_inodes)
577 continue; 466 continue;
578 if (stats.free_blocks < min_blocks) 467 if (stats.free_clusters < min_clusters)
579 continue; 468 continue;
580 goto found_flex_bg; 469 goto found_flex_bg;
581 } 470 }
@@ -659,7 +548,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
659 *group = parent_group; 548 *group = parent_group;
660 desc = ext4_get_group_desc(sb, *group, NULL); 549 desc = ext4_get_group_desc(sb, *group, NULL);
661 if (desc && ext4_free_inodes_count(sb, desc) && 550 if (desc && ext4_free_inodes_count(sb, desc) &&
662 ext4_free_blks_count(sb, desc)) 551 ext4_free_group_clusters(sb, desc))
663 return 0; 552 return 0;
664 553
665 /* 554 /*
@@ -683,7 +572,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
683 *group -= ngroups; 572 *group -= ngroups;
684 desc = ext4_get_group_desc(sb, *group, NULL); 573 desc = ext4_get_group_desc(sb, *group, NULL);
685 if (desc && ext4_free_inodes_count(sb, desc) && 574 if (desc && ext4_free_inodes_count(sb, desc) &&
686 ext4_free_blks_count(sb, desc)) 575 ext4_free_group_clusters(sb, desc))
687 return 0; 576 return 0;
688 } 577 }
689 578
@@ -802,7 +691,7 @@ err_ret:
802 * group to find a free inode. 691 * group to find a free inode.
803 */ 692 */
804struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, 693struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
805 const struct qstr *qstr, __u32 goal) 694 const struct qstr *qstr, __u32 goal, uid_t *owner)
806{ 695{
807 struct super_block *sb; 696 struct super_block *sb;
808 struct buffer_head *inode_bitmap_bh = NULL; 697 struct buffer_head *inode_bitmap_bh = NULL;
@@ -816,8 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
816 int ret2, err = 0; 705 int ret2, err = 0;
817 struct inode *ret; 706 struct inode *ret;
818 ext4_group_t i; 707 ext4_group_t i;
819 int free = 0;
820 static int once = 1;
821 ext4_group_t flex_group; 708 ext4_group_t flex_group;
822 709
823 /* Cannot create files in a deleted directory */ 710 /* Cannot create files in a deleted directory */
@@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
843 goto got_group; 730 goto got_group;
844 } 731 }
845 732
846 if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { 733 if (S_ISDIR(mode))
847 ret2 = find_group_flex(sb, dir, &group); 734 ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
848 if (ret2 == -1) { 735 else
849 ret2 = find_group_other(sb, dir, &group, mode);
850 if (ret2 == 0 && once) {
851 once = 0;
852 printk(KERN_NOTICE "ext4: find_group_flex "
853 "failed, fallback succeeded dir %lu\n",
854 dir->i_ino);
855 }
856 }
857 goto got_group;
858 }
859
860 if (S_ISDIR(mode)) {
861 if (test_opt(sb, OLDALLOC))
862 ret2 = find_group_dir(sb, dir, &group);
863 else
864 ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
865 } else
866 ret2 = find_group_other(sb, dir, &group, mode); 736 ret2 = find_group_other(sb, dir, &group, mode);
867 737
868got_group: 738got_group:
@@ -950,26 +820,21 @@ got:
950 goto fail; 820 goto fail;
951 } 821 }
952 822
953 free = 0; 823 BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
954 ext4_lock_group(sb, group); 824 err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
825 brelse(block_bitmap_bh);
826
955 /* recheck and clear flag under lock if we still need to */ 827 /* recheck and clear flag under lock if we still need to */
828 ext4_lock_group(sb, group);
956 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 829 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
957 free = ext4_free_blocks_after_init(sb, group, gdp);
958 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 830 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
959 ext4_free_blks_set(sb, gdp, free); 831 ext4_free_group_clusters_set(sb, gdp,
832 ext4_free_clusters_after_init(sb, group, gdp));
960 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, 833 gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
961 gdp); 834 gdp);
962 } 835 }
963 ext4_unlock_group(sb, group); 836 ext4_unlock_group(sb, group);
964 837
965 /* Don't need to dirty bitmap block if we didn't change it */
966 if (free) {
967 BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
968 err = ext4_handle_dirty_metadata(handle,
969 NULL, block_bitmap_bh);
970 }
971
972 brelse(block_bitmap_bh);
973 if (err) 838 if (err)
974 goto fail; 839 goto fail;
975 } 840 }
@@ -987,8 +852,11 @@ got:
987 flex_group = ext4_flex_group(sbi, group); 852 flex_group = ext4_flex_group(sbi, group);
988 atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); 853 atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
989 } 854 }
990 855 if (owner) {
991 if (test_opt(sb, GRPID)) { 856 inode->i_mode = mode;
857 inode->i_uid = owner[0];
858 inode->i_gid = owner[1];
859 } else if (test_opt(sb, GRPID)) {
992 inode->i_mode = mode; 860 inode->i_mode = mode;
993 inode->i_uid = current_fsuid(); 861 inode->i_uid = current_fsuid();
994 inode->i_gid = dir->i_gid; 862 inode->i_gid = dir->i_gid;
@@ -1005,11 +873,7 @@ got:
1005 ei->i_dir_start_lookup = 0; 873 ei->i_dir_start_lookup = 0;
1006 ei->i_disksize = 0; 874 ei->i_disksize = 0;
1007 875
1008 /* 876 /* Don't inherit extent flag from directory, amongst others. */
1009 * Don't inherit extent flag from directory, amongst others. We set
1010 * extent flag on newly created directory and file only if -o extent
1011 * mount option is specified
1012 */
1013 ei->i_flags = 877 ei->i_flags =
1014 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); 878 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
1015 ei->i_file_acl = 0; 879 ei->i_file_acl = 0;
@@ -1084,7 +948,7 @@ fail_free_drop:
1084fail_drop: 948fail_drop:
1085 dquot_drop(inode); 949 dquot_drop(inode);
1086 inode->i_flags |= S_NOQUOTA; 950 inode->i_flags |= S_NOQUOTA;
1087 inode->i_nlink = 0; 951 clear_nlink(inode);
1088 unlock_new_inode(inode); 952 unlock_new_inode(inode);
1089 iput(inode); 953 iput(inode);
1090 brelse(inode_bitmap_bh); 954 brelse(inode_bitmap_bh);
@@ -1235,7 +1099,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
1235 * inode allocation from the current group, so we take alloc_sem lock, to 1099 * inode allocation from the current group, so we take alloc_sem lock, to
1236 * block ext4_claim_inode until we are finished. 1100 * block ext4_claim_inode until we are finished.
1237 */ 1101 */
1238extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, 1102int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1239 int barrier) 1103 int barrier)
1240{ 1104{
1241 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 1105 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 0962642119c0..3cfc73fbca8e 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -699,6 +699,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
699 /* 699 /*
700 * Okay, we need to do block allocation. 700 * Okay, we need to do block allocation.
701 */ 701 */
702 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
703 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
704 EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
705 "non-extent mapped inodes with bigalloc");
706 return -ENOSPC;
707 }
708
702 goal = ext4_find_goal(inode, map->m_lblk, partial); 709 goal = ext4_find_goal(inode, map->m_lblk, partial);
703 710
704 /* the number of blocks need to allocate for [d,t]indirect blocks */ 711 /* the number of blocks need to allocate for [d,t]indirect blocks */
@@ -1343,7 +1350,9 @@ void ext4_ind_truncate(struct inode *inode)
1343 __le32 nr = 0; 1350 __le32 nr = 0;
1344 int n = 0; 1351 int n = 0;
1345 ext4_lblk_t last_block, max_block; 1352 ext4_lblk_t last_block, max_block;
1353 loff_t page_len;
1346 unsigned blocksize = inode->i_sb->s_blocksize; 1354 unsigned blocksize = inode->i_sb->s_blocksize;
1355 int err;
1347 1356
1348 handle = start_transaction(inode); 1357 handle = start_transaction(inode);
1349 if (IS_ERR(handle)) 1358 if (IS_ERR(handle))
@@ -1354,9 +1363,16 @@ void ext4_ind_truncate(struct inode *inode)
1354 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) 1363 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
1355 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 1364 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
1356 1365
1357 if (inode->i_size & (blocksize - 1)) 1366 if (inode->i_size % PAGE_CACHE_SIZE != 0) {
1358 if (ext4_block_truncate_page(handle, mapping, inode->i_size)) 1367 page_len = PAGE_CACHE_SIZE -
1368 (inode->i_size & (PAGE_CACHE_SIZE - 1));
1369
1370 err = ext4_discard_partial_page_buffers(handle,
1371 mapping, inode->i_size, page_len, 0);
1372
1373 if (err)
1359 goto out_stop; 1374 goto out_stop;
1375 }
1360 1376
1361 if (last_block != max_block) { 1377 if (last_block != max_block) {
1362 n = ext4_block_to_path(inode, last_block, offsets, NULL); 1378 n = ext4_block_to_path(inode, last_block, offsets, NULL);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 986e2388f031..240f6e2dc7ee 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -42,7 +42,6 @@
42#include "ext4_jbd2.h" 42#include "ext4_jbd2.h"
43#include "xattr.h" 43#include "xattr.h"
44#include "acl.h" 44#include "acl.h"
45#include "ext4_extents.h"
46#include "truncate.h" 45#include "truncate.h"
47 46
48#include <trace/events/ext4.h> 47#include <trace/events/ext4.h>
@@ -268,7 +267,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
268 struct ext4_inode_info *ei = EXT4_I(inode); 267 struct ext4_inode_info *ei = EXT4_I(inode);
269 268
270 spin_lock(&ei->i_block_reservation_lock); 269 spin_lock(&ei->i_block_reservation_lock);
271 trace_ext4_da_update_reserve_space(inode, used); 270 trace_ext4_da_update_reserve_space(inode, used, quota_claim);
272 if (unlikely(used > ei->i_reserved_data_blocks)) { 271 if (unlikely(used > ei->i_reserved_data_blocks)) {
273 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " 272 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
274 "with only %d reserved data blocks\n", 273 "with only %d reserved data blocks\n",
@@ -281,7 +280,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
281 /* Update per-inode reservations */ 280 /* Update per-inode reservations */
282 ei->i_reserved_data_blocks -= used; 281 ei->i_reserved_data_blocks -= used;
283 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; 282 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
284 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 283 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
285 used + ei->i_allocated_meta_blocks); 284 used + ei->i_allocated_meta_blocks);
286 ei->i_allocated_meta_blocks = 0; 285 ei->i_allocated_meta_blocks = 0;
287 286
@@ -291,7 +290,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
291 * only when we have written all of the delayed 290 * only when we have written all of the delayed
292 * allocation blocks. 291 * allocation blocks.
293 */ 292 */
294 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 293 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
295 ei->i_reserved_meta_blocks); 294 ei->i_reserved_meta_blocks);
296 ei->i_reserved_meta_blocks = 0; 295 ei->i_reserved_meta_blocks = 0;
297 ei->i_da_metadata_calc_len = 0; 296 ei->i_da_metadata_calc_len = 0;
@@ -300,14 +299,14 @@ void ext4_da_update_reserve_space(struct inode *inode,
300 299
301 /* Update quota subsystem for data blocks */ 300 /* Update quota subsystem for data blocks */
302 if (quota_claim) 301 if (quota_claim)
303 dquot_claim_block(inode, used); 302 dquot_claim_block(inode, EXT4_C2B(sbi, used));
304 else { 303 else {
305 /* 304 /*
306 * We did fallocate with an offset that is already delayed 305 * We did fallocate with an offset that is already delayed
307 * allocated. So on delayed allocated writeback we should 306 * allocated. So on delayed allocated writeback we should
308 * not re-claim the quota for fallocated blocks. 307 * not re-claim the quota for fallocated blocks.
309 */ 308 */
310 dquot_release_reservation_block(inode, used); 309 dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
311 } 310 }
312 311
313 /* 312 /*
@@ -399,6 +398,49 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
399} 398}
400 399
401/* 400/*
401 * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
402 */
403static void set_buffers_da_mapped(struct inode *inode,
404 struct ext4_map_blocks *map)
405{
406 struct address_space *mapping = inode->i_mapping;
407 struct pagevec pvec;
408 int i, nr_pages;
409 pgoff_t index, end;
410
411 index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
412 end = (map->m_lblk + map->m_len - 1) >>
413 (PAGE_CACHE_SHIFT - inode->i_blkbits);
414
415 pagevec_init(&pvec, 0);
416 while (index <= end) {
417 nr_pages = pagevec_lookup(&pvec, mapping, index,
418 min(end - index + 1,
419 (pgoff_t)PAGEVEC_SIZE));
420 if (nr_pages == 0)
421 break;
422 for (i = 0; i < nr_pages; i++) {
423 struct page *page = pvec.pages[i];
424 struct buffer_head *bh, *head;
425
426 if (unlikely(page->mapping != mapping) ||
427 !PageDirty(page))
428 break;
429
430 if (page_has_buffers(page)) {
431 bh = head = page_buffers(page);
432 do {
433 set_buffer_da_mapped(bh);
434 bh = bh->b_this_page;
435 } while (bh != head);
436 }
437 index++;
438 }
439 pagevec_release(&pvec);
440 }
441}
442
443/*
402 * The ext4_map_blocks() function tries to look up the requested blocks, 444 * The ext4_map_blocks() function tries to look up the requested blocks,
403 * and returns if the blocks are already mapped. 445 * and returns if the blocks are already mapped.
404 * 446 *
@@ -416,7 +458,7 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
416 * the buffer head is mapped. 458 * the buffer head is mapped.
417 * 459 *
418 * It returns 0 if plain look up failed (blocks have not been allocated), in 460 * It returns 0 if plain look up failed (blocks have not been allocated), in
419 * that casem, buffer head is unmapped 461 * that case, buffer head is unmapped
420 * 462 *
421 * It returns the error in case of allocation failure. 463 * It returns the error in case of allocation failure.
422 */ 464 */
@@ -435,9 +477,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
435 */ 477 */
436 down_read((&EXT4_I(inode)->i_data_sem)); 478 down_read((&EXT4_I(inode)->i_data_sem));
437 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 479 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
438 retval = ext4_ext_map_blocks(handle, inode, map, 0); 480 retval = ext4_ext_map_blocks(handle, inode, map, flags &
481 EXT4_GET_BLOCKS_KEEP_SIZE);
439 } else { 482 } else {
440 retval = ext4_ind_map_blocks(handle, inode, map, 0); 483 retval = ext4_ind_map_blocks(handle, inode, map, flags &
484 EXT4_GET_BLOCKS_KEEP_SIZE);
441 } 485 }
442 up_read((&EXT4_I(inode)->i_data_sem)); 486 up_read((&EXT4_I(inode)->i_data_sem));
443 487
@@ -455,7 +499,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
455 * Returns if the blocks have already allocated 499 * Returns if the blocks have already allocated
456 * 500 *
457 * Note that if blocks have been preallocated 501 * Note that if blocks have been preallocated
458 * ext4_ext_get_block() returns th create = 0 502 * ext4_ext_get_block() returns the create = 0
459 * with buffer head unmapped. 503 * with buffer head unmapped.
460 */ 504 */
461 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 505 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
@@ -517,9 +561,17 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
517 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) 561 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
518 ext4_da_update_reserve_space(inode, retval, 1); 562 ext4_da_update_reserve_space(inode, retval, 1);
519 } 563 }
520 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) 564 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
521 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); 565 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
522 566
567 /* If we have successfully mapped the delayed allocated blocks,
568 * set the BH_Da_Mapped bit on them. Its important to do this
569 * under the protection of i_data_sem.
570 */
571 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
572 set_buffers_da_mapped(inode, map);
573 }
574
523 up_write((&EXT4_I(inode)->i_data_sem)); 575 up_write((&EXT4_I(inode)->i_data_sem));
524 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 576 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
525 int ret = check_block_validity(inode, map); 577 int ret = check_block_validity(inode, map);
@@ -909,7 +961,11 @@ static int ext4_ordered_write_end(struct file *file,
909 ext4_orphan_add(handle, inode); 961 ext4_orphan_add(handle, inode);
910 if (ret2 < 0) 962 if (ret2 < 0)
911 ret = ret2; 963 ret = ret2;
964 } else {
965 unlock_page(page);
966 page_cache_release(page);
912 } 967 }
968
913 ret2 = ext4_journal_stop(handle); 969 ret2 = ext4_journal_stop(handle);
914 if (!ret) 970 if (!ret)
915 ret = ret2; 971 ret = ret2;
@@ -1037,14 +1093,14 @@ static int ext4_journalled_write_end(struct file *file,
1037} 1093}
1038 1094
1039/* 1095/*
1040 * Reserve a single block located at lblock 1096 * Reserve a single cluster located at lblock
1041 */ 1097 */
1042static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1098static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1043{ 1099{
1044 int retries = 0; 1100 int retries = 0;
1045 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1101 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1046 struct ext4_inode_info *ei = EXT4_I(inode); 1102 struct ext4_inode_info *ei = EXT4_I(inode);
1047 unsigned long md_needed; 1103 unsigned int md_needed;
1048 int ret; 1104 int ret;
1049 1105
1050 /* 1106 /*
@@ -1054,7 +1110,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
1054 */ 1110 */
1055repeat: 1111repeat:
1056 spin_lock(&ei->i_block_reservation_lock); 1112 spin_lock(&ei->i_block_reservation_lock);
1057 md_needed = ext4_calc_metadata_amount(inode, lblock); 1113 md_needed = EXT4_NUM_B2C(sbi,
1114 ext4_calc_metadata_amount(inode, lblock));
1058 trace_ext4_da_reserve_space(inode, md_needed); 1115 trace_ext4_da_reserve_space(inode, md_needed);
1059 spin_unlock(&ei->i_block_reservation_lock); 1116 spin_unlock(&ei->i_block_reservation_lock);
1060 1117
@@ -1063,15 +1120,15 @@ repeat:
1063 * us from metadata over-estimation, though we may go over by 1120 * us from metadata over-estimation, though we may go over by
1064 * a small amount in the end. Here we just reserve for data. 1121 * a small amount in the end. Here we just reserve for data.
1065 */ 1122 */
1066 ret = dquot_reserve_block(inode, 1); 1123 ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
1067 if (ret) 1124 if (ret)
1068 return ret; 1125 return ret;
1069 /* 1126 /*
1070 * We do still charge estimated metadata to the sb though; 1127 * We do still charge estimated metadata to the sb though;
1071 * we cannot afford to run out of free blocks. 1128 * we cannot afford to run out of free blocks.
1072 */ 1129 */
1073 if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { 1130 if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
1074 dquot_release_reservation_block(inode, 1); 1131 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
1075 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1132 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1076 yield(); 1133 yield();
1077 goto repeat; 1134 goto repeat;
@@ -1118,19 +1175,21 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1118 * We can release all of the reserved metadata blocks 1175 * We can release all of the reserved metadata blocks
1119 * only when we have written all of the delayed 1176 * only when we have written all of the delayed
1120 * allocation blocks. 1177 * allocation blocks.
1178 * Note that in case of bigalloc, i_reserved_meta_blocks,
1179 * i_reserved_data_blocks, etc. refer to number of clusters.
1121 */ 1180 */
1122 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 1181 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
1123 ei->i_reserved_meta_blocks); 1182 ei->i_reserved_meta_blocks);
1124 ei->i_reserved_meta_blocks = 0; 1183 ei->i_reserved_meta_blocks = 0;
1125 ei->i_da_metadata_calc_len = 0; 1184 ei->i_da_metadata_calc_len = 0;
1126 } 1185 }
1127 1186
1128 /* update fs dirty data blocks counter */ 1187 /* update fs dirty data blocks counter */
1129 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); 1188 percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
1130 1189
1131 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1190 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1132 1191
1133 dquot_release_reservation_block(inode, to_free); 1192 dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
1134} 1193}
1135 1194
1136static void ext4_da_page_release_reservation(struct page *page, 1195static void ext4_da_page_release_reservation(struct page *page,
@@ -1139,6 +1198,9 @@ static void ext4_da_page_release_reservation(struct page *page,
1139 int to_release = 0; 1198 int to_release = 0;
1140 struct buffer_head *head, *bh; 1199 struct buffer_head *head, *bh;
1141 unsigned int curr_off = 0; 1200 unsigned int curr_off = 0;
1201 struct inode *inode = page->mapping->host;
1202 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1203 int num_clusters;
1142 1204
1143 head = page_buffers(page); 1205 head = page_buffers(page);
1144 bh = head; 1206 bh = head;
@@ -1148,10 +1210,24 @@ static void ext4_da_page_release_reservation(struct page *page,
1148 if ((offset <= curr_off) && (buffer_delay(bh))) { 1210 if ((offset <= curr_off) && (buffer_delay(bh))) {
1149 to_release++; 1211 to_release++;
1150 clear_buffer_delay(bh); 1212 clear_buffer_delay(bh);
1213 clear_buffer_da_mapped(bh);
1151 } 1214 }
1152 curr_off = next_off; 1215 curr_off = next_off;
1153 } while ((bh = bh->b_this_page) != head); 1216 } while ((bh = bh->b_this_page) != head);
1154 ext4_da_release_space(page->mapping->host, to_release); 1217
1218 /* If we have released all the blocks belonging to a cluster, then we
1219 * need to release the reserved space for that cluster. */
1220 num_clusters = EXT4_NUM_B2C(sbi, to_release);
1221 while (num_clusters > 0) {
1222 ext4_fsblk_t lblk;
1223 lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
1224 ((num_clusters - 1) << sbi->s_cluster_bits);
1225 if (sbi->s_cluster_ratio == 1 ||
1226 !ext4_find_delalloc_cluster(inode, lblk, 1))
1227 ext4_da_release_space(inode, 1);
1228
1229 num_clusters--;
1230 }
1155} 1231}
1156 1232
1157/* 1233/*
@@ -1253,6 +1329,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1253 clear_buffer_delay(bh); 1329 clear_buffer_delay(bh);
1254 bh->b_blocknr = pblock; 1330 bh->b_blocknr = pblock;
1255 } 1331 }
1332 if (buffer_da_mapped(bh))
1333 clear_buffer_da_mapped(bh);
1256 if (buffer_unwritten(bh) || 1334 if (buffer_unwritten(bh) ||
1257 buffer_mapped(bh)) 1335 buffer_mapped(bh))
1258 BUG_ON(bh->b_blocknr != pblock); 1336 BUG_ON(bh->b_blocknr != pblock);
@@ -1346,12 +1424,15 @@ static void ext4_print_free_blocks(struct inode *inode)
1346{ 1424{
1347 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1425 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1348 printk(KERN_CRIT "Total free blocks count %lld\n", 1426 printk(KERN_CRIT "Total free blocks count %lld\n",
1349 ext4_count_free_blocks(inode->i_sb)); 1427 EXT4_C2B(EXT4_SB(inode->i_sb),
1428 ext4_count_free_clusters(inode->i_sb)));
1350 printk(KERN_CRIT "Free/Dirty block details\n"); 1429 printk(KERN_CRIT "Free/Dirty block details\n");
1351 printk(KERN_CRIT "free_blocks=%lld\n", 1430 printk(KERN_CRIT "free_blocks=%lld\n",
1352 (long long) percpu_counter_sum(&sbi->s_freeblocks_counter)); 1431 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1432 percpu_counter_sum(&sbi->s_freeclusters_counter)));
1353 printk(KERN_CRIT "dirty_blocks=%lld\n", 1433 printk(KERN_CRIT "dirty_blocks=%lld\n",
1354 (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 1434 (long long) EXT4_C2B(EXT4_SB(inode->i_sb),
1435 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
1355 printk(KERN_CRIT "Block reservation details\n"); 1436 printk(KERN_CRIT "Block reservation details\n");
1356 printk(KERN_CRIT "i_reserved_data_blocks=%u\n", 1437 printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
1357 EXT4_I(inode)->i_reserved_data_blocks); 1438 EXT4_I(inode)->i_reserved_data_blocks);
@@ -1430,8 +1511,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1430 if (err == -EAGAIN) 1511 if (err == -EAGAIN)
1431 goto submit_io; 1512 goto submit_io;
1432 1513
1433 if (err == -ENOSPC && 1514 if (err == -ENOSPC && ext4_count_free_clusters(sb)) {
1434 ext4_count_free_blocks(sb)) {
1435 mpd->retval = err; 1515 mpd->retval = err;
1436 goto submit_io; 1516 goto submit_io;
1437 } 1517 }
@@ -1471,13 +1551,15 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
1471 1551
1472 for (i = 0; i < map.m_len; i++) 1552 for (i = 0; i < map.m_len; i++)
1473 unmap_underlying_metadata(bdev, map.m_pblk + i); 1553 unmap_underlying_metadata(bdev, map.m_pblk + i);
1474 }
1475 1554
1476 if (ext4_should_order_data(mpd->inode)) { 1555 if (ext4_should_order_data(mpd->inode)) {
1477 err = ext4_jbd2_file_inode(handle, mpd->inode); 1556 err = ext4_jbd2_file_inode(handle, mpd->inode);
1478 if (err) 1557 if (err) {
1479 /* This only happens if the journal is aborted */ 1558 /* Only if the journal is aborted */
1480 return; 1559 mpd->retval = err;
1560 goto submit_io;
1561 }
1562 }
1481 } 1563 }
1482 1564
1483 /* 1565 /*
@@ -1584,6 +1666,66 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
1584} 1666}
1585 1667
1586/* 1668/*
1669 * This function is grabs code from the very beginning of
1670 * ext4_map_blocks, but assumes that the caller is from delayed write
1671 * time. This function looks up the requested blocks and sets the
1672 * buffer delay bit under the protection of i_data_sem.
1673 */
1674static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1675 struct ext4_map_blocks *map,
1676 struct buffer_head *bh)
1677{
1678 int retval;
1679 sector_t invalid_block = ~((sector_t) 0xffff);
1680
1681 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1682 invalid_block = ~0;
1683
1684 map->m_flags = 0;
1685 ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u,"
1686 "logical block %lu\n", inode->i_ino, map->m_len,
1687 (unsigned long) map->m_lblk);
1688 /*
1689 * Try to see if we can get the block without requesting a new
1690 * file system block.
1691 */
1692 down_read((&EXT4_I(inode)->i_data_sem));
1693 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
1694 retval = ext4_ext_map_blocks(NULL, inode, map, 0);
1695 else
1696 retval = ext4_ind_map_blocks(NULL, inode, map, 0);
1697
1698 if (retval == 0) {
1699 /*
1700 * XXX: __block_prepare_write() unmaps passed block,
1701 * is it OK?
1702 */
1703 /* If the block was allocated from previously allocated cluster,
1704 * then we dont need to reserve it again. */
1705 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1706 retval = ext4_da_reserve_space(inode, iblock);
1707 if (retval)
1708 /* not enough space to reserve */
1709 goto out_unlock;
1710 }
1711
1712 /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served
1713 * and it should not appear on the bh->b_state.
1714 */
1715 map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
1716
1717 map_bh(bh, inode->i_sb, invalid_block);
1718 set_buffer_new(bh);
1719 set_buffer_delay(bh);
1720 }
1721
1722out_unlock:
1723 up_read((&EXT4_I(inode)->i_data_sem));
1724
1725 return retval;
1726}
1727
1728/*
1587 * This is a special get_blocks_t callback which is used by 1729 * This is a special get_blocks_t callback which is used by
1588 * ext4_da_write_begin(). It will either return mapped block or 1730 * ext4_da_write_begin(). It will either return mapped block or
1589 * reserve space for a single block. 1731 * reserve space for a single block.
@@ -1600,10 +1742,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1600{ 1742{
1601 struct ext4_map_blocks map; 1743 struct ext4_map_blocks map;
1602 int ret = 0; 1744 int ret = 0;
1603 sector_t invalid_block = ~((sector_t) 0xffff);
1604
1605 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1606 invalid_block = ~0;
1607 1745
1608 BUG_ON(create == 0); 1746 BUG_ON(create == 0);
1609 BUG_ON(bh->b_size != inode->i_sb->s_blocksize); 1747 BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
@@ -1616,25 +1754,9 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1616 * preallocated blocks are unmapped but should treated 1754 * preallocated blocks are unmapped but should treated
1617 * the same as allocated blocks. 1755 * the same as allocated blocks.
1618 */ 1756 */
1619 ret = ext4_map_blocks(NULL, inode, &map, 0); 1757 ret = ext4_da_map_blocks(inode, iblock, &map, bh);
1620 if (ret < 0) 1758 if (ret <= 0)
1621 return ret; 1759 return ret;
1622 if (ret == 0) {
1623 if (buffer_delay(bh))
1624 return 0; /* Not sure this could or should happen */
1625 /*
1626 * XXX: __block_write_begin() unmaps passed block, is it OK?
1627 */
1628 ret = ext4_da_reserve_space(inode, iblock);
1629 if (ret)
1630 /* not enough space to reserve */
1631 return ret;
1632
1633 map_bh(bh, inode->i_sb, invalid_block);
1634 set_buffer_new(bh);
1635 set_buffer_delay(bh);
1636 return 0;
1637 }
1638 1760
1639 map_bh(bh, inode->i_sb, map.m_pblk); 1761 map_bh(bh, inode->i_sb, map.m_pblk);
1640 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 1762 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
@@ -1811,8 +1933,12 @@ static int ext4_writepage(struct page *page,
1811 * We don't want to do block allocation, so redirty 1933 * We don't want to do block allocation, so redirty
1812 * the page and return. We may reach here when we do 1934 * the page and return. We may reach here when we do
1813 * a journal commit via journal_submit_inode_data_buffers. 1935 * a journal commit via journal_submit_inode_data_buffers.
1814 * We can also reach here via shrink_page_list 1936 * We can also reach here via shrink_page_list but it
1937 * should never be for direct reclaim so warn if that
1938 * happens
1815 */ 1939 */
1940 WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
1941 PF_MEMALLOC);
1816 goto redirty_page; 1942 goto redirty_page;
1817 } 1943 }
1818 if (commit_write) 1944 if (commit_write)
@@ -2046,6 +2172,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2046 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2172 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2047 pgoff_t done_index = 0; 2173 pgoff_t done_index = 0;
2048 pgoff_t end; 2174 pgoff_t end;
2175 struct blk_plug plug;
2049 2176
2050 trace_ext4_da_writepages(inode, wbc); 2177 trace_ext4_da_writepages(inode, wbc);
2051 2178
@@ -2124,6 +2251,7 @@ retry:
2124 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2251 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2125 tag_pages_for_writeback(mapping, index, end); 2252 tag_pages_for_writeback(mapping, index, end);
2126 2253
2254 blk_start_plug(&plug);
2127 while (!ret && wbc->nr_to_write > 0) { 2255 while (!ret && wbc->nr_to_write > 0) {
2128 2256
2129 /* 2257 /*
@@ -2174,11 +2302,12 @@ retry:
2174 ret = 0; 2302 ret = 0;
2175 } else if (ret == MPAGE_DA_EXTENT_TAIL) { 2303 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
2176 /* 2304 /*
2177 * got one extent now try with 2305 * Got one extent now try with rest of the pages.
2178 * rest of the pages 2306 * If mpd.retval is set -EIO, journal is aborted.
2307 * So we don't need to write any more.
2179 */ 2308 */
2180 pages_written += mpd.pages_written; 2309 pages_written += mpd.pages_written;
2181 ret = 0; 2310 ret = mpd.retval;
2182 io_done = 1; 2311 io_done = 1;
2183 } else if (wbc->nr_to_write) 2312 } else if (wbc->nr_to_write)
2184 /* 2313 /*
@@ -2188,6 +2317,7 @@ retry:
2188 */ 2317 */
2189 break; 2318 break;
2190 } 2319 }
2320 blk_finish_plug(&plug);
2191 if (!io_done && !cycled) { 2321 if (!io_done && !cycled) {
2192 cycled = 1; 2322 cycled = 1;
2193 index = 0; 2323 index = 0;
@@ -2226,10 +2356,11 @@ static int ext4_nonda_switch(struct super_block *sb)
2226 * Delalloc need an accurate free block accounting. So switch 2356 * Delalloc need an accurate free block accounting. So switch
2227 * to non delalloc when we are near to error range. 2357 * to non delalloc when we are near to error range.
2228 */ 2358 */
2229 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 2359 free_blocks = EXT4_C2B(sbi,
2230 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); 2360 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
2361 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
2231 if (2 * free_blocks < 3 * dirty_blocks || 2362 if (2 * free_blocks < 3 * dirty_blocks ||
2232 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 2363 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
2233 /* 2364 /*
2234 * free block count is less than 150% of dirty blocks 2365 * free block count is less than 150% of dirty blocks
2235 * or free blocks is less than watermark 2366 * or free blocks is less than watermark
@@ -2241,7 +2372,7 @@ static int ext4_nonda_switch(struct super_block *sb)
2241 * start pushing delalloc when 1/2 of free blocks are dirty. 2372 * start pushing delalloc when 1/2 of free blocks are dirty.
2242 */ 2373 */
2243 if (free_blocks < 2 * dirty_blocks) 2374 if (free_blocks < 2 * dirty_blocks)
2244 writeback_inodes_sb_if_idle(sb); 2375 writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
2245 2376
2246 return 0; 2377 return 0;
2247} 2378}
@@ -2255,6 +2386,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2255 pgoff_t index; 2386 pgoff_t index;
2256 struct inode *inode = mapping->host; 2387 struct inode *inode = mapping->host;
2257 handle_t *handle; 2388 handle_t *handle;
2389 loff_t page_len;
2258 2390
2259 index = pos >> PAGE_CACHE_SHIFT; 2391 index = pos >> PAGE_CACHE_SHIFT;
2260 2392
@@ -2301,6 +2433,13 @@ retry:
2301 */ 2433 */
2302 if (pos + len > inode->i_size) 2434 if (pos + len > inode->i_size)
2303 ext4_truncate_failed_write(inode); 2435 ext4_truncate_failed_write(inode);
2436 } else {
2437 page_len = pos & (PAGE_CACHE_SIZE - 1);
2438 if (page_len > 0) {
2439 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2440 inode, page, pos - page_len, page_len,
2441 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2442 }
2304 } 2443 }
2305 2444
2306 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2445 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2343,6 +2482,7 @@ static int ext4_da_write_end(struct file *file,
2343 loff_t new_i_size; 2482 loff_t new_i_size;
2344 unsigned long start, end; 2483 unsigned long start, end;
2345 int write_mode = (int)(unsigned long)fsdata; 2484 int write_mode = (int)(unsigned long)fsdata;
2485 loff_t page_len;
2346 2486
2347 if (write_mode == FALL_BACK_TO_NONDELALLOC) { 2487 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2348 if (ext4_should_order_data(inode)) { 2488 if (ext4_should_order_data(inode)) {
@@ -2391,6 +2531,16 @@ static int ext4_da_write_end(struct file *file,
2391 } 2531 }
2392 ret2 = generic_write_end(file, mapping, pos, len, copied, 2532 ret2 = generic_write_end(file, mapping, pos, len, copied,
2393 page, fsdata); 2533 page, fsdata);
2534
2535 page_len = PAGE_CACHE_SIZE -
2536 ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
2537
2538 if (page_len > 0) {
2539 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2540 inode, page, pos + copied - 1, page_len,
2541 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2542 }
2543
2394 copied = ret2; 2544 copied = ret2;
2395 if (ret2 < 0) 2545 if (ret2 < 0)
2396 ret = ret2; 2546 ret = ret2;
@@ -2685,10 +2835,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2685 * but being more careful is always safe for the future change. 2835 * but being more careful is always safe for the future change.
2686 */ 2836 */
2687 inode = io_end->inode; 2837 inode = io_end->inode;
2688 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 2838 ext4_set_io_unwritten_flag(inode, io_end);
2689 io_end->flag |= EXT4_IO_END_UNWRITTEN;
2690 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
2691 }
2692 2839
2693 /* Add the io_end to per-inode completed io list*/ 2840 /* Add the io_end to per-inode completed io list*/
2694 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); 2841 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -2854,6 +3001,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
2854 struct inode *inode = file->f_mapping->host; 3001 struct inode *inode = file->f_mapping->host;
2855 ssize_t ret; 3002 ssize_t ret;
2856 3003
3004 /*
3005 * If we are doing data journalling we don't support O_DIRECT
3006 */
3007 if (ext4_should_journal_data(inode))
3008 return 0;
3009
2857 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); 3010 trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
2858 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3011 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
2859 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); 3012 ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
@@ -2923,6 +3076,7 @@ static const struct address_space_operations ext4_journalled_aops = {
2923 .bmap = ext4_bmap, 3076 .bmap = ext4_bmap,
2924 .invalidatepage = ext4_invalidatepage, 3077 .invalidatepage = ext4_invalidatepage,
2925 .releasepage = ext4_releasepage, 3078 .releasepage = ext4_releasepage,
3079 .direct_IO = ext4_direct_IO,
2926 .is_partially_uptodate = block_is_partially_uptodate, 3080 .is_partially_uptodate = block_is_partially_uptodate,
2927 .error_remove_page = generic_error_remove_page, 3081 .error_remove_page = generic_error_remove_page,
2928}; 3082};
@@ -2959,6 +3113,227 @@ void ext4_set_aops(struct inode *inode)
2959 inode->i_mapping->a_ops = &ext4_journalled_aops; 3113 inode->i_mapping->a_ops = &ext4_journalled_aops;
2960} 3114}
2961 3115
3116
3117/*
3118 * ext4_discard_partial_page_buffers()
3119 * Wrapper function for ext4_discard_partial_page_buffers_no_lock.
3120 * This function finds and locks the page containing the offset
3121 * "from" and passes it to ext4_discard_partial_page_buffers_no_lock.
3122 * Calling functions that already have the page locked should call
3123 * ext4_discard_partial_page_buffers_no_lock directly.
3124 */
3125int ext4_discard_partial_page_buffers(handle_t *handle,
3126 struct address_space *mapping, loff_t from,
3127 loff_t length, int flags)
3128{
3129 struct inode *inode = mapping->host;
3130 struct page *page;
3131 int err = 0;
3132
3133 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3134 mapping_gfp_mask(mapping) & ~__GFP_FS);
3135 if (!page)
3136 return -ENOMEM;
3137
3138 err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page,
3139 from, length, flags);
3140
3141 unlock_page(page);
3142 page_cache_release(page);
3143 return err;
3144}
3145
3146/*
3147 * ext4_discard_partial_page_buffers_no_lock()
3148 * Zeros a page range of length 'length' starting from offset 'from'.
3149 * Buffer heads that correspond to the block aligned regions of the
3150 * zeroed range will be unmapped. Unblock aligned regions
3151 * will have the corresponding buffer head mapped if needed so that
3152 * that region of the page can be updated with the partial zero out.
3153 *
3154 * This function assumes that the page has already been locked. The
3155 * The range to be discarded must be contained with in the given page.
3156 * If the specified range exceeds the end of the page it will be shortened
3157 * to the end of the page that corresponds to 'from'. This function is
3158 * appropriate for updating a page and it buffer heads to be unmapped and
3159 * zeroed for blocks that have been either released, or are going to be
3160 * released.
3161 *
3162 * handle: The journal handle
3163 * inode: The files inode
3164 * page: A locked page that contains the offset "from"
3165 * from: The starting byte offset (from the begining of the file)
3166 * to begin discarding
3167 * len: The length of bytes to discard
3168 * flags: Optional flags that may be used:
3169 *
3170 * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED
3171 * Only zero the regions of the page whose buffer heads
3172 * have already been unmapped. This flag is appropriate
3173 * for updateing the contents of a page whose blocks may
3174 * have already been released, and we only want to zero
3175 * out the regions that correspond to those released blocks.
3176 *
3177 * Returns zero on sucess or negative on failure.
3178 */
3179int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3180 struct inode *inode, struct page *page, loff_t from,
3181 loff_t length, int flags)
3182{
3183 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3184 unsigned int offset = from & (PAGE_CACHE_SIZE-1);
3185 unsigned int blocksize, max, pos;
3186 ext4_lblk_t iblock;
3187 struct buffer_head *bh;
3188 int err = 0;
3189
3190 blocksize = inode->i_sb->s_blocksize;
3191 max = PAGE_CACHE_SIZE - offset;
3192
3193 if (index != page->index)
3194 return -EINVAL;
3195
3196 /*
3197 * correct length if it does not fall between
3198 * 'from' and the end of the page
3199 */
3200 if (length > max || length < 0)
3201 length = max;
3202
3203 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3204
3205 if (!page_has_buffers(page)) {
3206 /*
3207 * If the range to be discarded covers a partial block
3208 * we need to get the page buffers. This is because
3209 * partial blocks cannot be released and the page needs
3210 * to be updated with the contents of the block before
3211 * we write the zeros on top of it.
3212 */
3213 if ((from & (blocksize - 1)) ||
3214 ((from + length) & (blocksize - 1))) {
3215 create_empty_buffers(page, blocksize, 0);
3216 } else {
3217 /*
3218 * If there are no partial blocks,
3219 * there is nothing to update,
3220 * so we can return now
3221 */
3222 return 0;
3223 }
3224 }
3225
3226 /* Find the buffer that contains "offset" */
3227 bh = page_buffers(page);
3228 pos = blocksize;
3229 while (offset >= pos) {
3230 bh = bh->b_this_page;
3231 iblock++;
3232 pos += blocksize;
3233 }
3234
3235 pos = offset;
3236 while (pos < offset + length) {
3237 unsigned int end_of_block, range_to_discard;
3238
3239 err = 0;
3240
3241 /* The length of space left to zero and unmap */
3242 range_to_discard = offset + length - pos;
3243
3244 /* The length of space until the end of the block */
3245 end_of_block = blocksize - (pos & (blocksize-1));
3246
3247 /*
3248 * Do not unmap or zero past end of block
3249 * for this buffer head
3250 */
3251 if (range_to_discard > end_of_block)
3252 range_to_discard = end_of_block;
3253
3254
3255 /*
3256 * Skip this buffer head if we are only zeroing unampped
3257 * regions of the page
3258 */
3259 if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED &&
3260 buffer_mapped(bh))
3261 goto next;
3262
3263 /* If the range is block aligned, unmap */
3264 if (range_to_discard == blocksize) {
3265 clear_buffer_dirty(bh);
3266 bh->b_bdev = NULL;
3267 clear_buffer_mapped(bh);
3268 clear_buffer_req(bh);
3269 clear_buffer_new(bh);
3270 clear_buffer_delay(bh);
3271 clear_buffer_unwritten(bh);
3272 clear_buffer_uptodate(bh);
3273 zero_user(page, pos, range_to_discard);
3274 BUFFER_TRACE(bh, "Buffer discarded");
3275 goto next;
3276 }
3277
3278 /*
3279 * If this block is not completely contained in the range
3280 * to be discarded, then it is not going to be released. Because
3281 * we need to keep this block, we need to make sure this part
3282 * of the page is uptodate before we modify it by writeing
3283 * partial zeros on it.
3284 */
3285 if (!buffer_mapped(bh)) {
3286 /*
3287 * Buffer head must be mapped before we can read
3288 * from the block
3289 */
3290 BUFFER_TRACE(bh, "unmapped");
3291 ext4_get_block(inode, iblock, bh, 0);
3292 /* unmapped? It's a hole - nothing to do */
3293 if (!buffer_mapped(bh)) {
3294 BUFFER_TRACE(bh, "still unmapped");
3295 goto next;
3296 }
3297 }
3298
3299 /* Ok, it's mapped. Make sure it's up-to-date */
3300 if (PageUptodate(page))
3301 set_buffer_uptodate(bh);
3302
3303 if (!buffer_uptodate(bh)) {
3304 err = -EIO;
3305 ll_rw_block(READ, 1, &bh);
3306 wait_on_buffer(bh);
3307 /* Uhhuh. Read error. Complain and punt.*/
3308 if (!buffer_uptodate(bh))
3309 goto next;
3310 }
3311
3312 if (ext4_should_journal_data(inode)) {
3313 BUFFER_TRACE(bh, "get write access");
3314 err = ext4_journal_get_write_access(handle, bh);
3315 if (err)
3316 goto next;
3317 }
3318
3319 zero_user(page, pos, range_to_discard);
3320
3321 err = 0;
3322 if (ext4_should_journal_data(inode)) {
3323 err = ext4_handle_dirty_metadata(handle, inode, bh);
3324 } else
3325 mark_buffer_dirty(bh);
3326
3327 BUFFER_TRACE(bh, "Partial buffer zeroed");
3328next:
3329 bh = bh->b_this_page;
3330 iblock++;
3331 pos += range_to_discard;
3332 }
3333
3334 return err;
3335}
3336
2962/* 3337/*
2963 * ext4_block_truncate_page() zeroes out a mapping from file offset `from' 3338 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
2964 * up to the end of the block which corresponds to `from'. 3339 * up to the end of the block which corresponds to `from'.
@@ -3001,7 +3376,7 @@ int ext4_block_zero_page_range(handle_t *handle,
3001 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, 3376 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3002 mapping_gfp_mask(mapping) & ~__GFP_FS); 3377 mapping_gfp_mask(mapping) & ~__GFP_FS);
3003 if (!page) 3378 if (!page)
3004 return -EINVAL; 3379 return -ENOMEM;
3005 3380
3006 blocksize = inode->i_sb->s_blocksize; 3381 blocksize = inode->i_sb->s_blocksize;
3007 max = blocksize - (offset & (blocksize - 1)); 3382 max = blocksize - (offset & (blocksize - 1));
@@ -3070,11 +3445,8 @@ int ext4_block_zero_page_range(handle_t *handle,
3070 err = 0; 3445 err = 0;
3071 if (ext4_should_journal_data(inode)) { 3446 if (ext4_should_journal_data(inode)) {
3072 err = ext4_handle_dirty_metadata(handle, inode, bh); 3447 err = ext4_handle_dirty_metadata(handle, inode, bh);
3073 } else { 3448 } else
3074 if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode)
3075 err = ext4_jbd2_file_inode(handle, inode);
3076 mark_buffer_dirty(bh); 3449 mark_buffer_dirty(bh);
3077 }
3078 3450
3079unlock: 3451unlock:
3080 unlock_page(page); 3452 unlock_page(page);
@@ -3115,6 +3487,11 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
3115 return -ENOTSUPP; 3487 return -ENOTSUPP;
3116 } 3488 }
3117 3489
3490 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
3491 /* TODO: Add support for bigalloc file systems */
3492 return -ENOTSUPP;
3493 }
3494
3118 return ext4_ext_punch_hole(file, offset, length); 3495 return ext4_ext_punch_hole(file, offset, length);
3119} 3496}
3120 3497
@@ -3414,7 +3791,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3414 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 3791 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
3415 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 3792 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
3416 } 3793 }
3417 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 3794 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
3418 3795
3419 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ 3796 ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
3420 ei->i_dir_start_lookup = 0; 3797 ei->i_dir_start_lookup = 0;
@@ -4416,6 +4793,7 @@ retry_alloc:
4416 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { 4793 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
4417 unlock_page(page); 4794 unlock_page(page);
4418 ret = VM_FAULT_SIGBUS; 4795 ret = VM_FAULT_SIGBUS;
4796 ext4_journal_stop(handle);
4419 goto out; 4797 goto out;
4420 } 4798 }
4421 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 4799 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f18bfe37aff8..a56796814d6a 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -21,6 +21,7 @@
21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
22{ 22{
23 struct inode *inode = filp->f_dentry->d_inode; 23 struct inode *inode = filp->f_dentry->d_inode;
24 struct super_block *sb = inode->i_sb;
24 struct ext4_inode_info *ei = EXT4_I(inode); 25 struct ext4_inode_info *ei = EXT4_I(inode);
25 unsigned int flags; 26 unsigned int flags;
26 27
@@ -173,33 +174,8 @@ setversion_out:
173 mnt_drop_write(filp->f_path.mnt); 174 mnt_drop_write(filp->f_path.mnt);
174 return err; 175 return err;
175 } 176 }
176#ifdef CONFIG_JBD2_DEBUG
177 case EXT4_IOC_WAIT_FOR_READONLY:
178 /*
179 * This is racy - by the time we're woken up and running,
180 * the superblock could be released. And the module could
181 * have been unloaded. So sue me.
182 *
183 * Returns 1 if it slept, else zero.
184 */
185 {
186 struct super_block *sb = inode->i_sb;
187 DECLARE_WAITQUEUE(wait, current);
188 int ret = 0;
189
190 set_current_state(TASK_INTERRUPTIBLE);
191 add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
192 if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) {
193 schedule();
194 ret = 1;
195 }
196 remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
197 return ret;
198 }
199#endif
200 case EXT4_IOC_GROUP_EXTEND: { 177 case EXT4_IOC_GROUP_EXTEND: {
201 ext4_fsblk_t n_blocks_count; 178 ext4_fsblk_t n_blocks_count;
202 struct super_block *sb = inode->i_sb;
203 int err, err2=0; 179 int err, err2=0;
204 180
205 err = ext4_resize_begin(sb); 181 err = ext4_resize_begin(sb);
@@ -209,6 +185,13 @@ setversion_out:
209 if (get_user(n_blocks_count, (__u32 __user *)arg)) 185 if (get_user(n_blocks_count, (__u32 __user *)arg))
210 return -EFAULT; 186 return -EFAULT;
211 187
188 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
189 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
190 ext4_msg(sb, KERN_ERR,
191 "Online resizing not supported with bigalloc");
192 return -EOPNOTSUPP;
193 }
194
212 err = mnt_want_write(filp->f_path.mnt); 195 err = mnt_want_write(filp->f_path.mnt);
213 if (err) 196 if (err)
214 return err; 197 return err;
@@ -250,6 +233,13 @@ setversion_out:
250 goto mext_out; 233 goto mext_out;
251 } 234 }
252 235
236 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
237 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
238 ext4_msg(sb, KERN_ERR,
239 "Online defrag not supported with bigalloc");
240 return -EOPNOTSUPP;
241 }
242
253 err = mnt_want_write(filp->f_path.mnt); 243 err = mnt_want_write(filp->f_path.mnt);
254 if (err) 244 if (err)
255 goto mext_out; 245 goto mext_out;
@@ -270,7 +260,6 @@ mext_out:
270 260
271 case EXT4_IOC_GROUP_ADD: { 261 case EXT4_IOC_GROUP_ADD: {
272 struct ext4_new_group_data input; 262 struct ext4_new_group_data input;
273 struct super_block *sb = inode->i_sb;
274 int err, err2=0; 263 int err, err2=0;
275 264
276 err = ext4_resize_begin(sb); 265 err = ext4_resize_begin(sb);
@@ -281,6 +270,13 @@ mext_out:
281 sizeof(input))) 270 sizeof(input)))
282 return -EFAULT; 271 return -EFAULT;
283 272
273 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
274 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
275 ext4_msg(sb, KERN_ERR,
276 "Online resizing not supported with bigalloc");
277 return -EOPNOTSUPP;
278 }
279
284 err = mnt_want_write(filp->f_path.mnt); 280 err = mnt_want_write(filp->f_path.mnt);
285 if (err) 281 if (err)
286 return err; 282 return err;
@@ -337,7 +333,6 @@ mext_out:
337 333
338 case FITRIM: 334 case FITRIM:
339 { 335 {
340 struct super_block *sb = inode->i_sb;
341 struct request_queue *q = bdev_get_queue(sb->s_bdev); 336 struct request_queue *q = bdev_get_queue(sb->s_bdev);
342 struct fstrim_range range; 337 struct fstrim_range range;
343 int ret = 0; 338 int ret = 0;
@@ -348,7 +343,14 @@ mext_out:
348 if (!blk_queue_discard(q)) 343 if (!blk_queue_discard(q))
349 return -EOPNOTSUPP; 344 return -EOPNOTSUPP;
350 345
351 if (copy_from_user(&range, (struct fstrim_range *)arg, 346 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
347 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
348 ext4_msg(sb, KERN_ERR,
349 "FITRIM not supported with bigalloc");
350 return -EOPNOTSUPP;
351 }
352
353 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
352 sizeof(range))) 354 sizeof(range)))
353 return -EFAULT; 355 return -EFAULT;
354 356
@@ -358,7 +360,7 @@ mext_out:
358 if (ret < 0) 360 if (ret < 0)
359 return ret; 361 return ret;
360 362
361 if (copy_to_user((struct fstrim_range *)arg, &range, 363 if (copy_to_user((struct fstrim_range __user *)arg, &range,
362 sizeof(range))) 364 sizeof(range)))
363 return -EFAULT; 365 return -EFAULT;
364 366
@@ -396,11 +398,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
396 case EXT4_IOC32_SETVERSION_OLD: 398 case EXT4_IOC32_SETVERSION_OLD:
397 cmd = EXT4_IOC_SETVERSION_OLD; 399 cmd = EXT4_IOC_SETVERSION_OLD;
398 break; 400 break;
399#ifdef CONFIG_JBD2_DEBUG
400 case EXT4_IOC32_WAIT_FOR_READONLY:
401 cmd = EXT4_IOC_WAIT_FOR_READONLY;
402 break;
403#endif
404 case EXT4_IOC32_GETRSVSZ: 401 case EXT4_IOC32_GETRSVSZ:
405 cmd = EXT4_IOC_GETRSVSZ; 402 cmd = EXT4_IOC_GETRSVSZ;
406 break; 403 break;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 17a5a57c415a..e2d8be8f28bf 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -70,8 +70,8 @@
70 * 70 *
71 * pa_lstart -> the logical start block for this prealloc space 71 * pa_lstart -> the logical start block for this prealloc space
72 * pa_pstart -> the physical start block for this prealloc space 72 * pa_pstart -> the physical start block for this prealloc space
73 * pa_len -> length for this prealloc space 73 * pa_len -> length for this prealloc space (in clusters)
74 * pa_free -> free space available in this prealloc space 74 * pa_free -> free space available in this prealloc space (in clusters)
75 * 75 *
76 * The inode preallocation space is used looking at the _logical_ start 76 * The inode preallocation space is used looking at the _logical_ start
77 * block. If only the logical file block falls within the range of prealloc 77 * block. If only the logical file block falls within the range of prealloc
@@ -126,7 +126,8 @@
126 * list. In case of inode preallocation we follow a list of heuristics 126 * list. In case of inode preallocation we follow a list of heuristics
127 * based on file size. This can be found in ext4_mb_normalize_request. If 127 * based on file size. This can be found in ext4_mb_normalize_request. If
128 * we are doing a group prealloc we try to normalize the request to 128 * we are doing a group prealloc we try to normalize the request to
129 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is 129 * sbi->s_mb_group_prealloc. The default value of s_mb_group_prealloc is
130 * dependent on the cluster size; for non-bigalloc file systems, it is
130 * 512 blocks. This can be tuned via 131 * 512 blocks. This can be tuned via
131 * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in 132 * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
132 * terms of number of blocks. If we have mounted the file system with -O 133 * terms of number of blocks. If we have mounted the file system with -O
@@ -459,7 +460,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
459 ext4_fsblk_t blocknr; 460 ext4_fsblk_t blocknr;
460 461
461 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 462 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
462 blocknr += first + i; 463 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
463 ext4_grp_locked_error(sb, e4b->bd_group, 464 ext4_grp_locked_error(sb, e4b->bd_group,
464 inode ? inode->i_ino : 0, 465 inode ? inode->i_ino : 0,
465 blocknr, 466 blocknr,
@@ -580,7 +581,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
580 continue; 581 continue;
581 } 582 }
582 583
583 /* both bits in buddy2 must be 0 */ 584 /* both bits in buddy2 must be 1 */
584 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); 585 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
585 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); 586 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
586 587
@@ -653,7 +654,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
653 ext4_grpblk_t chunk; 654 ext4_grpblk_t chunk;
654 unsigned short border; 655 unsigned short border;
655 656
656 BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); 657 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
657 658
658 border = 2 << sb->s_blocksize_bits; 659 border = 2 << sb->s_blocksize_bits;
659 660
@@ -705,7 +706,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
705 void *buddy, void *bitmap, ext4_group_t group) 706 void *buddy, void *bitmap, ext4_group_t group)
706{ 707{
707 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 708 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
708 ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb); 709 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
709 ext4_grpblk_t i = 0; 710 ext4_grpblk_t i = 0;
710 ext4_grpblk_t first; 711 ext4_grpblk_t first;
711 ext4_grpblk_t len; 712 ext4_grpblk_t len;
@@ -734,7 +735,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
734 735
735 if (free != grp->bb_free) { 736 if (free != grp->bb_free) {
736 ext4_grp_locked_error(sb, group, 0, 0, 737 ext4_grp_locked_error(sb, group, 0, 0,
737 "%u blocks in bitmap, %u in gd", 738 "%u clusters in bitmap, %u in gd",
738 free, grp->bb_free); 739 free, grp->bb_free);
739 /* 740 /*
740 * If we intent to continue, we consider group descritor 741 * If we intent to continue, we consider group descritor
@@ -1339,7 +1340,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1339 ext4_fsblk_t blocknr; 1340 ext4_fsblk_t blocknr;
1340 1341
1341 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 1342 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1342 blocknr += block; 1343 blocknr += EXT4_C2B(EXT4_SB(sb), block);
1343 ext4_grp_locked_error(sb, e4b->bd_group, 1344 ext4_grp_locked_error(sb, e4b->bd_group,
1344 inode ? inode->i_ino : 0, 1345 inode ? inode->i_ino : 0,
1345 blocknr, 1346 blocknr,
@@ -1390,7 +1391,6 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1390{ 1391{
1391 int next = block; 1392 int next = block;
1392 int max; 1393 int max;
1393 int ord;
1394 void *buddy; 1394 void *buddy;
1395 1395
1396 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); 1396 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
@@ -1432,9 +1432,8 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1432 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) 1432 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
1433 break; 1433 break;
1434 1434
1435 ord = mb_find_order_for_block(e4b, next); 1435 order = mb_find_order_for_block(e4b, next);
1436 1436
1437 order = ord;
1438 block = next >> order; 1437 block = next >> order;
1439 ex->fe_len += 1 << order; 1438 ex->fe_len += 1 << order;
1440 } 1439 }
@@ -1624,8 +1623,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1624 struct ext4_free_extent *gex = &ac->ac_g_ex; 1623 struct ext4_free_extent *gex = &ac->ac_g_ex;
1625 1624
1626 BUG_ON(ex->fe_len <= 0); 1625 BUG_ON(ex->fe_len <= 0);
1627 BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 1626 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1628 BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 1627 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1629 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); 1628 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1630 1629
1631 ac->ac_found++; 1630 ac->ac_found++;
@@ -1823,15 +1822,15 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1823 1822
1824 while (free && ac->ac_status == AC_STATUS_CONTINUE) { 1823 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1825 i = mb_find_next_zero_bit(bitmap, 1824 i = mb_find_next_zero_bit(bitmap,
1826 EXT4_BLOCKS_PER_GROUP(sb), i); 1825 EXT4_CLUSTERS_PER_GROUP(sb), i);
1827 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { 1826 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1828 /* 1827 /*
1829 * IF we have corrupt bitmap, we won't find any 1828 * IF we have corrupt bitmap, we won't find any
1830 * free blocks even though group info says we 1829 * free blocks even though group info says we
1831 * we have free blocks 1830 * we have free blocks
1832 */ 1831 */
1833 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, 1832 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1834 "%d free blocks as per " 1833 "%d free clusters as per "
1835 "group info. But bitmap says 0", 1834 "group info. But bitmap says 0",
1836 free); 1835 free);
1837 break; 1836 break;
@@ -1841,7 +1840,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1841 BUG_ON(ex.fe_len <= 0); 1840 BUG_ON(ex.fe_len <= 0);
1842 if (free < ex.fe_len) { 1841 if (free < ex.fe_len) {
1843 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, 1842 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1844 "%d free blocks as per " 1843 "%d free clusters as per "
1845 "group info. But got %d blocks", 1844 "group info. But got %d blocks",
1846 free, ex.fe_len); 1845 free, ex.fe_len);
1847 /* 1846 /*
@@ -1887,7 +1886,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1887 do_div(a, sbi->s_stripe); 1886 do_div(a, sbi->s_stripe);
1888 i = (a * sbi->s_stripe) - first_group_block; 1887 i = (a * sbi->s_stripe) - first_group_block;
1889 1888
1890 while (i < EXT4_BLOCKS_PER_GROUP(sb)) { 1889 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
1891 if (!mb_test_bit(i, bitmap)) { 1890 if (!mb_test_bit(i, bitmap)) {
1892 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); 1891 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
1893 if (max >= sbi->s_stripe) { 1892 if (max >= sbi->s_stripe) {
@@ -2252,10 +2251,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2252 */ 2251 */
2253 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 2252 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2254 meta_group_info[i]->bb_free = 2253 meta_group_info[i]->bb_free =
2255 ext4_free_blocks_after_init(sb, group, desc); 2254 ext4_free_clusters_after_init(sb, group, desc);
2256 } else { 2255 } else {
2257 meta_group_info[i]->bb_free = 2256 meta_group_info[i]->bb_free =
2258 ext4_free_blks_count(sb, desc); 2257 ext4_free_group_clusters(sb, desc);
2259 } 2258 }
2260 2259
2261 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2260 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
@@ -2473,7 +2472,20 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2473 sbi->s_mb_stats = MB_DEFAULT_STATS; 2472 sbi->s_mb_stats = MB_DEFAULT_STATS;
2474 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; 2473 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2475 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; 2474 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2476 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2475 /*
2476 * The default group preallocation is 512, which for 4k block
2477 * sizes translates to 2 megabytes. However for bigalloc file
2478 * systems, this is probably too big (i.e, if the cluster size
2479 * is 1 megabyte, then group preallocation size becomes half a
2480 * gigabyte!). As a default, we will keep a two megabyte
2481 * group pralloc size for cluster sizes up to 64k, and after
2482 * that, we will force a minimum group preallocation size of
2483 * 32 clusters. This translates to 8 megs when the cluster
2484 * size is 256k, and 32 megs when the cluster size is 1 meg,
2485 * which seems reasonable as a default.
2486 */
2487 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2488 sbi->s_cluster_bits, 32);
2477 /* 2489 /*
2478 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc 2490 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
2479 * to the lowest multiple of s_stripe which is bigger than 2491 * to the lowest multiple of s_stripe which is bigger than
@@ -2490,7 +2502,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2490 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2502 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2491 if (sbi->s_locality_groups == NULL) { 2503 if (sbi->s_locality_groups == NULL) {
2492 ret = -ENOMEM; 2504 ret = -ENOMEM;
2493 goto out; 2505 goto out_free_groupinfo_slab;
2494 } 2506 }
2495 for_each_possible_cpu(i) { 2507 for_each_possible_cpu(i) {
2496 struct ext4_locality_group *lg; 2508 struct ext4_locality_group *lg;
@@ -2503,9 +2515,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2503 2515
2504 /* init file for buddy data */ 2516 /* init file for buddy data */
2505 ret = ext4_mb_init_backend(sb); 2517 ret = ext4_mb_init_backend(sb);
2506 if (ret != 0) { 2518 if (ret != 0)
2507 goto out; 2519 goto out_free_locality_groups;
2508 }
2509 2520
2510 if (sbi->s_proc) 2521 if (sbi->s_proc)
2511 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2522 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
@@ -2513,11 +2524,19 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2513 2524
2514 if (sbi->s_journal) 2525 if (sbi->s_journal)
2515 sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2526 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2527
2528 return 0;
2529
2530out_free_locality_groups:
2531 free_percpu(sbi->s_locality_groups);
2532 sbi->s_locality_groups = NULL;
2533out_free_groupinfo_slab:
2534 ext4_groupinfo_destroy_slabs();
2516out: 2535out:
2517 if (ret) { 2536 kfree(sbi->s_mb_offsets);
2518 kfree(sbi->s_mb_offsets); 2537 sbi->s_mb_offsets = NULL;
2519 kfree(sbi->s_mb_maxs); 2538 kfree(sbi->s_mb_maxs);
2520 } 2539 sbi->s_mb_maxs = NULL;
2521 return ret; 2540 return ret;
2522} 2541}
2523 2542
@@ -2602,11 +2621,13 @@ int ext4_mb_release(struct super_block *sb)
2602} 2621}
2603 2622
2604static inline int ext4_issue_discard(struct super_block *sb, 2623static inline int ext4_issue_discard(struct super_block *sb,
2605 ext4_group_t block_group, ext4_grpblk_t block, int count) 2624 ext4_group_t block_group, ext4_grpblk_t cluster, int count)
2606{ 2625{
2607 ext4_fsblk_t discard_block; 2626 ext4_fsblk_t discard_block;
2608 2627
2609 discard_block = block + ext4_group_first_block_no(sb, block_group); 2628 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2629 ext4_group_first_block_no(sb, block_group));
2630 count = EXT4_C2B(EXT4_SB(sb), count);
2610 trace_ext4_discard_blocks(sb, 2631 trace_ext4_discard_blocks(sb,
2611 (unsigned long long) discard_block, count); 2632 (unsigned long long) discard_block, count);
2612 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2633 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
@@ -2633,7 +2654,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2633 2654
2634 if (test_opt(sb, DISCARD)) 2655 if (test_opt(sb, DISCARD))
2635 ext4_issue_discard(sb, entry->group, 2656 ext4_issue_discard(sb, entry->group,
2636 entry->start_blk, entry->count); 2657 entry->start_cluster, entry->count);
2637 2658
2638 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2659 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2639 /* we expect to find existing buddy because it's pinned */ 2660 /* we expect to find existing buddy because it's pinned */
@@ -2646,7 +2667,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2646 ext4_lock_group(sb, entry->group); 2667 ext4_lock_group(sb, entry->group);
2647 /* Take it out of per group rb tree */ 2668 /* Take it out of per group rb tree */
2648 rb_erase(&entry->node, &(db->bb_free_root)); 2669 rb_erase(&entry->node, &(db->bb_free_root));
2649 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); 2670 mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count);
2650 2671
2651 /* 2672 /*
2652 * Clear the trimmed flag for the group so that the next 2673 * Clear the trimmed flag for the group so that the next
@@ -2752,7 +2773,7 @@ void ext4_exit_mballoc(void)
2752 */ 2773 */
2753static noinline_for_stack int 2774static noinline_for_stack int
2754ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2775ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2755 handle_t *handle, unsigned int reserv_blks) 2776 handle_t *handle, unsigned int reserv_clstrs)
2756{ 2777{
2757 struct buffer_head *bitmap_bh = NULL; 2778 struct buffer_head *bitmap_bh = NULL;
2758 struct ext4_group_desc *gdp; 2779 struct ext4_group_desc *gdp;
@@ -2783,7 +2804,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2783 goto out_err; 2804 goto out_err;
2784 2805
2785 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, 2806 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2786 ext4_free_blks_count(sb, gdp)); 2807 ext4_free_group_clusters(sb, gdp));
2787 2808
2788 err = ext4_journal_get_write_access(handle, gdp_bh); 2809 err = ext4_journal_get_write_access(handle, gdp_bh);
2789 if (err) 2810 if (err)
@@ -2791,7 +2812,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2791 2812
2792 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); 2813 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2793 2814
2794 len = ac->ac_b_ex.fe_len; 2815 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2795 if (!ext4_data_block_valid(sbi, block, len)) { 2816 if (!ext4_data_block_valid(sbi, block, len)) {
2796 ext4_error(sb, "Allocating blocks %llu-%llu which overlap " 2817 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2797 "fs metadata\n", block, block+len); 2818 "fs metadata\n", block, block+len);
@@ -2823,28 +2844,29 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2823 ac->ac_b_ex.fe_len); 2844 ac->ac_b_ex.fe_len);
2824 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 2845 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2825 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 2846 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2826 ext4_free_blks_set(sb, gdp, 2847 ext4_free_group_clusters_set(sb, gdp,
2827 ext4_free_blocks_after_init(sb, 2848 ext4_free_clusters_after_init(sb,
2828 ac->ac_b_ex.fe_group, gdp)); 2849 ac->ac_b_ex.fe_group, gdp));
2829 } 2850 }
2830 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; 2851 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
2831 ext4_free_blks_set(sb, gdp, len); 2852 ext4_free_group_clusters_set(sb, gdp, len);
2832 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2853 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2833 2854
2834 ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 2855 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2835 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2856 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
2836 /* 2857 /*
2837 * Now reduce the dirty block count also. Should not go negative 2858 * Now reduce the dirty block count also. Should not go negative
2838 */ 2859 */
2839 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2860 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2840 /* release all the reserved blocks if non delalloc */ 2861 /* release all the reserved blocks if non delalloc */
2841 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); 2862 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
2863 reserv_clstrs);
2842 2864
2843 if (sbi->s_log_groups_per_flex) { 2865 if (sbi->s_log_groups_per_flex) {
2844 ext4_group_t flex_group = ext4_flex_group(sbi, 2866 ext4_group_t flex_group = ext4_flex_group(sbi,
2845 ac->ac_b_ex.fe_group); 2867 ac->ac_b_ex.fe_group);
2846 atomic_sub(ac->ac_b_ex.fe_len, 2868 atomic_sub(ac->ac_b_ex.fe_len,
2847 &sbi->s_flex_groups[flex_group].free_blocks); 2869 &sbi->s_flex_groups[flex_group].free_clusters);
2848 } 2870 }
2849 2871
2850 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 2872 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -2886,6 +2908,7 @@ static noinline_for_stack void
2886ext4_mb_normalize_request(struct ext4_allocation_context *ac, 2908ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2887 struct ext4_allocation_request *ar) 2909 struct ext4_allocation_request *ar)
2888{ 2910{
2911 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2889 int bsbits, max; 2912 int bsbits, max;
2890 ext4_lblk_t end; 2913 ext4_lblk_t end;
2891 loff_t size, orig_size, start_off; 2914 loff_t size, orig_size, start_off;
@@ -2916,7 +2939,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2916 2939
2917 /* first, let's learn actual file size 2940 /* first, let's learn actual file size
2918 * given current request is allocated */ 2941 * given current request is allocated */
2919 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; 2942 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
2920 size = size << bsbits; 2943 size = size << bsbits;
2921 if (size < i_size_read(ac->ac_inode)) 2944 if (size < i_size_read(ac->ac_inode))
2922 size = i_size_read(ac->ac_inode); 2945 size = i_size_read(ac->ac_inode);
@@ -2988,7 +3011,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2988 continue; 3011 continue;
2989 } 3012 }
2990 3013
2991 pa_end = pa->pa_lstart + pa->pa_len; 3014 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3015 pa->pa_len);
2992 3016
2993 /* PA must not overlap original request */ 3017 /* PA must not overlap original request */
2994 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || 3018 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
@@ -3018,9 +3042,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3018 rcu_read_lock(); 3042 rcu_read_lock();
3019 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { 3043 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3020 ext4_lblk_t pa_end; 3044 ext4_lblk_t pa_end;
3045
3021 spin_lock(&pa->pa_lock); 3046 spin_lock(&pa->pa_lock);
3022 if (pa->pa_deleted == 0) { 3047 if (pa->pa_deleted == 0) {
3023 pa_end = pa->pa_lstart + pa->pa_len; 3048 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3049 pa->pa_len);
3024 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); 3050 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3025 } 3051 }
3026 spin_unlock(&pa->pa_lock); 3052 spin_unlock(&pa->pa_lock);
@@ -3036,14 +3062,14 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3036 } 3062 }
3037 BUG_ON(start + size <= ac->ac_o_ex.fe_logical && 3063 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3038 start > ac->ac_o_ex.fe_logical); 3064 start > ac->ac_o_ex.fe_logical);
3039 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 3065 BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
3040 3066
3041 /* now prepare goal request */ 3067 /* now prepare goal request */
3042 3068
3043 /* XXX: is it better to align blocks WRT to logical 3069 /* XXX: is it better to align blocks WRT to logical
3044 * placement or satisfy big request as is */ 3070 * placement or satisfy big request as is */
3045 ac->ac_g_ex.fe_logical = start; 3071 ac->ac_g_ex.fe_logical = start;
3046 ac->ac_g_ex.fe_len = size; 3072 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3047 3073
3048 /* define goal start in order to merge */ 3074 /* define goal start in order to merge */
3049 if (ar->pright && (ar->lright == (start + size))) { 3075 if (ar->pright && (ar->lright == (start + size))) {
@@ -3112,14 +3138,16 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3112static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, 3138static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3113 struct ext4_prealloc_space *pa) 3139 struct ext4_prealloc_space *pa)
3114{ 3140{
3141 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3115 ext4_fsblk_t start; 3142 ext4_fsblk_t start;
3116 ext4_fsblk_t end; 3143 ext4_fsblk_t end;
3117 int len; 3144 int len;
3118 3145
3119 /* found preallocated blocks, use them */ 3146 /* found preallocated blocks, use them */
3120 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); 3147 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3121 end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len); 3148 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3122 len = end - start; 3149 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3150 len = EXT4_NUM_B2C(sbi, end - start);
3123 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, 3151 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3124 &ac->ac_b_ex.fe_start); 3152 &ac->ac_b_ex.fe_start);
3125 ac->ac_b_ex.fe_len = len; 3153 ac->ac_b_ex.fe_len = len;
@@ -3127,7 +3155,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3127 ac->ac_pa = pa; 3155 ac->ac_pa = pa;
3128 3156
3129 BUG_ON(start < pa->pa_pstart); 3157 BUG_ON(start < pa->pa_pstart);
3130 BUG_ON(start + len > pa->pa_pstart + pa->pa_len); 3158 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3131 BUG_ON(pa->pa_free < len); 3159 BUG_ON(pa->pa_free < len);
3132 pa->pa_free -= len; 3160 pa->pa_free -= len;
3133 3161
@@ -3193,6 +3221,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3193static noinline_for_stack int 3221static noinline_for_stack int
3194ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3222ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3195{ 3223{
3224 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3196 int order, i; 3225 int order, i;
3197 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3226 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3198 struct ext4_locality_group *lg; 3227 struct ext4_locality_group *lg;
@@ -3210,12 +3239,14 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3210 /* all fields in this condition don't change, 3239 /* all fields in this condition don't change,
3211 * so we can skip locking for them */ 3240 * so we can skip locking for them */
3212 if (ac->ac_o_ex.fe_logical < pa->pa_lstart || 3241 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3213 ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) 3242 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3243 EXT4_C2B(sbi, pa->pa_len)))
3214 continue; 3244 continue;
3215 3245
3216 /* non-extent files can't have physical blocks past 2^32 */ 3246 /* non-extent files can't have physical blocks past 2^32 */
3217 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && 3247 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3218 pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) 3248 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3249 EXT4_MAX_BLOCK_FILE_PHYS))
3219 continue; 3250 continue;
3220 3251
3221 /* found preallocated blocks, use them */ 3252 /* found preallocated blocks, use them */
@@ -3291,7 +3322,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3291 3322
3292 while (n) { 3323 while (n) {
3293 entry = rb_entry(n, struct ext4_free_data, node); 3324 entry = rb_entry(n, struct ext4_free_data, node);
3294 ext4_set_bits(bitmap, entry->start_blk, entry->count); 3325 ext4_set_bits(bitmap, entry->start_cluster, entry->count);
3295 n = rb_next(n); 3326 n = rb_next(n);
3296 } 3327 }
3297 return; 3328 return;
@@ -3312,7 +3343,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3312 ext4_group_t groupnr; 3343 ext4_group_t groupnr;
3313 ext4_grpblk_t start; 3344 ext4_grpblk_t start;
3314 int preallocated = 0; 3345 int preallocated = 0;
3315 int count = 0;
3316 int len; 3346 int len;
3317 3347
3318 /* all form of preallocation discards first load group, 3348 /* all form of preallocation discards first load group,
@@ -3335,7 +3365,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3335 BUG_ON(groupnr != group); 3365 BUG_ON(groupnr != group);
3336 ext4_set_bits(bitmap, start, len); 3366 ext4_set_bits(bitmap, start, len);
3337 preallocated += len; 3367 preallocated += len;
3338 count++;
3339 } 3368 }
3340 mb_debug(1, "prellocated %u for group %u\n", preallocated, group); 3369 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3341} 3370}
@@ -3412,6 +3441,7 @@ static noinline_for_stack int
3412ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) 3441ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3413{ 3442{
3414 struct super_block *sb = ac->ac_sb; 3443 struct super_block *sb = ac->ac_sb;
3444 struct ext4_sb_info *sbi = EXT4_SB(sb);
3415 struct ext4_prealloc_space *pa; 3445 struct ext4_prealloc_space *pa;
3416 struct ext4_group_info *grp; 3446 struct ext4_group_info *grp;
3417 struct ext4_inode_info *ei; 3447 struct ext4_inode_info *ei;
@@ -3443,16 +3473,18 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3443 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; 3473 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3444 3474
3445 /* also, we should cover whole original request */ 3475 /* also, we should cover whole original request */
3446 wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len; 3476 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3447 3477
3448 /* the smallest one defines real window */ 3478 /* the smallest one defines real window */
3449 win = min(winl, wins); 3479 win = min(winl, wins);
3450 3480
3451 offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len; 3481 offs = ac->ac_o_ex.fe_logical %
3482 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3452 if (offs && offs < win) 3483 if (offs && offs < win)
3453 win = offs; 3484 win = offs;
3454 3485
3455 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win; 3486 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3487 EXT4_B2C(sbi, win);
3456 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); 3488 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3457 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); 3489 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3458 } 3490 }
@@ -3477,7 +3509,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3477 trace_ext4_mb_new_inode_pa(ac, pa); 3509 trace_ext4_mb_new_inode_pa(ac, pa);
3478 3510
3479 ext4_mb_use_inode_pa(ac, pa); 3511 ext4_mb_use_inode_pa(ac, pa);
3480 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); 3512 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3481 3513
3482 ei = EXT4_I(ac->ac_inode); 3514 ei = EXT4_I(ac->ac_inode);
3483 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); 3515 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
@@ -3592,7 +3624,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3592 3624
3593 BUG_ON(pa->pa_deleted == 0); 3625 BUG_ON(pa->pa_deleted == 0);
3594 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3626 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3595 grp_blk_start = pa->pa_pstart - bit; 3627 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3596 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3628 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3597 end = bit + pa->pa_len; 3629 end = bit + pa->pa_len;
3598 3630
@@ -3607,7 +3639,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3607 free += next - bit; 3639 free += next - bit;
3608 3640
3609 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); 3641 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3610 trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit, 3642 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3643 EXT4_C2B(sbi, bit)),
3611 next - bit); 3644 next - bit);
3612 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3645 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3613 bit = next + 1; 3646 bit = next + 1;
@@ -3690,7 +3723,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3690 } 3723 }
3691 3724
3692 if (needed == 0) 3725 if (needed == 0)
3693 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; 3726 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3694 3727
3695 INIT_LIST_HEAD(&list); 3728 INIT_LIST_HEAD(&list);
3696repeat: 3729repeat:
@@ -3958,7 +3991,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3958 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) 3991 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3959 return; 3992 return;
3960 3993
3961 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; 3994 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3962 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) 3995 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
3963 >> bsbits; 3996 >> bsbits;
3964 3997
@@ -3969,6 +4002,11 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3969 return; 4002 return;
3970 } 4003 }
3971 4004
4005 if (sbi->s_mb_group_prealloc <= 0) {
4006 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4007 return;
4008 }
4009
3972 /* don't use group allocation for large files */ 4010 /* don't use group allocation for large files */
3973 size = max(size, isize); 4011 size = max(size, isize);
3974 if (size > sbi->s_mb_stream_request) { 4012 if (size > sbi->s_mb_stream_request) {
@@ -4007,8 +4045,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4007 len = ar->len; 4045 len = ar->len;
4008 4046
4009 /* just a dirty hack to filter too big requests */ 4047 /* just a dirty hack to filter too big requests */
4010 if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10) 4048 if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
4011 len = EXT4_BLOCKS_PER_GROUP(sb) - 10; 4049 len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
4012 4050
4013 /* start searching from the goal */ 4051 /* start searching from the goal */
4014 goal = ar->goal; 4052 goal = ar->goal;
@@ -4019,18 +4057,15 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4019 4057
4020 /* set up allocation goals */ 4058 /* set up allocation goals */
4021 memset(ac, 0, sizeof(struct ext4_allocation_context)); 4059 memset(ac, 0, sizeof(struct ext4_allocation_context));
4022 ac->ac_b_ex.fe_logical = ar->logical; 4060 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
4023 ac->ac_status = AC_STATUS_CONTINUE; 4061 ac->ac_status = AC_STATUS_CONTINUE;
4024 ac->ac_sb = sb; 4062 ac->ac_sb = sb;
4025 ac->ac_inode = ar->inode; 4063 ac->ac_inode = ar->inode;
4026 ac->ac_o_ex.fe_logical = ar->logical; 4064 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4027 ac->ac_o_ex.fe_group = group; 4065 ac->ac_o_ex.fe_group = group;
4028 ac->ac_o_ex.fe_start = block; 4066 ac->ac_o_ex.fe_start = block;
4029 ac->ac_o_ex.fe_len = len; 4067 ac->ac_o_ex.fe_len = len;
4030 ac->ac_g_ex.fe_logical = ar->logical; 4068 ac->ac_g_ex = ac->ac_o_ex;
4031 ac->ac_g_ex.fe_group = group;
4032 ac->ac_g_ex.fe_start = block;
4033 ac->ac_g_ex.fe_len = len;
4034 ac->ac_flags = ar->flags; 4069 ac->ac_flags = ar->flags;
4035 4070
4036 /* we have to define context: we'll we work with a file or 4071 /* we have to define context: we'll we work with a file or
@@ -4182,13 +4217,14 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4182 */ 4217 */
4183static int ext4_mb_release_context(struct ext4_allocation_context *ac) 4218static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4184{ 4219{
4220 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4185 struct ext4_prealloc_space *pa = ac->ac_pa; 4221 struct ext4_prealloc_space *pa = ac->ac_pa;
4186 if (pa) { 4222 if (pa) {
4187 if (pa->pa_type == MB_GROUP_PA) { 4223 if (pa->pa_type == MB_GROUP_PA) {
4188 /* see comment in ext4_mb_use_group_pa() */ 4224 /* see comment in ext4_mb_use_group_pa() */
4189 spin_lock(&pa->pa_lock); 4225 spin_lock(&pa->pa_lock);
4190 pa->pa_pstart += ac->ac_b_ex.fe_len; 4226 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4191 pa->pa_lstart += ac->ac_b_ex.fe_len; 4227 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4192 pa->pa_free -= ac->ac_b_ex.fe_len; 4228 pa->pa_free -= ac->ac_b_ex.fe_len;
4193 pa->pa_len -= ac->ac_b_ex.fe_len; 4229 pa->pa_len -= ac->ac_b_ex.fe_len;
4194 spin_unlock(&pa->pa_lock); 4230 spin_unlock(&pa->pa_lock);
@@ -4249,13 +4285,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4249 struct super_block *sb; 4285 struct super_block *sb;
4250 ext4_fsblk_t block = 0; 4286 ext4_fsblk_t block = 0;
4251 unsigned int inquota = 0; 4287 unsigned int inquota = 0;
4252 unsigned int reserv_blks = 0; 4288 unsigned int reserv_clstrs = 0;
4253 4289
4254 sb = ar->inode->i_sb; 4290 sb = ar->inode->i_sb;
4255 sbi = EXT4_SB(sb); 4291 sbi = EXT4_SB(sb);
4256 4292
4257 trace_ext4_request_blocks(ar); 4293 trace_ext4_request_blocks(ar);
4258 4294
4295 /* Allow to use superuser reservation for quota file */
4296 if (IS_NOQUOTA(ar->inode))
4297 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4298
4259 /* 4299 /*
4260 * For delayed allocation, we could skip the ENOSPC and 4300 * For delayed allocation, we could skip the ENOSPC and
4261 * EDQUOT check, as blocks and quotas have been already 4301 * EDQUOT check, as blocks and quotas have been already
@@ -4269,7 +4309,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4269 * and verify allocation doesn't exceed the quota limits. 4309 * and verify allocation doesn't exceed the quota limits.
4270 */ 4310 */
4271 while (ar->len && 4311 while (ar->len &&
4272 ext4_claim_free_blocks(sbi, ar->len, ar->flags)) { 4312 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4273 4313
4274 /* let others to free the space */ 4314 /* let others to free the space */
4275 yield(); 4315 yield();
@@ -4279,12 +4319,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4279 *errp = -ENOSPC; 4319 *errp = -ENOSPC;
4280 return 0; 4320 return 0;
4281 } 4321 }
4282 reserv_blks = ar->len; 4322 reserv_clstrs = ar->len;
4283 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { 4323 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4284 dquot_alloc_block_nofail(ar->inode, ar->len); 4324 dquot_alloc_block_nofail(ar->inode,
4325 EXT4_C2B(sbi, ar->len));
4285 } else { 4326 } else {
4286 while (ar->len && 4327 while (ar->len &&
4287 dquot_alloc_block(ar->inode, ar->len)) { 4328 dquot_alloc_block(ar->inode,
4329 EXT4_C2B(sbi, ar->len))) {
4288 4330
4289 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4331 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4290 ar->len--; 4332 ar->len--;
@@ -4328,7 +4370,7 @@ repeat:
4328 ext4_mb_new_preallocation(ac); 4370 ext4_mb_new_preallocation(ac);
4329 } 4371 }
4330 if (likely(ac->ac_status == AC_STATUS_FOUND)) { 4372 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4331 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); 4373 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4332 if (*errp == -EAGAIN) { 4374 if (*errp == -EAGAIN) {
4333 /* 4375 /*
4334 * drop the reference that we took 4376 * drop the reference that we took
@@ -4364,13 +4406,13 @@ out:
4364 if (ac) 4406 if (ac)
4365 kmem_cache_free(ext4_ac_cachep, ac); 4407 kmem_cache_free(ext4_ac_cachep, ac);
4366 if (inquota && ar->len < inquota) 4408 if (inquota && ar->len < inquota)
4367 dquot_free_block(ar->inode, inquota - ar->len); 4409 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4368 if (!ar->len) { 4410 if (!ar->len) {
4369 if (!ext4_test_inode_state(ar->inode, 4411 if (!ext4_test_inode_state(ar->inode,
4370 EXT4_STATE_DELALLOC_RESERVED)) 4412 EXT4_STATE_DELALLOC_RESERVED))
4371 /* release all the reserved blocks if non delalloc */ 4413 /* release all the reserved blocks if non delalloc */
4372 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 4414 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4373 reserv_blks); 4415 reserv_clstrs);
4374 } 4416 }
4375 4417
4376 trace_ext4_allocate_blocks(ar, (unsigned long long)block); 4418 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
@@ -4388,7 +4430,7 @@ static int can_merge(struct ext4_free_data *entry1,
4388{ 4430{
4389 if ((entry1->t_tid == entry2->t_tid) && 4431 if ((entry1->t_tid == entry2->t_tid) &&
4390 (entry1->group == entry2->group) && 4432 (entry1->group == entry2->group) &&
4391 ((entry1->start_blk + entry1->count) == entry2->start_blk)) 4433 ((entry1->start_cluster + entry1->count) == entry2->start_cluster))
4392 return 1; 4434 return 1;
4393 return 0; 4435 return 0;
4394} 4436}
@@ -4398,7 +4440,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4398 struct ext4_free_data *new_entry) 4440 struct ext4_free_data *new_entry)
4399{ 4441{
4400 ext4_group_t group = e4b->bd_group; 4442 ext4_group_t group = e4b->bd_group;
4401 ext4_grpblk_t block; 4443 ext4_grpblk_t cluster;
4402 struct ext4_free_data *entry; 4444 struct ext4_free_data *entry;
4403 struct ext4_group_info *db = e4b->bd_info; 4445 struct ext4_group_info *db = e4b->bd_info;
4404 struct super_block *sb = e4b->bd_sb; 4446 struct super_block *sb = e4b->bd_sb;
@@ -4411,7 +4453,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4411 BUG_ON(e4b->bd_buddy_page == NULL); 4453 BUG_ON(e4b->bd_buddy_page == NULL);
4412 4454
4413 new_node = &new_entry->node; 4455 new_node = &new_entry->node;
4414 block = new_entry->start_blk; 4456 cluster = new_entry->start_cluster;
4415 4457
4416 if (!*n) { 4458 if (!*n) {
4417 /* first free block exent. We need to 4459 /* first free block exent. We need to
@@ -4425,13 +4467,14 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4425 while (*n) { 4467 while (*n) {
4426 parent = *n; 4468 parent = *n;
4427 entry = rb_entry(parent, struct ext4_free_data, node); 4469 entry = rb_entry(parent, struct ext4_free_data, node);
4428 if (block < entry->start_blk) 4470 if (cluster < entry->start_cluster)
4429 n = &(*n)->rb_left; 4471 n = &(*n)->rb_left;
4430 else if (block >= (entry->start_blk + entry->count)) 4472 else if (cluster >= (entry->start_cluster + entry->count))
4431 n = &(*n)->rb_right; 4473 n = &(*n)->rb_right;
4432 else { 4474 else {
4433 ext4_grp_locked_error(sb, group, 0, 4475 ext4_grp_locked_error(sb, group, 0,
4434 ext4_group_first_block_no(sb, group) + block, 4476 ext4_group_first_block_no(sb, group) +
4477 EXT4_C2B(sbi, cluster),
4435 "Block already on to-be-freed list"); 4478 "Block already on to-be-freed list");
4436 return 0; 4479 return 0;
4437 } 4480 }
@@ -4445,7 +4488,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4445 if (node) { 4488 if (node) {
4446 entry = rb_entry(node, struct ext4_free_data, node); 4489 entry = rb_entry(node, struct ext4_free_data, node);
4447 if (can_merge(entry, new_entry)) { 4490 if (can_merge(entry, new_entry)) {
4448 new_entry->start_blk = entry->start_blk; 4491 new_entry->start_cluster = entry->start_cluster;
4449 new_entry->count += entry->count; 4492 new_entry->count += entry->count;
4450 rb_erase(node, &(db->bb_free_root)); 4493 rb_erase(node, &(db->bb_free_root));
4451 spin_lock(&sbi->s_md_lock); 4494 spin_lock(&sbi->s_md_lock);
@@ -4496,6 +4539,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4496 ext4_group_t block_group; 4539 ext4_group_t block_group;
4497 struct ext4_sb_info *sbi; 4540 struct ext4_sb_info *sbi;
4498 struct ext4_buddy e4b; 4541 struct ext4_buddy e4b;
4542 unsigned int count_clusters;
4499 int err = 0; 4543 int err = 0;
4500 int ret; 4544 int ret;
4501 4545
@@ -4544,6 +4588,38 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4544 if (!ext4_should_writeback_data(inode)) 4588 if (!ext4_should_writeback_data(inode))
4545 flags |= EXT4_FREE_BLOCKS_METADATA; 4589 flags |= EXT4_FREE_BLOCKS_METADATA;
4546 4590
4591 /*
4592 * If the extent to be freed does not begin on a cluster
4593 * boundary, we need to deal with partial clusters at the
4594 * beginning and end of the extent. Normally we will free
4595 * blocks at the beginning or the end unless we are explicitly
4596 * requested to avoid doing so.
4597 */
4598 overflow = block & (sbi->s_cluster_ratio - 1);
4599 if (overflow) {
4600 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4601 overflow = sbi->s_cluster_ratio - overflow;
4602 block += overflow;
4603 if (count > overflow)
4604 count -= overflow;
4605 else
4606 return;
4607 } else {
4608 block -= overflow;
4609 count += overflow;
4610 }
4611 }
4612 overflow = count & (sbi->s_cluster_ratio - 1);
4613 if (overflow) {
4614 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4615 if (count > overflow)
4616 count -= overflow;
4617 else
4618 return;
4619 } else
4620 count += sbi->s_cluster_ratio - overflow;
4621 }
4622
4547do_more: 4623do_more:
4548 overflow = 0; 4624 overflow = 0;
4549 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 4625 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4552,10 +4628,12 @@ do_more:
4552 * Check to see if we are freeing blocks across a group 4628 * Check to see if we are freeing blocks across a group
4553 * boundary. 4629 * boundary.
4554 */ 4630 */
4555 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { 4631 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4556 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); 4632 overflow = EXT4_C2B(sbi, bit) + count -
4633 EXT4_BLOCKS_PER_GROUP(sb);
4557 count -= overflow; 4634 count -= overflow;
4558 } 4635 }
4636 count_clusters = EXT4_B2C(sbi, count);
4559 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 4637 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4560 if (!bitmap_bh) { 4638 if (!bitmap_bh) {
4561 err = -EIO; 4639 err = -EIO;
@@ -4570,9 +4648,9 @@ do_more:
4570 if (in_range(ext4_block_bitmap(sb, gdp), block, count) || 4648 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4571 in_range(ext4_inode_bitmap(sb, gdp), block, count) || 4649 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4572 in_range(block, ext4_inode_table(sb, gdp), 4650 in_range(block, ext4_inode_table(sb, gdp),
4573 EXT4_SB(sb)->s_itb_per_group) || 4651 EXT4_SB(sb)->s_itb_per_group) ||
4574 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4652 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4575 EXT4_SB(sb)->s_itb_per_group)) { 4653 EXT4_SB(sb)->s_itb_per_group)) {
4576 4654
4577 ext4_error(sb, "Freeing blocks in system zone - " 4655 ext4_error(sb, "Freeing blocks in system zone - "
4578 "Block = %llu, count = %lu", block, count); 4656 "Block = %llu, count = %lu", block, count);
@@ -4597,11 +4675,11 @@ do_more:
4597#ifdef AGGRESSIVE_CHECK 4675#ifdef AGGRESSIVE_CHECK
4598 { 4676 {
4599 int i; 4677 int i;
4600 for (i = 0; i < count; i++) 4678 for (i = 0; i < count_clusters; i++)
4601 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); 4679 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4602 } 4680 }
4603#endif 4681#endif
4604 trace_ext4_mballoc_free(sb, inode, block_group, bit, count); 4682 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4605 4683
4606 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4684 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4607 if (err) 4685 if (err)
@@ -4618,13 +4696,13 @@ do_more:
4618 err = -ENOMEM; 4696 err = -ENOMEM;
4619 goto error_return; 4697 goto error_return;
4620 } 4698 }
4621 new_entry->start_blk = bit; 4699 new_entry->start_cluster = bit;
4622 new_entry->group = block_group; 4700 new_entry->group = block_group;
4623 new_entry->count = count; 4701 new_entry->count = count_clusters;
4624 new_entry->t_tid = handle->h_transaction->t_tid; 4702 new_entry->t_tid = handle->h_transaction->t_tid;
4625 4703
4626 ext4_lock_group(sb, block_group); 4704 ext4_lock_group(sb, block_group);
4627 mb_clear_bits(bitmap_bh->b_data, bit, count); 4705 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4628 ext4_mb_free_metadata(handle, &e4b, new_entry); 4706 ext4_mb_free_metadata(handle, &e4b, new_entry);
4629 } else { 4707 } else {
4630 /* need to update group_info->bb_free and bitmap 4708 /* need to update group_info->bb_free and bitmap
@@ -4632,25 +4710,29 @@ do_more:
4632 * them with group lock_held 4710 * them with group lock_held
4633 */ 4711 */
4634 ext4_lock_group(sb, block_group); 4712 ext4_lock_group(sb, block_group);
4635 mb_clear_bits(bitmap_bh->b_data, bit, count); 4713 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4636 mb_free_blocks(inode, &e4b, bit, count); 4714 mb_free_blocks(inode, &e4b, bit, count_clusters);
4637 } 4715 }
4638 4716
4639 ret = ext4_free_blks_count(sb, gdp) + count; 4717 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4640 ext4_free_blks_set(sb, gdp, ret); 4718 ext4_free_group_clusters_set(sb, gdp, ret);
4641 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4719 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4642 ext4_unlock_group(sb, block_group); 4720 ext4_unlock_group(sb, block_group);
4643 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4721 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4644 4722
4645 if (sbi->s_log_groups_per_flex) { 4723 if (sbi->s_log_groups_per_flex) {
4646 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4724 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4647 atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); 4725 atomic_add(count_clusters,
4726 &sbi->s_flex_groups[flex_group].free_clusters);
4648 } 4727 }
4649 4728
4650 ext4_mb_unload_buddy(&e4b); 4729 ext4_mb_unload_buddy(&e4b);
4651 4730
4652 freed += count; 4731 freed += count;
4653 4732
4733 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4734 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4735
4654 /* We dirtied the bitmap block */ 4736 /* We dirtied the bitmap block */
4655 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4737 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4656 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 4738 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -4669,8 +4751,6 @@ do_more:
4669 } 4751 }
4670 ext4_mark_super_dirty(sb); 4752 ext4_mark_super_dirty(sb);
4671error_return: 4753error_return:
4672 if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4673 dquot_free_block(inode, freed);
4674 brelse(bitmap_bh); 4754 brelse(bitmap_bh);
4675 ext4_std_error(sb, err); 4755 ext4_std_error(sb, err);
4676 return; 4756 return;
@@ -4778,16 +4858,17 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4778 ext4_lock_group(sb, block_group); 4858 ext4_lock_group(sb, block_group);
4779 mb_clear_bits(bitmap_bh->b_data, bit, count); 4859 mb_clear_bits(bitmap_bh->b_data, bit, count);
4780 mb_free_blocks(NULL, &e4b, bit, count); 4860 mb_free_blocks(NULL, &e4b, bit, count);
4781 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); 4861 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
4782 ext4_free_blks_set(sb, desc, blk_free_count); 4862 ext4_free_group_clusters_set(sb, desc, blk_free_count);
4783 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 4863 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
4784 ext4_unlock_group(sb, block_group); 4864 ext4_unlock_group(sb, block_group);
4785 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); 4865 percpu_counter_add(&sbi->s_freeclusters_counter,
4866 EXT4_B2C(sbi, blocks_freed));
4786 4867
4787 if (sbi->s_log_groups_per_flex) { 4868 if (sbi->s_log_groups_per_flex) {
4788 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4869 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4789 atomic_add(blocks_freed, 4870 atomic_add(EXT4_B2C(sbi, blocks_freed),
4790 &sbi->s_flex_groups[flex_group].free_blocks); 4871 &sbi->s_flex_groups[flex_group].free_clusters);
4791 } 4872 }
4792 4873
4793 ext4_mb_unload_buddy(&e4b); 4874 ext4_mb_unload_buddy(&e4b);
@@ -4948,7 +5029,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4948 struct ext4_group_info *grp; 5029 struct ext4_group_info *grp;
4949 ext4_group_t first_group, last_group; 5030 ext4_group_t first_group, last_group;
4950 ext4_group_t group, ngroups = ext4_get_groups_count(sb); 5031 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4951 ext4_grpblk_t cnt = 0, first_block, last_block; 5032 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
4952 uint64_t start, len, minlen, trimmed = 0; 5033 uint64_t start, len, minlen, trimmed = 0;
4953 ext4_fsblk_t first_data_blk = 5034 ext4_fsblk_t first_data_blk =
4954 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 5035 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
@@ -4958,7 +5039,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4958 len = range->len >> sb->s_blocksize_bits; 5039 len = range->len >> sb->s_blocksize_bits;
4959 minlen = range->minlen >> sb->s_blocksize_bits; 5040 minlen = range->minlen >> sb->s_blocksize_bits;
4960 5041
4961 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) 5042 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)))
4962 return -EINVAL; 5043 return -EINVAL;
4963 if (start + len <= first_data_blk) 5044 if (start + len <= first_data_blk)
4964 goto out; 5045 goto out;
@@ -4969,11 +5050,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4969 5050
4970 /* Determine first and last group to examine based on start and len */ 5051 /* Determine first and last group to examine based on start and len */
4971 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, 5052 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
4972 &first_group, &first_block); 5053 &first_group, &first_cluster);
4973 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), 5054 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
4974 &last_group, &last_block); 5055 &last_group, &last_cluster);
4975 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; 5056 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
4976 last_block = EXT4_BLOCKS_PER_GROUP(sb); 5057 last_cluster = EXT4_CLUSTERS_PER_GROUP(sb);
4977 5058
4978 if (first_group > last_group) 5059 if (first_group > last_group)
4979 return -EINVAL; 5060 return -EINVAL;
@@ -4993,20 +5074,20 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4993 * change it for the last group in which case start + 5074 * change it for the last group in which case start +
4994 * len < EXT4_BLOCKS_PER_GROUP(sb). 5075 * len < EXT4_BLOCKS_PER_GROUP(sb).
4995 */ 5076 */
4996 if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb)) 5077 if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb))
4997 last_block = first_block + len; 5078 last_cluster = first_cluster + len;
4998 len -= last_block - first_block; 5079 len -= last_cluster - first_cluster;
4999 5080
5000 if (grp->bb_free >= minlen) { 5081 if (grp->bb_free >= minlen) {
5001 cnt = ext4_trim_all_free(sb, group, first_block, 5082 cnt = ext4_trim_all_free(sb, group, first_cluster,
5002 last_block, minlen); 5083 last_cluster, minlen);
5003 if (cnt < 0) { 5084 if (cnt < 0) {
5004 ret = cnt; 5085 ret = cnt;
5005 break; 5086 break;
5006 } 5087 }
5007 } 5088 }
5008 trimmed += cnt; 5089 trimmed += cnt;
5009 first_block = 0; 5090 first_cluster = 0;
5010 } 5091 }
5011 range->len = trimmed * sb->s_blocksize; 5092 range->len = trimmed * sb->s_blocksize;
5012 5093
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 9d4a636b546c..47705f3285e3 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -106,7 +106,7 @@ struct ext4_free_data {
106 ext4_group_t group; 106 ext4_group_t group;
107 107
108 /* free block extent */ 108 /* free block extent */
109 ext4_grpblk_t start_blk; 109 ext4_grpblk_t start_cluster;
110 ext4_grpblk_t count; 110 ext4_grpblk_t count;
111 111
112 /* transaction which freed this extent */ 112 /* transaction which freed this extent */
@@ -139,9 +139,9 @@ enum {
139 139
140struct ext4_free_extent { 140struct ext4_free_extent {
141 ext4_lblk_t fe_logical; 141 ext4_lblk_t fe_logical;
142 ext4_grpblk_t fe_start; 142 ext4_grpblk_t fe_start; /* In cluster units */
143 ext4_group_t fe_group; 143 ext4_group_t fe_group;
144 ext4_grpblk_t fe_len; 144 ext4_grpblk_t fe_len; /* In cluster units */
145}; 145};
146 146
147/* 147/*
@@ -175,7 +175,7 @@ struct ext4_allocation_context {
175 /* the best found extent */ 175 /* the best found extent */
176 struct ext4_free_extent ac_b_ex; 176 struct ext4_free_extent ac_b_ex;
177 177
178 /* copy of the bext found extent taken before preallocation efforts */ 178 /* copy of the best found extent taken before preallocation efforts */
179 struct ext4_free_extent ac_f_ex; 179 struct ext4_free_extent ac_f_ex;
180 180
181 /* number of iterations done. we have to track to limit searching */ 181 /* number of iterations done. we have to track to limit searching */
@@ -216,6 +216,7 @@ struct ext4_buddy {
216static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, 216static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
217 struct ext4_free_extent *fex) 217 struct ext4_free_extent *fex)
218{ 218{
219 return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start; 219 return ext4_group_first_block_no(sb, fex->fe_group) +
220 (fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
220} 221}
221#endif 222#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b57b98fb44d1..16ac228dbec6 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -15,19 +15,18 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include "ext4_jbd2.h" 17#include "ext4_jbd2.h"
18#include "ext4_extents.h"
19 18
20/* 19/*
21 * The contiguous blocks details which can be 20 * The contiguous blocks details which can be
22 * represented by a single extent 21 * represented by a single extent
23 */ 22 */
24struct list_blocks_struct { 23struct migrate_struct {
25 ext4_lblk_t first_block, last_block; 24 ext4_lblk_t first_block, last_block, curr_block;
26 ext4_fsblk_t first_pblock, last_pblock; 25 ext4_fsblk_t first_pblock, last_pblock;
27}; 26};
28 27
29static int finish_range(handle_t *handle, struct inode *inode, 28static int finish_range(handle_t *handle, struct inode *inode,
30 struct list_blocks_struct *lb) 29 struct migrate_struct *lb)
31 30
32{ 31{
33 int retval = 0, needed; 32 int retval = 0, needed;
@@ -87,8 +86,7 @@ err_out:
87} 86}
88 87
89static int update_extent_range(handle_t *handle, struct inode *inode, 88static int update_extent_range(handle_t *handle, struct inode *inode,
90 ext4_fsblk_t pblock, ext4_lblk_t blk_num, 89 ext4_fsblk_t pblock, struct migrate_struct *lb)
91 struct list_blocks_struct *lb)
92{ 90{
93 int retval; 91 int retval;
94 /* 92 /*
@@ -96,9 +94,10 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
96 */ 94 */
97 if (lb->first_pblock && 95 if (lb->first_pblock &&
98 (lb->last_pblock+1 == pblock) && 96 (lb->last_pblock+1 == pblock) &&
99 (lb->last_block+1 == blk_num)) { 97 (lb->last_block+1 == lb->curr_block)) {
100 lb->last_pblock = pblock; 98 lb->last_pblock = pblock;
101 lb->last_block = blk_num; 99 lb->last_block = lb->curr_block;
100 lb->curr_block++;
102 return 0; 101 return 0;
103 } 102 }
104 /* 103 /*
@@ -106,64 +105,49 @@ static int update_extent_range(handle_t *handle, struct inode *inode,
106 */ 105 */
107 retval = finish_range(handle, inode, lb); 106 retval = finish_range(handle, inode, lb);
108 lb->first_pblock = lb->last_pblock = pblock; 107 lb->first_pblock = lb->last_pblock = pblock;
109 lb->first_block = lb->last_block = blk_num; 108 lb->first_block = lb->last_block = lb->curr_block;
110 109 lb->curr_block++;
111 return retval; 110 return retval;
112} 111}
113 112
114static int update_ind_extent_range(handle_t *handle, struct inode *inode, 113static int update_ind_extent_range(handle_t *handle, struct inode *inode,
115 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 114 ext4_fsblk_t pblock,
116 struct list_blocks_struct *lb) 115 struct migrate_struct *lb)
117{ 116{
118 struct buffer_head *bh; 117 struct buffer_head *bh;
119 __le32 *i_data; 118 __le32 *i_data;
120 int i, retval = 0; 119 int i, retval = 0;
121 ext4_lblk_t blk_count = *blk_nump;
122 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 120 unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
123 121
124 if (!pblock) {
125 /* Only update the file block number */
126 *blk_nump += max_entries;
127 return 0;
128 }
129
130 bh = sb_bread(inode->i_sb, pblock); 122 bh = sb_bread(inode->i_sb, pblock);
131 if (!bh) 123 if (!bh)
132 return -EIO; 124 return -EIO;
133 125
134 i_data = (__le32 *)bh->b_data; 126 i_data = (__le32 *)bh->b_data;
135 for (i = 0; i < max_entries; i++, blk_count++) { 127 for (i = 0; i < max_entries; i++) {
136 if (i_data[i]) { 128 if (i_data[i]) {
137 retval = update_extent_range(handle, inode, 129 retval = update_extent_range(handle, inode,
138 le32_to_cpu(i_data[i]), 130 le32_to_cpu(i_data[i]), lb);
139 blk_count, lb);
140 if (retval) 131 if (retval)
141 break; 132 break;
133 } else {
134 lb->curr_block++;
142 } 135 }
143 } 136 }
144
145 /* Update the file block number */
146 *blk_nump = blk_count;
147 put_bh(bh); 137 put_bh(bh);
148 return retval; 138 return retval;
149 139
150} 140}
151 141
152static int update_dind_extent_range(handle_t *handle, struct inode *inode, 142static int update_dind_extent_range(handle_t *handle, struct inode *inode,
153 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 143 ext4_fsblk_t pblock,
154 struct list_blocks_struct *lb) 144 struct migrate_struct *lb)
155{ 145{
156 struct buffer_head *bh; 146 struct buffer_head *bh;
157 __le32 *i_data; 147 __le32 *i_data;
158 int i, retval = 0; 148 int i, retval = 0;
159 ext4_lblk_t blk_count = *blk_nump;
160 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 149 unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
161 150
162 if (!pblock) {
163 /* Only update the file block number */
164 *blk_nump += max_entries * max_entries;
165 return 0;
166 }
167 bh = sb_bread(inode->i_sb, pblock); 151 bh = sb_bread(inode->i_sb, pblock);
168 if (!bh) 152 if (!bh)
169 return -EIO; 153 return -EIO;
@@ -172,38 +156,28 @@ static int update_dind_extent_range(handle_t *handle, struct inode *inode,
172 for (i = 0; i < max_entries; i++) { 156 for (i = 0; i < max_entries; i++) {
173 if (i_data[i]) { 157 if (i_data[i]) {
174 retval = update_ind_extent_range(handle, inode, 158 retval = update_ind_extent_range(handle, inode,
175 le32_to_cpu(i_data[i]), 159 le32_to_cpu(i_data[i]), lb);
176 &blk_count, lb);
177 if (retval) 160 if (retval)
178 break; 161 break;
179 } else { 162 } else {
180 /* Only update the file block number */ 163 /* Only update the file block number */
181 blk_count += max_entries; 164 lb->curr_block += max_entries;
182 } 165 }
183 } 166 }
184
185 /* Update the file block number */
186 *blk_nump = blk_count;
187 put_bh(bh); 167 put_bh(bh);
188 return retval; 168 return retval;
189 169
190} 170}
191 171
192static int update_tind_extent_range(handle_t *handle, struct inode *inode, 172static int update_tind_extent_range(handle_t *handle, struct inode *inode,
193 ext4_fsblk_t pblock, ext4_lblk_t *blk_nump, 173 ext4_fsblk_t pblock,
194 struct list_blocks_struct *lb) 174 struct migrate_struct *lb)
195{ 175{
196 struct buffer_head *bh; 176 struct buffer_head *bh;
197 __le32 *i_data; 177 __le32 *i_data;
198 int i, retval = 0; 178 int i, retval = 0;
199 ext4_lblk_t blk_count = *blk_nump;
200 unsigned long max_entries = inode->i_sb->s_blocksize >> 2; 179 unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
201 180
202 if (!pblock) {
203 /* Only update the file block number */
204 *blk_nump += max_entries * max_entries * max_entries;
205 return 0;
206 }
207 bh = sb_bread(inode->i_sb, pblock); 181 bh = sb_bread(inode->i_sb, pblock);
208 if (!bh) 182 if (!bh)
209 return -EIO; 183 return -EIO;
@@ -212,16 +186,14 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
212 for (i = 0; i < max_entries; i++) { 186 for (i = 0; i < max_entries; i++) {
213 if (i_data[i]) { 187 if (i_data[i]) {
214 retval = update_dind_extent_range(handle, inode, 188 retval = update_dind_extent_range(handle, inode,
215 le32_to_cpu(i_data[i]), 189 le32_to_cpu(i_data[i]), lb);
216 &blk_count, lb);
217 if (retval) 190 if (retval)
218 break; 191 break;
219 } else 192 } else {
220 /* Only update the file block number */ 193 /* Only update the file block number */
221 blk_count += max_entries * max_entries; 194 lb->curr_block += max_entries * max_entries;
195 }
222 } 196 }
223 /* Update the file block number */
224 *blk_nump = blk_count;
225 put_bh(bh); 197 put_bh(bh);
226 return retval; 198 return retval;
227 199
@@ -462,12 +434,12 @@ int ext4_ext_migrate(struct inode *inode)
462 handle_t *handle; 434 handle_t *handle;
463 int retval = 0, i; 435 int retval = 0, i;
464 __le32 *i_data; 436 __le32 *i_data;
465 ext4_lblk_t blk_count = 0;
466 struct ext4_inode_info *ei; 437 struct ext4_inode_info *ei;
467 struct inode *tmp_inode = NULL; 438 struct inode *tmp_inode = NULL;
468 struct list_blocks_struct lb; 439 struct migrate_struct lb;
469 unsigned long max_entries; 440 unsigned long max_entries;
470 __u32 goal; 441 __u32 goal;
442 uid_t owner[2];
471 443
472 /* 444 /*
473 * If the filesystem does not support extents, or the inode 445 * If the filesystem does not support extents, or the inode
@@ -495,10 +467,12 @@ int ext4_ext_migrate(struct inode *inode)
495 } 467 }
496 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * 468 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
497 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; 469 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
470 owner[0] = inode->i_uid;
471 owner[1] = inode->i_gid;
498 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, 472 tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
499 S_IFREG, NULL, goal); 473 S_IFREG, NULL, goal, owner);
500 if (IS_ERR(tmp_inode)) { 474 if (IS_ERR(tmp_inode)) {
501 retval = -ENOMEM; 475 retval = PTR_ERR(inode);
502 ext4_journal_stop(handle); 476 ext4_journal_stop(handle);
503 return retval; 477 return retval;
504 } 478 }
@@ -507,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode)
507 * Set the i_nlink to zero so it will be deleted later 481 * Set the i_nlink to zero so it will be deleted later
508 * when we drop inode reference. 482 * when we drop inode reference.
509 */ 483 */
510 tmp_inode->i_nlink = 0; 484 clear_nlink(tmp_inode);
511 485
512 ext4_ext_tree_init(handle, tmp_inode); 486 ext4_ext_tree_init(handle, tmp_inode);
513 ext4_orphan_add(handle, tmp_inode); 487 ext4_orphan_add(handle, tmp_inode);
@@ -551,35 +525,32 @@ int ext4_ext_migrate(struct inode *inode)
551 525
552 /* 32 bit block address 4 bytes */ 526 /* 32 bit block address 4 bytes */
553 max_entries = inode->i_sb->s_blocksize >> 2; 527 max_entries = inode->i_sb->s_blocksize >> 2;
554 for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) { 528 for (i = 0; i < EXT4_NDIR_BLOCKS; i++) {
555 if (i_data[i]) { 529 if (i_data[i]) {
556 retval = update_extent_range(handle, tmp_inode, 530 retval = update_extent_range(handle, tmp_inode,
557 le32_to_cpu(i_data[i]), 531 le32_to_cpu(i_data[i]), &lb);
558 blk_count, &lb);
559 if (retval) 532 if (retval)
560 goto err_out; 533 goto err_out;
561 } 534 } else
535 lb.curr_block++;
562 } 536 }
563 if (i_data[EXT4_IND_BLOCK]) { 537 if (i_data[EXT4_IND_BLOCK]) {
564 retval = update_ind_extent_range(handle, tmp_inode, 538 retval = update_ind_extent_range(handle, tmp_inode,
565 le32_to_cpu(i_data[EXT4_IND_BLOCK]), 539 le32_to_cpu(i_data[EXT4_IND_BLOCK]), &lb);
566 &blk_count, &lb);
567 if (retval) 540 if (retval)
568 goto err_out; 541 goto err_out;
569 } else 542 } else
570 blk_count += max_entries; 543 lb.curr_block += max_entries;
571 if (i_data[EXT4_DIND_BLOCK]) { 544 if (i_data[EXT4_DIND_BLOCK]) {
572 retval = update_dind_extent_range(handle, tmp_inode, 545 retval = update_dind_extent_range(handle, tmp_inode,
573 le32_to_cpu(i_data[EXT4_DIND_BLOCK]), 546 le32_to_cpu(i_data[EXT4_DIND_BLOCK]), &lb);
574 &blk_count, &lb);
575 if (retval) 547 if (retval)
576 goto err_out; 548 goto err_out;
577 } else 549 } else
578 blk_count += max_entries * max_entries; 550 lb.curr_block += max_entries * max_entries;
579 if (i_data[EXT4_TIND_BLOCK]) { 551 if (i_data[EXT4_TIND_BLOCK]) {
580 retval = update_tind_extent_range(handle, tmp_inode, 552 retval = update_tind_extent_range(handle, tmp_inode,
581 le32_to_cpu(i_data[EXT4_TIND_BLOCK]), 553 le32_to_cpu(i_data[EXT4_TIND_BLOCK]), &lb);
582 &blk_count, &lb);
583 if (retval) 554 if (retval)
584 goto err_out; 555 goto err_out;
585 } 556 }
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 9bdef3f537c5..7ea4ba4eff2a 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -109,7 +109,7 @@ static int kmmpd(void *data)
109 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 109 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
110 bdevname(bh->b_bdev, mmp->mmp_bdevname); 110 bdevname(bh->b_bdev, mmp->mmp_bdevname);
111 111
112 memcpy(mmp->mmp_nodename, init_utsname()->sysname, 112 memcpy(mmp->mmp_nodename, init_utsname()->nodename,
113 sizeof(mmp->mmp_nodename)); 113 sizeof(mmp->mmp_nodename));
114 114
115 while (!kthread_should_stop()) { 115 while (!kthread_should_stop()) {
@@ -125,8 +125,9 @@ static int kmmpd(void *data)
125 * Don't spew too many error messages. Print one every 125 * Don't spew too many error messages. Print one every
126 * (s_mmp_update_interval * 60) seconds. 126 * (s_mmp_update_interval * 60) seconds.
127 */ 127 */
128 if (retval && (failed_writes % 60) == 0) { 128 if (retval) {
129 ext4_error(sb, "Error writing to MMP block"); 129 if ((failed_writes % 60) == 0)
130 ext4_error(sb, "Error writing to MMP block");
130 failed_writes++; 131 failed_writes++;
131 } 132 }
132 133
@@ -295,7 +296,8 @@ skip:
295 /* 296 /*
296 * write a new random sequence number. 297 * write a new random sequence number.
297 */ 298 */
298 mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq()); 299 seq = mmp_new_seq();
300 mmp->mmp_seq = cpu_to_le32(seq);
299 301
300 retval = write_mmp_block(bh); 302 retval = write_mmp_block(bh);
301 if (retval) 303 if (retval)
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index f57455a1b1b2..c5826c623e7a 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -17,7 +17,6 @@
17#include <linux/quotaops.h> 17#include <linux/quotaops.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include "ext4_jbd2.h" 19#include "ext4_jbd2.h"
20#include "ext4_extents.h"
21#include "ext4.h" 20#include "ext4.h"
22 21
23/** 22/**
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1c924faeb6c8..aa4c782c9dd7 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1586,7 +1586,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1586 dxtrace(dx_show_index("node", frames[1].entries)); 1586 dxtrace(dx_show_index("node", frames[1].entries));
1587 dxtrace(dx_show_index("node", 1587 dxtrace(dx_show_index("node",
1588 ((struct dx_node *) bh2->b_data)->entries)); 1588 ((struct dx_node *) bh2->b_data)->entries));
1589 err = ext4_handle_dirty_metadata(handle, inode, bh2); 1589 err = ext4_handle_dirty_metadata(handle, dir, bh2);
1590 if (err) 1590 if (err)
1591 goto journal_error; 1591 goto journal_error;
1592 brelse (bh2); 1592 brelse (bh2);
@@ -1612,7 +1612,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1612 if (err) 1612 if (err)
1613 goto journal_error; 1613 goto journal_error;
1614 } 1614 }
1615 err = ext4_handle_dirty_metadata(handle, inode, frames[0].bh); 1615 err = ext4_handle_dirty_metadata(handle, dir, frames[0].bh);
1616 if (err) { 1616 if (err) {
1617 ext4_std_error(inode->i_sb, err); 1617 ext4_std_error(inode->i_sb, err);
1618 goto cleanup; 1618 goto cleanup;
@@ -1694,7 +1694,7 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode)
1694 if (is_dx(inode) && inode->i_nlink > 1) { 1694 if (is_dx(inode) && inode->i_nlink > 1) {
1695 /* limit is 16-bit i_links_count */ 1695 /* limit is 16-bit i_links_count */
1696 if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) { 1696 if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
1697 inode->i_nlink = 1; 1697 set_nlink(inode, 1);
1698 EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb, 1698 EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
1699 EXT4_FEATURE_RO_COMPAT_DIR_NLINK); 1699 EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
1700 } 1700 }
@@ -1707,9 +1707,8 @@ static void ext4_inc_count(handle_t *handle, struct inode *inode)
1707 */ 1707 */
1708static void ext4_dec_count(handle_t *handle, struct inode *inode) 1708static void ext4_dec_count(handle_t *handle, struct inode *inode)
1709{ 1709{
1710 drop_nlink(inode); 1710 if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
1711 if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0) 1711 drop_nlink(inode);
1712 inc_nlink(inode);
1713} 1712}
1714 1713
1715 1714
@@ -1756,7 +1755,7 @@ retry:
1756 if (IS_DIRSYNC(dir)) 1755 if (IS_DIRSYNC(dir))
1757 ext4_handle_sync(handle); 1756 ext4_handle_sync(handle);
1758 1757
1759 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); 1758 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
1760 err = PTR_ERR(inode); 1759 err = PTR_ERR(inode);
1761 if (!IS_ERR(inode)) { 1760 if (!IS_ERR(inode)) {
1762 inode->i_op = &ext4_file_inode_operations; 1761 inode->i_op = &ext4_file_inode_operations;
@@ -1792,7 +1791,7 @@ retry:
1792 if (IS_DIRSYNC(dir)) 1791 if (IS_DIRSYNC(dir))
1793 ext4_handle_sync(handle); 1792 ext4_handle_sync(handle);
1794 1793
1795 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0); 1794 inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
1796 err = PTR_ERR(inode); 1795 err = PTR_ERR(inode);
1797 if (!IS_ERR(inode)) { 1796 if (!IS_ERR(inode)) {
1798 init_special_inode(inode, inode->i_mode, rdev); 1797 init_special_inode(inode, inode->i_mode, rdev);
@@ -1832,7 +1831,7 @@ retry:
1832 ext4_handle_sync(handle); 1831 ext4_handle_sync(handle);
1833 1832
1834 inode = ext4_new_inode(handle, dir, S_IFDIR | mode, 1833 inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
1835 &dentry->d_name, 0); 1834 &dentry->d_name, 0, NULL);
1836 err = PTR_ERR(inode); 1835 err = PTR_ERR(inode);
1837 if (IS_ERR(inode)) 1836 if (IS_ERR(inode))
1838 goto out_stop; 1837 goto out_stop;
@@ -1861,9 +1860,9 @@ retry:
1861 de->name_len = 2; 1860 de->name_len = 2;
1862 strcpy(de->name, ".."); 1861 strcpy(de->name, "..");
1863 ext4_set_de_type(dir->i_sb, de, S_IFDIR); 1862 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
1864 inode->i_nlink = 2; 1863 set_nlink(inode, 2);
1865 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); 1864 BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
1866 err = ext4_handle_dirty_metadata(handle, dir, dir_block); 1865 err = ext4_handle_dirty_metadata(handle, inode, dir_block);
1867 if (err) 1866 if (err)
1868 goto out_clear_inode; 1867 goto out_clear_inode;
1869 err = ext4_mark_inode_dirty(handle, inode); 1868 err = ext4_mark_inode_dirty(handle, inode);
@@ -2214,7 +2213,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2214 ext4_warning(inode->i_sb, 2213 ext4_warning(inode->i_sb,
2215 "Deleting nonexistent file (%lu), %d", 2214 "Deleting nonexistent file (%lu), %d",
2216 inode->i_ino, inode->i_nlink); 2215 inode->i_ino, inode->i_nlink);
2217 inode->i_nlink = 1; 2216 set_nlink(inode, 1);
2218 } 2217 }
2219 retval = ext4_delete_entry(handle, dir, de, bh); 2218 retval = ext4_delete_entry(handle, dir, de, bh);
2220 if (retval) 2219 if (retval)
@@ -2279,7 +2278,7 @@ retry:
2279 ext4_handle_sync(handle); 2278 ext4_handle_sync(handle);
2280 2279
2281 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, 2280 inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
2282 &dentry->d_name, 0); 2281 &dentry->d_name, 0, NULL);
2283 err = PTR_ERR(inode); 2282 err = PTR_ERR(inode);
2284 if (IS_ERR(inode)) 2283 if (IS_ERR(inode))
2285 goto out_stop; 2284 goto out_stop;
@@ -2530,7 +2529,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2530 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = 2529 PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
2531 cpu_to_le32(new_dir->i_ino); 2530 cpu_to_le32(new_dir->i_ino);
2532 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); 2531 BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
2533 retval = ext4_handle_dirty_metadata(handle, old_dir, dir_bh); 2532 retval = ext4_handle_dirty_metadata(handle, old_inode, dir_bh);
2534 if (retval) { 2533 if (retval) {
2535 ext4_std_error(old_dir->i_sb, retval); 2534 ext4_std_error(old_dir->i_sb, retval);
2536 goto end_rename; 2535 goto end_rename;
@@ -2539,7 +2538,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2539 if (new_inode) { 2538 if (new_inode) {
2540 /* checked empty_dir above, can't have another parent, 2539 /* checked empty_dir above, can't have another parent,
2541 * ext4_dec_count() won't work for many-linked dirs */ 2540 * ext4_dec_count() won't work for many-linked dirs */
2542 new_inode->i_nlink = 0; 2541 clear_nlink(new_inode);
2543 } else { 2542 } else {
2544 ext4_inc_count(handle, new_dir); 2543 ext4_inc_count(handle, new_dir);
2545 ext4_update_dx_flag(new_dir); 2544 ext4_update_dx_flag(new_dir);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 92f38ee13f8a..7ce1d0b19c94 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -70,7 +70,6 @@ static void put_io_page(struct ext4_io_page *io_page)
70void ext4_free_io_end(ext4_io_end_t *io) 70void ext4_free_io_end(ext4_io_end_t *io)
71{ 71{
72 int i; 72 int i;
73 wait_queue_head_t *wq;
74 73
75 BUG_ON(!io); 74 BUG_ON(!io);
76 if (io->page) 75 if (io->page)
@@ -78,56 +77,43 @@ void ext4_free_io_end(ext4_io_end_t *io)
78 for (i = 0; i < io->num_io_pages; i++) 77 for (i = 0; i < io->num_io_pages; i++)
79 put_io_page(io->pages[i]); 78 put_io_page(io->pages[i]);
80 io->num_io_pages = 0; 79 io->num_io_pages = 0;
81 wq = ext4_ioend_wq(io->inode); 80 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
82 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && 81 wake_up_all(ext4_ioend_wq(io->inode));
83 waitqueue_active(wq))
84 wake_up_all(wq);
85 kmem_cache_free(io_end_cachep, io); 82 kmem_cache_free(io_end_cachep, io);
86} 83}
87 84
88/* 85/*
89 * check a range of space and convert unwritten extents to written. 86 * check a range of space and convert unwritten extents to written.
87 *
88 * Called with inode->i_mutex; we depend on this when we manipulate
89 * io->flag, since we could otherwise race with ext4_flush_completed_IO()
90 */ 90 */
91int ext4_end_io_nolock(ext4_io_end_t *io) 91int ext4_end_io_nolock(ext4_io_end_t *io)
92{ 92{
93 struct inode *inode = io->inode; 93 struct inode *inode = io->inode;
94 loff_t offset = io->offset; 94 loff_t offset = io->offset;
95 ssize_t size = io->size; 95 ssize_t size = io->size;
96 wait_queue_head_t *wq;
97 int ret = 0; 96 int ret = 0;
98 97
99 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," 98 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
100 "list->prev 0x%p\n", 99 "list->prev 0x%p\n",
101 io, inode->i_ino, io->list.next, io->list.prev); 100 io, inode->i_ino, io->list.next, io->list.prev);
102 101
103 if (list_empty(&io->list))
104 return ret;
105
106 if (!(io->flag & EXT4_IO_END_UNWRITTEN))
107 return ret;
108
109 ret = ext4_convert_unwritten_extents(inode, offset, size); 102 ret = ext4_convert_unwritten_extents(inode, offset, size);
110 if (ret < 0) { 103 if (ret < 0) {
111 printk(KERN_EMERG "%s: failed to convert unwritten " 104 ext4_msg(inode->i_sb, KERN_EMERG,
112 "extents to written extents, error is %d " 105 "failed to convert unwritten extents to written "
113 "io is still on inode %lu aio dio list\n", 106 "extents -- potential data loss! "
114 __func__, ret, inode->i_ino); 107 "(inode %lu, offset %llu, size %zd, error %d)",
115 return ret; 108 inode->i_ino, offset, size, ret);
116 } 109 }
117 110
118 if (io->iocb) 111 if (io->iocb)
119 aio_complete(io->iocb, io->result, 0); 112 aio_complete(io->iocb, io->result, 0);
120 /* clear the DIO AIO unwritten flag */
121 if (io->flag & EXT4_IO_END_UNWRITTEN) {
122 io->flag &= ~EXT4_IO_END_UNWRITTEN;
123 /* Wake up anyone waiting on unwritten extent conversion */
124 wq = ext4_ioend_wq(io->inode);
125 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
126 waitqueue_active(wq)) {
127 wake_up_all(wq);
128 }
129 }
130 113
114 /* Wake up anyone waiting on unwritten extent conversion */
115 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
116 wake_up_all(ext4_ioend_wq(io->inode));
131 return ret; 117 return ret;
132} 118}
133 119
@@ -140,9 +126,15 @@ static void ext4_end_io_work(struct work_struct *work)
140 struct inode *inode = io->inode; 126 struct inode *inode = io->inode;
141 struct ext4_inode_info *ei = EXT4_I(inode); 127 struct ext4_inode_info *ei = EXT4_I(inode);
142 unsigned long flags; 128 unsigned long flags;
143 int ret; 129
130 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
131 if (list_empty(&io->list)) {
132 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
133 goto free;
134 }
144 135
145 if (!mutex_trylock(&inode->i_mutex)) { 136 if (!mutex_trylock(&inode->i_mutex)) {
137 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
146 /* 138 /*
147 * Requeue the work instead of waiting so that the work 139 * Requeue the work instead of waiting so that the work
148 * items queued after this can be processed. 140 * items queued after this can be processed.
@@ -159,17 +151,11 @@ static void ext4_end_io_work(struct work_struct *work)
159 io->flag |= EXT4_IO_END_QUEUED; 151 io->flag |= EXT4_IO_END_QUEUED;
160 return; 152 return;
161 } 153 }
162 ret = ext4_end_io_nolock(io); 154 list_del_init(&io->list);
163 if (ret < 0) {
164 mutex_unlock(&inode->i_mutex);
165 return;
166 }
167
168 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
169 if (!list_empty(&io->list))
170 list_del_init(&io->list);
171 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 155 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
156 (void) ext4_end_io_nolock(io);
172 mutex_unlock(&inode->i_mutex); 157 mutex_unlock(&inode->i_mutex);
158free:
173 ext4_free_io_end(io); 159 ext4_free_io_end(io);
174} 160}
175 161
@@ -350,10 +336,8 @@ submit_and_retry:
350 if ((io_end->num_io_pages >= MAX_IO_PAGES) && 336 if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
351 (io_end->pages[io_end->num_io_pages-1] != io_page)) 337 (io_end->pages[io_end->num_io_pages-1] != io_page))
352 goto submit_and_retry; 338 goto submit_and_retry;
353 if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 339 if (buffer_uninit(bh))
354 io_end->flag |= EXT4_IO_END_UNWRITTEN; 340 ext4_set_io_unwritten_flag(inode, io_end);
355 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
356 }
357 io->io_end->size += bh->b_size; 341 io->io_end->size += bh->b_size;
358 io->io_next_block++; 342 io->io_next_block++;
359 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); 343 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 707d3f16f7ce..996780ab4f4e 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -875,7 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
875 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ 875 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
876 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ 876 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
877 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ 877 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
878 ext4_free_blks_set(sb, gdp, input->free_blocks_count); 878 ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count);
879 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 879 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
880 gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); 880 gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
881 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); 881 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
@@ -937,8 +937,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
937 input->reserved_blocks); 937 input->reserved_blocks);
938 938
939 /* Update the free space counts */ 939 /* Update the free space counts */
940 percpu_counter_add(&sbi->s_freeblocks_counter, 940 percpu_counter_add(&sbi->s_freeclusters_counter,
941 input->free_blocks_count); 941 EXT4_B2C(sbi, input->free_blocks_count));
942 percpu_counter_add(&sbi->s_freeinodes_counter, 942 percpu_counter_add(&sbi->s_freeinodes_counter,
943 EXT4_INODES_PER_GROUP(sb)); 943 EXT4_INODES_PER_GROUP(sb));
944 944
@@ -946,8 +946,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
946 sbi->s_log_groups_per_flex) { 946 sbi->s_log_groups_per_flex) {
947 ext4_group_t flex_group; 947 ext4_group_t flex_group;
948 flex_group = ext4_flex_group(sbi, input->group); 948 flex_group = ext4_flex_group(sbi, input->group);
949 atomic_add(input->free_blocks_count, 949 atomic_add(EXT4_B2C(sbi, input->free_blocks_count),
950 &sbi->s_flex_groups[flex_group].free_blocks); 950 &sbi->s_flex_groups[flex_group].free_clusters);
951 atomic_add(EXT4_INODES_PER_GROUP(sb), 951 atomic_add(EXT4_INODES_PER_GROUP(sb),
952 &sbi->s_flex_groups[flex_group].free_inodes); 952 &sbi->s_flex_groups[flex_group].free_inodes);
953 } 953 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 44d0c8db2239..9953d80145ad 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -45,6 +45,7 @@
45#include <linux/freezer.h> 45#include <linux/freezer.h>
46 46
47#include "ext4.h" 47#include "ext4.h"
48#include "ext4_extents.h"
48#include "ext4_jbd2.h" 49#include "ext4_jbd2.h"
49#include "xattr.h" 50#include "xattr.h"
50#include "acl.h" 51#include "acl.h"
@@ -163,8 +164,8 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,
163 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); 164 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
164} 165}
165 166
166__u32 ext4_free_blks_count(struct super_block *sb, 167__u32 ext4_free_group_clusters(struct super_block *sb,
167 struct ext4_group_desc *bg) 168 struct ext4_group_desc *bg)
168{ 169{
169 return le16_to_cpu(bg->bg_free_blocks_count_lo) | 170 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
170 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? 171 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
@@ -219,8 +220,8 @@ void ext4_inode_table_set(struct super_block *sb,
219 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32); 220 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
220} 221}
221 222
222void ext4_free_blks_set(struct super_block *sb, 223void ext4_free_group_clusters_set(struct super_block *sb,
223 struct ext4_group_desc *bg, __u32 count) 224 struct ext4_group_desc *bg, __u32 count)
224{ 225{
225 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count); 226 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
226 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) 227 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
@@ -414,6 +415,22 @@ static void save_error_info(struct super_block *sb, const char *func,
414 ext4_commit_super(sb, 1); 415 ext4_commit_super(sb, 1);
415} 416}
416 417
418/*
419 * The del_gendisk() function uninitializes the disk-specific data
420 * structures, including the bdi structure, without telling anyone
421 * else. Once this happens, any attempt to call mark_buffer_dirty()
422 * (for example, by ext4_commit_super), will cause a kernel OOPS.
423 * This is a kludge to prevent these oops until we can put in a proper
424 * hook in del_gendisk() to inform the VFS and file system layers.
425 */
426static int block_device_ejected(struct super_block *sb)
427{
428 struct inode *bd_inode = sb->s_bdev->bd_inode;
429 struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
430
431 return bdi->dev == NULL;
432}
433
417 434
418/* Deal with the reporting of failure conditions on a filesystem such as 435/* Deal with the reporting of failure conditions on a filesystem such as
419 * inconsistencies detected or read IO failures. 436 * inconsistencies detected or read IO failures.
@@ -821,10 +838,10 @@ static void ext4_put_super(struct super_block *sb)
821 brelse(sbi->s_group_desc[i]); 838 brelse(sbi->s_group_desc[i]);
822 ext4_kvfree(sbi->s_group_desc); 839 ext4_kvfree(sbi->s_group_desc);
823 ext4_kvfree(sbi->s_flex_groups); 840 ext4_kvfree(sbi->s_flex_groups);
824 percpu_counter_destroy(&sbi->s_freeblocks_counter); 841 percpu_counter_destroy(&sbi->s_freeclusters_counter);
825 percpu_counter_destroy(&sbi->s_freeinodes_counter); 842 percpu_counter_destroy(&sbi->s_freeinodes_counter);
826 percpu_counter_destroy(&sbi->s_dirs_counter); 843 percpu_counter_destroy(&sbi->s_dirs_counter);
827 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 844 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
828 brelse(sbi->s_sbh); 845 brelse(sbi->s_sbh);
829#ifdef CONFIG_QUOTA 846#ifdef CONFIG_QUOTA
830 for (i = 0; i < MAXQUOTAS; i++) 847 for (i = 0; i < MAXQUOTAS; i++)
@@ -1057,8 +1074,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1057 seq_puts(seq, ",nouid32"); 1074 seq_puts(seq, ",nouid32");
1058 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) 1075 if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
1059 seq_puts(seq, ",debug"); 1076 seq_puts(seq, ",debug");
1060 if (test_opt(sb, OLDALLOC))
1061 seq_puts(seq, ",oldalloc");
1062#ifdef CONFIG_EXT4_FS_XATTR 1077#ifdef CONFIG_EXT4_FS_XATTR
1063 if (test_opt(sb, XATTR_USER)) 1078 if (test_opt(sb, XATTR_USER))
1064 seq_puts(seq, ",user_xattr"); 1079 seq_puts(seq, ",user_xattr");
@@ -1567,10 +1582,12 @@ static int parse_options(char *options, struct super_block *sb,
1567 set_opt(sb, DEBUG); 1582 set_opt(sb, DEBUG);
1568 break; 1583 break;
1569 case Opt_oldalloc: 1584 case Opt_oldalloc:
1570 set_opt(sb, OLDALLOC); 1585 ext4_msg(sb, KERN_WARNING,
1586 "Ignoring deprecated oldalloc option");
1571 break; 1587 break;
1572 case Opt_orlov: 1588 case Opt_orlov:
1573 clear_opt(sb, OLDALLOC); 1589 ext4_msg(sb, KERN_WARNING,
1590 "Ignoring deprecated orlov option");
1574 break; 1591 break;
1575#ifdef CONFIG_EXT4_FS_XATTR 1592#ifdef CONFIG_EXT4_FS_XATTR
1576 case Opt_user_xattr: 1593 case Opt_user_xattr:
@@ -1801,6 +1818,7 @@ set_qf_format:
1801 break; 1818 break;
1802 case Opt_nodelalloc: 1819 case Opt_nodelalloc:
1803 clear_opt(sb, DELALLOC); 1820 clear_opt(sb, DELALLOC);
1821 clear_opt2(sb, EXPLICIT_DELALLOC);
1804 break; 1822 break;
1805 case Opt_mblk_io_submit: 1823 case Opt_mblk_io_submit:
1806 set_opt(sb, MBLK_IO_SUBMIT); 1824 set_opt(sb, MBLK_IO_SUBMIT);
@@ -1817,6 +1835,7 @@ set_qf_format:
1817 break; 1835 break;
1818 case Opt_delalloc: 1836 case Opt_delalloc:
1819 set_opt(sb, DELALLOC); 1837 set_opt(sb, DELALLOC);
1838 set_opt2(sb, EXPLICIT_DELALLOC);
1820 break; 1839 break;
1821 case Opt_block_validity: 1840 case Opt_block_validity:
1822 set_opt(sb, BLOCK_VALIDITY); 1841 set_opt(sb, BLOCK_VALIDITY);
@@ -1935,7 +1954,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1935 res = MS_RDONLY; 1954 res = MS_RDONLY;
1936 } 1955 }
1937 if (read_only) 1956 if (read_only)
1938 return res; 1957 goto done;
1939 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1958 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1940 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1959 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1941 "running e2fsck is recommended"); 1960 "running e2fsck is recommended");
@@ -1966,6 +1985,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1966 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1985 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1967 1986
1968 ext4_commit_super(sb, 1); 1987 ext4_commit_super(sb, 1);
1988done:
1969 if (test_opt(sb, DEBUG)) 1989 if (test_opt(sb, DEBUG))
1970 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " 1990 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1971 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n", 1991 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
@@ -2015,8 +2035,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
2015 flex_group = ext4_flex_group(sbi, i); 2035 flex_group = ext4_flex_group(sbi, i);
2016 atomic_add(ext4_free_inodes_count(sb, gdp), 2036 atomic_add(ext4_free_inodes_count(sb, gdp),
2017 &sbi->s_flex_groups[flex_group].free_inodes); 2037 &sbi->s_flex_groups[flex_group].free_inodes);
2018 atomic_add(ext4_free_blks_count(sb, gdp), 2038 atomic_add(ext4_free_group_clusters(sb, gdp),
2019 &sbi->s_flex_groups[flex_group].free_blocks); 2039 &sbi->s_flex_groups[flex_group].free_clusters);
2020 atomic_add(ext4_used_dirs_count(sb, gdp), 2040 atomic_add(ext4_used_dirs_count(sb, gdp),
2021 &sbi->s_flex_groups[flex_group].used_dirs); 2041 &sbi->s_flex_groups[flex_group].used_dirs);
2022 } 2042 }
@@ -2134,7 +2154,8 @@ static int ext4_check_descriptors(struct super_block *sb,
2134 if (NULL != first_not_zeroed) 2154 if (NULL != first_not_zeroed)
2135 *first_not_zeroed = grp; 2155 *first_not_zeroed = grp;
2136 2156
2137 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 2157 ext4_free_blocks_count_set(sbi->s_es,
2158 EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
2138 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); 2159 sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
2139 return 1; 2160 return 1;
2140} 2161}
@@ -2454,7 +2475,8 @@ static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
2454 char *buf) 2475 char *buf)
2455{ 2476{
2456 return snprintf(buf, PAGE_SIZE, "%llu\n", 2477 return snprintf(buf, PAGE_SIZE, "%llu\n",
2457 (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); 2478 (s64) EXT4_C2B(sbi,
2479 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
2458} 2480}
2459 2481
2460static ssize_t session_write_kbytes_show(struct ext4_attr *a, 2482static ssize_t session_write_kbytes_show(struct ext4_attr *a,
@@ -2682,6 +2704,13 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
2682 return 0; 2704 return 0;
2683 } 2705 }
2684 } 2706 }
2707 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
2708 !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2709 ext4_msg(sb, KERN_ERR,
2710 "Can't support bigalloc feature without "
2711 "extents feature\n");
2712 return 0;
2713 }
2685 return 1; 2714 return 1;
2686} 2715}
2687 2716
@@ -3087,10 +3116,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3087 char *cp; 3116 char *cp;
3088 const char *descr; 3117 const char *descr;
3089 int ret = -ENOMEM; 3118 int ret = -ENOMEM;
3090 int blocksize; 3119 int blocksize, clustersize;
3091 unsigned int db_count; 3120 unsigned int db_count;
3092 unsigned int i; 3121 unsigned int i;
3093 int needs_recovery, has_huge_files; 3122 int needs_recovery, has_huge_files, has_bigalloc;
3094 __u64 blocks_count; 3123 __u64 blocks_count;
3095 int err; 3124 int err;
3096 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; 3125 unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
@@ -3224,6 +3253,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3224 &journal_ioprio, NULL, 0)) 3253 &journal_ioprio, NULL, 0))
3225 goto failed_mount; 3254 goto failed_mount;
3226 3255
3256 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3257 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
3258 "with data=journal disables delayed "
3259 "allocation and O_DIRECT support!\n");
3260 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
3261 ext4_msg(sb, KERN_ERR, "can't mount with "
3262 "both data=journal and delalloc");
3263 goto failed_mount;
3264 }
3265 if (test_opt(sb, DIOREAD_NOLOCK)) {
3266 ext4_msg(sb, KERN_ERR, "can't mount with "
3267 "both data=journal and delalloc");
3268 goto failed_mount;
3269 }
3270 if (test_opt(sb, DELALLOC))
3271 clear_opt(sb, DELALLOC);
3272 }
3273
3274 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3275 if (test_opt(sb, DIOREAD_NOLOCK)) {
3276 if (blocksize < PAGE_SIZE) {
3277 ext4_msg(sb, KERN_ERR, "can't mount with "
3278 "dioread_nolock if block size != PAGE_SIZE");
3279 goto failed_mount;
3280 }
3281 }
3282
3227 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 3283 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3228 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); 3284 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
3229 3285
@@ -3265,8 +3321,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3265 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) 3321 if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
3266 goto failed_mount; 3322 goto failed_mount;
3267 3323
3268 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
3269
3270 if (blocksize < EXT4_MIN_BLOCK_SIZE || 3324 if (blocksize < EXT4_MIN_BLOCK_SIZE ||
3271 blocksize > EXT4_MAX_BLOCK_SIZE) { 3325 blocksize > EXT4_MAX_BLOCK_SIZE) {
3272 ext4_msg(sb, KERN_ERR, 3326 ext4_msg(sb, KERN_ERR,
@@ -3369,12 +3423,53 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3369 sb->s_dirt = 1; 3423 sb->s_dirt = 1;
3370 } 3424 }
3371 3425
3372 if (sbi->s_blocks_per_group > blocksize * 8) { 3426 /* Handle clustersize */
3373 ext4_msg(sb, KERN_ERR, 3427 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
3374 "#blocks per group too big: %lu", 3428 has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3375 sbi->s_blocks_per_group); 3429 EXT4_FEATURE_RO_COMPAT_BIGALLOC);
3376 goto failed_mount; 3430 if (has_bigalloc) {
3431 if (clustersize < blocksize) {
3432 ext4_msg(sb, KERN_ERR,
3433 "cluster size (%d) smaller than "
3434 "block size (%d)", clustersize, blocksize);
3435 goto failed_mount;
3436 }
3437 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
3438 le32_to_cpu(es->s_log_block_size);
3439 sbi->s_clusters_per_group =
3440 le32_to_cpu(es->s_clusters_per_group);
3441 if (sbi->s_clusters_per_group > blocksize * 8) {
3442 ext4_msg(sb, KERN_ERR,
3443 "#clusters per group too big: %lu",
3444 sbi->s_clusters_per_group);
3445 goto failed_mount;
3446 }
3447 if (sbi->s_blocks_per_group !=
3448 (sbi->s_clusters_per_group * (clustersize / blocksize))) {
3449 ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
3450 "clusters per group (%lu) inconsistent",
3451 sbi->s_blocks_per_group,
3452 sbi->s_clusters_per_group);
3453 goto failed_mount;
3454 }
3455 } else {
3456 if (clustersize != blocksize) {
3457 ext4_warning(sb, "fragment/cluster size (%d) != "
3458 "block size (%d)", clustersize,
3459 blocksize);
3460 clustersize = blocksize;
3461 }
3462 if (sbi->s_blocks_per_group > blocksize * 8) {
3463 ext4_msg(sb, KERN_ERR,
3464 "#blocks per group too big: %lu",
3465 sbi->s_blocks_per_group);
3466 goto failed_mount;
3467 }
3468 sbi->s_clusters_per_group = sbi->s_blocks_per_group;
3469 sbi->s_cluster_bits = 0;
3377 } 3470 }
3471 sbi->s_cluster_ratio = clustersize / blocksize;
3472
3378 if (sbi->s_inodes_per_group > blocksize * 8) { 3473 if (sbi->s_inodes_per_group > blocksize * 8) {
3379 ext4_msg(sb, KERN_ERR, 3474 ext4_msg(sb, KERN_ERR,
3380 "#inodes per group too big: %lu", 3475 "#inodes per group too big: %lu",
@@ -3446,10 +3541,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3446 goto failed_mount; 3541 goto failed_mount;
3447 } 3542 }
3448 3543
3449#ifdef CONFIG_PROC_FS
3450 if (ext4_proc_root) 3544 if (ext4_proc_root)
3451 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 3545 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
3452#endif
3453 3546
3454 bgl_lock_init(sbi->s_blockgroup_lock); 3547 bgl_lock_init(sbi->s_blockgroup_lock);
3455 3548
@@ -3483,8 +3576,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3483 sbi->s_err_report.function = print_daily_error_info; 3576 sbi->s_err_report.function = print_daily_error_info;
3484 sbi->s_err_report.data = (unsigned long) sb; 3577 sbi->s_err_report.data = (unsigned long) sb;
3485 3578
3486 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3579 err = percpu_counter_init(&sbi->s_freeclusters_counter,
3487 ext4_count_free_blocks(sb)); 3580 ext4_count_free_clusters(sb));
3488 if (!err) { 3581 if (!err) {
3489 err = percpu_counter_init(&sbi->s_freeinodes_counter, 3582 err = percpu_counter_init(&sbi->s_freeinodes_counter,
3490 ext4_count_free_inodes(sb)); 3583 ext4_count_free_inodes(sb));
@@ -3494,7 +3587,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3494 ext4_count_dirs(sb)); 3587 ext4_count_dirs(sb));
3495 } 3588 }
3496 if (!err) { 3589 if (!err) {
3497 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 3590 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
3498 } 3591 }
3499 if (err) { 3592 if (err) {
3500 ext4_msg(sb, KERN_ERR, "insufficient memory"); 3593 ext4_msg(sb, KERN_ERR, "insufficient memory");
@@ -3609,13 +3702,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3609 * The journal may have updated the bg summary counts, so we 3702 * The journal may have updated the bg summary counts, so we
3610 * need to update the global counters. 3703 * need to update the global counters.
3611 */ 3704 */
3612 percpu_counter_set(&sbi->s_freeblocks_counter, 3705 percpu_counter_set(&sbi->s_freeclusters_counter,
3613 ext4_count_free_blocks(sb)); 3706 ext4_count_free_clusters(sb));
3614 percpu_counter_set(&sbi->s_freeinodes_counter, 3707 percpu_counter_set(&sbi->s_freeinodes_counter,
3615 ext4_count_free_inodes(sb)); 3708 ext4_count_free_inodes(sb));
3616 percpu_counter_set(&sbi->s_dirs_counter, 3709 percpu_counter_set(&sbi->s_dirs_counter,
3617 ext4_count_dirs(sb)); 3710 ext4_count_dirs(sb));
3618 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); 3711 percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
3619 3712
3620no_journal: 3713no_journal:
3621 /* 3714 /*
@@ -3679,25 +3772,6 @@ no_journal:
3679 "available"); 3772 "available");
3680 } 3773 }
3681 3774
3682 if (test_opt(sb, DELALLOC) &&
3683 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
3684 ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - "
3685 "requested data journaling mode");
3686 clear_opt(sb, DELALLOC);
3687 }
3688 if (test_opt(sb, DIOREAD_NOLOCK)) {
3689 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
3690 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
3691 "option - requested data journaling mode");
3692 clear_opt(sb, DIOREAD_NOLOCK);
3693 }
3694 if (sb->s_blocksize < PAGE_SIZE) {
3695 ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
3696 "option - block size is too small");
3697 clear_opt(sb, DIOREAD_NOLOCK);
3698 }
3699 }
3700
3701 err = ext4_setup_system_zone(sb); 3775 err = ext4_setup_system_zone(sb);
3702 if (err) { 3776 if (err) {
3703 ext4_msg(sb, KERN_ERR, "failed to initialize system " 3777 ext4_msg(sb, KERN_ERR, "failed to initialize system "
@@ -3710,22 +3784,19 @@ no_journal:
3710 if (err) { 3784 if (err) {
3711 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)", 3785 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
3712 err); 3786 err);
3713 goto failed_mount4; 3787 goto failed_mount5;
3714 } 3788 }
3715 3789
3716 err = ext4_register_li_request(sb, first_not_zeroed); 3790 err = ext4_register_li_request(sb, first_not_zeroed);
3717 if (err) 3791 if (err)
3718 goto failed_mount4; 3792 goto failed_mount6;
3719 3793
3720 sbi->s_kobj.kset = ext4_kset; 3794 sbi->s_kobj.kset = ext4_kset;
3721 init_completion(&sbi->s_kobj_unregister); 3795 init_completion(&sbi->s_kobj_unregister);
3722 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, 3796 err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
3723 "%s", sb->s_id); 3797 "%s", sb->s_id);
3724 if (err) { 3798 if (err)
3725 ext4_mb_release(sb); 3799 goto failed_mount7;
3726 ext4_ext_release(sb);
3727 goto failed_mount4;
3728 };
3729 3800
3730 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; 3801 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
3731 ext4_orphan_cleanup(sb, es); 3802 ext4_orphan_cleanup(sb, es);
@@ -3759,13 +3830,19 @@ cantfind_ext4:
3759 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); 3830 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
3760 goto failed_mount; 3831 goto failed_mount;
3761 3832
3833failed_mount7:
3834 ext4_unregister_li_request(sb);
3835failed_mount6:
3836 ext4_ext_release(sb);
3837failed_mount5:
3838 ext4_mb_release(sb);
3839 ext4_release_system_zone(sb);
3762failed_mount4: 3840failed_mount4:
3763 iput(root); 3841 iput(root);
3764 sb->s_root = NULL; 3842 sb->s_root = NULL;
3765 ext4_msg(sb, KERN_ERR, "mount failed"); 3843 ext4_msg(sb, KERN_ERR, "mount failed");
3766 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); 3844 destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
3767failed_mount_wq: 3845failed_mount_wq:
3768 ext4_release_system_zone(sb);
3769 if (sbi->s_journal) { 3846 if (sbi->s_journal) {
3770 jbd2_journal_destroy(sbi->s_journal); 3847 jbd2_journal_destroy(sbi->s_journal);
3771 sbi->s_journal = NULL; 3848 sbi->s_journal = NULL;
@@ -3774,10 +3851,10 @@ failed_mount3:
3774 del_timer(&sbi->s_err_report); 3851 del_timer(&sbi->s_err_report);
3775 if (sbi->s_flex_groups) 3852 if (sbi->s_flex_groups)
3776 ext4_kvfree(sbi->s_flex_groups); 3853 ext4_kvfree(sbi->s_flex_groups);
3777 percpu_counter_destroy(&sbi->s_freeblocks_counter); 3854 percpu_counter_destroy(&sbi->s_freeclusters_counter);
3778 percpu_counter_destroy(&sbi->s_freeinodes_counter); 3855 percpu_counter_destroy(&sbi->s_freeinodes_counter);
3779 percpu_counter_destroy(&sbi->s_dirs_counter); 3856 percpu_counter_destroy(&sbi->s_dirs_counter);
3780 percpu_counter_destroy(&sbi->s_dirtyblocks_counter); 3857 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
3781 if (sbi->s_mmp_tsk) 3858 if (sbi->s_mmp_tsk)
3782 kthread_stop(sbi->s_mmp_tsk); 3859 kthread_stop(sbi->s_mmp_tsk);
3783failed_mount2: 3860failed_mount2:
@@ -4064,7 +4141,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
4064 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 4141 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
4065 int error = 0; 4142 int error = 0;
4066 4143
4067 if (!sbh) 4144 if (!sbh || block_device_ejected(sb))
4068 return error; 4145 return error;
4069 if (buffer_write_io_error(sbh)) { 4146 if (buffer_write_io_error(sbh)) {
4070 /* 4147 /*
@@ -4100,8 +4177,9 @@ static int ext4_commit_super(struct super_block *sb, int sync)
4100 else 4177 else
4101 es->s_kbytes_written = 4178 es->s_kbytes_written =
4102 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 4179 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
4103 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 4180 ext4_free_blocks_count_set(es,
4104 &EXT4_SB(sb)->s_freeblocks_counter)); 4181 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
4182 &EXT4_SB(sb)->s_freeclusters_counter)));
4105 es->s_free_inodes_count = 4183 es->s_free_inodes_count =
4106 cpu_to_le32(percpu_counter_sum_positive( 4184 cpu_to_le32(percpu_counter_sum_positive(
4107 &EXT4_SB(sb)->s_freeinodes_counter)); 4185 &EXT4_SB(sb)->s_freeinodes_counter));
@@ -4506,16 +4584,34 @@ restore_opts:
4506 return err; 4584 return err;
4507} 4585}
4508 4586
4587/*
4588 * Note: calculating the overhead so we can be compatible with
4589 * historical BSD practice is quite difficult in the face of
4590 * clusters/bigalloc. This is because multiple metadata blocks from
4591 * different block group can end up in the same allocation cluster.
4592 * Calculating the exact overhead in the face of clustered allocation
4593 * requires either O(all block bitmaps) in memory or O(number of block
4594 * groups**2) in time. We will still calculate the superblock for
4595 * older file systems --- and if we come across with a bigalloc file
4596 * system with zero in s_overhead_clusters the estimate will be close to
4597 * correct especially for very large cluster sizes --- but for newer
4598 * file systems, it's better to calculate this figure once at mkfs
4599 * time, and store it in the superblock. If the superblock value is
4600 * present (even for non-bigalloc file systems), we will use it.
4601 */
4509static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) 4602static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4510{ 4603{
4511 struct super_block *sb = dentry->d_sb; 4604 struct super_block *sb = dentry->d_sb;
4512 struct ext4_sb_info *sbi = EXT4_SB(sb); 4605 struct ext4_sb_info *sbi = EXT4_SB(sb);
4513 struct ext4_super_block *es = sbi->s_es; 4606 struct ext4_super_block *es = sbi->s_es;
4607 struct ext4_group_desc *gdp;
4514 u64 fsid; 4608 u64 fsid;
4515 s64 bfree; 4609 s64 bfree;
4516 4610
4517 if (test_opt(sb, MINIX_DF)) { 4611 if (test_opt(sb, MINIX_DF)) {
4518 sbi->s_overhead_last = 0; 4612 sbi->s_overhead_last = 0;
4613 } else if (es->s_overhead_clusters) {
4614 sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters);
4519 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { 4615 } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
4520 ext4_group_t i, ngroups = ext4_get_groups_count(sb); 4616 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4521 ext4_fsblk_t overhead = 0; 4617 ext4_fsblk_t overhead = 0;
@@ -4530,24 +4626,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4530 * All of the blocks before first_data_block are 4626 * All of the blocks before first_data_block are
4531 * overhead 4627 * overhead
4532 */ 4628 */
4533 overhead = le32_to_cpu(es->s_first_data_block); 4629 overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
4534 4630
4535 /* 4631 /*
4536 * Add the overhead attributed to the superblock and 4632 * Add the overhead found in each block group
4537 * block group descriptors. If the sparse superblocks
4538 * feature is turned on, then not all groups have this.
4539 */ 4633 */
4540 for (i = 0; i < ngroups; i++) { 4634 for (i = 0; i < ngroups; i++) {
4541 overhead += ext4_bg_has_super(sb, i) + 4635 gdp = ext4_get_group_desc(sb, i, NULL);
4542 ext4_bg_num_gdb(sb, i); 4636 overhead += ext4_num_overhead_clusters(sb, i, gdp);
4543 cond_resched(); 4637 cond_resched();
4544 } 4638 }
4545
4546 /*
4547 * Every block group has an inode bitmap, a block
4548 * bitmap, and an inode table.
4549 */
4550 overhead += ngroups * (2 + sbi->s_itb_per_group);
4551 sbi->s_overhead_last = overhead; 4639 sbi->s_overhead_last = overhead;
4552 smp_wmb(); 4640 smp_wmb();
4553 sbi->s_blocks_last = ext4_blocks_count(es); 4641 sbi->s_blocks_last = ext4_blocks_count(es);
@@ -4555,11 +4643,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
4555 4643
4556 buf->f_type = EXT4_SUPER_MAGIC; 4644 buf->f_type = EXT4_SUPER_MAGIC;
4557 buf->f_bsize = sb->s_blocksize; 4645 buf->f_bsize = sb->s_blocksize;
4558 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 4646 buf->f_blocks = (ext4_blocks_count(es) -
4559 bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 4647 EXT4_C2B(sbi, sbi->s_overhead_last));
4560 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 4648 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
4649 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
4561 /* prevent underflow in case that few free space is available */ 4650 /* prevent underflow in case that few free space is available */
4562 buf->f_bfree = max_t(s64, bfree, 0); 4651 buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
4563 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 4652 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
4564 if (buf->f_bfree < ext4_r_blocks_count(es)) 4653 if (buf->f_bfree < ext4_r_blocks_count(es))
4565 buf->f_bavail = 0; 4654 buf->f_bavail = 0;
@@ -4980,13 +5069,11 @@ static int __init ext4_init_fs(void)
4980 return err; 5069 return err;
4981 err = ext4_init_system_zone(); 5070 err = ext4_init_system_zone();
4982 if (err) 5071 if (err)
4983 goto out7; 5072 goto out6;
4984 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 5073 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
4985 if (!ext4_kset) 5074 if (!ext4_kset)
4986 goto out6;
4987 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4988 if (!ext4_proc_root)
4989 goto out5; 5075 goto out5;
5076 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
4990 5077
4991 err = ext4_init_feat_adverts(); 5078 err = ext4_init_feat_adverts();
4992 if (err) 5079 if (err)
@@ -5022,12 +5109,12 @@ out2:
5022out3: 5109out3:
5023 ext4_exit_feat_adverts(); 5110 ext4_exit_feat_adverts();
5024out4: 5111out4:
5025 remove_proc_entry("fs/ext4", NULL); 5112 if (ext4_proc_root)
5026out5: 5113 remove_proc_entry("fs/ext4", NULL);
5027 kset_unregister(ext4_kset); 5114 kset_unregister(ext4_kset);
5028out6: 5115out5:
5029 ext4_exit_system_zone(); 5116 ext4_exit_system_zone();
5030out7: 5117out6:
5031 ext4_exit_pageio(); 5118 ext4_exit_pageio();
5032 return err; 5119 return err;
5033} 5120}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c757adc97250..93a00d89a220 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -820,8 +820,14 @@ inserted:
820 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 820 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
821 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; 821 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
822 822
823 /*
824 * take i_data_sem because we will test
825 * i_delalloc_reserved_flag in ext4_mb_new_blocks
826 */
827 down_read((&EXT4_I(inode)->i_data_sem));
823 block = ext4_new_meta_blocks(handle, inode, goal, 0, 828 block = ext4_new_meta_blocks(handle, inode, goal, 0,
824 NULL, &error); 829 NULL, &error);
830 up_read((&EXT4_I(inode)->i_data_sem));
825 if (error) 831 if (error)
826 goto cleanup; 832 goto cleanup;
827 833
@@ -985,11 +991,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
985 no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); 991 no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
986 ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); 992 ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
987 993
988 error = ext4_get_inode_loc(inode, &is.iloc); 994 error = ext4_reserve_inode_write(handle, inode, &is.iloc);
989 if (error)
990 goto cleanup;
991
992 error = ext4_journal_get_write_access(handle, is.iloc.bh);
993 if (error) 995 if (error)
994 goto cleanup; 996 goto cleanup;
995 997
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 5efbd5d7701a..aca191bd5f8f 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -156,8 +156,8 @@ static int uni16_to_x8(struct super_block *sb, unsigned char *ascii,
156 } else { 156 } else {
157 if (uni_xlate == 1) { 157 if (uni_xlate == 1) {
158 *op++ = ':'; 158 *op++ = ':';
159 op = pack_hex_byte(op, ec >> 8); 159 op = hex_byte_pack(op, ec >> 8);
160 op = pack_hex_byte(op, ec); 160 op = hex_byte_pack(op, ec);
161 len -= 5; 161 len -= 5;
162 } else { 162 } else {
163 *op++ = '?'; 163 *op++ = '?';
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index a5d3853822e0..1510a4d51990 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -326,15 +326,14 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent,
326extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, 326extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
327 struct inode *i2); 327 struct inode *i2);
328/* fat/misc.c */ 328/* fat/misc.c */
329extern void 329extern __printf(3, 4) __cold
330__fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) 330void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...);
331 __attribute__ ((format (printf, 3, 4))) __cold;
332#define fat_fs_error(sb, fmt, args...) \ 331#define fat_fs_error(sb, fmt, args...) \
333 __fat_fs_error(sb, 1, fmt , ## args) 332 __fat_fs_error(sb, 1, fmt , ## args)
334#define fat_fs_error_ratelimit(sb, fmt, args...) \ 333#define fat_fs_error_ratelimit(sb, fmt, args...) \
335 __fat_fs_error(sb, __ratelimit(&MSDOS_SB(sb)->ratelimit), fmt , ## args) 334 __fat_fs_error(sb, __ratelimit(&MSDOS_SB(sb)->ratelimit), fmt , ## args)
336void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...) 335__printf(3, 4) __cold
337 __attribute__ ((format (printf, 3, 4))) __cold; 336void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...);
338extern int fat_clusters_flush(struct super_block *sb); 337extern int fat_clusters_flush(struct super_block *sb);
339extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); 338extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
340extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, 339extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 1726d7303047..808cac7edcfb 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -379,7 +379,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
379 return error; 379 return error;
380 MSDOS_I(inode)->mmu_private = inode->i_size; 380 MSDOS_I(inode)->mmu_private = inode->i_size;
381 381
382 inode->i_nlink = fat_subdirs(inode); 382 set_nlink(inode, fat_subdirs(inode));
383 } else { /* not a directory */ 383 } else { /* not a directory */
384 inode->i_generation |= 1; 384 inode->i_generation |= 1;
385 inode->i_mode = fat_make_mode(sbi, de->attr, 385 inode->i_mode = fat_make_mode(sbi, de->attr,
@@ -1233,7 +1233,7 @@ static int fat_read_root(struct inode *inode)
1233 fat_save_attrs(inode, ATTR_DIR); 1233 fat_save_attrs(inode, ATTR_DIR);
1234 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; 1234 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
1235 inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; 1235 inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
1236 inode->i_nlink = fat_subdirs(inode)+2; 1236 set_nlink(inode, fat_subdirs(inode)+2);
1237 1237
1238 return 0; 1238 return 0;
1239} 1239}
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 66e83b845455..216b419f30e2 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -387,7 +387,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode)
387 /* the directory was completed, just return a error */ 387 /* the directory was completed, just return a error */
388 goto out; 388 goto out;
389 } 389 }
390 inode->i_nlink = 2; 390 set_nlink(inode, 2);
391 inode->i_mtime = inode->i_atime = inode->i_ctime = ts; 391 inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
392 /* timestamp is already written, so mark_inode_dirty() is unneeded. */ 392 /* timestamp is already written, so mark_inode_dirty() is unneeded. */
393 393
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index bb3f29c3557b..a87a65663c25 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -900,7 +900,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode)
900 goto out; 900 goto out;
901 } 901 }
902 inode->i_version++; 902 inode->i_version++;
903 inode->i_nlink = 2; 903 set_nlink(inode, 2);
904 inode->i_mtime = inode->i_atime = inode->i_ctime = ts; 904 inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
905 /* timestamp is already written, so mark_inode_dirty() is unneeded. */ 905 /* timestamp is already written, so mark_inode_dirty() is unneeded. */
906 906
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 1a4311437a8b..7b2af5abe2fa 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -227,7 +227,7 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
227 ip->i_uid = (uid_t)vip->vii_uid; 227 ip->i_uid = (uid_t)vip->vii_uid;
228 ip->i_gid = (gid_t)vip->vii_gid; 228 ip->i_gid = (gid_t)vip->vii_gid;
229 229
230 ip->i_nlink = vip->vii_nlink; 230 set_nlink(ip, vip->vii_nlink);
231 ip->i_size = vip->vii_size; 231 ip->i_size = vip->vii_size;
232 232
233 ip->i_atime.tv_sec = vip->vii_atime; 233 ip->i_atime.tv_sec = vip->vii_atime;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 04cf3b91e501..73c3992b2bb4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -41,11 +41,23 @@ struct wb_writeback_work {
41 unsigned int for_kupdate:1; 41 unsigned int for_kupdate:1;
42 unsigned int range_cyclic:1; 42 unsigned int range_cyclic:1;
43 unsigned int for_background:1; 43 unsigned int for_background:1;
44 enum wb_reason reason; /* why was writeback initiated? */
44 45
45 struct list_head list; /* pending work list */ 46 struct list_head list; /* pending work list */
46 struct completion *done; /* set if the caller waits */ 47 struct completion *done; /* set if the caller waits */
47}; 48};
48 49
50const char *wb_reason_name[] = {
51 [WB_REASON_BACKGROUND] = "background",
52 [WB_REASON_TRY_TO_FREE_PAGES] = "try_to_free_pages",
53 [WB_REASON_SYNC] = "sync",
54 [WB_REASON_PERIODIC] = "periodic",
55 [WB_REASON_LAPTOP_TIMER] = "laptop_timer",
56 [WB_REASON_FREE_MORE_MEM] = "free_more_memory",
57 [WB_REASON_FS_FREE_SPACE] = "fs_free_space",
58 [WB_REASON_FORKER_THREAD] = "forker_thread"
59};
60
49/* 61/*
50 * Include the creation of the trace points after defining the 62 * Include the creation of the trace points after defining the
51 * wb_writeback_work structure so that the definition remains local to this 63 * wb_writeback_work structure so that the definition remains local to this
@@ -115,7 +127,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
115 127
116static void 128static void
117__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, 129__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
118 bool range_cyclic) 130 bool range_cyclic, enum wb_reason reason)
119{ 131{
120 struct wb_writeback_work *work; 132 struct wb_writeback_work *work;
121 133
@@ -135,6 +147,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
135 work->sync_mode = WB_SYNC_NONE; 147 work->sync_mode = WB_SYNC_NONE;
136 work->nr_pages = nr_pages; 148 work->nr_pages = nr_pages;
137 work->range_cyclic = range_cyclic; 149 work->range_cyclic = range_cyclic;
150 work->reason = reason;
138 151
139 bdi_queue_work(bdi, work); 152 bdi_queue_work(bdi, work);
140} 153}
@@ -150,9 +163,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
150 * completion. Caller need not hold sb s_umount semaphore. 163 * completion. Caller need not hold sb s_umount semaphore.
151 * 164 *
152 */ 165 */
153void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) 166void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
167 enum wb_reason reason)
154{ 168{
155 __bdi_start_writeback(bdi, nr_pages, true); 169 __bdi_start_writeback(bdi, nr_pages, true, reason);
156} 170}
157 171
158/** 172/**
@@ -251,7 +265,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
251 */ 265 */
252static int move_expired_inodes(struct list_head *delaying_queue, 266static int move_expired_inodes(struct list_head *delaying_queue,
253 struct list_head *dispatch_queue, 267 struct list_head *dispatch_queue,
254 unsigned long *older_than_this) 268 struct wb_writeback_work *work)
255{ 269{
256 LIST_HEAD(tmp); 270 LIST_HEAD(tmp);
257 struct list_head *pos, *node; 271 struct list_head *pos, *node;
@@ -262,8 +276,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,
262 276
263 while (!list_empty(delaying_queue)) { 277 while (!list_empty(delaying_queue)) {
264 inode = wb_inode(delaying_queue->prev); 278 inode = wb_inode(delaying_queue->prev);
265 if (older_than_this && 279 if (work->older_than_this &&
266 inode_dirtied_after(inode, *older_than_this)) 280 inode_dirtied_after(inode, *work->older_than_this))
267 break; 281 break;
268 if (sb && sb != inode->i_sb) 282 if (sb && sb != inode->i_sb)
269 do_sb_sort = 1; 283 do_sb_sort = 1;
@@ -302,13 +316,13 @@ out:
302 * | 316 * |
303 * +--> dequeue for IO 317 * +--> dequeue for IO
304 */ 318 */
305static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) 319static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
306{ 320{
307 int moved; 321 int moved;
308 assert_spin_locked(&wb->list_lock); 322 assert_spin_locked(&wb->list_lock);
309 list_splice_init(&wb->b_more_io, &wb->b_io); 323 list_splice_init(&wb->b_more_io, &wb->b_io);
310 moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); 324 moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
311 trace_writeback_queue_io(wb, older_than_this, moved); 325 trace_writeback_queue_io(wb, work, moved);
312} 326}
313 327
314static int write_inode(struct inode *inode, struct writeback_control *wbc) 328static int write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -641,31 +655,40 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb,
641 return wrote; 655 return wrote;
642} 656}
643 657
644long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages) 658long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
659 enum wb_reason reason)
645{ 660{
646 struct wb_writeback_work work = { 661 struct wb_writeback_work work = {
647 .nr_pages = nr_pages, 662 .nr_pages = nr_pages,
648 .sync_mode = WB_SYNC_NONE, 663 .sync_mode = WB_SYNC_NONE,
649 .range_cyclic = 1, 664 .range_cyclic = 1,
665 .reason = reason,
650 }; 666 };
651 667
652 spin_lock(&wb->list_lock); 668 spin_lock(&wb->list_lock);
653 if (list_empty(&wb->b_io)) 669 if (list_empty(&wb->b_io))
654 queue_io(wb, NULL); 670 queue_io(wb, &work);
655 __writeback_inodes_wb(wb, &work); 671 __writeback_inodes_wb(wb, &work);
656 spin_unlock(&wb->list_lock); 672 spin_unlock(&wb->list_lock);
657 673
658 return nr_pages - work.nr_pages; 674 return nr_pages - work.nr_pages;
659} 675}
660 676
661static inline bool over_bground_thresh(void) 677static bool over_bground_thresh(struct backing_dev_info *bdi)
662{ 678{
663 unsigned long background_thresh, dirty_thresh; 679 unsigned long background_thresh, dirty_thresh;
664 680
665 global_dirty_limits(&background_thresh, &dirty_thresh); 681 global_dirty_limits(&background_thresh, &dirty_thresh);
666 682
667 return (global_page_state(NR_FILE_DIRTY) + 683 if (global_page_state(NR_FILE_DIRTY) +
668 global_page_state(NR_UNSTABLE_NFS) > background_thresh); 684 global_page_state(NR_UNSTABLE_NFS) > background_thresh)
685 return true;
686
687 if (bdi_stat(bdi, BDI_RECLAIMABLE) >
688 bdi_dirty_limit(bdi, background_thresh))
689 return true;
690
691 return false;
669} 692}
670 693
671/* 694/*
@@ -675,7 +698,7 @@ static inline bool over_bground_thresh(void)
675static void wb_update_bandwidth(struct bdi_writeback *wb, 698static void wb_update_bandwidth(struct bdi_writeback *wb,
676 unsigned long start_time) 699 unsigned long start_time)
677{ 700{
678 __bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, start_time); 701 __bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, 0, start_time);
679} 702}
680 703
681/* 704/*
@@ -727,7 +750,7 @@ static long wb_writeback(struct bdi_writeback *wb,
727 * For background writeout, stop when we are below the 750 * For background writeout, stop when we are below the
728 * background dirty threshold 751 * background dirty threshold
729 */ 752 */
730 if (work->for_background && !over_bground_thresh()) 753 if (work->for_background && !over_bground_thresh(wb->bdi))
731 break; 754 break;
732 755
733 if (work->for_kupdate) { 756 if (work->for_kupdate) {
@@ -738,7 +761,7 @@ static long wb_writeback(struct bdi_writeback *wb,
738 761
739 trace_writeback_start(wb->bdi, work); 762 trace_writeback_start(wb->bdi, work);
740 if (list_empty(&wb->b_io)) 763 if (list_empty(&wb->b_io))
741 queue_io(wb, work->older_than_this); 764 queue_io(wb, work);
742 if (work->sb) 765 if (work->sb)
743 progress = writeback_sb_inodes(work->sb, wb, work); 766 progress = writeback_sb_inodes(work->sb, wb, work);
744 else 767 else
@@ -811,13 +834,14 @@ static unsigned long get_nr_dirty_pages(void)
811 834
812static long wb_check_background_flush(struct bdi_writeback *wb) 835static long wb_check_background_flush(struct bdi_writeback *wb)
813{ 836{
814 if (over_bground_thresh()) { 837 if (over_bground_thresh(wb->bdi)) {
815 838
816 struct wb_writeback_work work = { 839 struct wb_writeback_work work = {
817 .nr_pages = LONG_MAX, 840 .nr_pages = LONG_MAX,
818 .sync_mode = WB_SYNC_NONE, 841 .sync_mode = WB_SYNC_NONE,
819 .for_background = 1, 842 .for_background = 1,
820 .range_cyclic = 1, 843 .range_cyclic = 1,
844 .reason = WB_REASON_BACKGROUND,
821 }; 845 };
822 846
823 return wb_writeback(wb, &work); 847 return wb_writeback(wb, &work);
@@ -851,6 +875,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
851 .sync_mode = WB_SYNC_NONE, 875 .sync_mode = WB_SYNC_NONE,
852 .for_kupdate = 1, 876 .for_kupdate = 1,
853 .range_cyclic = 1, 877 .range_cyclic = 1,
878 .reason = WB_REASON_PERIODIC,
854 }; 879 };
855 880
856 return wb_writeback(wb, &work); 881 return wb_writeback(wb, &work);
@@ -969,7 +994,7 @@ int bdi_writeback_thread(void *data)
969 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back 994 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
970 * the whole world. 995 * the whole world.
971 */ 996 */
972void wakeup_flusher_threads(long nr_pages) 997void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
973{ 998{
974 struct backing_dev_info *bdi; 999 struct backing_dev_info *bdi;
975 1000
@@ -982,7 +1007,7 @@ void wakeup_flusher_threads(long nr_pages)
982 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { 1007 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
983 if (!bdi_has_dirty_io(bdi)) 1008 if (!bdi_has_dirty_io(bdi))
984 continue; 1009 continue;
985 __bdi_start_writeback(bdi, nr_pages, false); 1010 __bdi_start_writeback(bdi, nr_pages, false, reason);
986 } 1011 }
987 rcu_read_unlock(); 1012 rcu_read_unlock();
988} 1013}
@@ -1203,7 +1228,9 @@ static void wait_sb_inodes(struct super_block *sb)
1203 * on how many (if any) will be written, and this function does not wait 1228 * on how many (if any) will be written, and this function does not wait
1204 * for IO completion of submitted IO. 1229 * for IO completion of submitted IO.
1205 */ 1230 */
1206void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr) 1231void writeback_inodes_sb_nr(struct super_block *sb,
1232 unsigned long nr,
1233 enum wb_reason reason)
1207{ 1234{
1208 DECLARE_COMPLETION_ONSTACK(done); 1235 DECLARE_COMPLETION_ONSTACK(done);
1209 struct wb_writeback_work work = { 1236 struct wb_writeback_work work = {
@@ -1212,6 +1239,7 @@ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
1212 .tagged_writepages = 1, 1239 .tagged_writepages = 1,
1213 .done = &done, 1240 .done = &done,
1214 .nr_pages = nr, 1241 .nr_pages = nr,
1242 .reason = reason,
1215 }; 1243 };
1216 1244
1217 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1245 WARN_ON(!rwsem_is_locked(&sb->s_umount));
@@ -1228,9 +1256,9 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
1228 * on how many (if any) will be written, and this function does not wait 1256 * on how many (if any) will be written, and this function does not wait
1229 * for IO completion of submitted IO. 1257 * for IO completion of submitted IO.
1230 */ 1258 */
1231void writeback_inodes_sb(struct super_block *sb) 1259void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
1232{ 1260{
1233 return writeback_inodes_sb_nr(sb, get_nr_dirty_pages()); 1261 return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
1234} 1262}
1235EXPORT_SYMBOL(writeback_inodes_sb); 1263EXPORT_SYMBOL(writeback_inodes_sb);
1236 1264
@@ -1241,11 +1269,11 @@ EXPORT_SYMBOL(writeback_inodes_sb);
1241 * Invoke writeback_inodes_sb if no writeback is currently underway. 1269 * Invoke writeback_inodes_sb if no writeback is currently underway.
1242 * Returns 1 if writeback was started, 0 if not. 1270 * Returns 1 if writeback was started, 0 if not.
1243 */ 1271 */
1244int writeback_inodes_sb_if_idle(struct super_block *sb) 1272int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
1245{ 1273{
1246 if (!writeback_in_progress(sb->s_bdi)) { 1274 if (!writeback_in_progress(sb->s_bdi)) {
1247 down_read(&sb->s_umount); 1275 down_read(&sb->s_umount);
1248 writeback_inodes_sb(sb); 1276 writeback_inodes_sb(sb, reason);
1249 up_read(&sb->s_umount); 1277 up_read(&sb->s_umount);
1250 return 1; 1278 return 1;
1251 } else 1279 } else
@@ -1262,11 +1290,12 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1262 * Returns 1 if writeback was started, 0 if not. 1290 * Returns 1 if writeback was started, 0 if not.
1263 */ 1291 */
1264int writeback_inodes_sb_nr_if_idle(struct super_block *sb, 1292int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
1265 unsigned long nr) 1293 unsigned long nr,
1294 enum wb_reason reason)
1266{ 1295{
1267 if (!writeback_in_progress(sb->s_bdi)) { 1296 if (!writeback_in_progress(sb->s_bdi)) {
1268 down_read(&sb->s_umount); 1297 down_read(&sb->s_umount);
1269 writeback_inodes_sb_nr(sb, nr); 1298 writeback_inodes_sb_nr(sb, nr, reason);
1270 up_read(&sb->s_umount); 1299 up_read(&sb->s_umount);
1271 return 1; 1300 return 1;
1272 } else 1301 } else
@@ -1290,6 +1319,7 @@ void sync_inodes_sb(struct super_block *sb)
1290 .nr_pages = LONG_MAX, 1319 .nr_pages = LONG_MAX,
1291 .range_cyclic = 0, 1320 .range_cyclic = 0,
1292 .done = &done, 1321 .done = &done,
1322 .reason = WB_REASON_SYNC,
1293 }; 1323 };
1294 1324
1295 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1325 WARN_ON(!rwsem_is_locked(&sb->s_umount));
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 85542a7daf40..42593c587d48 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -231,7 +231,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
231 if (iop) 231 if (iop)
232 inode->i_op = iop; 232 inode->i_op = iop;
233 inode->i_fop = fop; 233 inode->i_fop = fop;
234 inode->i_nlink = nlink; 234 set_nlink(inode, nlink);
235 inode->i_private = fc; 235 inode->i_private = fc;
236 d_add(dentry, inode); 236 d_add(dentry, inode);
237 return dentry; 237 return dentry;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index add96f6ffda5..3e6d72756479 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -151,7 +151,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
151 151
152 inode->i_ino = attr->ino; 152 inode->i_ino = attr->ino;
153 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 153 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
154 inode->i_nlink = attr->nlink; 154 set_nlink(inode, attr->nlink);
155 inode->i_uid = attr->uid; 155 inode->i_uid = attr->uid;
156 inode->i_gid = attr->gid; 156 inode->i_gid = attr->gid;
157 inode->i_blocks = attr->blocks; 157 inode->i_blocks = attr->blocks;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 66707118af25..2553b858a72e 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -201,7 +201,7 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
201void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); 201void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
202void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); 202void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
203 203
204__attribute__ ((format(printf, 2, 3))) 204__printf(2, 3)
205void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); 205void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
206 206
207/** 207/**
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 78418b4fa857..1656df7aacd2 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -299,7 +299,7 @@ static void gfs2_set_nlink(struct inode *inode, u32 nlink)
299 if (nlink == 0) 299 if (nlink == 0)
300 clear_nlink(inode); 300 clear_nlink(inode);
301 else 301 else
302 inode->i_nlink = nlink; 302 set_nlink(inode, nlink);
303 } 303 }
304} 304}
305 305
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 3ebc437736fe..1cbdeea1db44 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -46,11 +46,26 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
46 case HFS_EXT_CNID: 46 case HFS_EXT_CNID:
47 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize, 47 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
48 mdb->drXTFlSize, be32_to_cpu(mdb->drXTClpSiz)); 48 mdb->drXTFlSize, be32_to_cpu(mdb->drXTClpSiz));
49 if (HFS_I(tree->inode)->alloc_blocks >
50 HFS_I(tree->inode)->first_blocks) {
51 printk(KERN_ERR "hfs: invalid btree extent records\n");
52 unlock_new_inode(tree->inode);
53 goto free_inode;
54 }
55
49 tree->inode->i_mapping->a_ops = &hfs_btree_aops; 56 tree->inode->i_mapping->a_ops = &hfs_btree_aops;
50 break; 57 break;
51 case HFS_CAT_CNID: 58 case HFS_CAT_CNID:
52 hfs_inode_read_fork(tree->inode, mdb->drCTExtRec, mdb->drCTFlSize, 59 hfs_inode_read_fork(tree->inode, mdb->drCTExtRec, mdb->drCTFlSize,
53 mdb->drCTFlSize, be32_to_cpu(mdb->drCTClpSiz)); 60 mdb->drCTFlSize, be32_to_cpu(mdb->drCTClpSiz));
61
62 if (!HFS_I(tree->inode)->first_blocks) {
63 printk(KERN_ERR "hfs: invalid btree extent records "
64 "(0 size).\n");
65 unlock_new_inode(tree->inode);
66 goto free_inode;
67 }
68
54 tree->inode->i_mapping->a_ops = &hfs_btree_aops; 69 tree->inode->i_mapping->a_ops = &hfs_btree_aops;
55 break; 70 break;
56 default: 71 default:
@@ -59,11 +74,6 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
59 } 74 }
60 unlock_new_inode(tree->inode); 75 unlock_new_inode(tree->inode);
61 76
62 if (!HFS_I(tree->inode)->first_blocks) {
63 printk(KERN_ERR "hfs: invalid btree extent records (0 size).\n");
64 goto free_inode;
65 }
66
67 mapping = tree->inode->i_mapping; 77 mapping = tree->inode->i_mapping;
68 page = read_mapping_page(mapping, 0, NULL); 78 page = read_mapping_page(mapping, 0, NULL);
69 if (IS_ERR(page)) 79 if (IS_ERR(page))
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index b4d70b13be92..bce4eef91a06 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -198,7 +198,7 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, int mode,
198 198
199 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode); 199 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
200 if (res) { 200 if (res) {
201 inode->i_nlink = 0; 201 clear_nlink(inode);
202 hfs_delete_inode(inode); 202 hfs_delete_inode(inode);
203 iput(inode); 203 iput(inode);
204 return res; 204 return res;
@@ -227,7 +227,7 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
227 227
228 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode); 228 res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
229 if (res) { 229 if (res) {
230 inode->i_nlink = 0; 230 clear_nlink(inode);
231 hfs_delete_inode(inode); 231 hfs_delete_inode(inode);
232 iput(inode); 232 iput(inode);
233 return res; 233 return res;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 96a1b625fc74..a1a9fdcd2a00 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -183,7 +183,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
183 inode->i_mode = mode; 183 inode->i_mode = mode;
184 inode->i_uid = current_fsuid(); 184 inode->i_uid = current_fsuid();
185 inode->i_gid = current_fsgid(); 185 inode->i_gid = current_fsgid();
186 inode->i_nlink = 1; 186 set_nlink(inode, 1);
187 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 187 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
188 HFS_I(inode)->flags = 0; 188 HFS_I(inode)->flags = 0;
189 HFS_I(inode)->rsrc_inode = NULL; 189 HFS_I(inode)->rsrc_inode = NULL;
@@ -313,7 +313,7 @@ static int hfs_read_inode(struct inode *inode, void *data)
313 /* Initialize the inode */ 313 /* Initialize the inode */
314 inode->i_uid = hsb->s_uid; 314 inode->i_uid = hsb->s_uid;
315 inode->i_gid = hsb->s_gid; 315 inode->i_gid = hsb->s_gid;
316 inode->i_nlink = 1; 316 set_nlink(inode, 1);
317 317
318 if (idata->key) 318 if (idata->key)
319 HFS_I(inode)->cat_key = *idata->key; 319 HFS_I(inode)->cat_key = *idata->key;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 25b2443a004c..4536cd3f15ae 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -415,7 +415,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
415 goto out; 415 goto out;
416 416
417out_err: 417out_err:
418 inode->i_nlink = 0; 418 clear_nlink(inode);
419 hfsplus_delete_inode(inode); 419 hfsplus_delete_inode(inode);
420 iput(inode); 420 iput(inode);
421out: 421out:
@@ -440,7 +440,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
440 440
441 res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); 441 res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode);
442 if (res) { 442 if (res) {
443 inode->i_nlink = 0; 443 clear_nlink(inode);
444 hfsplus_delete_inode(inode); 444 hfsplus_delete_inode(inode);
445 iput(inode); 445 iput(inode);
446 goto out; 446 goto out;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 4cc1e3a36ec7..40e1413be4cf 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -391,7 +391,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
391 inode->i_mode = mode; 391 inode->i_mode = mode;
392 inode->i_uid = current_fsuid(); 392 inode->i_uid = current_fsuid();
393 inode->i_gid = current_fsgid(); 393 inode->i_gid = current_fsgid();
394 inode->i_nlink = 1; 394 set_nlink(inode, 1);
395 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 395 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
396 396
397 hip = HFSPLUS_I(inode); 397 hip = HFSPLUS_I(inode);
@@ -512,7 +512,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
512 hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, 512 hfs_bnode_read(fd->bnode, &entry, fd->entryoffset,
513 sizeof(struct hfsplus_cat_folder)); 513 sizeof(struct hfsplus_cat_folder));
514 hfsplus_get_perms(inode, &folder->permissions, 1); 514 hfsplus_get_perms(inode, &folder->permissions, 1);
515 inode->i_nlink = 1; 515 set_nlink(inode, 1);
516 inode->i_size = 2 + be32_to_cpu(folder->valence); 516 inode->i_size = 2 + be32_to_cpu(folder->valence);
517 inode->i_atime = hfsp_mt2ut(folder->access_date); 517 inode->i_atime = hfsp_mt2ut(folder->access_date);
518 inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); 518 inode->i_mtime = hfsp_mt2ut(folder->content_mod_date);
@@ -532,11 +532,11 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
532 hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ? 532 hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ?
533 &file->rsrc_fork : &file->data_fork); 533 &file->rsrc_fork : &file->data_fork);
534 hfsplus_get_perms(inode, &file->permissions, 0); 534 hfsplus_get_perms(inode, &file->permissions, 0);
535 inode->i_nlink = 1; 535 set_nlink(inode, 1);
536 if (S_ISREG(inode->i_mode)) { 536 if (S_ISREG(inode->i_mode)) {
537 if (file->permissions.dev) 537 if (file->permissions.dev)
538 inode->i_nlink = 538 set_nlink(inode,
539 be32_to_cpu(file->permissions.dev); 539 be32_to_cpu(file->permissions.dev));
540 inode->i_op = &hfsplus_file_inode_operations; 540 inode->i_op = &hfsplus_file_inode_operations;
541 inode->i_fop = &hfsplus_file_operations; 541 inode->i_fop = &hfsplus_file_operations;
542 inode->i_mapping->a_ops = &hfsplus_aops; 542 inode->i_mapping->a_ops = &hfsplus_aops;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 0d22afdd4611..2f72da5ae686 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -541,7 +541,7 @@ static int read_name(struct inode *ino, char *name)
541 541
542 ino->i_ino = st.ino; 542 ino->i_ino = st.ino;
543 ino->i_mode = st.mode; 543 ino->i_mode = st.mode;
544 ino->i_nlink = st.nlink; 544 set_nlink(ino, st.nlink);
545 ino->i_uid = st.uid; 545 ino->i_uid = st.uid;
546 ino->i_gid = st.gid; 546 ino->i_gid = st.gid;
547 ino->i_atime = st.atime; 547 ino->i_atime = st.atime;
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index d51a98384bc0..dd7bc38a3825 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -16,7 +16,6 @@
16#include <sys/vfs.h> 16#include <sys/vfs.h>
17#include "hostfs.h" 17#include "hostfs.h"
18#include "os.h" 18#include "os.h"
19#include "user.h"
20#include <utime.h> 19#include <utime.h>
21 20
22static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p) 21static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 96a8ed91cedd..2fa0089a02a8 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -247,7 +247,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
247 result->i_mode &= ~0111; 247 result->i_mode &= ~0111;
248 result->i_op = &hpfs_file_iops; 248 result->i_op = &hpfs_file_iops;
249 result->i_fop = &hpfs_file_ops; 249 result->i_fop = &hpfs_file_ops;
250 result->i_nlink = 1; 250 set_nlink(result, 1);
251 } 251 }
252 unlock_new_inode(result); 252 unlock_new_inode(result);
253 } 253 }
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 331b5e234ef3..de946170ebb1 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -311,8 +311,8 @@ static inline struct hpfs_sb_info *hpfs_sb(struct super_block *sb)
311 311
312/* super.c */ 312/* super.c */
313 313
314void hpfs_error(struct super_block *, const char *, ...) 314__printf(2, 3)
315 __attribute__((format (printf, 2, 3))); 315void hpfs_error(struct super_block *, const char *, ...);
316int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *); 316int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *);
317unsigned hpfs_count_one_bitmap(struct super_block *, secno); 317unsigned hpfs_count_one_bitmap(struct super_block *, secno);
318 318
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 338cd8368451..3b2cec29972b 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -53,7 +53,7 @@ void hpfs_read_inode(struct inode *i)
53 i->i_mode &= ~0111; 53 i->i_mode &= ~0111;
54 i->i_op = &hpfs_file_iops; 54 i->i_op = &hpfs_file_iops;
55 i->i_fop = &hpfs_file_ops; 55 i->i_fop = &hpfs_file_ops;
56 i->i_nlink = 0;*/ 56 clear_nlink(i);*/
57 make_bad_inode(i); 57 make_bad_inode(i);
58 return; 58 return;
59 } 59 }
@@ -77,7 +77,7 @@ void hpfs_read_inode(struct inode *i)
77 i->i_mode = S_IFLNK | 0777; 77 i->i_mode = S_IFLNK | 0777;
78 i->i_op = &page_symlink_inode_operations; 78 i->i_op = &page_symlink_inode_operations;
79 i->i_data.a_ops = &hpfs_symlink_aops; 79 i->i_data.a_ops = &hpfs_symlink_aops;
80 i->i_nlink = 1; 80 set_nlink(i, 1);
81 i->i_size = ea_size; 81 i->i_size = ea_size;
82 i->i_blocks = 1; 82 i->i_blocks = 1;
83 brelse(bh); 83 brelse(bh);
@@ -101,7 +101,7 @@ void hpfs_read_inode(struct inode *i)
101 } 101 }
102 if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { 102 if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) {
103 brelse(bh); 103 brelse(bh);
104 i->i_nlink = 1; 104 set_nlink(i, 1);
105 i->i_size = 0; 105 i->i_size = 0;
106 i->i_blocks = 1; 106 i->i_blocks = 1;
107 init_special_inode(i, mode, 107 init_special_inode(i, mode,
@@ -125,13 +125,13 @@ void hpfs_read_inode(struct inode *i)
125 hpfs_count_dnodes(i->i_sb, hpfs_inode->i_dno, &n_dnodes, &n_subdirs, NULL); 125 hpfs_count_dnodes(i->i_sb, hpfs_inode->i_dno, &n_dnodes, &n_subdirs, NULL);
126 i->i_blocks = 4 * n_dnodes; 126 i->i_blocks = 4 * n_dnodes;
127 i->i_size = 2048 * n_dnodes; 127 i->i_size = 2048 * n_dnodes;
128 i->i_nlink = 2 + n_subdirs; 128 set_nlink(i, 2 + n_subdirs);
129 } else { 129 } else {
130 i->i_mode |= S_IFREG; 130 i->i_mode |= S_IFREG;
131 if (!hpfs_inode->i_ea_mode) i->i_mode &= ~0111; 131 if (!hpfs_inode->i_ea_mode) i->i_mode &= ~0111;
132 i->i_op = &hpfs_file_iops; 132 i->i_op = &hpfs_file_iops;
133 i->i_fop = &hpfs_file_ops; 133 i->i_fop = &hpfs_file_ops;
134 i->i_nlink = 1; 134 set_nlink(i, 1);
135 i->i_size = le32_to_cpu(fnode->file_size); 135 i->i_size = le32_to_cpu(fnode->file_size);
136 i->i_blocks = ((i->i_size + 511) >> 9) + 1; 136 i->i_blocks = ((i->i_size + 511) >> 9) + 1;
137 i->i_data.a_ops = &hpfs_aops; 137 i->i_data.a_ops = &hpfs_aops;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 2df69e2f07cf..ea91fcb0ef9b 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -56,7 +56,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
56 result->i_fop = &hpfs_dir_ops; 56 result->i_fop = &hpfs_dir_ops;
57 result->i_blocks = 4; 57 result->i_blocks = 4;
58 result->i_size = 2048; 58 result->i_size = 2048;
59 result->i_nlink = 2; 59 set_nlink(result, 2);
60 if (dee.read_only) 60 if (dee.read_only)
61 result->i_mode &= ~0222; 61 result->i_mode &= ~0222;
62 62
@@ -150,7 +150,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
150 result->i_mode &= ~0111; 150 result->i_mode &= ~0111;
151 result->i_op = &hpfs_file_iops; 151 result->i_op = &hpfs_file_iops;
152 result->i_fop = &hpfs_file_ops; 152 result->i_fop = &hpfs_file_ops;
153 result->i_nlink = 1; 153 set_nlink(result, 1);
154 hpfs_i(result)->i_parent_dir = dir->i_ino; 154 hpfs_i(result)->i_parent_dir = dir->i_ino;
155 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)); 155 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
156 result->i_ctime.tv_nsec = 0; 156 result->i_ctime.tv_nsec = 0;
@@ -242,7 +242,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
242 hpfs_i(result)->i_ea_size = 0; 242 hpfs_i(result)->i_ea_size = 0;
243 result->i_uid = current_fsuid(); 243 result->i_uid = current_fsuid();
244 result->i_gid = current_fsgid(); 244 result->i_gid = current_fsgid();
245 result->i_nlink = 1; 245 set_nlink(result, 1);
246 result->i_size = 0; 246 result->i_size = 0;
247 result->i_blocks = 1; 247 result->i_blocks = 1;
248 init_special_inode(result, mode, rdev); 248 init_special_inode(result, mode, rdev);
@@ -318,7 +318,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
318 result->i_uid = current_fsuid(); 318 result->i_uid = current_fsuid();
319 result->i_gid = current_fsgid(); 319 result->i_gid = current_fsgid();
320 result->i_blocks = 1; 320 result->i_blocks = 1;
321 result->i_nlink = 1; 321 set_nlink(result, 1);
322 result->i_size = strlen(symlink); 322 result->i_size = strlen(symlink);
323 result->i_op = &page_symlink_inode_operations; 323 result->i_op = &page_symlink_inode_operations;
324 result->i_data.a_ops = &hpfs_symlink_aops; 324 result->i_data.a_ops = &hpfs_symlink_aops;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 970ea987b3f6..f590b1160c6c 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -702,7 +702,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
702 inode->i_ctime = proc_ino->i_ctime; 702 inode->i_ctime = proc_ino->i_ctime;
703 inode->i_ino = proc_ino->i_ino; 703 inode->i_ino = proc_ino->i_ino;
704 inode->i_mode = proc_ino->i_mode; 704 inode->i_mode = proc_ino->i_mode;
705 inode->i_nlink = proc_ino->i_nlink; 705 set_nlink(inode, proc_ino->i_nlink);
706 inode->i_size = proc_ino->i_size; 706 inode->i_size = proc_ino->i_size;
707 inode->i_blocks = proc_ino->i_blocks; 707 inode->i_blocks = proc_ino->i_blocks;
708 708
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ec889538e5a6..0be5a78598d0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -970,7 +970,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
970 970
971 d_instantiate(path.dentry, inode); 971 d_instantiate(path.dentry, inode);
972 inode->i_size = size; 972 inode->i_size = size;
973 inode->i_nlink = 0; 973 clear_nlink(inode);
974 974
975 error = -ENFILE; 975 error = -ENFILE;
976 file = alloc_file(&path, FMODE_WRITE | FMODE_READ, 976 file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
diff --git a/fs/inode.c b/fs/inode.c
index ecbb68dc7e2a..ee4e66b998f4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -142,7 +142,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
142 atomic_set(&inode->i_count, 1); 142 atomic_set(&inode->i_count, 1);
143 inode->i_op = &empty_iops; 143 inode->i_op = &empty_iops;
144 inode->i_fop = &empty_fops; 144 inode->i_fop = &empty_fops;
145 inode->i_nlink = 1; 145 inode->__i_nlink = 1;
146 inode->i_opflags = 0; 146 inode->i_opflags = 0;
147 inode->i_uid = 0; 147 inode->i_uid = 0;
148 inode->i_gid = 0; 148 inode->i_gid = 0;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index a5d03672d04e..f950059525fc 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -20,6 +20,7 @@
20#include <linux/statfs.h> 20#include <linux/statfs.h>
21#include <linux/cdrom.h> 21#include <linux/cdrom.h>
22#include <linux/parser.h> 22#include <linux/parser.h>
23#include <linux/mpage.h>
23 24
24#include "isofs.h" 25#include "isofs.h"
25#include "zisofs.h" 26#include "zisofs.h"
@@ -1148,7 +1149,13 @@ struct buffer_head *isofs_bread(struct inode *inode, sector_t block)
1148 1149
1149static int isofs_readpage(struct file *file, struct page *page) 1150static int isofs_readpage(struct file *file, struct page *page)
1150{ 1151{
1151 return block_read_full_page(page,isofs_get_block); 1152 return mpage_readpage(page, isofs_get_block);
1153}
1154
1155static int isofs_readpages(struct file *file, struct address_space *mapping,
1156 struct list_head *pages, unsigned nr_pages)
1157{
1158 return mpage_readpages(mapping, pages, nr_pages, isofs_get_block);
1152} 1159}
1153 1160
1154static sector_t _isofs_bmap(struct address_space *mapping, sector_t block) 1161static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
@@ -1158,6 +1165,7 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
1158 1165
1159static const struct address_space_operations isofs_aops = { 1166static const struct address_space_operations isofs_aops = {
1160 .readpage = isofs_readpage, 1167 .readpage = isofs_readpage,
1168 .readpages = isofs_readpages,
1161 .bmap = _isofs_bmap 1169 .bmap = _isofs_bmap
1162}; 1170};
1163 1171
@@ -1319,7 +1327,7 @@ static int isofs_read_inode(struct inode *inode)
1319 inode->i_mode = S_IFDIR | sbi->s_dmode; 1327 inode->i_mode = S_IFDIR | sbi->s_dmode;
1320 else 1328 else
1321 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 1329 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
1322 inode->i_nlink = 1; /* 1330 set_nlink(inode, 1); /*
1323 * Set to 1. We know there are 2, but 1331 * Set to 1. We know there are 2, but
1324 * the find utility tries to optimize 1332 * the find utility tries to optimize
1325 * if it is 2, and it screws up. It is 1333 * if it is 2, and it screws up. It is
@@ -1337,7 +1345,7 @@ static int isofs_read_inode(struct inode *inode)
1337 */ 1345 */
1338 inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO; 1346 inode->i_mode = S_IFREG | S_IRUGO | S_IXUGO;
1339 } 1347 }
1340 inode->i_nlink = 1; 1348 set_nlink(inode, 1);
1341 } 1349 }
1342 inode->i_uid = sbi->s_uid; 1350 inode->i_uid = sbi->s_uid;
1343 inode->i_gid = sbi->s_gid; 1351 inode->i_gid = sbi->s_gid;
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 1fbc7de88f50..70e79d0c756a 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -363,7 +363,7 @@ repeat:
363 break; 363 break;
364 case SIG('P', 'X'): 364 case SIG('P', 'X'):
365 inode->i_mode = isonum_733(rr->u.PX.mode); 365 inode->i_mode = isonum_733(rr->u.PX.mode);
366 inode->i_nlink = isonum_733(rr->u.PX.n_links); 366 set_nlink(inode, isonum_733(rr->u.PX.n_links));
367 inode->i_uid = isonum_733(rr->u.PX.uid); 367 inode->i_uid = isonum_733(rr->u.PX.uid);
368 inode->i_gid = isonum_733(rr->u.PX.gid); 368 inode->i_gid = isonum_733(rr->u.PX.gid);
369 break; 369 break;
@@ -496,7 +496,7 @@ repeat:
496 goto out; 496 goto out;
497 } 497 }
498 inode->i_mode = reloc->i_mode; 498 inode->i_mode = reloc->i_mode;
499 inode->i_nlink = reloc->i_nlink; 499 set_nlink(inode, reloc->i_nlink);
500 inode->i_uid = reloc->i_uid; 500 inode->i_uid = reloc->i_uid;
501 inode->i_gid = reloc->i_gid; 501 inode->i_gid = reloc->i_gid;
502 inode->i_rdev = reloc->i_rdev; 502 inode->i_rdev = reloc->i_rdev;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 9fe061fb8779..fea8dd661d2b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1135,6 +1135,14 @@ static int journal_get_superblock(journal_t *journal)
1135 goto out; 1135 goto out;
1136 } 1136 }
1137 1137
1138 if (be32_to_cpu(sb->s_first) == 0 ||
1139 be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
1140 printk(KERN_WARNING
1141 "JBD: Invalid start block of journal: %u\n",
1142 be32_to_cpu(sb->s_first));
1143 goto out;
1144 }
1145
1138 return 0; 1146 return 0;
1139 1147
1140out: 1148out:
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index eef6979821a4..68d704db787f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -352,7 +352,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
352 J_ASSERT(commit_transaction->t_state == T_RUNNING); 352 J_ASSERT(commit_transaction->t_state == T_RUNNING);
353 353
354 trace_jbd2_start_commit(journal, commit_transaction); 354 trace_jbd2_start_commit(journal, commit_transaction);
355 jbd_debug(1, "JBD: starting commit of transaction %d\n", 355 jbd_debug(1, "JBD2: starting commit of transaction %d\n",
356 commit_transaction->t_tid); 356 commit_transaction->t_tid);
357 357
358 write_lock(&journal->j_state_lock); 358 write_lock(&journal->j_state_lock);
@@ -427,7 +427,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
427 __jbd2_journal_clean_checkpoint_list(journal); 427 __jbd2_journal_clean_checkpoint_list(journal);
428 spin_unlock(&journal->j_list_lock); 428 spin_unlock(&journal->j_list_lock);
429 429
430 jbd_debug (3, "JBD: commit phase 1\n"); 430 jbd_debug(3, "JBD2: commit phase 1\n");
431 431
432 /* 432 /*
433 * Switch to a new revoke table. 433 * Switch to a new revoke table.
@@ -447,7 +447,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
447 wake_up(&journal->j_wait_transaction_locked); 447 wake_up(&journal->j_wait_transaction_locked);
448 write_unlock(&journal->j_state_lock); 448 write_unlock(&journal->j_state_lock);
449 449
450 jbd_debug (3, "JBD: commit phase 2\n"); 450 jbd_debug(3, "JBD2: commit phase 2\n");
451 451
452 /* 452 /*
453 * Now start flushing things to disk, in the order they appear 453 * Now start flushing things to disk, in the order they appear
@@ -462,7 +462,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
462 WRITE_SYNC); 462 WRITE_SYNC);
463 blk_finish_plug(&plug); 463 blk_finish_plug(&plug);
464 464
465 jbd_debug(3, "JBD: commit phase 2\n"); 465 jbd_debug(3, "JBD2: commit phase 2\n");
466 466
467 /* 467 /*
468 * Way to go: we have now written out all of the data for a 468 * Way to go: we have now written out all of the data for a
@@ -522,7 +522,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
522 522
523 J_ASSERT (bufs == 0); 523 J_ASSERT (bufs == 0);
524 524
525 jbd_debug(4, "JBD: get descriptor\n"); 525 jbd_debug(4, "JBD2: get descriptor\n");
526 526
527 descriptor = jbd2_journal_get_descriptor_buffer(journal); 527 descriptor = jbd2_journal_get_descriptor_buffer(journal);
528 if (!descriptor) { 528 if (!descriptor) {
@@ -531,7 +531,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
531 } 531 }
532 532
533 bh = jh2bh(descriptor); 533 bh = jh2bh(descriptor);
534 jbd_debug(4, "JBD: got buffer %llu (%p)\n", 534 jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
535 (unsigned long long)bh->b_blocknr, bh->b_data); 535 (unsigned long long)bh->b_blocknr, bh->b_data);
536 header = (journal_header_t *)&bh->b_data[0]; 536 header = (journal_header_t *)&bh->b_data[0];
537 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 537 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
@@ -625,7 +625,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
625 commit_transaction->t_buffers == NULL || 625 commit_transaction->t_buffers == NULL ||
626 space_left < tag_bytes + 16) { 626 space_left < tag_bytes + 16) {
627 627
628 jbd_debug(4, "JBD: Submit %d IOs\n", bufs); 628 jbd_debug(4, "JBD2: Submit %d IOs\n", bufs);
629 629
630 /* Write an end-of-descriptor marker before 630 /* Write an end-of-descriptor marker before
631 submitting the IOs. "tag" still points to 631 submitting the IOs. "tag" still points to
@@ -707,7 +707,7 @@ start_journal_io:
707 so we incur less scheduling load. 707 so we incur less scheduling load.
708 */ 708 */
709 709
710 jbd_debug(3, "JBD: commit phase 3\n"); 710 jbd_debug(3, "JBD2: commit phase 3\n");
711 711
712 /* 712 /*
713 * akpm: these are BJ_IO, and j_list_lock is not needed. 713 * akpm: these are BJ_IO, and j_list_lock is not needed.
@@ -771,7 +771,7 @@ wait_for_iobuf:
771 771
772 J_ASSERT (commit_transaction->t_shadow_list == NULL); 772 J_ASSERT (commit_transaction->t_shadow_list == NULL);
773 773
774 jbd_debug(3, "JBD: commit phase 4\n"); 774 jbd_debug(3, "JBD2: commit phase 4\n");
775 775
776 /* Here we wait for the revoke record and descriptor record buffers */ 776 /* Here we wait for the revoke record and descriptor record buffers */
777 wait_for_ctlbuf: 777 wait_for_ctlbuf:
@@ -801,7 +801,7 @@ wait_for_iobuf:
801 if (err) 801 if (err)
802 jbd2_journal_abort(journal, err); 802 jbd2_journal_abort(journal, err);
803 803
804 jbd_debug(3, "JBD: commit phase 5\n"); 804 jbd_debug(3, "JBD2: commit phase 5\n");
805 write_lock(&journal->j_state_lock); 805 write_lock(&journal->j_state_lock);
806 J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH); 806 J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
807 commit_transaction->t_state = T_COMMIT_JFLUSH; 807 commit_transaction->t_state = T_COMMIT_JFLUSH;
@@ -830,7 +830,7 @@ wait_for_iobuf:
830 transaction can be removed from any checkpoint list it was on 830 transaction can be removed from any checkpoint list it was on
831 before. */ 831 before. */
832 832
833 jbd_debug(3, "JBD: commit phase 6\n"); 833 jbd_debug(3, "JBD2: commit phase 6\n");
834 834
835 J_ASSERT(list_empty(&commit_transaction->t_inode_list)); 835 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
836 J_ASSERT(commit_transaction->t_buffers == NULL); 836 J_ASSERT(commit_transaction->t_buffers == NULL);
@@ -964,7 +964,7 @@ restart_loop:
964 964
965 /* Done with this transaction! */ 965 /* Done with this transaction! */
966 966
967 jbd_debug(3, "JBD: commit phase 7\n"); 967 jbd_debug(3, "JBD2: commit phase 7\n");
968 968
969 J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH); 969 J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
970 970
@@ -1039,7 +1039,7 @@ restart_loop:
1039 journal->j_commit_callback(journal, commit_transaction); 1039 journal->j_commit_callback(journal, commit_transaction);
1040 1040
1041 trace_jbd2_end_commit(journal, commit_transaction); 1041 trace_jbd2_end_commit(journal, commit_transaction);
1042 jbd_debug(1, "JBD: commit %d complete, head %d\n", 1042 jbd_debug(1, "JBD2: commit %d complete, head %d\n",
1043 journal->j_commit_sequence, journal->j_tail_sequence); 1043 journal->j_commit_sequence, journal->j_tail_sequence);
1044 if (to_free) 1044 if (to_free)
1045 kfree(commit_transaction); 1045 kfree(commit_transaction);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f24df13adc4e..0fa0123151d3 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -491,7 +491,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
491 */ 491 */
492 492
493 journal->j_commit_request = target; 493 journal->j_commit_request = target;
494 jbd_debug(1, "JBD: requesting commit %d/%d\n", 494 jbd_debug(1, "JBD2: requesting commit %d/%d\n",
495 journal->j_commit_request, 495 journal->j_commit_request,
496 journal->j_commit_sequence); 496 journal->j_commit_sequence);
497 wake_up(&journal->j_wait_commit); 497 wake_up(&journal->j_wait_commit);
@@ -500,7 +500,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
500 /* This should never happen, but if it does, preserve 500 /* This should never happen, but if it does, preserve
501 the evidence before kjournald goes into a loop and 501 the evidence before kjournald goes into a loop and
502 increments j_commit_sequence beyond all recognition. */ 502 increments j_commit_sequence beyond all recognition. */
503 WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n", 503 WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
504 journal->j_commit_request, 504 journal->j_commit_request,
505 journal->j_commit_sequence, 505 journal->j_commit_sequence,
506 target, journal->j_running_transaction ? 506 target, journal->j_running_transaction ?
@@ -645,7 +645,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
645 } 645 }
646#endif 646#endif
647 while (tid_gt(tid, journal->j_commit_sequence)) { 647 while (tid_gt(tid, journal->j_commit_sequence)) {
648 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", 648 jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n",
649 tid, journal->j_commit_sequence); 649 tid, journal->j_commit_sequence);
650 wake_up(&journal->j_wait_commit); 650 wake_up(&journal->j_wait_commit);
651 read_unlock(&journal->j_state_lock); 651 read_unlock(&journal->j_state_lock);
@@ -1093,7 +1093,7 @@ static int journal_reset(journal_t *journal)
1093 first = be32_to_cpu(sb->s_first); 1093 first = be32_to_cpu(sb->s_first);
1094 last = be32_to_cpu(sb->s_maxlen); 1094 last = be32_to_cpu(sb->s_maxlen);
1095 if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { 1095 if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
1096 printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n", 1096 printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
1097 first, last); 1097 first, last);
1098 journal_fail_superblock(journal); 1098 journal_fail_superblock(journal);
1099 return -EINVAL; 1099 return -EINVAL;
@@ -1139,7 +1139,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1139 */ 1139 */
1140 if (sb->s_start == 0 && journal->j_tail_sequence == 1140 if (sb->s_start == 0 && journal->j_tail_sequence ==
1141 journal->j_transaction_sequence) { 1141 journal->j_transaction_sequence) {
1142 jbd_debug(1,"JBD: Skipping superblock update on recovered sb " 1142 jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
1143 "(start %ld, seq %d, errno %d)\n", 1143 "(start %ld, seq %d, errno %d)\n",
1144 journal->j_tail, journal->j_tail_sequence, 1144 journal->j_tail, journal->j_tail_sequence,
1145 journal->j_errno); 1145 journal->j_errno);
@@ -1163,7 +1163,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
1163 } 1163 }
1164 1164
1165 read_lock(&journal->j_state_lock); 1165 read_lock(&journal->j_state_lock);
1166 jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", 1166 jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n",
1167 journal->j_tail, journal->j_tail_sequence, journal->j_errno); 1167 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
1168 1168
1169 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1169 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -1216,8 +1216,8 @@ static int journal_get_superblock(journal_t *journal)
1216 ll_rw_block(READ, 1, &bh); 1216 ll_rw_block(READ, 1, &bh);
1217 wait_on_buffer(bh); 1217 wait_on_buffer(bh);
1218 if (!buffer_uptodate(bh)) { 1218 if (!buffer_uptodate(bh)) {
1219 printk (KERN_ERR 1219 printk(KERN_ERR
1220 "JBD: IO error reading journal superblock\n"); 1220 "JBD2: IO error reading journal superblock\n");
1221 goto out; 1221 goto out;
1222 } 1222 }
1223 } 1223 }
@@ -1228,7 +1228,7 @@ static int journal_get_superblock(journal_t *journal)
1228 1228
1229 if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || 1229 if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
1230 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { 1230 sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
1231 printk(KERN_WARNING "JBD: no valid journal superblock found\n"); 1231 printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
1232 goto out; 1232 goto out;
1233 } 1233 }
1234 1234
@@ -1240,14 +1240,22 @@ static int journal_get_superblock(journal_t *journal)
1240 journal->j_format_version = 2; 1240 journal->j_format_version = 2;
1241 break; 1241 break;
1242 default: 1242 default:
1243 printk(KERN_WARNING "JBD: unrecognised superblock format ID\n"); 1243 printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
1244 goto out; 1244 goto out;
1245 } 1245 }
1246 1246
1247 if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) 1247 if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
1248 journal->j_maxlen = be32_to_cpu(sb->s_maxlen); 1248 journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
1249 else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { 1249 else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
1250 printk (KERN_WARNING "JBD: journal file too short\n"); 1250 printk(KERN_WARNING "JBD2: journal file too short\n");
1251 goto out;
1252 }
1253
1254 if (be32_to_cpu(sb->s_first) == 0 ||
1255 be32_to_cpu(sb->s_first) >= journal->j_maxlen) {
1256 printk(KERN_WARNING
1257 "JBD2: Invalid start block of journal: %u\n",
1258 be32_to_cpu(sb->s_first));
1251 goto out; 1259 goto out;
1252 } 1260 }
1253 1261
@@ -1310,8 +1318,8 @@ int jbd2_journal_load(journal_t *journal)
1310 ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || 1318 ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
1311 (sb->s_feature_incompat & 1319 (sb->s_feature_incompat &
1312 ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { 1320 ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
1313 printk (KERN_WARNING 1321 printk(KERN_WARNING
1314 "JBD: Unrecognised features on journal\n"); 1322 "JBD2: Unrecognised features on journal\n");
1315 return -EINVAL; 1323 return -EINVAL;
1316 } 1324 }
1317 } 1325 }
@@ -1346,7 +1354,7 @@ int jbd2_journal_load(journal_t *journal)
1346 return 0; 1354 return 0;
1347 1355
1348recovery_error: 1356recovery_error:
1349 printk (KERN_WARNING "JBD: recovery failed\n"); 1357 printk(KERN_WARNING "JBD2: recovery failed\n");
1350 return -EIO; 1358 return -EIO;
1351} 1359}
1352 1360
@@ -1577,7 +1585,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
1577 struct buffer_head *bh; 1585 struct buffer_head *bh;
1578 1586
1579 printk(KERN_WARNING 1587 printk(KERN_WARNING
1580 "JBD: Converting superblock from version 1 to 2.\n"); 1588 "JBD2: Converting superblock from version 1 to 2.\n");
1581 1589
1582 /* Pre-initialise new fields to zero */ 1590 /* Pre-initialise new fields to zero */
1583 offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); 1591 offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
@@ -1694,7 +1702,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
1694 if (!journal->j_tail) 1702 if (!journal->j_tail)
1695 goto no_recovery; 1703 goto no_recovery;
1696 1704
1697 printk (KERN_WARNING "JBD: %s recovery information on journal\n", 1705 printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
1698 write ? "Clearing" : "Ignoring"); 1706 write ? "Clearing" : "Ignoring");
1699 1707
1700 err = jbd2_journal_skip_recovery(journal); 1708 err = jbd2_journal_skip_recovery(journal);
@@ -2020,7 +2028,7 @@ static int journal_init_jbd2_journal_head_cache(void)
2020 retval = 0; 2028 retval = 0;
2021 if (!jbd2_journal_head_cache) { 2029 if (!jbd2_journal_head_cache) {
2022 retval = -ENOMEM; 2030 retval = -ENOMEM;
2023 printk(KERN_EMERG "JBD: no memory for journal_head cache\n"); 2031 printk(KERN_EMERG "JBD2: no memory for journal_head cache\n");
2024 } 2032 }
2025 return retval; 2033 return retval;
2026} 2034}
@@ -2383,7 +2391,7 @@ static void __exit journal_exit(void)
2383#ifdef CONFIG_JBD2_DEBUG 2391#ifdef CONFIG_JBD2_DEBUG
2384 int n = atomic_read(&nr_journal_heads); 2392 int n = atomic_read(&nr_journal_heads);
2385 if (n) 2393 if (n)
2386 printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); 2394 printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
2387#endif 2395#endif
2388 jbd2_remove_debugfs_entry(); 2396 jbd2_remove_debugfs_entry();
2389 jbd2_remove_jbd_stats_proc_entry(); 2397 jbd2_remove_jbd_stats_proc_entry();
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 1cad869494f0..da6d7baf1390 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -89,7 +89,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
89 err = jbd2_journal_bmap(journal, next, &blocknr); 89 err = jbd2_journal_bmap(journal, next, &blocknr);
90 90
91 if (err) { 91 if (err) {
92 printk (KERN_ERR "JBD: bad block at offset %u\n", 92 printk(KERN_ERR "JBD2: bad block at offset %u\n",
93 next); 93 next);
94 goto failed; 94 goto failed;
95 } 95 }
@@ -138,14 +138,14 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
138 *bhp = NULL; 138 *bhp = NULL;
139 139
140 if (offset >= journal->j_maxlen) { 140 if (offset >= journal->j_maxlen) {
141 printk(KERN_ERR "JBD: corrupted journal superblock\n"); 141 printk(KERN_ERR "JBD2: corrupted journal superblock\n");
142 return -EIO; 142 return -EIO;
143 } 143 }
144 144
145 err = jbd2_journal_bmap(journal, offset, &blocknr); 145 err = jbd2_journal_bmap(journal, offset, &blocknr);
146 146
147 if (err) { 147 if (err) {
148 printk (KERN_ERR "JBD: bad block at offset %u\n", 148 printk(KERN_ERR "JBD2: bad block at offset %u\n",
149 offset); 149 offset);
150 return err; 150 return err;
151 } 151 }
@@ -163,7 +163,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
163 } 163 }
164 164
165 if (!buffer_uptodate(bh)) { 165 if (!buffer_uptodate(bh)) {
166 printk (KERN_ERR "JBD: Failed to read block at offset %u\n", 166 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
167 offset); 167 offset);
168 brelse(bh); 168 brelse(bh);
169 return -EIO; 169 return -EIO;
@@ -251,10 +251,10 @@ int jbd2_journal_recover(journal_t *journal)
251 if (!err) 251 if (!err)
252 err = do_one_pass(journal, &info, PASS_REPLAY); 252 err = do_one_pass(journal, &info, PASS_REPLAY);
253 253
254 jbd_debug(1, "JBD: recovery, exit status %d, " 254 jbd_debug(1, "JBD2: recovery, exit status %d, "
255 "recovered transactions %u to %u\n", 255 "recovered transactions %u to %u\n",
256 err, info.start_transaction, info.end_transaction); 256 err, info.start_transaction, info.end_transaction);
257 jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n", 257 jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
258 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 258 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
259 259
260 /* Restart the log at the next transaction ID, thus invalidating 260 /* Restart the log at the next transaction ID, thus invalidating
@@ -293,14 +293,14 @@ int jbd2_journal_skip_recovery(journal_t *journal)
293 err = do_one_pass(journal, &info, PASS_SCAN); 293 err = do_one_pass(journal, &info, PASS_SCAN);
294 294
295 if (err) { 295 if (err) {
296 printk(KERN_ERR "JBD: error %d scanning journal\n", err); 296 printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
297 ++journal->j_transaction_sequence; 297 ++journal->j_transaction_sequence;
298 } else { 298 } else {
299#ifdef CONFIG_JBD2_DEBUG 299#ifdef CONFIG_JBD2_DEBUG
300 int dropped = info.end_transaction - 300 int dropped = info.end_transaction -
301 be32_to_cpu(journal->j_superblock->s_sequence); 301 be32_to_cpu(journal->j_superblock->s_sequence);
302 jbd_debug(1, 302 jbd_debug(1,
303 "JBD: ignoring %d transaction%s from the journal.\n", 303 "JBD2: ignoring %d transaction%s from the journal.\n",
304 dropped, (dropped == 1) ? "" : "s"); 304 dropped, (dropped == 1) ? "" : "s");
305#endif 305#endif
306 journal->j_transaction_sequence = ++info.end_transaction; 306 journal->j_transaction_sequence = ++info.end_transaction;
@@ -338,7 +338,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
338 wrap(journal, *next_log_block); 338 wrap(journal, *next_log_block);
339 err = jread(&obh, journal, io_block); 339 err = jread(&obh, journal, io_block);
340 if (err) { 340 if (err) {
341 printk(KERN_ERR "JBD: IO error %d recovering block " 341 printk(KERN_ERR "JBD2: IO error %d recovering block "
342 "%lu in log\n", err, io_block); 342 "%lu in log\n", err, io_block);
343 return 1; 343 return 1;
344 } else { 344 } else {
@@ -411,7 +411,7 @@ static int do_one_pass(journal_t *journal,
411 * either the next descriptor block or the final commit 411 * either the next descriptor block or the final commit
412 * record. */ 412 * record. */
413 413
414 jbd_debug(3, "JBD: checking block %ld\n", next_log_block); 414 jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
415 err = jread(&bh, journal, next_log_block); 415 err = jread(&bh, journal, next_log_block);
416 if (err) 416 if (err)
417 goto failed; 417 goto failed;
@@ -491,8 +491,8 @@ static int do_one_pass(journal_t *journal,
491 /* Recover what we can, but 491 /* Recover what we can, but
492 * report failure at the end. */ 492 * report failure at the end. */
493 success = err; 493 success = err;
494 printk (KERN_ERR 494 printk(KERN_ERR
495 "JBD: IO error %d recovering " 495 "JBD2: IO error %d recovering "
496 "block %ld in log\n", 496 "block %ld in log\n",
497 err, io_block); 497 err, io_block);
498 } else { 498 } else {
@@ -520,7 +520,7 @@ static int do_one_pass(journal_t *journal,
520 journal->j_blocksize); 520 journal->j_blocksize);
521 if (nbh == NULL) { 521 if (nbh == NULL) {
522 printk(KERN_ERR 522 printk(KERN_ERR
523 "JBD: Out of memory " 523 "JBD2: Out of memory "
524 "during recovery.\n"); 524 "during recovery.\n");
525 err = -ENOMEM; 525 err = -ENOMEM;
526 brelse(bh); 526 brelse(bh);
@@ -689,7 +689,7 @@ static int do_one_pass(journal_t *journal,
689 /* It's really bad news if different passes end up at 689 /* It's really bad news if different passes end up at
690 * different places (but possible due to IO errors). */ 690 * different places (but possible due to IO errors). */
691 if (info->end_transaction != next_commit_ID) { 691 if (info->end_transaction != next_commit_ID) {
692 printk (KERN_ERR "JBD: recovery pass %d ended at " 692 printk(KERN_ERR "JBD2: recovery pass %d ended at "
693 "transaction %u, expected %u\n", 693 "transaction %u, expected %u\n",
694 pass, next_commit_ID, info->end_transaction); 694 pass, next_commit_ID, info->end_transaction);
695 if (!success) 695 if (!success)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 2d7109414cdd..a0e41a4c080e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -27,6 +27,7 @@
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h> 28#include <linux/hrtimer.h>
29#include <linux/backing-dev.h> 29#include <linux/backing-dev.h>
30#include <linux/bug.h>
30#include <linux/module.h> 31#include <linux/module.h>
31 32
32static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 33static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
@@ -115,7 +116,7 @@ static inline void update_t_max_wait(transaction_t *transaction,
115 */ 116 */
116 117
117static int start_this_handle(journal_t *journal, handle_t *handle, 118static int start_this_handle(journal_t *journal, handle_t *handle,
118 int gfp_mask) 119 gfp_t gfp_mask)
119{ 120{
120 transaction_t *transaction, *new_transaction = NULL; 121 transaction_t *transaction, *new_transaction = NULL;
121 tid_t tid; 122 tid_t tid;
@@ -124,7 +125,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
124 unsigned long ts = jiffies; 125 unsigned long ts = jiffies;
125 126
126 if (nblocks > journal->j_max_transaction_buffers) { 127 if (nblocks > journal->j_max_transaction_buffers) {
127 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 128 printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n",
128 current->comm, nblocks, 129 current->comm, nblocks,
129 journal->j_max_transaction_buffers); 130 journal->j_max_transaction_buffers);
130 return -ENOSPC; 131 return -ENOSPC;
@@ -320,7 +321,7 @@ static handle_t *new_handle(int nblocks)
320 * Return a pointer to a newly allocated handle, or an ERR_PTR() value 321 * Return a pointer to a newly allocated handle, or an ERR_PTR() value
321 * on failure. 322 * on failure.
322 */ 323 */
323handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) 324handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask)
324{ 325{
325 handle_t *handle = journal_current_handle(); 326 handle_t *handle = journal_current_handle();
326 int err; 327 int err;
@@ -443,7 +444,7 @@ out:
443 * transaction capabable of guaranteeing the requested number of 444 * transaction capabable of guaranteeing the requested number of
444 * credits. 445 * credits.
445 */ 446 */
446int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) 447int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
447{ 448{
448 transaction_t *transaction = handle->h_transaction; 449 transaction_t *transaction = handle->h_transaction;
449 journal_t *journal = transaction->t_journal; 450 journal_t *journal = transaction->t_journal;
@@ -563,7 +564,7 @@ static void warn_dirty_buffer(struct buffer_head *bh)
563 char b[BDEVNAME_SIZE]; 564 char b[BDEVNAME_SIZE];
564 565
565 printk(KERN_WARNING 566 printk(KERN_WARNING
566 "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " 567 "JBD2: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
567 "There's a risk of filesystem corruption in case of system " 568 "There's a risk of filesystem corruption in case of system "
568 "crash.\n", 569 "crash.\n",
569 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); 570 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
@@ -1049,6 +1050,10 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
1049 * mark dirty metadata which needs to be journaled as part of the current 1050 * mark dirty metadata which needs to be journaled as part of the current
1050 * transaction. 1051 * transaction.
1051 * 1052 *
1053 * The buffer must have previously had jbd2_journal_get_write_access()
1054 * called so that it has a valid journal_head attached to the buffer
1055 * head.
1056 *
1052 * The buffer is placed on the transaction's metadata list and is marked 1057 * The buffer is placed on the transaction's metadata list and is marked
1053 * as belonging to the transaction. 1058 * as belonging to the transaction.
1054 * 1059 *
@@ -1065,11 +1070,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1065 transaction_t *transaction = handle->h_transaction; 1070 transaction_t *transaction = handle->h_transaction;
1066 journal_t *journal = transaction->t_journal; 1071 journal_t *journal = transaction->t_journal;
1067 struct journal_head *jh = bh2jh(bh); 1072 struct journal_head *jh = bh2jh(bh);
1073 int ret = 0;
1068 1074
1069 jbd_debug(5, "journal_head %p\n", jh); 1075 jbd_debug(5, "journal_head %p\n", jh);
1070 JBUFFER_TRACE(jh, "entry"); 1076 JBUFFER_TRACE(jh, "entry");
1071 if (is_handle_aborted(handle)) 1077 if (is_handle_aborted(handle))
1072 goto out; 1078 goto out;
1079 if (!buffer_jbd(bh)) {
1080 ret = -EUCLEAN;
1081 goto out;
1082 }
1073 1083
1074 jbd_lock_bh_state(bh); 1084 jbd_lock_bh_state(bh);
1075 1085
@@ -1093,8 +1103,20 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1093 */ 1103 */
1094 if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { 1104 if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
1095 JBUFFER_TRACE(jh, "fastpath"); 1105 JBUFFER_TRACE(jh, "fastpath");
1096 J_ASSERT_JH(jh, jh->b_transaction == 1106 if (unlikely(jh->b_transaction !=
1097 journal->j_running_transaction); 1107 journal->j_running_transaction)) {
1108 printk(KERN_EMERG "JBD: %s: "
1109 "jh->b_transaction (%llu, %p, %u) != "
1110 "journal->j_running_transaction (%p, %u)",
1111 journal->j_devname,
1112 (unsigned long long) bh->b_blocknr,
1113 jh->b_transaction,
1114 jh->b_transaction ? jh->b_transaction->t_tid : 0,
1115 journal->j_running_transaction,
1116 journal->j_running_transaction ?
1117 journal->j_running_transaction->t_tid : 0);
1118 ret = -EINVAL;
1119 }
1098 goto out_unlock_bh; 1120 goto out_unlock_bh;
1099 } 1121 }
1100 1122
@@ -1108,9 +1130,32 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1108 */ 1130 */
1109 if (jh->b_transaction != transaction) { 1131 if (jh->b_transaction != transaction) {
1110 JBUFFER_TRACE(jh, "already on other transaction"); 1132 JBUFFER_TRACE(jh, "already on other transaction");
1111 J_ASSERT_JH(jh, jh->b_transaction == 1133 if (unlikely(jh->b_transaction !=
1112 journal->j_committing_transaction); 1134 journal->j_committing_transaction)) {
1113 J_ASSERT_JH(jh, jh->b_next_transaction == transaction); 1135 printk(KERN_EMERG "JBD: %s: "
1136 "jh->b_transaction (%llu, %p, %u) != "
1137 "journal->j_committing_transaction (%p, %u)",
1138 journal->j_devname,
1139 (unsigned long long) bh->b_blocknr,
1140 jh->b_transaction,
1141 jh->b_transaction ? jh->b_transaction->t_tid : 0,
1142 journal->j_committing_transaction,
1143 journal->j_committing_transaction ?
1144 journal->j_committing_transaction->t_tid : 0);
1145 ret = -EINVAL;
1146 }
1147 if (unlikely(jh->b_next_transaction != transaction)) {
1148 printk(KERN_EMERG "JBD: %s: "
1149 "jh->b_next_transaction (%llu, %p, %u) != "
1150 "transaction (%p, %u)",
1151 journal->j_devname,
1152 (unsigned long long) bh->b_blocknr,
1153 jh->b_next_transaction,
1154 jh->b_next_transaction ?
1155 jh->b_next_transaction->t_tid : 0,
1156 transaction, transaction->t_tid);
1157 ret = -EINVAL;
1158 }
1114 /* And this case is illegal: we can't reuse another 1159 /* And this case is illegal: we can't reuse another
1115 * transaction's data buffer, ever. */ 1160 * transaction's data buffer, ever. */
1116 goto out_unlock_bh; 1161 goto out_unlock_bh;
@@ -1127,7 +1172,8 @@ out_unlock_bh:
1127 jbd_unlock_bh_state(bh); 1172 jbd_unlock_bh_state(bh);
1128out: 1173out:
1129 JBUFFER_TRACE(jh, "exit"); 1174 JBUFFER_TRACE(jh, "exit");
1130 return 0; 1175 WARN_ON(ret); /* All errors are bugs, so dump the stack */
1176 return ret;
1131} 1177}
1132 1178
1133/* 1179/*
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 9659b7c00468..be6169bd8acd 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -245,7 +245,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
245 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, 245 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
246 dentry->d_name.len, dead_f, now); 246 dentry->d_name.len, dead_f, now);
247 if (dead_f->inocache) 247 if (dead_f->inocache)
248 dentry->d_inode->i_nlink = dead_f->inocache->pino_nlink; 248 set_nlink(dentry->d_inode, dead_f->inocache->pino_nlink);
249 if (!ret) 249 if (!ret)
250 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 250 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
251 return ret; 251 return ret;
@@ -278,7 +278,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
278 278
279 if (!ret) { 279 if (!ret) {
280 mutex_lock(&f->sem); 280 mutex_lock(&f->sem);
281 old_dentry->d_inode->i_nlink = ++f->inocache->pino_nlink; 281 set_nlink(old_dentry->d_inode, ++f->inocache->pino_nlink);
282 mutex_unlock(&f->sem); 282 mutex_unlock(&f->sem);
283 d_instantiate(dentry, old_dentry->d_inode); 283 d_instantiate(dentry, old_dentry->d_inode);
284 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 284 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
@@ -497,7 +497,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
497 f = JFFS2_INODE_INFO(inode); 497 f = JFFS2_INODE_INFO(inode);
498 498
499 /* Directories get nlink 2 at start */ 499 /* Directories get nlink 2 at start */
500 inode->i_nlink = 2; 500 set_nlink(inode, 2);
501 /* but ic->pino_nlink is the parent ino# */ 501 /* but ic->pino_nlink is the parent ino# */
502 f->inocache->pino_nlink = dir_i->i_ino; 502 f->inocache->pino_nlink = dir_i->i_ino;
503 503
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index bbcb9755dd2b..7286e44ac665 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -278,7 +278,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
278 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); 278 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
279 inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime)); 279 inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
280 280
281 inode->i_nlink = f->inocache->pino_nlink; 281 set_nlink(inode, f->inocache->pino_nlink);
282 282
283 inode->i_blocks = (inode->i_size + 511) >> 9; 283 inode->i_blocks = (inode->i_size + 511) >> 9;
284 284
@@ -291,7 +291,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
291 case S_IFDIR: 291 case S_IFDIR:
292 { 292 {
293 struct jffs2_full_dirent *fd; 293 struct jffs2_full_dirent *fd;
294 inode->i_nlink = 2; /* parent and '.' */ 294 set_nlink(inode, 2); /* parent and '.' */
295 295
296 for (fd=f->dents; fd; fd = fd->next) { 296 for (fd=f->dents; fd; fd = fd->next) {
297 if (fd->type == DT_DIR && fd->ino) 297 if (fd->type == DT_DIR && fd->ino)
@@ -453,7 +453,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
453 iput(inode); 453 iput(inode);
454 return ERR_PTR(ret); 454 return ERR_PTR(ret);
455 } 455 }
456 inode->i_nlink = 1; 456 set_nlink(inode, 1);
457 inode->i_ino = je32_to_cpu(ri->ino); 457 inode->i_ino = je32_to_cpu(ri->ino);
458 inode->i_mode = jemode_to_cpu(ri->mode); 458 inode->i_mode = jemode_to_cpu(ri->mode);
459 inode->i_gid = je16_to_cpu(ri->gid); 459 inode->i_gid = je16_to_cpu(ri->gid);
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index b78b2f978f04..1b6f15f191b3 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -457,7 +457,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
457 /* read the page of fixed disk inode (AIT) in raw mode */ 457 /* read the page of fixed disk inode (AIT) in raw mode */
458 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 458 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
459 if (mp == NULL) { 459 if (mp == NULL) {
460 ip->i_nlink = 1; /* Don't want iput() deleting it */ 460 set_nlink(ip, 1); /* Don't want iput() deleting it */
461 iput(ip); 461 iput(ip);
462 return (NULL); 462 return (NULL);
463 } 463 }
@@ -469,7 +469,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
469 /* copy on-disk inode to in-memory inode */ 469 /* copy on-disk inode to in-memory inode */
470 if ((copy_from_dinode(dp, ip)) != 0) { 470 if ((copy_from_dinode(dp, ip)) != 0) {
471 /* handle bad return by returning NULL for ip */ 471 /* handle bad return by returning NULL for ip */
472 ip->i_nlink = 1; /* Don't want iput() deleting it */ 472 set_nlink(ip, 1); /* Don't want iput() deleting it */
473 iput(ip); 473 iput(ip);
474 /* release the page */ 474 /* release the page */
475 release_metapage(mp); 475 release_metapage(mp);
@@ -3076,7 +3076,7 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3076 ip->i_mode |= 0001; 3076 ip->i_mode |= 0001;
3077 } 3077 }
3078 } 3078 }
3079 ip->i_nlink = le32_to_cpu(dip->di_nlink); 3079 set_nlink(ip, le32_to_cpu(dip->di_nlink));
3080 3080
3081 jfs_ip->saved_uid = le32_to_cpu(dip->di_uid); 3081 jfs_ip->saved_uid = le32_to_cpu(dip->di_uid);
3082 if (sbi->uid == -1) 3082 if (sbi->uid == -1)
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 2686531e235a..c1a3e603279c 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -157,7 +157,7 @@ fail_drop:
157 dquot_drop(inode); 157 dquot_drop(inode);
158 inode->i_flags |= S_NOQUOTA; 158 inode->i_flags |= S_NOQUOTA;
159fail_unlock: 159fail_unlock:
160 inode->i_nlink = 0; 160 clear_nlink(inode);
161 unlock_new_inode(inode); 161 unlock_new_inode(inode);
162fail_put: 162fail_put:
163 iput(inode); 163 iput(inode);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index e17545e15664..a112ad96e474 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -172,7 +172,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
172 mutex_unlock(&JFS_IP(dip)->commit_mutex); 172 mutex_unlock(&JFS_IP(dip)->commit_mutex);
173 if (rc) { 173 if (rc) {
174 free_ea_wmap(ip); 174 free_ea_wmap(ip);
175 ip->i_nlink = 0; 175 clear_nlink(ip);
176 unlock_new_inode(ip); 176 unlock_new_inode(ip);
177 iput(ip); 177 iput(ip);
178 } else { 178 } else {
@@ -292,7 +292,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
292 goto out3; 292 goto out3;
293 } 293 }
294 294
295 ip->i_nlink = 2; /* for '.' */ 295 set_nlink(ip, 2); /* for '.' */
296 ip->i_op = &jfs_dir_inode_operations; 296 ip->i_op = &jfs_dir_inode_operations;
297 ip->i_fop = &jfs_dir_operations; 297 ip->i_fop = &jfs_dir_operations;
298 298
@@ -311,7 +311,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
311 mutex_unlock(&JFS_IP(dip)->commit_mutex); 311 mutex_unlock(&JFS_IP(dip)->commit_mutex);
312 if (rc) { 312 if (rc) {
313 free_ea_wmap(ip); 313 free_ea_wmap(ip);
314 ip->i_nlink = 0; 314 clear_nlink(ip);
315 unlock_new_inode(ip); 315 unlock_new_inode(ip);
316 iput(ip); 316 iput(ip);
317 } else { 317 } else {
@@ -844,7 +844,7 @@ static int jfs_link(struct dentry *old_dentry,
844 rc = txCommit(tid, 2, &iplist[0], 0); 844 rc = txCommit(tid, 2, &iplist[0], 0);
845 845
846 if (rc) { 846 if (rc) {
847 ip->i_nlink--; /* never instantiated */ 847 drop_nlink(ip); /* never instantiated */
848 iput(ip); 848 iput(ip);
849 } else 849 } else
850 d_instantiate(dentry, ip); 850 d_instantiate(dentry, ip);
@@ -1048,7 +1048,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
1048 mutex_unlock(&JFS_IP(dip)->commit_mutex); 1048 mutex_unlock(&JFS_IP(dip)->commit_mutex);
1049 if (rc) { 1049 if (rc) {
1050 free_ea_wmap(ip); 1050 free_ea_wmap(ip);
1051 ip->i_nlink = 0; 1051 clear_nlink(ip);
1052 unlock_new_inode(ip); 1052 unlock_new_inode(ip);
1053 iput(ip); 1053 iput(ip);
1054 } else { 1054 } else {
@@ -1433,7 +1433,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1433 mutex_unlock(&JFS_IP(dir)->commit_mutex); 1433 mutex_unlock(&JFS_IP(dir)->commit_mutex);
1434 if (rc) { 1434 if (rc) {
1435 free_ea_wmap(ip); 1435 free_ea_wmap(ip);
1436 ip->i_nlink = 0; 1436 clear_nlink(ip);
1437 unlock_new_inode(ip); 1437 unlock_new_inode(ip);
1438 iput(ip); 1438 iput(ip);
1439 } else { 1439 } else {
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 06c8a67cbe76..a44eff076c17 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -485,7 +485,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
485 goto out_unload; 485 goto out_unload;
486 } 486 }
487 inode->i_ino = 0; 487 inode->i_ino = 0;
488 inode->i_nlink = 1;
489 inode->i_size = sb->s_bdev->bd_inode->i_size; 488 inode->i_size = sb->s_bdev->bd_inode->i_size;
490 inode->i_mapping->a_ops = &jfs_metapage_aops; 489 inode->i_mapping->a_ops = &jfs_metapage_aops;
491 insert_inode_hash(inode); 490 insert_inode_hash(inode);
diff --git a/fs/libfs.c b/fs/libfs.c
index c18e9a1235b6..f6d411eef1e7 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -490,7 +490,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
490 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 490 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
491 inode->i_op = &simple_dir_inode_operations; 491 inode->i_op = &simple_dir_inode_operations;
492 inode->i_fop = &simple_dir_operations; 492 inode->i_fop = &simple_dir_operations;
493 inode->i_nlink = 2; 493 set_nlink(inode, 2);
494 root = d_alloc_root(inode); 494 root = d_alloc_root(inode);
495 if (!root) { 495 if (!root) {
496 iput(inode); 496 iput(inode);
@@ -510,8 +510,10 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
510 if (!dentry) 510 if (!dentry)
511 goto out; 511 goto out;
512 inode = new_inode(s); 512 inode = new_inode(s);
513 if (!inode) 513 if (!inode) {
514 dput(dentry);
514 goto out; 515 goto out;
516 }
515 inode->i_mode = S_IFREG | files->mode; 517 inode->i_mode = S_IFREG | files->mode;
516 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 518 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
517 inode->i_fop = files->ops; 519 inode->i_fop = files->ops;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index b3ff3d894165..b7d7f67cee5a 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -197,7 +197,7 @@ static int logfs_remove_inode(struct inode *inode)
197{ 197{
198 int ret; 198 int ret;
199 199
200 inode->i_nlink--; 200 drop_nlink(inode);
201 ret = write_inode(inode); 201 ret = write_inode(inode);
202 LOGFS_BUG_ON(ret, inode->i_sb); 202 LOGFS_BUG_ON(ret, inode->i_sb);
203 return ret; 203 return ret;
@@ -433,7 +433,7 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
433 433
434 ta = kzalloc(sizeof(*ta), GFP_KERNEL); 434 ta = kzalloc(sizeof(*ta), GFP_KERNEL);
435 if (!ta) { 435 if (!ta) {
436 inode->i_nlink--; 436 drop_nlink(inode);
437 iput(inode); 437 iput(inode);
438 return -ENOMEM; 438 return -ENOMEM;
439 } 439 }
@@ -456,7 +456,7 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
456 abort_transaction(inode, ta); 456 abort_transaction(inode, ta);
457 li->li_flags |= LOGFS_IF_STILLBORN; 457 li->li_flags |= LOGFS_IF_STILLBORN;
458 /* FIXME: truncate symlink */ 458 /* FIXME: truncate symlink */
459 inode->i_nlink--; 459 drop_nlink(inode);
460 iput(inode); 460 iput(inode);
461 goto out; 461 goto out;
462 } 462 }
@@ -563,7 +563,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
563 563
564 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 564 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
565 ihold(inode); 565 ihold(inode);
566 inode->i_nlink++; 566 inc_nlink(inode);
567 mark_inode_dirty_sync(inode); 567 mark_inode_dirty_sync(inode);
568 568
569 return __logfs_create(dir, dentry, inode, NULL, 0); 569 return __logfs_create(dir, dentry, inode, NULL, 0);
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index edfea7a3a747..7e441ad5f792 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -93,7 +93,7 @@ static struct inode *__logfs_iget(struct super_block *sb, ino_t ino)
93 /* inode->i_nlink == 0 can be true when called from 93 /* inode->i_nlink == 0 can be true when called from
94 * block validator */ 94 * block validator */
95 /* set i_nlink to 0 to prevent caching */ 95 /* set i_nlink to 0 to prevent caching */
96 inode->i_nlink = 0; 96 clear_nlink(inode);
97 logfs_inode(inode)->li_flags |= LOGFS_IF_ZOMBIE; 97 logfs_inode(inode)->li_flags |= LOGFS_IF_ZOMBIE;
98 iget_failed(inode); 98 iget_failed(inode);
99 if (!err) 99 if (!err)
@@ -199,7 +199,6 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode)
199 inode->i_blocks = 0; 199 inode->i_blocks = 0;
200 inode->i_ctime = CURRENT_TIME; 200 inode->i_ctime = CURRENT_TIME;
201 inode->i_mtime = CURRENT_TIME; 201 inode->i_mtime = CURRENT_TIME;
202 inode->i_nlink = 1;
203 li->li_refcount = 1; 202 li->li_refcount = 1;
204 INIT_LIST_HEAD(&li->li_freeing_list); 203 INIT_LIST_HEAD(&li->li_freeing_list);
205 204
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index f22d108bfa5d..398ecff6e548 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -618,7 +618,6 @@ static inline int logfs_buf_recover(struct logfs_area *area, u64 ofs,
618struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index); 618struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index);
619void emergency_read_end(struct page *page); 619void emergency_read_end(struct page *page);
620void logfs_crash_dump(struct super_block *sb); 620void logfs_crash_dump(struct super_block *sb);
621void *memchr_inv(const void *s, int c, size_t n);
622int logfs_statfs(struct dentry *dentry, struct kstatfs *stats); 621int logfs_statfs(struct dentry *dentry, struct kstatfs *stats);
623int logfs_check_ds(struct logfs_disk_super *ds); 622int logfs_check_ds(struct logfs_disk_super *ds);
624int logfs_write_sb(struct super_block *sb); 623int logfs_write_sb(struct super_block *sb);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index d8d09380c7de..2ac4217b7901 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -126,7 +126,7 @@ static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode)
126 inode->i_atime = be64_to_timespec(di->di_atime); 126 inode->i_atime = be64_to_timespec(di->di_atime);
127 inode->i_ctime = be64_to_timespec(di->di_ctime); 127 inode->i_ctime = be64_to_timespec(di->di_ctime);
128 inode->i_mtime = be64_to_timespec(di->di_mtime); 128 inode->i_mtime = be64_to_timespec(di->di_mtime);
129 inode->i_nlink = be32_to_cpu(di->di_refcount); 129 set_nlink(inode, be32_to_cpu(di->di_refcount));
130 inode->i_generation = be32_to_cpu(di->di_generation); 130 inode->i_generation = be32_to_cpu(di->di_generation);
131 131
132 switch (inode->i_mode & S_IFMT) { 132 switch (inode->i_mode & S_IFMT) {
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index b9b3154b0485..e795c234ea33 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -92,28 +92,6 @@ void logfs_crash_dump(struct super_block *sb)
92} 92}
93 93
94/* 94/*
95 * TODO: move to lib/string.c
96 */
97/**
98 * memchr_inv - Find a character in an area of memory.
99 * @s: The memory area
100 * @c: The byte to search for
101 * @n: The size of the area.
102 *
103 * returns the address of the first character other than @c, or %NULL
104 * if the whole buffer contains just @c.
105 */
106void *memchr_inv(const void *s, int c, size_t n)
107{
108 const unsigned char *p = s;
109 while (n-- != 0)
110 if ((unsigned char)c != *p++)
111 return (void *)(p - 1);
112
113 return NULL;
114}
115
116/*
117 * FIXME: There should be a reserve for root, similar to ext2. 95 * FIXME: There should be a reserve for root, similar to ext2.
118 */ 96 */
119int logfs_statfs(struct dentry *dentry, struct kstatfs *stats) 97int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e7d23e25bf1d..64cdcd662ffc 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -446,7 +446,7 @@ static struct inode *V1_minix_iget(struct inode *inode)
446 inode->i_mode = raw_inode->i_mode; 446 inode->i_mode = raw_inode->i_mode;
447 inode->i_uid = (uid_t)raw_inode->i_uid; 447 inode->i_uid = (uid_t)raw_inode->i_uid;
448 inode->i_gid = (gid_t)raw_inode->i_gid; 448 inode->i_gid = (gid_t)raw_inode->i_gid;
449 inode->i_nlink = raw_inode->i_nlinks; 449 set_nlink(inode, raw_inode->i_nlinks);
450 inode->i_size = raw_inode->i_size; 450 inode->i_size = raw_inode->i_size;
451 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time; 451 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time;
452 inode->i_mtime.tv_nsec = 0; 452 inode->i_mtime.tv_nsec = 0;
@@ -479,7 +479,7 @@ static struct inode *V2_minix_iget(struct inode *inode)
479 inode->i_mode = raw_inode->i_mode; 479 inode->i_mode = raw_inode->i_mode;
480 inode->i_uid = (uid_t)raw_inode->i_uid; 480 inode->i_uid = (uid_t)raw_inode->i_uid;
481 inode->i_gid = (gid_t)raw_inode->i_gid; 481 inode->i_gid = (gid_t)raw_inode->i_gid;
482 inode->i_nlink = raw_inode->i_nlinks; 482 set_nlink(inode, raw_inode->i_nlinks);
483 inode->i_size = raw_inode->i_size; 483 inode->i_size = raw_inode->i_size;
484 inode->i_mtime.tv_sec = raw_inode->i_mtime; 484 inode->i_mtime.tv_sec = raw_inode->i_mtime;
485 inode->i_atime.tv_sec = raw_inode->i_atime; 485 inode->i_atime.tv_sec = raw_inode->i_atime;
diff --git a/fs/namei.c b/fs/namei.c
index 7657be4352bf..ac6d214da827 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -137,7 +137,7 @@ static int do_getname(const char __user *filename, char *page)
137 return retval; 137 return retval;
138} 138}
139 139
140static char *getname_flags(const char __user * filename, int flags) 140static char *getname_flags(const char __user *filename, int flags, int *empty)
141{ 141{
142 char *tmp, *result; 142 char *tmp, *result;
143 143
@@ -148,6 +148,8 @@ static char *getname_flags(const char __user * filename, int flags)
148 148
149 result = tmp; 149 result = tmp;
150 if (retval < 0) { 150 if (retval < 0) {
151 if (retval == -ENOENT && empty)
152 *empty = 1;
151 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) { 153 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
152 __putname(tmp); 154 __putname(tmp);
153 result = ERR_PTR(retval); 155 result = ERR_PTR(retval);
@@ -160,7 +162,7 @@ static char *getname_flags(const char __user * filename, int flags)
160 162
161char *getname(const char __user * filename) 163char *getname(const char __user * filename)
162{ 164{
163 return getname_flags(filename, 0); 165 return getname_flags(filename, 0, 0);
164} 166}
165 167
166#ifdef CONFIG_AUDITSYSCALL 168#ifdef CONFIG_AUDITSYSCALL
@@ -1798,11 +1800,11 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1798 return __lookup_hash(&this, base, NULL); 1800 return __lookup_hash(&this, base, NULL);
1799} 1801}
1800 1802
1801int user_path_at(int dfd, const char __user *name, unsigned flags, 1803int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
1802 struct path *path) 1804 struct path *path, int *empty)
1803{ 1805{
1804 struct nameidata nd; 1806 struct nameidata nd;
1805 char *tmp = getname_flags(name, flags); 1807 char *tmp = getname_flags(name, flags, empty);
1806 int err = PTR_ERR(tmp); 1808 int err = PTR_ERR(tmp);
1807 if (!IS_ERR(tmp)) { 1809 if (!IS_ERR(tmp)) {
1808 1810
@@ -1816,6 +1818,12 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1816 return err; 1818 return err;
1817} 1819}
1818 1820
1821int user_path_at(int dfd, const char __user *name, unsigned flags,
1822 struct path *path)
1823{
1824 return user_path_at_empty(dfd, name, flags, path, 0);
1825}
1826
1819static int user_path_parent(int dfd, const char __user *path, 1827static int user_path_parent(int dfd, const char __user *path,
1820 struct nameidata *nd, char **name) 1828 struct nameidata *nd, char **name)
1821{ 1829{
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 202f370526a7..5b5fa33b6b9d 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -228,7 +228,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
228 228
229 DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode); 229 DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode);
230 230
231 inode->i_nlink = 1; 231 set_nlink(inode, 1);
232 inode->i_uid = server->m.uid; 232 inode->i_uid = server->m.uid;
233 inode->i_gid = server->m.gid; 233 inode->i_gid = server->m.gid;
234 234
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 918ad647afea..726e59a9e50f 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -488,17 +488,18 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
488 struct xdr_stream *xdr, 488 struct xdr_stream *xdr,
489 struct cb_recallanyargs *args) 489 struct cb_recallanyargs *args)
490{ 490{
491 __be32 *p; 491 uint32_t bitmap[2];
492 __be32 *p, status;
492 493
493 args->craa_addr = svc_addr(rqstp); 494 args->craa_addr = svc_addr(rqstp);
494 p = read_buf(xdr, 4); 495 p = read_buf(xdr, 4);
495 if (unlikely(p == NULL)) 496 if (unlikely(p == NULL))
496 return htonl(NFS4ERR_BADXDR); 497 return htonl(NFS4ERR_BADXDR);
497 args->craa_objs_to_keep = ntohl(*p++); 498 args->craa_objs_to_keep = ntohl(*p++);
498 p = read_buf(xdr, 4); 499 status = decode_bitmap(xdr, bitmap);
499 if (unlikely(p == NULL)) 500 if (unlikely(status))
500 return htonl(NFS4ERR_BADXDR); 501 return status;
501 args->craa_type_mask = ntohl(*p); 502 args->craa_type_mask = bitmap[0];
502 503
503 return 0; 504 return 0;
504} 505}
@@ -986,4 +987,5 @@ struct svc_version nfs4_callback_version4 = {
986 .vs_proc = nfs4_callback_procedures1, 987 .vs_proc = nfs4_callback_procedures1,
987 .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, 988 .vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
988 .vs_dispatch = NULL, 989 .vs_dispatch = NULL,
990 .vs_hidden = 1,
989}; 991};
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 91c01f0a4c3b..0a1f8312b4dc 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -137,11 +137,9 @@ nfs_file_open(struct inode *inode, struct file *filp)
137static int 137static int
138nfs_file_release(struct inode *inode, struct file *filp) 138nfs_file_release(struct inode *inode, struct file *filp)
139{ 139{
140 struct dentry *dentry = filp->f_path.dentry;
141
142 dprintk("NFS: release(%s/%s)\n", 140 dprintk("NFS: release(%s/%s)\n",
143 dentry->d_parent->d_name.name, 141 filp->f_path.dentry->d_parent->d_name.name,
144 dentry->d_name.name); 142 filp->f_path.dentry->d_name.name);
145 143
146 nfs_inc_stats(inode, NFSIOS_VFSRELEASE); 144 nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
147 return nfs_release(inode, filp); 145 return nfs_release(inode, filp);
@@ -228,14 +226,13 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
228 struct dentry * dentry = iocb->ki_filp->f_path.dentry; 226 struct dentry * dentry = iocb->ki_filp->f_path.dentry;
229 struct inode * inode = dentry->d_inode; 227 struct inode * inode = dentry->d_inode;
230 ssize_t result; 228 ssize_t result;
231 size_t count = iov_length(iov, nr_segs);
232 229
233 if (iocb->ki_filp->f_flags & O_DIRECT) 230 if (iocb->ki_filp->f_flags & O_DIRECT)
234 return nfs_file_direct_read(iocb, iov, nr_segs, pos); 231 return nfs_file_direct_read(iocb, iov, nr_segs, pos);
235 232
236 dprintk("NFS: read(%s/%s, %lu@%lu)\n", 233 dprintk("NFS: read(%s/%s, %lu@%lu)\n",
237 dentry->d_parent->d_name.name, dentry->d_name.name, 234 dentry->d_parent->d_name.name, dentry->d_name.name,
238 (unsigned long) count, (unsigned long) pos); 235 (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
239 236
240 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); 237 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
241 if (!result) { 238 if (!result) {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 4dc6d078f108..c07a55aec838 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -320,7 +320,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
320 memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); 320 memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
321 inode->i_version = 0; 321 inode->i_version = 0;
322 inode->i_size = 0; 322 inode->i_size = 0;
323 inode->i_nlink = 0; 323 clear_nlink(inode);
324 inode->i_uid = -2; 324 inode->i_uid = -2;
325 inode->i_gid = -2; 325 inode->i_gid = -2;
326 inode->i_blocks = 0; 326 inode->i_blocks = 0;
@@ -355,7 +355,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
355 | NFS_INO_INVALID_DATA 355 | NFS_INO_INVALID_DATA
356 | NFS_INO_REVAL_PAGECACHE; 356 | NFS_INO_REVAL_PAGECACHE;
357 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 357 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
358 inode->i_nlink = fattr->nlink; 358 set_nlink(inode, fattr->nlink);
359 else if (nfs_server_capable(inode, NFS_CAP_NLINK)) 359 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
360 nfsi->cache_validity |= NFS_INO_INVALID_ATTR; 360 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
361 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 361 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
@@ -1361,7 +1361,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1361 invalid |= NFS_INO_INVALID_ATTR; 1361 invalid |= NFS_INO_INVALID_ATTR;
1362 if (S_ISDIR(inode->i_mode)) 1362 if (S_ISDIR(inode->i_mode))
1363 invalid |= NFS_INO_INVALID_DATA; 1363 invalid |= NFS_INO_INVALID_DATA;
1364 inode->i_nlink = fattr->nlink; 1364 set_nlink(inode, fattr->nlink);
1365 } 1365 }
1366 } else if (server->caps & NFS_CAP_NLINK) 1366 } else if (server->caps & NFS_CAP_NLINK)
1367 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR 1367 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 955699515e70..a62d36b9a99e 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -450,9 +450,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
450 450
451 fl->dsaddr = dsaddr; 451 fl->dsaddr = dsaddr;
452 452
453 if (fl->first_stripe_index < 0 || 453 if (fl->first_stripe_index >= dsaddr->stripe_count) {
454 fl->first_stripe_index >= dsaddr->stripe_count) { 454 dprintk("%s Bad first_stripe_index %u\n",
455 dprintk("%s Bad first_stripe_index %d\n",
456 __func__, fl->first_stripe_index); 455 __func__, fl->first_stripe_index);
457 goto out_put; 456 goto out_put;
458 } 457 }
@@ -553,7 +552,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
553 552
554 /* Note that a zero value for num_fh is legal for STRIPE_SPARSE. 553 /* Note that a zero value for num_fh is legal for STRIPE_SPARSE.
555 * Futher checking is done in filelayout_check_layout */ 554 * Futher checking is done in filelayout_check_layout */
556 if (fl->num_fh < 0 || fl->num_fh > 555 if (fl->num_fh >
557 max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT)) 556 max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT))
558 goto out_err; 557 goto out_err;
559 558
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d2ae413c986a..b60fddf606f7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5950,6 +5950,7 @@ static void nfs4_layoutcommit_release(void *calldata)
5950{ 5950{
5951 struct nfs4_layoutcommit_data *data = calldata; 5951 struct nfs4_layoutcommit_data *data = calldata;
5952 struct pnfs_layout_segment *lseg, *tmp; 5952 struct pnfs_layout_segment *lseg, *tmp;
5953 unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
5953 5954
5954 pnfs_cleanup_layoutcommit(data); 5955 pnfs_cleanup_layoutcommit(data);
5955 /* Matched by references in pnfs_set_layoutcommit */ 5956 /* Matched by references in pnfs_set_layoutcommit */
@@ -5959,6 +5960,11 @@ static void nfs4_layoutcommit_release(void *calldata)
5959 &lseg->pls_flags)) 5960 &lseg->pls_flags))
5960 put_lseg(lseg); 5961 put_lseg(lseg);
5961 } 5962 }
5963
5964 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
5965 smp_mb__after_clear_bit();
5966 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
5967
5962 put_rpccred(data->cred); 5968 put_rpccred(data->cred);
5963 kfree(data); 5969 kfree(data);
5964} 5970}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1dce12f41a4f..e6161b213ed1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6602,8 +6602,6 @@ static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
6602 if (status) 6602 if (status)
6603 goto out; 6603 goto out;
6604 status = decode_secinfo(xdr, res); 6604 status = decode_secinfo(xdr, res);
6605 if (status)
6606 goto out;
6607out: 6605out:
6608 return status; 6606 return status;
6609} 6607}
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index d0cda12fddc3..c807ab93140e 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -38,21 +38,15 @@
38 */ 38 */
39 39
40#include <linux/module.h> 40#include <linux/module.h>
41#include <scsi/osd_initiator.h> 41#include <scsi/osd_ore.h>
42 42
43#include "objlayout.h" 43#include "objlayout.h"
44 44
45#define NFSDBG_FACILITY NFSDBG_PNFS_LD 45#define NFSDBG_FACILITY NFSDBG_PNFS_LD
46 46
47#define _LLU(x) ((unsigned long long)x)
48
49enum { BIO_MAX_PAGES_KMALLOC =
50 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
51};
52
53struct objio_dev_ent { 47struct objio_dev_ent {
54 struct nfs4_deviceid_node id_node; 48 struct nfs4_deviceid_node id_node;
55 struct osd_dev *od; 49 struct ore_dev od;
56}; 50};
57 51
58static void 52static void
@@ -60,8 +54,8 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d)
60{ 54{
61 struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); 55 struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
62 56
63 dprintk("%s: free od=%p\n", __func__, de->od); 57 dprintk("%s: free od=%p\n", __func__, de->od.od);
64 osduld_put_device(de->od); 58 osduld_put_device(de->od.od);
65 kfree(de); 59 kfree(de);
66} 60}
67 61
@@ -98,12 +92,12 @@ _dev_list_add(const struct nfs_server *nfss,
98 nfss->pnfs_curr_ld, 92 nfss->pnfs_curr_ld,
99 nfss->nfs_client, 93 nfss->nfs_client,
100 d_id); 94 d_id);
101 de->od = od; 95 de->od.od = od;
102 96
103 d = nfs4_insert_deviceid_node(&de->id_node); 97 d = nfs4_insert_deviceid_node(&de->id_node);
104 n = container_of(d, struct objio_dev_ent, id_node); 98 n = container_of(d, struct objio_dev_ent, id_node);
105 if (n != de) { 99 if (n != de) {
106 dprintk("%s: Race with other n->od=%p\n", __func__, n->od); 100 dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od);
107 objio_free_deviceid_node(&de->id_node); 101 objio_free_deviceid_node(&de->id_node);
108 de = n; 102 de = n;
109 } 103 }
@@ -111,28 +105,11 @@ _dev_list_add(const struct nfs_server *nfss,
111 return de; 105 return de;
112} 106}
113 107
114struct caps_buffers {
115 u8 caps_key[OSD_CRYPTO_KEYID_SIZE];
116 u8 creds[OSD_CAP_LEN];
117};
118
119struct objio_segment { 108struct objio_segment {
120 struct pnfs_layout_segment lseg; 109 struct pnfs_layout_segment lseg;
121 110
122 struct pnfs_osd_object_cred *comps; 111 struct ore_layout layout;
123 112 struct ore_components oc;
124 unsigned mirrors_p1;
125 unsigned stripe_unit;
126 unsigned group_width; /* Data stripe_units without integrity comps */
127 u64 group_depth;
128 unsigned group_count;
129
130 unsigned max_io_size;
131
132 unsigned comps_index;
133 unsigned num_comps;
134 /* variable length */
135 struct objio_dev_ent *ods[];
136}; 113};
137 114
138static inline struct objio_segment * 115static inline struct objio_segment *
@@ -141,59 +118,44 @@ OBJIO_LSEG(struct pnfs_layout_segment *lseg)
141 return container_of(lseg, struct objio_segment, lseg); 118 return container_of(lseg, struct objio_segment, lseg);
142} 119}
143 120
144struct objio_state;
145typedef ssize_t (*objio_done_fn)(struct objio_state *ios);
146
147struct objio_state { 121struct objio_state {
148 /* Generic layer */ 122 /* Generic layer */
149 struct objlayout_io_state ol_state; 123 struct objlayout_io_res oir;
150 124
151 struct objio_segment *layout; 125 bool sync;
152 126 /*FIXME: Support for extra_bytes at ore_get_rw_state() */
153 struct kref kref; 127 struct ore_io_state *ios;
154 objio_done_fn done;
155 void *private;
156
157 unsigned long length;
158 unsigned numdevs; /* Actually used devs in this IO */
159 /* A per-device variable array of size numdevs */
160 struct _objio_per_comp {
161 struct bio *bio;
162 struct osd_request *or;
163 unsigned long length;
164 u64 offset;
165 unsigned dev;
166 } per_dev[];
167}; 128};
168 129
169/* Send and wait for a get_device_info of devices in the layout, 130/* Send and wait for a get_device_info of devices in the layout,
170 then look them up with the osd_initiator library */ 131 then look them up with the osd_initiator library */
171static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, 132static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
172 struct objio_segment *objio_seg, unsigned comp, 133 struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id,
173 gfp_t gfp_flags) 134 gfp_t gfp_flags)
174{ 135{
175 struct pnfs_osd_deviceaddr *deviceaddr; 136 struct pnfs_osd_deviceaddr *deviceaddr;
176 struct nfs4_deviceid *d_id;
177 struct objio_dev_ent *ode; 137 struct objio_dev_ent *ode;
178 struct osd_dev *od; 138 struct osd_dev *od;
179 struct osd_dev_info odi; 139 struct osd_dev_info odi;
180 int err; 140 int err;
181 141
182 d_id = &objio_seg->comps[comp].oc_object_id.oid_device_id;
183
184 ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); 142 ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id);
185 if (ode) 143 if (ode) {
186 return ode; 144 objio_seg->oc.ods[c] = &ode->od; /* must use container_of */
145 return 0;
146 }
187 147
188 err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); 148 err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags);
189 if (unlikely(err)) { 149 if (unlikely(err)) {
190 dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", 150 dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n",
191 __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); 151 __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err);
192 return ERR_PTR(err); 152 return err;
193 } 153 }
194 154
195 odi.systemid_len = deviceaddr->oda_systemid.len; 155 odi.systemid_len = deviceaddr->oda_systemid.len;
196 if (odi.systemid_len > sizeof(odi.systemid)) { 156 if (odi.systemid_len > sizeof(odi.systemid)) {
157 dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n",
158 __func__, sizeof(odi.systemid));
197 err = -EINVAL; 159 err = -EINVAL;
198 goto out; 160 goto out;
199 } else if (odi.systemid_len) 161 } else if (odi.systemid_len)
@@ -218,96 +180,53 @@ static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay,
218 180
219 ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, 181 ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od,
220 gfp_flags); 182 gfp_flags);
221 183 objio_seg->oc.ods[c] = &ode->od; /* must use container_of */
184 dprintk("Adding new dev_id(%llx:%llx)\n",
185 _DEVID_LO(d_id), _DEVID_HI(d_id));
222out: 186out:
223 dprintk("%s: return=%d\n", __func__, err);
224 objlayout_put_deviceinfo(deviceaddr); 187 objlayout_put_deviceinfo(deviceaddr);
225 return err ? ERR_PTR(err) : ode; 188 return err;
226} 189}
227 190
228static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, 191static void copy_single_comp(struct ore_components *oc, unsigned c,
229 struct objio_segment *objio_seg, 192 struct pnfs_osd_object_cred *src_comp)
230 gfp_t gfp_flags)
231{ 193{
232 unsigned i; 194 struct ore_comp *ocomp = &oc->comps[c];
233 int err;
234 195
235 /* lookup all devices */ 196 WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */
236 for (i = 0; i < objio_seg->num_comps; i++) { 197 WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred));
237 struct objio_dev_ent *ode;
238 198
239 ode = _device_lookup(pnfslay, objio_seg, i, gfp_flags); 199 ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id;
240 if (unlikely(IS_ERR(ode))) { 200 ocomp->obj.id = src_comp->oc_object_id.oid_object_id;
241 err = PTR_ERR(ode);
242 goto out;
243 }
244 objio_seg->ods[i] = ode;
245 }
246 err = 0;
247 201
248out: 202 memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
249 dprintk("%s: return=%d\n", __func__, err);
250 return err;
251} 203}
252 204
253static int _verify_data_map(struct pnfs_osd_layout *layout) 205int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
206 struct objio_segment **pseg)
254{ 207{
255 struct pnfs_osd_data_map *data_map = &layout->olo_map; 208 struct __alloc_objio_segment {
256 u64 stripe_length; 209 struct objio_segment olseg;
257 u32 group_width; 210 struct ore_dev *ods[numdevs];
258 211 struct ore_comp comps[numdevs];
259/* FIXME: Only raid0 for now. if not go through MDS */ 212 } *aolseg;
260 if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
261 printk(KERN_ERR "Only RAID_0 for now\n");
262 return -ENOTSUPP;
263 }
264 if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) {
265 printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n",
266 data_map->odm_num_comps, data_map->odm_mirror_cnt);
267 return -EINVAL;
268 }
269 213
270 if (data_map->odm_group_width) 214 aolseg = kzalloc(sizeof(*aolseg), gfp_flags);
271 group_width = data_map->odm_group_width; 215 if (unlikely(!aolseg)) {
272 else 216 dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__,
273 group_width = data_map->odm_num_comps / 217 numdevs, sizeof(*aolseg));
274 (data_map->odm_mirror_cnt + 1); 218 return -ENOMEM;
275
276 stripe_length = (u64)data_map->odm_stripe_unit * group_width;
277 if (stripe_length >= (1ULL << 32)) {
278 printk(KERN_ERR "Total Stripe length(0x%llx)"
279 " >= 32bit is not supported\n", _LLU(stripe_length));
280 return -ENOTSUPP;
281 } 219 }
282 220
283 if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) { 221 aolseg->olseg.oc.numdevs = numdevs;
284 printk(KERN_ERR "Stripe Unit(0x%llx)" 222 aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS;
285 " must be Multples of PAGE_SIZE(0x%lx)\n", 223 aolseg->olseg.oc.comps = aolseg->comps;
286 _LLU(data_map->odm_stripe_unit), PAGE_SIZE); 224 aolseg->olseg.oc.ods = aolseg->ods;
287 return -ENOTSUPP;
288 }
289 225
226 *pseg = &aolseg->olseg;
290 return 0; 227 return 0;
291} 228}
292 229
293static void copy_single_comp(struct pnfs_osd_object_cred *cur_comp,
294 struct pnfs_osd_object_cred *src_comp,
295 struct caps_buffers *caps_p)
296{
297 WARN_ON(src_comp->oc_cap_key.cred_len > sizeof(caps_p->caps_key));
298 WARN_ON(src_comp->oc_cap.cred_len > sizeof(caps_p->creds));
299
300 *cur_comp = *src_comp;
301
302 memcpy(caps_p->caps_key, src_comp->oc_cap_key.cred,
303 sizeof(caps_p->caps_key));
304 cur_comp->oc_cap_key.cred = caps_p->caps_key;
305
306 memcpy(caps_p->creds, src_comp->oc_cap.cred,
307 sizeof(caps_p->creds));
308 cur_comp->oc_cap.cred = caps_p->creds;
309}
310
311int objio_alloc_lseg(struct pnfs_layout_segment **outp, 230int objio_alloc_lseg(struct pnfs_layout_segment **outp,
312 struct pnfs_layout_hdr *pnfslay, 231 struct pnfs_layout_hdr *pnfslay,
313 struct pnfs_layout_range *range, 232 struct pnfs_layout_range *range,
@@ -317,59 +236,43 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp,
317 struct objio_segment *objio_seg; 236 struct objio_segment *objio_seg;
318 struct pnfs_osd_xdr_decode_layout_iter iter; 237 struct pnfs_osd_xdr_decode_layout_iter iter;
319 struct pnfs_osd_layout layout; 238 struct pnfs_osd_layout layout;
320 struct pnfs_osd_object_cred *cur_comp, src_comp; 239 struct pnfs_osd_object_cred src_comp;
321 struct caps_buffers *caps_p; 240 unsigned cur_comp;
322 int err; 241 int err;
323 242
324 err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); 243 err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
325 if (unlikely(err)) 244 if (unlikely(err))
326 return err; 245 return err;
327 246
328 err = _verify_data_map(&layout); 247 err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg);
329 if (unlikely(err)) 248 if (unlikely(err))
330 return err; 249 return err;
331 250
332 objio_seg = kzalloc(sizeof(*objio_seg) + 251 objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit;
333 sizeof(objio_seg->ods[0]) * layout.olo_num_comps + 252 objio_seg->layout.group_width = layout.olo_map.odm_group_width;
334 sizeof(*objio_seg->comps) * layout.olo_num_comps + 253 objio_seg->layout.group_depth = layout.olo_map.odm_group_depth;
335 sizeof(struct caps_buffers) * layout.olo_num_comps, 254 objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
336 gfp_flags); 255 objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm;
337 if (!objio_seg)
338 return -ENOMEM;
339 256
340 objio_seg->comps = (void *)(objio_seg->ods + layout.olo_num_comps); 257 err = ore_verify_layout(layout.olo_map.odm_num_comps,
341 cur_comp = objio_seg->comps; 258 &objio_seg->layout);
342 caps_p = (void *)(cur_comp + layout.olo_num_comps);
343 while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err))
344 copy_single_comp(cur_comp++, &src_comp, caps_p++);
345 if (unlikely(err)) 259 if (unlikely(err))
346 goto err; 260 goto err;
347 261
348 objio_seg->num_comps = layout.olo_num_comps; 262 objio_seg->oc.first_dev = layout.olo_comps_index;
349 objio_seg->comps_index = layout.olo_comps_index; 263 cur_comp = 0;
350 err = objio_devices_lookup(pnfslay, objio_seg, gfp_flags); 264 while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
351 if (err) 265 copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
352 goto err; 266 err = objio_devices_lookup(pnfslay, objio_seg, cur_comp,
353 267 &src_comp.oc_object_id.oid_device_id,
354 objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; 268 gfp_flags);
355 objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit; 269 if (err)
356 if (layout.olo_map.odm_group_width) { 270 goto err;
357 objio_seg->group_width = layout.olo_map.odm_group_width; 271 ++cur_comp;
358 objio_seg->group_depth = layout.olo_map.odm_group_depth;
359 objio_seg->group_count = layout.olo_map.odm_num_comps /
360 objio_seg->mirrors_p1 /
361 objio_seg->group_width;
362 } else {
363 objio_seg->group_width = layout.olo_map.odm_num_comps /
364 objio_seg->mirrors_p1;
365 objio_seg->group_depth = -1;
366 objio_seg->group_count = 1;
367 } 272 }
368 273 /* pnfs_osd_xdr_decode_layout_comp returns false on error */
369 /* Cache this calculation it will hit for every page */ 274 if (unlikely(err))
370 objio_seg->max_io_size = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - 275 goto err;
371 objio_seg->stripe_unit) *
372 objio_seg->group_width;
373 276
374 *outp = &objio_seg->lseg; 277 *outp = &objio_seg->lseg;
375 return 0; 278 return 0;
@@ -386,43 +289,63 @@ void objio_free_lseg(struct pnfs_layout_segment *lseg)
386 int i; 289 int i;
387 struct objio_segment *objio_seg = OBJIO_LSEG(lseg); 290 struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
388 291
389 for (i = 0; i < objio_seg->num_comps; i++) { 292 for (i = 0; i < objio_seg->oc.numdevs; i++) {
390 if (!objio_seg->ods[i]) 293 struct ore_dev *od = objio_seg->oc.ods[i];
294 struct objio_dev_ent *ode;
295
296 if (!od)
391 break; 297 break;
392 nfs4_put_deviceid_node(&objio_seg->ods[i]->id_node); 298 ode = container_of(od, typeof(*ode), od);
299 nfs4_put_deviceid_node(&ode->id_node);
393 } 300 }
394 kfree(objio_seg); 301 kfree(objio_seg);
395} 302}
396 303
397int objio_alloc_io_state(struct pnfs_layout_segment *lseg, 304static int
398 struct objlayout_io_state **outp, 305objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
399 gfp_t gfp_flags) 306 struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
307 loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
308 struct objio_state **outp)
400{ 309{
401 struct objio_segment *objio_seg = OBJIO_LSEG(lseg); 310 struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
402 struct objio_state *ios; 311 struct ore_io_state *ios;
403 const unsigned first_size = sizeof(*ios) + 312 int ret;
404 objio_seg->num_comps * sizeof(ios->per_dev[0]); 313 struct __alloc_objio_state {
405 const unsigned sec_size = objio_seg->num_comps * 314 struct objio_state objios;
406 sizeof(ios->ol_state.ioerrs[0]); 315 struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
407 316 } *aos;
408 ios = kzalloc(first_size + sec_size, gfp_flags); 317
409 if (unlikely(!ios)) 318 aos = kzalloc(sizeof(*aos), gfp_flags);
319 if (unlikely(!aos))
410 return -ENOMEM; 320 return -ENOMEM;
411 321
412 ios->layout = objio_seg; 322 objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
413 ios->ol_state.ioerrs = ((void *)ios) + first_size; 323 aos->ioerrs, rpcdata, pnfs_layout_type);
414 ios->ol_state.num_comps = objio_seg->num_comps;
415 324
416 *outp = &ios->ol_state; 325 ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
326 offset, count, &ios);
327 if (unlikely(ret)) {
328 kfree(aos);
329 return ret;
330 }
331
332 ios->pages = pages;
333 ios->pgbase = pgbase;
334 ios->private = aos;
335 BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
336
337 aos->objios.sync = 0;
338 aos->objios.ios = ios;
339 *outp = &aos->objios;
417 return 0; 340 return 0;
418} 341}
419 342
420void objio_free_io_state(struct objlayout_io_state *ol_state) 343void objio_free_result(struct objlayout_io_res *oir)
421{ 344{
422 struct objio_state *ios = container_of(ol_state, struct objio_state, 345 struct objio_state *objios = container_of(oir, struct objio_state, oir);
423 ol_state);
424 346
425 kfree(ios); 347 ore_put_io_state(objios->ios);
348 kfree(objios);
426} 349}
427 350
428enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) 351enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
@@ -455,539 +378,152 @@ enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
455 } 378 }
456} 379}
457 380
458static void _clear_bio(struct bio *bio) 381static void __on_dev_error(struct ore_io_state *ios,
382 struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
383 u64 dev_offset, u64 dev_len)
459{ 384{
460 struct bio_vec *bv; 385 struct objio_state *objios = ios->private;
461 unsigned i; 386 struct pnfs_osd_objid pooid;
462 387 struct objio_dev_ent *ode = container_of(od, typeof(*ode), od);
463 __bio_for_each_segment(bv, bio, i, 0) { 388 /* FIXME: what to do with more-then-one-group layouts. We need to
464 unsigned this_count = bv->bv_len; 389 * translate from ore_io_state index to oc->comps index
465 390 */
466 if (likely(PAGE_SIZE == this_count)) 391 unsigned comp = dev_index;
467 clear_highpage(bv->bv_page);
468 else
469 zero_user(bv->bv_page, bv->bv_offset, this_count);
470 }
471}
472
473static int _io_check(struct objio_state *ios, bool is_write)
474{
475 enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR;
476 int lin_ret = 0;
477 int i;
478
479 for (i = 0; i < ios->numdevs; i++) {
480 struct osd_sense_info osi;
481 struct osd_request *or = ios->per_dev[i].or;
482 int ret;
483
484 if (!or)
485 continue;
486 392
487 ret = osd_req_decode_sense(or, &osi); 393 pooid.oid_device_id = ode->id_node.deviceid;
488 if (likely(!ret)) 394 pooid.oid_partition_id = ios->oc->comps[comp].obj.partition;
489 continue; 395 pooid.oid_object_id = ios->oc->comps[comp].obj.id;
490 396
491 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { 397 objlayout_io_set_result(&objios->oir, comp,
492 /* start read offset passed endof file */ 398 &pooid, osd_pri_2_pnfs_err(oep),
493 BUG_ON(is_write); 399 dev_offset, dev_len, !ios->reading);
494 _clear_bio(ios->per_dev[i].bio);
495 dprintk("%s: start read offset passed end of file "
496 "offset=0x%llx, length=0x%lx\n", __func__,
497 _LLU(ios->per_dev[i].offset),
498 ios->per_dev[i].length);
499
500 continue; /* we recovered */
501 }
502 objlayout_io_set_result(&ios->ol_state, i,
503 &ios->layout->comps[i].oc_object_id,
504 osd_pri_2_pnfs_err(osi.osd_err_pri),
505 ios->per_dev[i].offset,
506 ios->per_dev[i].length,
507 is_write);
508
509 if (osi.osd_err_pri >= oep) {
510 oep = osi.osd_err_pri;
511 lin_ret = ret;
512 }
513 }
514
515 return lin_ret;
516}
517
518/*
519 * Common IO state helpers.
520 */
521static void _io_free(struct objio_state *ios)
522{
523 unsigned i;
524
525 for (i = 0; i < ios->numdevs; i++) {
526 struct _objio_per_comp *per_dev = &ios->per_dev[i];
527
528 if (per_dev->or) {
529 osd_end_request(per_dev->or);
530 per_dev->or = NULL;
531 }
532
533 if (per_dev->bio) {
534 bio_put(per_dev->bio);
535 per_dev->bio = NULL;
536 }
537 }
538}
539
540struct osd_dev *_io_od(struct objio_state *ios, unsigned dev)
541{
542 unsigned min_dev = ios->layout->comps_index;
543 unsigned max_dev = min_dev + ios->layout->num_comps;
544
545 BUG_ON(dev < min_dev || max_dev <= dev);
546 return ios->layout->ods[dev - min_dev]->od;
547}
548
549struct _striping_info {
550 u64 obj_offset;
551 u64 group_length;
552 unsigned dev;
553 unsigned unit_off;
554};
555
556static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
557 struct _striping_info *si)
558{
559 u32 stripe_unit = ios->layout->stripe_unit;
560 u32 group_width = ios->layout->group_width;
561 u64 group_depth = ios->layout->group_depth;
562 u32 U = stripe_unit * group_width;
563
564 u64 T = U * group_depth;
565 u64 S = T * ios->layout->group_count;
566 u64 M = div64_u64(file_offset, S);
567
568 /*
569 G = (L - (M * S)) / T
570 H = (L - (M * S)) % T
571 */
572 u64 LmodU = file_offset - M * S;
573 u32 G = div64_u64(LmodU, T);
574 u64 H = LmodU - G * T;
575
576 u32 N = div_u64(H, U);
577
578 div_u64_rem(file_offset, stripe_unit, &si->unit_off);
579 si->obj_offset = si->unit_off + (N * stripe_unit) +
580 (M * group_depth * stripe_unit);
581
582 /* "H - (N * U)" is just "H % U" so it's bound to u32 */
583 si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
584 si->dev *= ios->layout->mirrors_p1;
585
586 si->group_length = T - H;
587}
588
589static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg,
590 unsigned pgbase, struct _objio_per_comp *per_dev, int len,
591 gfp_t gfp_flags)
592{
593 unsigned pg = *cur_pg;
594 int cur_len = len;
595 struct request_queue *q =
596 osd_request_queue(_io_od(ios, per_dev->dev));
597
598 if (per_dev->bio == NULL) {
599 unsigned pages_in_stripe = ios->layout->group_width *
600 (ios->layout->stripe_unit / PAGE_SIZE);
601 unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
602 ios->layout->group_width;
603
604 if (BIO_MAX_PAGES_KMALLOC < bio_size)
605 bio_size = BIO_MAX_PAGES_KMALLOC;
606
607 per_dev->bio = bio_kmalloc(gfp_flags, bio_size);
608 if (unlikely(!per_dev->bio)) {
609 dprintk("Faild to allocate BIO size=%u\n", bio_size);
610 return -ENOMEM;
611 }
612 }
613
614 while (cur_len > 0) {
615 unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
616 unsigned added_len;
617
618 BUG_ON(ios->ol_state.nr_pages <= pg);
619 cur_len -= pglen;
620
621 added_len = bio_add_pc_page(q, per_dev->bio,
622 ios->ol_state.pages[pg], pglen, pgbase);
623 if (unlikely(pglen != added_len))
624 return -ENOMEM;
625 pgbase = 0;
626 ++pg;
627 }
628 BUG_ON(cur_len);
629
630 per_dev->length += len;
631 *cur_pg = pg;
632 return 0;
633}
634
635static int _prepare_one_group(struct objio_state *ios, u64 length,
636 struct _striping_info *si, unsigned *last_pg,
637 gfp_t gfp_flags)
638{
639 unsigned stripe_unit = ios->layout->stripe_unit;
640 unsigned mirrors_p1 = ios->layout->mirrors_p1;
641 unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
642 unsigned dev = si->dev;
643 unsigned first_dev = dev - (dev % devs_in_group);
644 unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
645 unsigned cur_pg = *last_pg;
646 int ret = 0;
647
648 while (length) {
649 struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev];
650 unsigned cur_len, page_off = 0;
651
652 if (!per_dev->length) {
653 per_dev->dev = dev;
654 if (dev < si->dev) {
655 per_dev->offset = si->obj_offset + stripe_unit -
656 si->unit_off;
657 cur_len = stripe_unit;
658 } else if (dev == si->dev) {
659 per_dev->offset = si->obj_offset;
660 cur_len = stripe_unit - si->unit_off;
661 page_off = si->unit_off & ~PAGE_MASK;
662 BUG_ON(page_off &&
663 (page_off != ios->ol_state.pgbase));
664 } else { /* dev > si->dev */
665 per_dev->offset = si->obj_offset - si->unit_off;
666 cur_len = stripe_unit;
667 }
668
669 if (max_comp < dev - first_dev)
670 max_comp = dev - first_dev;
671 } else {
672 cur_len = stripe_unit;
673 }
674 if (cur_len >= length)
675 cur_len = length;
676
677 ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
678 cur_len, gfp_flags);
679 if (unlikely(ret))
680 goto out;
681
682 dev += mirrors_p1;
683 dev = (dev % devs_in_group) + first_dev;
684
685 length -= cur_len;
686 ios->length += cur_len;
687 }
688out:
689 ios->numdevs = max_comp + mirrors_p1;
690 *last_pg = cur_pg;
691 return ret;
692}
693
694static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags)
695{
696 u64 length = ios->ol_state.count;
697 u64 offset = ios->ol_state.offset;
698 struct _striping_info si;
699 unsigned last_pg = 0;
700 int ret = 0;
701
702 while (length) {
703 _calc_stripe_info(ios, offset, &si);
704
705 if (length < si.group_length)
706 si.group_length = length;
707
708 ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags);
709 if (unlikely(ret))
710 goto out;
711
712 offset += si.group_length;
713 length -= si.group_length;
714 }
715
716out:
717 if (!ios->length)
718 return ret;
719
720 return 0;
721}
722
723static ssize_t _sync_done(struct objio_state *ios)
724{
725 struct completion *waiting = ios->private;
726
727 complete(waiting);
728 return 0;
729}
730
731static void _last_io(struct kref *kref)
732{
733 struct objio_state *ios = container_of(kref, struct objio_state, kref);
734
735 ios->done(ios);
736}
737
738static void _done_io(struct osd_request *or, void *p)
739{
740 struct objio_state *ios = p;
741
742 kref_put(&ios->kref, _last_io);
743}
744
745static ssize_t _io_exec(struct objio_state *ios)
746{
747 DECLARE_COMPLETION_ONSTACK(wait);
748 ssize_t status = 0; /* sync status */
749 unsigned i;
750 objio_done_fn saved_done_fn = ios->done;
751 bool sync = ios->ol_state.sync;
752
753 if (sync) {
754 ios->done = _sync_done;
755 ios->private = &wait;
756 }
757
758 kref_init(&ios->kref);
759
760 for (i = 0; i < ios->numdevs; i++) {
761 struct osd_request *or = ios->per_dev[i].or;
762
763 if (!or)
764 continue;
765
766 kref_get(&ios->kref);
767 osd_execute_request_async(or, _done_io, ios);
768 }
769
770 kref_put(&ios->kref, _last_io);
771
772 if (sync) {
773 wait_for_completion(&wait);
774 status = saved_done_fn(ios);
775 }
776
777 return status;
778} 400}
779 401
780/* 402/*
781 * read 403 * read
782 */ 404 */
783static ssize_t _read_done(struct objio_state *ios) 405static void _read_done(struct ore_io_state *ios, void *private)
784{ 406{
407 struct objio_state *objios = private;
785 ssize_t status; 408 ssize_t status;
786 int ret = _io_check(ios, false); 409 int ret = ore_check_io(ios, &__on_dev_error);
787 410
788 _io_free(ios); 411 /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
789 412
790 if (likely(!ret)) 413 if (likely(!ret))
791 status = ios->length; 414 status = ios->length;
792 else 415 else
793 status = ret; 416 status = ret;
794 417
795 objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync); 418 objlayout_read_done(&objios->oir, status, objios->sync);
796 return status;
797} 419}
798 420
799static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) 421int objio_read_pagelist(struct nfs_read_data *rdata)
800{ 422{
801 struct osd_request *or = NULL; 423 struct objio_state *objios;
802 struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
803 unsigned dev = per_dev->dev;
804 struct pnfs_osd_object_cred *cred =
805 &ios->layout->comps[cur_comp];
806 struct osd_obj_id obj = {
807 .partition = cred->oc_object_id.oid_partition_id,
808 .id = cred->oc_object_id.oid_object_id,
809 };
810 int ret; 424 int ret;
811 425
812 or = osd_start_request(_io_od(ios, dev), GFP_KERNEL); 426 ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true,
813 if (unlikely(!or)) { 427 rdata->lseg, rdata->args.pages, rdata->args.pgbase,
814 ret = -ENOMEM; 428 rdata->args.offset, rdata->args.count, rdata,
815 goto err; 429 GFP_KERNEL, &objios);
816 }
817 per_dev->or = or;
818
819 osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length);
820
821 ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL);
822 if (ret) {
823 dprintk("%s: Faild to osd_finalize_request() => %d\n",
824 __func__, ret);
825 goto err;
826 }
827
828 dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
829 __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
830 per_dev->length);
831
832err:
833 return ret;
834}
835
836static ssize_t _read_exec(struct objio_state *ios)
837{
838 unsigned i;
839 int ret;
840
841 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
842 if (!ios->per_dev[i].length)
843 continue;
844 ret = _read_mirrors(ios, i);
845 if (unlikely(ret))
846 goto err;
847 }
848
849 ios->done = _read_done;
850 return _io_exec(ios); /* In sync mode exec returns the io status */
851
852err:
853 _io_free(ios);
854 return ret;
855}
856
857ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state)
858{
859 struct objio_state *ios = container_of(ol_state, struct objio_state,
860 ol_state);
861 int ret;
862
863 ret = _io_rw_pagelist(ios, GFP_KERNEL);
864 if (unlikely(ret)) 430 if (unlikely(ret))
865 return ret; 431 return ret;
866 432
867 return _read_exec(ios); 433 objios->ios->done = _read_done;
434 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
435 rdata->args.offset, rdata->args.count);
436 return ore_read(objios->ios);
868} 437}
869 438
870/* 439/*
871 * write 440 * write
872 */ 441 */
873static ssize_t _write_done(struct objio_state *ios) 442static void _write_done(struct ore_io_state *ios, void *private)
874{ 443{
444 struct objio_state *objios = private;
875 ssize_t status; 445 ssize_t status;
876 int ret = _io_check(ios, true); 446 int ret = ore_check_io(ios, &__on_dev_error);
877 447
878 _io_free(ios); 448 /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
879 449
880 if (likely(!ret)) { 450 if (likely(!ret)) {
881 /* FIXME: should be based on the OSD's persistence model 451 /* FIXME: should be based on the OSD's persistence model
882 * See OSD2r05 Section 4.13 Data persistence model */ 452 * See OSD2r05 Section 4.13 Data persistence model */
883 ios->ol_state.committed = NFS_FILE_SYNC; 453 objios->oir.committed = NFS_FILE_SYNC;
884 status = ios->length; 454 status = ios->length;
885 } else { 455 } else {
886 status = ret; 456 status = ret;
887 } 457 }
888 458
889 objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync); 459 objlayout_write_done(&objios->oir, status, objios->sync);
890 return status;
891} 460}
892 461
893static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) 462static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
894{ 463{
895 struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp]; 464 struct objio_state *objios = priv;
896 unsigned dev = ios->per_dev[cur_comp].dev; 465 struct nfs_write_data *wdata = objios->oir.rpcdata;
897 unsigned last_comp = cur_comp + ios->layout->mirrors_p1; 466 pgoff_t index = offset / PAGE_SIZE;
898 int ret; 467 struct page *page = find_get_page(wdata->inode->i_mapping, index);
899
900 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
901 struct osd_request *or = NULL;
902 struct pnfs_osd_object_cred *cred =
903 &ios->layout->comps[cur_comp];
904 struct osd_obj_id obj = {
905 .partition = cred->oc_object_id.oid_partition_id,
906 .id = cred->oc_object_id.oid_object_id,
907 };
908 struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
909 struct bio *bio;
910
911 or = osd_start_request(_io_od(ios, dev), GFP_NOFS);
912 if (unlikely(!or)) {
913 ret = -ENOMEM;
914 goto err;
915 }
916 per_dev->or = or;
917
918 if (per_dev != master_dev) {
919 bio = bio_kmalloc(GFP_NOFS,
920 master_dev->bio->bi_max_vecs);
921 if (unlikely(!bio)) {
922 dprintk("Faild to allocate BIO size=%u\n",
923 master_dev->bio->bi_max_vecs);
924 ret = -ENOMEM;
925 goto err;
926 }
927
928 __bio_clone(bio, master_dev->bio);
929 bio->bi_bdev = NULL;
930 bio->bi_next = NULL;
931 per_dev->bio = bio;
932 per_dev->dev = dev;
933 per_dev->length = master_dev->length;
934 per_dev->offset = master_dev->offset;
935 } else {
936 bio = master_dev->bio;
937 bio->bi_rw |= REQ_WRITE;
938 }
939
940 osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length);
941 468
942 ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); 469 if (!page) {
943 if (ret) { 470 page = find_or_create_page(wdata->inode->i_mapping,
944 dprintk("%s: Faild to osd_finalize_request() => %d\n", 471 index, GFP_NOFS);
945 __func__, ret); 472 if (unlikely(!page)) {
946 goto err; 473 dprintk("%s: grab_cache_page Failed index=0x%lx\n",
474 __func__, index);
475 return NULL;
947 } 476 }
948 477 unlock_page(page);
949 dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
950 __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
951 per_dev->length);
952 } 478 }
479 if (PageDirty(page) || PageWriteback(page))
480 *uptodate = true;
481 else
482 *uptodate = PageUptodate(page);
483 dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
484 return page;
485}
953 486
954err: 487static void __r4w_put_page(void *priv, struct page *page)
955 return ret; 488{
489 dprintk("%s: index=0x%lx\n", __func__, page->index);
490 page_cache_release(page);
491 return;
956} 492}
957 493
958static ssize_t _write_exec(struct objio_state *ios) 494static const struct _ore_r4w_op _r4w_op = {
495 .get_page = &__r4w_get_page,
496 .put_page = &__r4w_put_page,
497};
498
499int objio_write_pagelist(struct nfs_write_data *wdata, int how)
959{ 500{
960 unsigned i; 501 struct objio_state *objios;
961 int ret; 502 int ret;
962 503
963 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { 504 ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false,
964 if (!ios->per_dev[i].length) 505 wdata->lseg, wdata->args.pages, wdata->args.pgbase,
965 continue; 506 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
966 ret = _write_mirrors(ios, i); 507 &objios);
967 if (unlikely(ret)) 508 if (unlikely(ret))
968 goto err; 509 return ret;
969 }
970
971 ios->done = _write_done;
972 return _io_exec(ios); /* In sync mode exec returns the io->status */
973 510
974err: 511 objios->sync = 0 != (how & FLUSH_SYNC);
975 _io_free(ios); 512 objios->ios->r4w = &_r4w_op;
976 return ret;
977}
978 513
979ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) 514 if (!objios->sync)
980{ 515 objios->ios->done = _write_done;
981 struct objio_state *ios = container_of(ol_state, struct objio_state,
982 ol_state);
983 int ret;
984 516
985 /* TODO: ios->stable = stable; */ 517 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
986 ret = _io_rw_pagelist(ios, GFP_NOFS); 518 wdata->args.offset, wdata->args.count);
519 ret = ore_write(objios->ios);
987 if (unlikely(ret)) 520 if (unlikely(ret))
988 return ret; 521 return ret;
989 522
990 return _write_exec(ios); 523 if (objios->sync)
524 _write_done(objios->ios, objios);
525
526 return 0;
991} 527}
992 528
993static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, 529static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
@@ -997,7 +533,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
997 return false; 533 return false;
998 534
999 return pgio->pg_count + req->wb_bytes <= 535 return pgio->pg_count + req->wb_bytes <=
1000 OBJIO_LSEG(pgio->pg_lseg)->max_io_size; 536 OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
1001} 537}
1002 538
1003static const struct nfs_pageio_ops objio_pg_read_ops = { 539static const struct nfs_pageio_ops objio_pg_read_ops = {
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 1d06f8e2adea..72074e3a04f9 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -156,77 +156,39 @@ last_byte_offset(u64 start, u64 len)
156 return end > start ? end - 1 : NFS4_MAX_UINT64; 156 return end > start ? end - 1 : NFS4_MAX_UINT64;
157} 157}
158 158
159static struct objlayout_io_state * 159void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
160objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, 160 struct page ***p_pages, unsigned *p_pgbase,
161 struct page **pages, 161 u64 offset, unsigned long count)
162 unsigned pgbase,
163 loff_t offset,
164 size_t count,
165 struct pnfs_layout_segment *lseg,
166 void *rpcdata,
167 gfp_t gfp_flags)
168{ 162{
169 struct objlayout_io_state *state;
170 u64 lseg_end_offset; 163 u64 lseg_end_offset;
171 164
172 dprintk("%s: allocating io_state\n", __func__);
173 if (objio_alloc_io_state(lseg, &state, gfp_flags))
174 return NULL;
175
176 BUG_ON(offset < lseg->pls_range.offset); 165 BUG_ON(offset < lseg->pls_range.offset);
177 lseg_end_offset = end_offset(lseg->pls_range.offset, 166 lseg_end_offset = end_offset(lseg->pls_range.offset,
178 lseg->pls_range.length); 167 lseg->pls_range.length);
179 BUG_ON(offset >= lseg_end_offset); 168 BUG_ON(offset >= lseg_end_offset);
180 if (offset + count > lseg_end_offset) { 169 WARN_ON(offset + count > lseg_end_offset);
181 count = lseg->pls_range.length -
182 (offset - lseg->pls_range.offset);
183 dprintk("%s: truncated count %Zd\n", __func__, count);
184 }
185 170
186 if (pgbase > PAGE_SIZE) { 171 if (*p_pgbase > PAGE_SIZE) {
187 pages += pgbase >> PAGE_SHIFT; 172 dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
188 pgbase &= ~PAGE_MASK; 173 *p_pages += *p_pgbase >> PAGE_SHIFT;
174 *p_pgbase &= ~PAGE_MASK;
189 } 175 }
190
191 INIT_LIST_HEAD(&state->err_list);
192 state->lseg = lseg;
193 state->rpcdata = rpcdata;
194 state->pages = pages;
195 state->pgbase = pgbase;
196 state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
197 state->offset = offset;
198 state->count = count;
199 state->sync = 0;
200
201 return state;
202}
203
204static void
205objlayout_free_io_state(struct objlayout_io_state *state)
206{
207 dprintk("%s: freeing io_state\n", __func__);
208 if (unlikely(!state))
209 return;
210
211 objio_free_io_state(state);
212} 176}
213 177
214/* 178/*
215 * I/O done common code 179 * I/O done common code
216 */ 180 */
217static void 181static void
218objlayout_iodone(struct objlayout_io_state *state) 182objlayout_iodone(struct objlayout_io_res *oir)
219{ 183{
220 dprintk("%s: state %p status\n", __func__, state); 184 if (likely(oir->status >= 0)) {
221 185 objio_free_result(oir);
222 if (likely(state->status >= 0)) {
223 objlayout_free_io_state(state);
224 } else { 186 } else {
225 struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); 187 struct objlayout *objlay = oir->objlay;
226 188
227 spin_lock(&objlay->lock); 189 spin_lock(&objlay->lock);
228 objlay->delta_space_valid = OBJ_DSU_INVALID; 190 objlay->delta_space_valid = OBJ_DSU_INVALID;
229 list_add(&objlay->err_list, &state->err_list); 191 list_add(&objlay->err_list, &oir->err_list);
230 spin_unlock(&objlay->lock); 192 spin_unlock(&objlay->lock);
231 } 193 }
232} 194}
@@ -238,13 +200,13 @@ objlayout_iodone(struct objlayout_io_state *state)
238 * the error for later reporting at layout-return. 200 * the error for later reporting at layout-return.
239 */ 201 */
240void 202void
241objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, 203objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
242 struct pnfs_osd_objid *pooid, int osd_error, 204 struct pnfs_osd_objid *pooid, int osd_error,
243 u64 offset, u64 length, bool is_write) 205 u64 offset, u64 length, bool is_write)
244{ 206{
245 struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index]; 207 struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
246 208
247 BUG_ON(index >= state->num_comps); 209 BUG_ON(index >= oir->num_comps);
248 if (osd_error) { 210 if (osd_error) {
249 ioerr->oer_component = *pooid; 211 ioerr->oer_component = *pooid;
250 ioerr->oer_comp_offset = offset; 212 ioerr->oer_comp_offset = offset;
@@ -285,21 +247,18 @@ static void _rpc_read_complete(struct work_struct *work)
285} 247}
286 248
287void 249void
288objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) 250objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
289{ 251{
290 int eof = state->eof; 252 struct nfs_read_data *rdata = oir->rpcdata;
291 struct nfs_read_data *rdata;
292 253
293 state->status = status; 254 oir->status = rdata->task.tk_status = status;
294 dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof); 255 if (status >= 0)
295 rdata = state->rpcdata;
296 rdata->task.tk_status = status;
297 if (status >= 0) {
298 rdata->res.count = status; 256 rdata->res.count = status;
299 rdata->res.eof = eof; 257 objlayout_iodone(oir);
300 } 258 /* must not use oir after this point */
301 objlayout_iodone(state); 259
302 /* must not use state after this point */ 260 dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
261 status, rdata->res.eof, sync);
303 262
304 if (sync) 263 if (sync)
305 pnfs_ld_read_done(rdata); 264 pnfs_ld_read_done(rdata);
@@ -317,40 +276,36 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
317{ 276{
318 loff_t offset = rdata->args.offset; 277 loff_t offset = rdata->args.offset;
319 size_t count = rdata->args.count; 278 size_t count = rdata->args.count;
320 struct objlayout_io_state *state; 279 int err;
321 ssize_t status = 0;
322 loff_t eof; 280 loff_t eof;
323 281
324 dprintk("%s: Begin inode %p offset %llu count %d\n",
325 __func__, rdata->inode, offset, (int)count);
326
327 eof = i_size_read(rdata->inode); 282 eof = i_size_read(rdata->inode);
328 if (unlikely(offset + count > eof)) { 283 if (unlikely(offset + count > eof)) {
329 if (offset >= eof) { 284 if (offset >= eof) {
330 status = 0; 285 err = 0;
331 rdata->res.count = 0; 286 rdata->res.count = 0;
332 rdata->res.eof = 1; 287 rdata->res.eof = 1;
288 /*FIXME: do we need to call pnfs_ld_read_done() */
333 goto out; 289 goto out;
334 } 290 }
335 count = eof - offset; 291 count = eof - offset;
336 } 292 }
337 293
338 state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout, 294 rdata->res.eof = (offset + count) >= eof;
339 rdata->args.pages, rdata->args.pgbase, 295 _fix_verify_io_params(rdata->lseg, &rdata->args.pages,
340 offset, count, 296 &rdata->args.pgbase,
341 rdata->lseg, rdata, 297 rdata->args.offset, rdata->args.count);
342 GFP_KERNEL);
343 if (unlikely(!state)) {
344 status = -ENOMEM;
345 goto out;
346 }
347 298
348 state->eof = state->offset + state->count >= eof; 299 dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
300 __func__, rdata->inode->i_ino, offset, count, rdata->res.eof);
349 301
350 status = objio_read_pagelist(state); 302 err = objio_read_pagelist(rdata);
351 out: 303 out:
352 dprintk("%s: Return status %Zd\n", __func__, status); 304 if (unlikely(err)) {
353 rdata->pnfs_error = status; 305 rdata->pnfs_error = err;
306 dprintk("%s: Returned Error %d\n", __func__, err);
307 return PNFS_NOT_ATTEMPTED;
308 }
354 return PNFS_ATTEMPTED; 309 return PNFS_ATTEMPTED;
355} 310}
356 311
@@ -371,26 +326,20 @@ static void _rpc_write_complete(struct work_struct *work)
371} 326}
372 327
373void 328void
374objlayout_write_done(struct objlayout_io_state *state, ssize_t status, 329objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
375 bool sync)
376{ 330{
377 struct nfs_write_data *wdata; 331 struct nfs_write_data *wdata = oir->rpcdata;
378 332
379 dprintk("%s: Begin\n", __func__); 333 oir->status = wdata->task.tk_status = status;
380 wdata = state->rpcdata;
381 state->status = status;
382 wdata->task.tk_status = status;
383 if (status >= 0) { 334 if (status >= 0) {
384 wdata->res.count = status; 335 wdata->res.count = status;
385 wdata->verf.committed = state->committed; 336 wdata->verf.committed = oir->committed;
386 dprintk("%s: Return status %d committed %d\n", 337 }
387 __func__, wdata->task.tk_status, 338 objlayout_iodone(oir);
388 wdata->verf.committed); 339 /* must not use oir after this point */
389 } else 340
390 dprintk("%s: Return status %d\n", 341 dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
391 __func__, wdata->task.tk_status); 342 status, wdata->verf.committed, sync);
392 objlayout_iodone(state);
393 /* must not use state after this point */
394 343
395 if (sync) 344 if (sync)
396 pnfs_ld_write_done(wdata); 345 pnfs_ld_write_done(wdata);
@@ -407,30 +356,18 @@ enum pnfs_try_status
407objlayout_write_pagelist(struct nfs_write_data *wdata, 356objlayout_write_pagelist(struct nfs_write_data *wdata,
408 int how) 357 int how)
409{ 358{
410 struct objlayout_io_state *state; 359 int err;
411 ssize_t status;
412
413 dprintk("%s: Begin inode %p offset %llu count %u\n",
414 __func__, wdata->inode, wdata->args.offset, wdata->args.count);
415
416 state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
417 wdata->args.pages,
418 wdata->args.pgbase,
419 wdata->args.offset,
420 wdata->args.count,
421 wdata->lseg, wdata,
422 GFP_NOFS);
423 if (unlikely(!state)) {
424 status = -ENOMEM;
425 goto out;
426 }
427 360
428 state->sync = how & FLUSH_SYNC; 361 _fix_verify_io_params(wdata->lseg, &wdata->args.pages,
362 &wdata->args.pgbase,
363 wdata->args.offset, wdata->args.count);
429 364
430 status = objio_write_pagelist(state, how & FLUSH_STABLE); 365 err = objio_write_pagelist(wdata, how);
431 out: 366 if (unlikely(err)) {
432 dprintk("%s: Return status %Zd\n", __func__, status); 367 wdata->pnfs_error = err;
433 wdata->pnfs_error = status; 368 dprintk("%s: Returned Error %d\n", __func__, err);
369 return PNFS_NOT_ATTEMPTED;
370 }
434 return PNFS_ATTEMPTED; 371 return PNFS_ATTEMPTED;
435} 372}
436 373
@@ -537,14 +474,14 @@ merge_ioerr(struct pnfs_osd_ioerr *dest_err,
537static void 474static void
538encode_accumulated_error(struct objlayout *objlay, __be32 *p) 475encode_accumulated_error(struct objlayout *objlay, __be32 *p)
539{ 476{
540 struct objlayout_io_state *state, *tmp; 477 struct objlayout_io_res *oir, *tmp;
541 struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; 478 struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
542 479
543 list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { 480 list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
544 unsigned i; 481 unsigned i;
545 482
546 for (i = 0; i < state->num_comps; i++) { 483 for (i = 0; i < oir->num_comps; i++) {
547 struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; 484 struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
548 485
549 if (!ioerr->oer_errno) 486 if (!ioerr->oer_errno)
550 continue; 487 continue;
@@ -563,8 +500,8 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p)
563 500
564 merge_ioerr(&accumulated_err, ioerr); 501 merge_ioerr(&accumulated_err, ioerr);
565 } 502 }
566 list_del(&state->err_list); 503 list_del(&oir->err_list);
567 objlayout_free_io_state(state); 504 objio_free_result(oir);
568 } 505 }
569 506
570 pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); 507 pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
@@ -576,7 +513,7 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
576 const struct nfs4_layoutreturn_args *args) 513 const struct nfs4_layoutreturn_args *args)
577{ 514{
578 struct objlayout *objlay = OBJLAYOUT(pnfslay); 515 struct objlayout *objlay = OBJLAYOUT(pnfslay);
579 struct objlayout_io_state *state, *tmp; 516 struct objlayout_io_res *oir, *tmp;
580 __be32 *start; 517 __be32 *start;
581 518
582 dprintk("%s: Begin\n", __func__); 519 dprintk("%s: Begin\n", __func__);
@@ -585,13 +522,13 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
585 522
586 spin_lock(&objlay->lock); 523 spin_lock(&objlay->lock);
587 524
588 list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { 525 list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
589 __be32 *last_xdr = NULL, *p; 526 __be32 *last_xdr = NULL, *p;
590 unsigned i; 527 unsigned i;
591 int res = 0; 528 int res = 0;
592 529
593 for (i = 0; i < state->num_comps; i++) { 530 for (i = 0; i < oir->num_comps; i++) {
594 struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; 531 struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
595 532
596 if (!ioerr->oer_errno) 533 if (!ioerr->oer_errno)
597 continue; 534 continue;
@@ -615,7 +552,7 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
615 } 552 }
616 553
617 last_xdr = p; 554 last_xdr = p;
618 pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]); 555 pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
619 } 556 }
620 557
621 /* TODO: use xdr_write_pages */ 558 /* TODO: use xdr_write_pages */
@@ -631,8 +568,8 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
631 encode_accumulated_error(objlay, last_xdr); 568 encode_accumulated_error(objlay, last_xdr);
632 goto loop_done; 569 goto loop_done;
633 } 570 }
634 list_del(&state->err_list); 571 list_del(&oir->err_list);
635 objlayout_free_io_state(state); 572 objio_free_result(oir);
636 } 573 }
637loop_done: 574loop_done:
638 spin_unlock(&objlay->lock); 575 spin_unlock(&objlay->lock);
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index a8244c8e042d..8ec34727ed21 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -74,19 +74,11 @@ OBJLAYOUT(struct pnfs_layout_hdr *lo)
74 * per-I/O operation state 74 * per-I/O operation state
75 * embedded in objects provider io_state data structure 75 * embedded in objects provider io_state data structure
76 */ 76 */
77struct objlayout_io_state { 77struct objlayout_io_res {
78 struct pnfs_layout_segment *lseg; 78 struct objlayout *objlay;
79
80 struct page **pages;
81 unsigned pgbase;
82 unsigned nr_pages;
83 unsigned long count;
84 loff_t offset;
85 bool sync;
86 79
87 void *rpcdata; 80 void *rpcdata;
88 int status; /* res */ 81 int status; /* res */
89 int eof; /* res */
90 int committed; /* res */ 82 int committed; /* res */
91 83
92 /* Error reporting (layout_return) */ 84 /* Error reporting (layout_return) */
@@ -100,6 +92,18 @@ struct objlayout_io_state {
100 struct pnfs_osd_ioerr *ioerrs; 92 struct pnfs_osd_ioerr *ioerrs;
101}; 93};
102 94
95static inline
96void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps,
97 struct pnfs_osd_ioerr *ioerrs, void *rpcdata,
98 struct pnfs_layout_hdr *pnfs_layout_type)
99{
100 oir->objlay = OBJLAYOUT(pnfs_layout_type);
101 oir->rpcdata = rpcdata;
102 INIT_LIST_HEAD(&oir->err_list);
103 oir->num_comps = num_comps;
104 oir->ioerrs = ioerrs;
105}
106
103/* 107/*
104 * Raid engine I/O API 108 * Raid engine I/O API
105 */ 109 */
@@ -110,28 +114,24 @@ extern int objio_alloc_lseg(struct pnfs_layout_segment **outp,
110 gfp_t gfp_flags); 114 gfp_t gfp_flags);
111extern void objio_free_lseg(struct pnfs_layout_segment *lseg); 115extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
112 116
113extern int objio_alloc_io_state( 117/* objio_free_result will free these @oir structs recieved from
114 struct pnfs_layout_segment *lseg, 118 * objlayout_{read,write}_done
115 struct objlayout_io_state **outp, 119 */
116 gfp_t gfp_flags); 120extern void objio_free_result(struct objlayout_io_res *oir);
117extern void objio_free_io_state(struct objlayout_io_state *state);
118 121
119extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state); 122extern int objio_read_pagelist(struct nfs_read_data *rdata);
120extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, 123extern int objio_write_pagelist(struct nfs_write_data *wdata, int how);
121 bool stable);
122 124
123/* 125/*
124 * callback API 126 * callback API
125 */ 127 */
126extern void objlayout_io_set_result(struct objlayout_io_state *state, 128extern void objlayout_io_set_result(struct objlayout_io_res *oir,
127 unsigned index, struct pnfs_osd_objid *pooid, 129 unsigned index, struct pnfs_osd_objid *pooid,
128 int osd_error, u64 offset, u64 length, bool is_write); 130 int osd_error, u64 offset, u64 length, bool is_write);
129 131
130static inline void 132static inline void
131objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used) 133objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used)
132{ 134{
133 struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout);
134
135 /* If one of the I/Os errored out and the delta_space_used was 135 /* If one of the I/Os errored out and the delta_space_used was
136 * invalid we render the complete report as invalid. Protocol mandate 136 * invalid we render the complete report as invalid. Protocol mandate
137 * the DSU be accurate or not reported. 137 * the DSU be accurate or not reported.
@@ -144,9 +144,9 @@ objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
144 spin_unlock(&objlay->lock); 144 spin_unlock(&objlay->lock);
145} 145}
146 146
147extern void objlayout_read_done(struct objlayout_io_state *state, 147extern void objlayout_read_done(struct objlayout_io_res *oir,
148 ssize_t status, bool sync); 148 ssize_t status, bool sync);
149extern void objlayout_write_done(struct objlayout_io_state *state, 149extern void objlayout_write_done(struct objlayout_io_res *oir,
150 ssize_t status, bool sync); 150 ssize_t status, bool sync);
151 151
152extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, 152extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a788d8522c88..5668f7c54c41 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -42,7 +42,7 @@ nfs_page_free(struct nfs_page *p)
42 42
43/** 43/**
44 * nfs_create_request - Create an NFS read/write request. 44 * nfs_create_request - Create an NFS read/write request.
45 * @file: file descriptor to use 45 * @ctx: open context to use
46 * @inode: inode to which the request is attached 46 * @inode: inode to which the request is attached
47 * @page: page to write 47 * @page: page to write
48 * @offset: starting offset within the page for the write 48 * @offset: starting offset within the page for the write
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ba1d5388fafd..baf73536bc04 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1444,17 +1444,31 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1444 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ 1444 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
1445 data = kzalloc(sizeof(*data), GFP_NOFS); 1445 data = kzalloc(sizeof(*data), GFP_NOFS);
1446 if (!data) { 1446 if (!data) {
1447 mark_inode_dirty_sync(inode);
1448 status = -ENOMEM; 1447 status = -ENOMEM;
1449 goto out; 1448 goto out;
1450 } 1449 }
1451 1450
1451 if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
1452 goto out_free;
1453
1454 if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
1455 if (!sync) {
1456 status = -EAGAIN;
1457 goto out_free;
1458 }
1459 status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING,
1460 nfs_wait_bit_killable, TASK_KILLABLE);
1461 if (status)
1462 goto out_free;
1463 }
1464
1452 INIT_LIST_HEAD(&data->lseg_list); 1465 INIT_LIST_HEAD(&data->lseg_list);
1453 spin_lock(&inode->i_lock); 1466 spin_lock(&inode->i_lock);
1454 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 1467 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1468 clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags);
1455 spin_unlock(&inode->i_lock); 1469 spin_unlock(&inode->i_lock);
1456 kfree(data); 1470 wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING);
1457 goto out; 1471 goto out_free;
1458 } 1472 }
1459 1473
1460 pnfs_list_write_lseg(inode, &data->lseg_list); 1474 pnfs_list_write_lseg(inode, &data->lseg_list);
@@ -1476,6 +1490,11 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
1476 1490
1477 status = nfs4_proc_layoutcommit(data, sync); 1491 status = nfs4_proc_layoutcommit(data, sync);
1478out: 1492out:
1493 if (status)
1494 mark_inode_dirty_sync(inode);
1479 dprintk("<-- %s status %d\n", __func__, status); 1495 dprintk("<-- %s status %d\n", __func__, status);
1480 return status; 1496 return status;
1497out_free:
1498 kfree(data);
1499 goto out;
1481} 1500}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index cd1edfd8c2d0..1dda78db6a73 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1244,7 +1244,6 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1244{ 1244{
1245 struct nfs_writeargs *argp = &data->args; 1245 struct nfs_writeargs *argp = &data->args;
1246 struct nfs_writeres *resp = &data->res; 1246 struct nfs_writeres *resp = &data->res;
1247 struct nfs_server *server = NFS_SERVER(data->inode);
1248 int status; 1247 int status;
1249 1248
1250 dprintk("NFS: %5u nfs_writeback_done (status %d)\n", 1249 dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1278,7 +1277,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1278 if (time_before(complain, jiffies)) { 1277 if (time_before(complain, jiffies)) {
1279 dprintk("NFS: faulty NFS server %s:" 1278 dprintk("NFS: faulty NFS server %s:"
1280 " (committed = %d) != (stable = %d)\n", 1279 " (committed = %d) != (stable = %d)\n",
1281 server->nfs_client->cl_hostname, 1280 NFS_SERVER(data->inode)->nfs_client->cl_hostname,
1282 resp->verf->committed, argp->stable); 1281 resp->verf->committed, argp->stable);
1283 complain = jiffies + 300 * HZ; 1282 complain = jiffies + 300 * HZ;
1284 } 1283 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 66d095d7955e..b6fa792d6b85 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -655,7 +655,7 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x)
655 default: 655 default:
656 return nfserr_bad_xdr; 656 return nfserr_bad_xdr;
657 } 657 }
658 w &= !NFS4_SHARE_ACCESS_MASK; 658 w &= ~NFS4_SHARE_ACCESS_MASK;
659 if (!w) 659 if (!w)
660 return nfs_ok; 660 return nfs_ok;
661 if (!argp->minorversion) 661 if (!argp->minorversion)
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 4b8e828ae15f..eda7d7e55e05 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -257,6 +257,8 @@ static void nfsd_last_thread(struct svc_serv *serv)
257 nfsd_serv = NULL; 257 nfsd_serv = NULL;
258 nfsd_shutdown(); 258 nfsd_shutdown();
259 259
260 svc_rpcb_cleanup(serv);
261
260 printk(KERN_WARNING "nfsd: last server has exited, flushing export " 262 printk(KERN_WARNING "nfsd: last server has exited, flushing export "
261 "cache\n"); 263 "cache\n");
262 nfsd_export_flush(); 264 nfsd_export_flush();
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 666628b395f1..b50ffb72e5b3 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -354,7 +354,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
354 354
355 failed_acl: 355 failed_acl:
356 failed_bmap: 356 failed_bmap:
357 inode->i_nlink = 0; 357 clear_nlink(inode);
358 iput(inode); /* raw_inode will be deleted through 358 iput(inode); /* raw_inode will be deleted through
359 generic_delete_inode() */ 359 generic_delete_inode() */
360 goto failed; 360 goto failed;
@@ -396,7 +396,7 @@ int nilfs_read_inode_common(struct inode *inode,
396 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 396 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
397 inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid); 397 inode->i_uid = (uid_t)le32_to_cpu(raw_inode->i_uid);
398 inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid); 398 inode->i_gid = (gid_t)le32_to_cpu(raw_inode->i_gid);
399 inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); 399 set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
400 inode->i_size = le64_to_cpu(raw_inode->i_size); 400 inode->i_size = le64_to_cpu(raw_inode->i_size);
401 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); 401 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
402 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime); 402 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index a3141990061e..768982de10e4 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -289,7 +289,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
289 nilfs_warning(inode->i_sb, __func__, 289 nilfs_warning(inode->i_sb, __func__,
290 "deleting nonexistent file (%lu), %d\n", 290 "deleting nonexistent file (%lu), %d\n",
291 inode->i_ino, inode->i_nlink); 291 inode->i_ino, inode->i_nlink);
292 inode->i_nlink = 1; 292 set_nlink(inode, 1);
293 } 293 }
294 err = nilfs_delete_entry(de, page); 294 err = nilfs_delete_entry(de, page);
295 if (err) 295 if (err)
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 255d5e1c03b7..3777d138f895 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -276,10 +276,10 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
276/* super.c */ 276/* super.c */
277extern struct inode *nilfs_alloc_inode(struct super_block *); 277extern struct inode *nilfs_alloc_inode(struct super_block *);
278extern void nilfs_destroy_inode(struct inode *); 278extern void nilfs_destroy_inode(struct inode *);
279extern void nilfs_error(struct super_block *, const char *, const char *, ...) 279extern __printf(3, 4)
280 __attribute__ ((format (printf, 3, 4))); 280void nilfs_error(struct super_block *, const char *, const char *, ...);
281extern void nilfs_warning(struct super_block *, const char *, const char *, ...) 281extern __printf(3, 4)
282 __attribute__ ((format (printf, 3, 4))); 282void nilfs_warning(struct super_block *, const char *, const char *, ...);
283extern struct nilfs_super_block * 283extern struct nilfs_super_block *
284nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); 284nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **);
285extern int nilfs_store_magic_and_option(struct super_block *, 285extern int nilfs_store_magic_and_option(struct super_block *,
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 2142b1c68b61..53c27eaf2307 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -30,8 +30,9 @@
30 30
31extern int debug_msgs; 31extern int debug_msgs;
32 32
33extern void __ntfs_debug(const char *file, int line, const char *function, 33extern __printf(4, 5)
34 const char *format, ...) __attribute__ ((format (printf, 4, 5))); 34void __ntfs_debug(const char *file, int line, const char *function,
35 const char *format, ...);
35/** 36/**
36 * ntfs_debug - write a debug level message to syslog 37 * ntfs_debug - write a debug level message to syslog
37 * @f: a printf format string containing the message 38 * @f: a printf format string containing the message
@@ -52,12 +53,14 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl);
52 53
53#endif /* !DEBUG */ 54#endif /* !DEBUG */
54 55
55extern void __ntfs_warning(const char *function, const struct super_block *sb, 56extern __printf(3, 4)
56 const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); 57void __ntfs_warning(const char *function, const struct super_block *sb,
58 const char *fmt, ...);
57#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a) 59#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a)
58 60
59extern void __ntfs_error(const char *function, const struct super_block *sb, 61extern __printf(3, 4)
60 const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); 62void __ntfs_error(const char *function, const struct super_block *sb,
63 const char *fmt, ...);
61#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a) 64#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a)
62 65
63#endif /* _LINUX_NTFS_DEBUG_H */ 66#endif /* _LINUX_NTFS_DEBUG_H */
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 1371487da955..97e2dacbc867 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -612,7 +612,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
612 * might be tricky due to vfs interactions. Need to think about this 612 * might be tricky due to vfs interactions. Need to think about this
613 * some more when implementing the unlink command. 613 * some more when implementing the unlink command.
614 */ 614 */
615 vi->i_nlink = le16_to_cpu(m->link_count); 615 set_nlink(vi, le16_to_cpu(m->link_count));
616 /* 616 /*
617 * FIXME: Reparse points can have the directory bit set even though 617 * FIXME: Reparse points can have the directory bit set even though
618 * they would be S_IFLNK. Need to deal with this further below when we 618 * they would be S_IFLNK. Need to deal with this further below when we
@@ -634,7 +634,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
634 vi->i_mode &= ~vol->dmask; 634 vi->i_mode &= ~vol->dmask;
635 /* Things break without this kludge! */ 635 /* Things break without this kludge! */
636 if (vi->i_nlink > 1) 636 if (vi->i_nlink > 1)
637 vi->i_nlink = 1; 637 set_nlink(vi, 1);
638 } else { 638 } else {
639 vi->i_mode |= S_IFREG; 639 vi->i_mode |= S_IFREG;
640 /* Apply the file permissions mask set in the mount options. */ 640 /* Apply the file permissions mask set in the mount options. */
@@ -1242,7 +1242,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1242 vi->i_version = base_vi->i_version; 1242 vi->i_version = base_vi->i_version;
1243 vi->i_uid = base_vi->i_uid; 1243 vi->i_uid = base_vi->i_uid;
1244 vi->i_gid = base_vi->i_gid; 1244 vi->i_gid = base_vi->i_gid;
1245 vi->i_nlink = base_vi->i_nlink; 1245 set_nlink(vi, base_vi->i_nlink);
1246 vi->i_mtime = base_vi->i_mtime; 1246 vi->i_mtime = base_vi->i_mtime;
1247 vi->i_ctime = base_vi->i_ctime; 1247 vi->i_ctime = base_vi->i_ctime;
1248 vi->i_atime = base_vi->i_atime; 1248 vi->i_atime = base_vi->i_atime;
@@ -1508,7 +1508,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1508 vi->i_version = base_vi->i_version; 1508 vi->i_version = base_vi->i_version;
1509 vi->i_uid = base_vi->i_uid; 1509 vi->i_uid = base_vi->i_uid;
1510 vi->i_gid = base_vi->i_gid; 1510 vi->i_gid = base_vi->i_gid;
1511 vi->i_nlink = base_vi->i_nlink; 1511 set_nlink(vi, base_vi->i_nlink);
1512 vi->i_mtime = base_vi->i_mtime; 1512 vi->i_mtime = base_vi->i_mtime;
1513 vi->i_ctime = base_vi->i_ctime; 1513 vi->i_ctime = base_vi->i_ctime;
1514 vi->i_atime = base_vi->i_atime; 1514 vi->i_atime = base_vi->i_atime;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8582e3f4f120..e2878b5895fb 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2292,7 +2292,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
2292 ocfs2_journal_dirty(handle, di_bh); 2292 ocfs2_journal_dirty(handle, di_bh);
2293 2293
2294 i_size_write(inode, size); 2294 i_size_write(inode, size);
2295 inode->i_nlink = 2; 2295 set_nlink(inode, 2);
2296 inode->i_blocks = ocfs2_inode_sector_count(inode); 2296 inode->i_blocks = ocfs2_inode_sector_count(inode);
2297 2297
2298 ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); 2298 ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
@@ -2354,7 +2354,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2354 ocfs2_journal_dirty(handle, new_bh); 2354 ocfs2_journal_dirty(handle, new_bh);
2355 2355
2356 i_size_write(inode, inode->i_sb->s_blocksize); 2356 i_size_write(inode, inode->i_sb->s_blocksize);
2357 inode->i_nlink = 2; 2357 set_nlink(inode, 2);
2358 inode->i_blocks = ocfs2_inode_sector_count(inode); 2358 inode->i_blocks = ocfs2_inode_sector_count(inode);
2359 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh); 2359 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
2360 if (status < 0) { 2360 if (status < 0) {
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 7642d7ca73e5..e1ed5e502ff2 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2092,7 +2092,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
2092 inode->i_uid = be32_to_cpu(lvb->lvb_iuid); 2092 inode->i_uid = be32_to_cpu(lvb->lvb_iuid);
2093 inode->i_gid = be32_to_cpu(lvb->lvb_igid); 2093 inode->i_gid = be32_to_cpu(lvb->lvb_igid);
2094 inode->i_mode = be16_to_cpu(lvb->lvb_imode); 2094 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
2095 inode->i_nlink = be16_to_cpu(lvb->lvb_inlink); 2095 set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
2096 ocfs2_unpack_timespec(&inode->i_atime, 2096 ocfs2_unpack_timespec(&inode->i_atime,
2097 be64_to_cpu(lvb->lvb_iatime_packed)); 2097 be64_to_cpu(lvb->lvb_iatime_packed));
2098 ocfs2_unpack_timespec(&inode->i_mtime, 2098 ocfs2_unpack_timespec(&inode->i_mtime,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index b4c8bb6b8d28..a22d2c098890 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -291,7 +291,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
291 (unsigned long long)OCFS2_I(inode)->ip_blkno, 291 (unsigned long long)OCFS2_I(inode)->ip_blkno,
292 (unsigned long long)le64_to_cpu(fe->i_blkno)); 292 (unsigned long long)le64_to_cpu(fe->i_blkno));
293 293
294 inode->i_nlink = ocfs2_read_links_count(fe); 294 set_nlink(inode, ocfs2_read_links_count(fe));
295 295
296 trace_ocfs2_populate_inode(OCFS2_I(inode)->ip_blkno, 296 trace_ocfs2_populate_inode(OCFS2_I(inode)->ip_blkno,
297 le32_to_cpu(fe->i_flags)); 297 le32_to_cpu(fe->i_flags));
@@ -1290,7 +1290,7 @@ void ocfs2_refresh_inode(struct inode *inode,
1290 OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features); 1290 OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
1291 ocfs2_set_inode_flags(inode); 1291 ocfs2_set_inode_flags(inode);
1292 i_size_write(inode, le64_to_cpu(fe->i_size)); 1292 i_size_write(inode, le64_to_cpu(fe->i_size));
1293 inode->i_nlink = ocfs2_read_links_count(fe); 1293 set_nlink(inode, ocfs2_read_links_count(fe));
1294 inode->i_uid = le32_to_cpu(fe->i_uid); 1294 inode->i_uid = le32_to_cpu(fe->i_uid);
1295 inode->i_gid = le32_to_cpu(fe->i_gid); 1295 inode->i_gid = le32_to_cpu(fe->i_gid);
1296 inode->i_mode = le16_to_cpu(fe->i_mode); 1296 inode->i_mode = le16_to_cpu(fe->i_mode);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 53aa41ed7bf3..a8b2bfea574e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -199,9 +199,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
199 * these are used by the support functions here and in 199 * these are used by the support functions here and in
200 * callers. */ 200 * callers. */
201 if (S_ISDIR(mode)) 201 if (S_ISDIR(mode))
202 inode->i_nlink = 2; 202 set_nlink(inode, 2);
203 else
204 inode->i_nlink = 1;
205 inode_init_owner(inode, dir, mode); 203 inode_init_owner(inode, dir, mode);
206 dquot_initialize(inode); 204 dquot_initialize(inode);
207 return inode; 205 return inode;
@@ -1379,7 +1377,7 @@ static int ocfs2_rename(struct inode *old_dir,
1379 } 1377 }
1380 1378
1381 if (new_inode) { 1379 if (new_inode) {
1382 new_inode->i_nlink--; 1380 drop_nlink(new_inode);
1383 new_inode->i_ctime = CURRENT_TIME; 1381 new_inode->i_ctime = CURRENT_TIME;
1384 } 1382 }
1385 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; 1383 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
@@ -1387,9 +1385,9 @@ static int ocfs2_rename(struct inode *old_dir,
1387 if (update_dot_dot) { 1385 if (update_dot_dot) {
1388 status = ocfs2_update_entry(old_inode, handle, 1386 status = ocfs2_update_entry(old_inode, handle,
1389 &old_inode_dot_dot_res, new_dir); 1387 &old_inode_dot_dot_res, new_dir);
1390 old_dir->i_nlink--; 1388 drop_nlink(old_dir);
1391 if (new_inode) { 1389 if (new_inode) {
1392 new_inode->i_nlink--; 1390 drop_nlink(new_inode);
1393 } else { 1391 } else {
1394 inc_nlink(new_dir); 1392 inc_nlink(new_dir);
1395 mark_inode_dirty(new_dir); 1393 mark_inode_dirty(new_dir);
@@ -2018,7 +2016,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
2018 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; 2016 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2019 if (S_ISDIR(inode->i_mode)) 2017 if (S_ISDIR(inode->i_mode))
2020 ocfs2_add_links_count(orphan_fe, 1); 2018 ocfs2_add_links_count(orphan_fe, 1);
2021 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); 2019 set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
2022 ocfs2_journal_dirty(handle, orphan_dir_bh); 2020 ocfs2_journal_dirty(handle, orphan_dir_bh);
2023 2021
2024 status = __ocfs2_add_entry(handle, orphan_dir_inode, name, 2022 status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
@@ -2116,7 +2114,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2116 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; 2114 orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2117 if (S_ISDIR(inode->i_mode)) 2115 if (S_ISDIR(inode->i_mode))
2118 ocfs2_add_links_count(orphan_fe, -1); 2116 ocfs2_add_links_count(orphan_fe, -1);
2119 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); 2117 set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe));
2120 ocfs2_journal_dirty(handle, orphan_dir_bh); 2118 ocfs2_journal_dirty(handle, orphan_dir_bh);
2121 2119
2122leave: 2120leave:
@@ -2282,7 +2280,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
2282 goto leave; 2280 goto leave;
2283 } 2281 }
2284 2282
2285 inode->i_nlink = 0; 2283 clear_nlink(inode);
2286 /* do the real work now. */ 2284 /* do the real work now. */
2287 status = __ocfs2_mknod_locked(dir, inode, 2285 status = __ocfs2_mknod_locked(dir, inode,
2288 0, &new_di_bh, parent_di_bh, handle, 2286 0, &new_di_bh, parent_di_bh, handle,
@@ -2437,7 +2435,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2437 di = (struct ocfs2_dinode *)di_bh->b_data; 2435 di = (struct ocfs2_dinode *)di_bh->b_data;
2438 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL); 2436 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
2439 di->i_orphaned_slot = 0; 2437 di->i_orphaned_slot = 0;
2440 inode->i_nlink = 1; 2438 set_nlink(inode, 1);
2441 ocfs2_set_links_count(di, inode->i_nlink); 2439 ocfs2_set_links_count(di, inode->i_nlink);
2442 ocfs2_journal_dirty(handle, di_bh); 2440 ocfs2_journal_dirty(handle, di_bh);
2443 2441
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index 40c7de084c10..74ff74cf78fe 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -31,17 +31,15 @@ extern struct workqueue_struct *ocfs2_wq;
31int ocfs2_publish_get_mount_state(struct ocfs2_super *osb, 31int ocfs2_publish_get_mount_state(struct ocfs2_super *osb,
32 int node_num); 32 int node_num);
33 33
34void __ocfs2_error(struct super_block *sb, 34__printf(3, 4)
35 const char *function, 35void __ocfs2_error(struct super_block *sb, const char *function,
36 const char *fmt, ...) 36 const char *fmt, ...);
37 __attribute__ ((format (printf, 3, 4)));
38 37
39#define ocfs2_error(sb, fmt, args...) __ocfs2_error(sb, __PRETTY_FUNCTION__, fmt, ##args) 38#define ocfs2_error(sb, fmt, args...) __ocfs2_error(sb, __PRETTY_FUNCTION__, fmt, ##args)
40 39
41void __ocfs2_abort(struct super_block *sb, 40__printf(3, 4)
42 const char *function, 41void __ocfs2_abort(struct super_block *sb, const char *function,
43 const char *fmt, ...) 42 const char *fmt, ...);
44 __attribute__ ((format (printf, 3, 4)));
45 43
46#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) 44#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
47 45
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index a2a5bff774e3..e4e0ff7962e2 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -242,7 +242,7 @@ found:
242 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 242 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
243 inode->i_op = &openprom_inode_operations; 243 inode->i_op = &openprom_inode_operations;
244 inode->i_fop = &openprom_operations; 244 inode->i_fop = &openprom_operations;
245 inode->i_nlink = 2; 245 set_nlink(inode, 2);
246 break; 246 break;
247 case op_inode_prop: 247 case op_inode_prop:
248 if (!strcmp(dp->name, "options") && (len == 17) && 248 if (!strcmp(dp->name, "options") && (len == 17) &&
@@ -251,7 +251,7 @@ found:
251 else 251 else
252 inode->i_mode = S_IFREG | S_IRUGO; 252 inode->i_mode = S_IFREG | S_IRUGO;
253 inode->i_fop = &openpromfs_prop_ops; 253 inode->i_fop = &openpromfs_prop_ops;
254 inode->i_nlink = 1; 254 set_nlink(inode, 1);
255 inode->i_size = ent_oi->u.prop->length; 255 inode->i_size = ent_oi->u.prop->length;
256 break; 256 break;
257 } 257 }
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index af9fdf046769..bd8ae788f689 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -49,18 +49,20 @@
49#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a) 49#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a)
50#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a) 50#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a)
51 51
52__attribute__ ((format (printf, 3, 4))) 52static __printf(3, 4)
53static void _ldm_printk (const char *level, const char *function, 53void _ldm_printk(const char *level, const char *function, const char *fmt, ...)
54 const char *fmt, ...)
55{ 54{
56 static char buf[128]; 55 struct va_format vaf;
57 va_list args; 56 va_list args;
58 57
59 va_start (args, fmt); 58 va_start (args, fmt);
60 vsnprintf (buf, sizeof (buf), fmt, args);
61 va_end (args);
62 59
63 printk ("%s%s(): %s\n", level, function, buf); 60 vaf.fmt = fmt;
61 vaf.va = &args;
62
63 printk("%s%s(): %pV\n", level, function, &vaf);
64
65 va_end(args);
64} 66}
65 67
66/** 68/**
diff --git a/fs/pipe.c b/fs/pipe.c
index 0e0be1dc0f8e..4065f07366b3 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1254,6 +1254,7 @@ out:
1254 1254
1255static const struct super_operations pipefs_ops = { 1255static const struct super_operations pipefs_ops = {
1256 .destroy_inode = free_inode_nonrcu, 1256 .destroy_inode = free_inode_nonrcu,
1257 .statfs = simple_statfs,
1257}; 1258};
1258 1259
1259/* 1260/*
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5eb02069e1b8..2db1bd3173b2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1107,13 +1107,6 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1107 goto err_sighand; 1107 goto err_sighand;
1108 } 1108 }
1109 1109
1110 if (oom_adjust != task->signal->oom_adj) {
1111 if (oom_adjust == OOM_DISABLE)
1112 atomic_inc(&task->mm->oom_disable_count);
1113 if (task->signal->oom_adj == OOM_DISABLE)
1114 atomic_dec(&task->mm->oom_disable_count);
1115 }
1116
1117 /* 1110 /*
1118 * Warn that /proc/pid/oom_adj is deprecated, see 1111 * Warn that /proc/pid/oom_adj is deprecated, see
1119 * Documentation/feature-removal-schedule.txt. 1112 * Documentation/feature-removal-schedule.txt.
@@ -1215,12 +1208,6 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1215 goto err_sighand; 1208 goto err_sighand;
1216 } 1209 }
1217 1210
1218 if (oom_score_adj != task->signal->oom_score_adj) {
1219 if (oom_score_adj == OOM_SCORE_ADJ_MIN)
1220 atomic_inc(&task->mm->oom_disable_count);
1221 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1222 atomic_dec(&task->mm->oom_disable_count);
1223 }
1224 task->signal->oom_score_adj = oom_score_adj; 1211 task->signal->oom_score_adj = oom_score_adj;
1225 if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1212 if (has_capability_noaudit(current, CAP_SYS_RESOURCE))
1226 task->signal->oom_score_adj_min = oom_score_adj; 1213 task->signal->oom_score_adj_min = oom_score_adj;
@@ -1665,12 +1652,46 @@ out:
1665 return error; 1652 return error;
1666} 1653}
1667 1654
1655static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
1656 struct kstat *stat)
1657{
1658 struct inode *inode = dentry->d_inode;
1659 struct task_struct *task = get_proc_task(inode);
1660 int rc;
1661
1662 if (task == NULL)
1663 return -ESRCH;
1664
1665 rc = -EACCES;
1666 if (lock_trace(task))
1667 goto out_task;
1668
1669 generic_fillattr(inode, stat);
1670 unlock_trace(task);
1671 rc = 0;
1672out_task:
1673 put_task_struct(task);
1674 return rc;
1675}
1676
1668static const struct inode_operations proc_pid_link_inode_operations = { 1677static const struct inode_operations proc_pid_link_inode_operations = {
1669 .readlink = proc_pid_readlink, 1678 .readlink = proc_pid_readlink,
1670 .follow_link = proc_pid_follow_link, 1679 .follow_link = proc_pid_follow_link,
1671 .setattr = proc_setattr, 1680 .setattr = proc_setattr,
1672}; 1681};
1673 1682
1683static const struct inode_operations proc_fdinfo_link_inode_operations = {
1684 .setattr = proc_setattr,
1685 .getattr = proc_pid_fd_link_getattr,
1686};
1687
1688static const struct inode_operations proc_fd_link_inode_operations = {
1689 .readlink = proc_pid_readlink,
1690 .follow_link = proc_pid_follow_link,
1691 .setattr = proc_setattr,
1692 .getattr = proc_pid_fd_link_getattr,
1693};
1694
1674 1695
1675/* building an inode */ 1696/* building an inode */
1676 1697
@@ -1902,49 +1923,61 @@ out:
1902 1923
1903static int proc_fd_info(struct inode *inode, struct path *path, char *info) 1924static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1904{ 1925{
1905 struct task_struct *task = get_proc_task(inode); 1926 struct task_struct *task;
1906 struct files_struct *files = NULL; 1927 struct files_struct *files;
1907 struct file *file; 1928 struct file *file;
1908 int fd = proc_fd(inode); 1929 int fd = proc_fd(inode);
1930 int rc;
1909 1931
1910 if (task) { 1932 task = get_proc_task(inode);
1911 files = get_files_struct(task); 1933 if (!task)
1912 put_task_struct(task); 1934 return -ENOENT;
1913 } 1935
1914 if (files) { 1936 rc = -EACCES;
1915 /* 1937 if (lock_trace(task))
1916 * We are not taking a ref to the file structure, so we must 1938 goto out_task;
1917 * hold ->file_lock. 1939
1918 */ 1940 rc = -ENOENT;
1919 spin_lock(&files->file_lock); 1941 files = get_files_struct(task);
1920 file = fcheck_files(files, fd); 1942 if (files == NULL)
1921 if (file) { 1943 goto out_unlock;
1922 unsigned int f_flags; 1944
1923 struct fdtable *fdt; 1945 /*
1924 1946 * We are not taking a ref to the file structure, so we must
1925 fdt = files_fdtable(files); 1947 * hold ->file_lock.
1926 f_flags = file->f_flags & ~O_CLOEXEC; 1948 */
1927 if (FD_ISSET(fd, fdt->close_on_exec)) 1949 spin_lock(&files->file_lock);
1928 f_flags |= O_CLOEXEC; 1950 file = fcheck_files(files, fd);
1929 1951 if (file) {
1930 if (path) { 1952 unsigned int f_flags;
1931 *path = file->f_path; 1953 struct fdtable *fdt;
1932 path_get(&file->f_path); 1954
1933 } 1955 fdt = files_fdtable(files);
1934 if (info) 1956 f_flags = file->f_flags & ~O_CLOEXEC;
1935 snprintf(info, PROC_FDINFO_MAX, 1957 if (FD_ISSET(fd, fdt->close_on_exec))
1936 "pos:\t%lli\n" 1958 f_flags |= O_CLOEXEC;
1937 "flags:\t0%o\n", 1959
1938 (long long) file->f_pos, 1960 if (path) {
1939 f_flags); 1961 *path = file->f_path;
1940 spin_unlock(&files->file_lock); 1962 path_get(&file->f_path);
1941 put_files_struct(files);
1942 return 0;
1943 } 1963 }
1944 spin_unlock(&files->file_lock); 1964 if (info)
1945 put_files_struct(files); 1965 snprintf(info, PROC_FDINFO_MAX,
1946 } 1966 "pos:\t%lli\n"
1947 return -ENOENT; 1967 "flags:\t0%o\n",
1968 (long long) file->f_pos,
1969 f_flags);
1970 rc = 0;
1971 } else
1972 rc = -ENOENT;
1973 spin_unlock(&files->file_lock);
1974 put_files_struct(files);
1975
1976out_unlock:
1977 unlock_trace(task);
1978out_task:
1979 put_task_struct(task);
1980 return rc;
1948} 1981}
1949 1982
1950static int proc_fd_link(struct inode *inode, struct path *path) 1983static int proc_fd_link(struct inode *inode, struct path *path)
@@ -2039,7 +2072,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
2039 spin_unlock(&files->file_lock); 2072 spin_unlock(&files->file_lock);
2040 put_files_struct(files); 2073 put_files_struct(files);
2041 2074
2042 inode->i_op = &proc_pid_link_inode_operations; 2075 inode->i_op = &proc_fd_link_inode_operations;
2043 inode->i_size = 64; 2076 inode->i_size = 64;
2044 ei->op.proc_get_link = proc_fd_link; 2077 ei->op.proc_get_link = proc_fd_link;
2045 d_set_d_op(dentry, &tid_fd_dentry_operations); 2078 d_set_d_op(dentry, &tid_fd_dentry_operations);
@@ -2071,7 +2104,12 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
2071 if (fd == ~0U) 2104 if (fd == ~0U)
2072 goto out; 2105 goto out;
2073 2106
2107 result = ERR_PTR(-EACCES);
2108 if (lock_trace(task))
2109 goto out;
2110
2074 result = instantiate(dir, dentry, task, &fd); 2111 result = instantiate(dir, dentry, task, &fd);
2112 unlock_trace(task);
2075out: 2113out:
2076 put_task_struct(task); 2114 put_task_struct(task);
2077out_no_task: 2115out_no_task:
@@ -2091,23 +2129,28 @@ static int proc_readfd_common(struct file * filp, void * dirent,
2091 retval = -ENOENT; 2129 retval = -ENOENT;
2092 if (!p) 2130 if (!p)
2093 goto out_no_task; 2131 goto out_no_task;
2132
2133 retval = -EACCES;
2134 if (lock_trace(p))
2135 goto out;
2136
2094 retval = 0; 2137 retval = 0;
2095 2138
2096 fd = filp->f_pos; 2139 fd = filp->f_pos;
2097 switch (fd) { 2140 switch (fd) {
2098 case 0: 2141 case 0:
2099 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 2142 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
2100 goto out; 2143 goto out_unlock;
2101 filp->f_pos++; 2144 filp->f_pos++;
2102 case 1: 2145 case 1:
2103 ino = parent_ino(dentry); 2146 ino = parent_ino(dentry);
2104 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 2147 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
2105 goto out; 2148 goto out_unlock;
2106 filp->f_pos++; 2149 filp->f_pos++;
2107 default: 2150 default:
2108 files = get_files_struct(p); 2151 files = get_files_struct(p);
2109 if (!files) 2152 if (!files)
2110 goto out; 2153 goto out_unlock;
2111 rcu_read_lock(); 2154 rcu_read_lock();
2112 for (fd = filp->f_pos-2; 2155 for (fd = filp->f_pos-2;
2113 fd < files_fdtable(files)->max_fds; 2156 fd < files_fdtable(files)->max_fds;
@@ -2131,6 +2174,9 @@ static int proc_readfd_common(struct file * filp, void * dirent,
2131 rcu_read_unlock(); 2174 rcu_read_unlock();
2132 put_files_struct(files); 2175 put_files_struct(files);
2133 } 2176 }
2177
2178out_unlock:
2179 unlock_trace(p);
2134out: 2180out:
2135 put_task_struct(p); 2181 put_task_struct(p);
2136out_no_task: 2182out_no_task:
@@ -2208,6 +2254,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2208 ei->fd = fd; 2254 ei->fd = fd;
2209 inode->i_mode = S_IFREG | S_IRUSR; 2255 inode->i_mode = S_IFREG | S_IRUSR;
2210 inode->i_fop = &proc_fdinfo_file_operations; 2256 inode->i_fop = &proc_fdinfo_file_operations;
2257 inode->i_op = &proc_fdinfo_link_inode_operations;
2211 d_set_d_op(dentry, &tid_fd_dentry_operations); 2258 d_set_d_op(dentry, &tid_fd_dentry_operations);
2212 d_add(dentry, inode); 2259 d_add(dentry, inode);
2213 /* Close the race of the process dying before we return the dentry */ 2260 /* Close the race of the process dying before we return the dentry */
@@ -2261,7 +2308,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
2261 ei = PROC_I(inode); 2308 ei = PROC_I(inode);
2262 inode->i_mode = p->mode; 2309 inode->i_mode = p->mode;
2263 if (S_ISDIR(inode->i_mode)) 2310 if (S_ISDIR(inode->i_mode))
2264 inode->i_nlink = 2; /* Use getattr to fix if necessary */ 2311 set_nlink(inode, 2); /* Use getattr to fix if necessary */
2265 if (p->iop) 2312 if (p->iop)
2266 inode->i_op = p->iop; 2313 inode->i_op = p->iop;
2267 if (p->fop) 2314 if (p->fop)
@@ -2655,7 +2702,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2655 2702
2656 inode->i_mode = p->mode; 2703 inode->i_mode = p->mode;
2657 if (S_ISDIR(inode->i_mode)) 2704 if (S_ISDIR(inode->i_mode))
2658 inode->i_nlink = 2; 2705 set_nlink(inode, 2);
2659 if (S_ISLNK(inode->i_mode)) 2706 if (S_ISLNK(inode->i_mode))
2660 inode->i_size = 64; 2707 inode->i_size = 64;
2661 if (p->iop) 2708 if (p->iop)
@@ -2994,8 +3041,8 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
2994 inode->i_fop = &proc_tgid_base_operations; 3041 inode->i_fop = &proc_tgid_base_operations;
2995 inode->i_flags|=S_IMMUTABLE; 3042 inode->i_flags|=S_IMMUTABLE;
2996 3043
2997 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, 3044 set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
2998 ARRAY_SIZE(tgid_base_stuff)); 3045 ARRAY_SIZE(tgid_base_stuff)));
2999 3046
3000 d_set_d_op(dentry, &pid_dentry_operations); 3047 d_set_d_op(dentry, &pid_dentry_operations);
3001 3048
@@ -3246,8 +3293,8 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
3246 inode->i_fop = &proc_tid_base_operations; 3293 inode->i_fop = &proc_tid_base_operations;
3247 inode->i_flags|=S_IMMUTABLE; 3294 inode->i_flags|=S_IMMUTABLE;
3248 3295
3249 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, 3296 set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
3250 ARRAY_SIZE(tid_base_stuff)); 3297 ARRAY_SIZE(tid_base_stuff)));
3251 3298
3252 d_set_d_op(dentry, &pid_dentry_operations); 3299 d_set_d_op(dentry, &pid_dentry_operations);
3253 3300
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 9d99131d0d65..10090d9c7ad5 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -283,7 +283,7 @@ static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
283 struct inode *inode = dentry->d_inode; 283 struct inode *inode = dentry->d_inode;
284 struct proc_dir_entry *de = PROC_I(inode)->pde; 284 struct proc_dir_entry *de = PROC_I(inode)->pde;
285 if (de && de->nlink) 285 if (de && de->nlink)
286 inode->i_nlink = de->nlink; 286 set_nlink(inode, de->nlink);
287 287
288 generic_fillattr(inode, stat); 288 generic_fillattr(inode, stat);
289 return 0; 289 return 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 7ed72d6c1c6f..7737c5468a40 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -445,7 +445,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
445 if (de->size) 445 if (de->size)
446 inode->i_size = de->size; 446 inode->i_size = de->size;
447 if (de->nlink) 447 if (de->nlink)
448 inode->i_nlink = de->nlink; 448 set_nlink(inode, de->nlink);
449 if (de->proc_iops) 449 if (de->proc_iops)
450 inode->i_op = de->proc_iops; 450 inode->i_op = de->proc_iops;
451 if (de->proc_fops) { 451 if (de->proc_fops) {
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 1a77dbef226f..a6b62173d4c3 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -3,6 +3,7 @@
3 */ 3 */
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/sysctl.h> 5#include <linux/sysctl.h>
6#include <linux/poll.h>
6#include <linux/proc_fs.h> 7#include <linux/proc_fs.h>
7#include <linux/security.h> 8#include <linux/security.h>
8#include <linux/namei.h> 9#include <linux/namei.h>
@@ -14,6 +15,15 @@ static const struct inode_operations proc_sys_inode_operations;
14static const struct file_operations proc_sys_dir_file_operations; 15static const struct file_operations proc_sys_dir_file_operations;
15static const struct inode_operations proc_sys_dir_operations; 16static const struct inode_operations proc_sys_dir_operations;
16 17
18void proc_sys_poll_notify(struct ctl_table_poll *poll)
19{
20 if (!poll)
21 return;
22
23 atomic_inc(&poll->event);
24 wake_up_interruptible(&poll->wait);
25}
26
17static struct inode *proc_sys_make_inode(struct super_block *sb, 27static struct inode *proc_sys_make_inode(struct super_block *sb,
18 struct ctl_table_header *head, struct ctl_table *table) 28 struct ctl_table_header *head, struct ctl_table *table)
19{ 29{
@@ -39,7 +49,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
39 inode->i_fop = &proc_sys_file_operations; 49 inode->i_fop = &proc_sys_file_operations;
40 } else { 50 } else {
41 inode->i_mode |= S_IFDIR; 51 inode->i_mode |= S_IFDIR;
42 inode->i_nlink = 0; 52 clear_nlink(inode);
43 inode->i_op = &proc_sys_dir_operations; 53 inode->i_op = &proc_sys_dir_operations;
44 inode->i_fop = &proc_sys_dir_file_operations; 54 inode->i_fop = &proc_sys_dir_file_operations;
45 } 55 }
@@ -176,6 +186,39 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
176 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); 186 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
177} 187}
178 188
189static int proc_sys_open(struct inode *inode, struct file *filp)
190{
191 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
192
193 if (table->poll)
194 filp->private_data = proc_sys_poll_event(table->poll);
195
196 return 0;
197}
198
199static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
200{
201 struct inode *inode = filp->f_path.dentry->d_inode;
202 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
203 unsigned long event = (unsigned long)filp->private_data;
204 unsigned int ret = DEFAULT_POLLMASK;
205
206 if (!table->proc_handler)
207 goto out;
208
209 if (!table->poll)
210 goto out;
211
212 poll_wait(filp, &table->poll->wait, wait);
213
214 if (event != atomic_read(&table->poll->event)) {
215 filp->private_data = proc_sys_poll_event(table->poll);
216 ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
217 }
218
219out:
220 return ret;
221}
179 222
180static int proc_sys_fill_cache(struct file *filp, void *dirent, 223static int proc_sys_fill_cache(struct file *filp, void *dirent,
181 filldir_t filldir, 224 filldir_t filldir,
@@ -364,12 +407,15 @@ static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
364} 407}
365 408
366static const struct file_operations proc_sys_file_operations = { 409static const struct file_operations proc_sys_file_operations = {
410 .open = proc_sys_open,
411 .poll = proc_sys_poll,
367 .read = proc_sys_read, 412 .read = proc_sys_read,
368 .write = proc_sys_write, 413 .write = proc_sys_write,
369 .llseek = default_llseek, 414 .llseek = default_llseek,
370}; 415};
371 416
372static const struct file_operations proc_sys_dir_file_operations = { 417static const struct file_operations proc_sys_dir_file_operations = {
418 .read = generic_read_dir,
373 .readdir = proc_sys_readdir, 419 .readdir = proc_sys_readdir,
374 .llseek = generic_file_llseek, 420 .llseek = generic_file_llseek,
375}; 421};
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5afaa58a8630..e418c5abdb0e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -44,6 +44,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
44 "VmPeak:\t%8lu kB\n" 44 "VmPeak:\t%8lu kB\n"
45 "VmSize:\t%8lu kB\n" 45 "VmSize:\t%8lu kB\n"
46 "VmLck:\t%8lu kB\n" 46 "VmLck:\t%8lu kB\n"
47 "VmPin:\t%8lu kB\n"
47 "VmHWM:\t%8lu kB\n" 48 "VmHWM:\t%8lu kB\n"
48 "VmRSS:\t%8lu kB\n" 49 "VmRSS:\t%8lu kB\n"
49 "VmData:\t%8lu kB\n" 50 "VmData:\t%8lu kB\n"
@@ -55,6 +56,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
55 hiwater_vm << (PAGE_SHIFT-10), 56 hiwater_vm << (PAGE_SHIFT-10),
56 (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), 57 (total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
57 mm->locked_vm << (PAGE_SHIFT-10), 58 mm->locked_vm << (PAGE_SHIFT-10),
59 mm->pinned_vm << (PAGE_SHIFT-10),
58 hiwater_rss << (PAGE_SHIFT-10), 60 hiwater_rss << (PAGE_SHIFT-10),
59 total_rss << (PAGE_SHIFT-10), 61 total_rss << (PAGE_SHIFT-10),
60 data << (PAGE_SHIFT-10), 62 data << (PAGE_SHIFT-10),
@@ -1039,6 +1041,9 @@ static int show_numa_map(struct seq_file *m, void *v)
1039 seq_printf(m, " stack"); 1041 seq_printf(m, " stack");
1040 } 1042 }
1041 1043
1044 if (is_vm_hugetlb_page(vma))
1045 seq_printf(m, " huge");
1046
1042 walk_page_range(vma->vm_start, vma->vm_end, &walk); 1047 walk_page_range(vma->vm_start, vma->vm_end, &walk);
1043 1048
1044 if (!md->pages) 1049 if (!md->pages)
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 893b961dcfd8..379a02dc1217 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -24,6 +24,7 @@
24#include <linux/highmem.h> 24#include <linux/highmem.h>
25#include <linux/time.h> 25#include <linux/time.h>
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/list.h>
27#include <linux/string.h> 28#include <linux/string.h>
28#include <linux/mount.h> 29#include <linux/mount.h>
29#include <linux/ramfs.h> 30#include <linux/ramfs.h>
@@ -32,13 +33,18 @@
32#include <linux/magic.h> 33#include <linux/magic.h>
33#include <linux/pstore.h> 34#include <linux/pstore.h>
34#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/spinlock.h>
35#include <linux/uaccess.h> 37#include <linux/uaccess.h>
36 38
37#include "internal.h" 39#include "internal.h"
38 40
39#define PSTORE_NAMELEN 64 41#define PSTORE_NAMELEN 64
40 42
43static DEFINE_SPINLOCK(allpstore_lock);
44static LIST_HEAD(allpstore);
45
41struct pstore_private { 46struct pstore_private {
47 struct list_head list;
42 struct pstore_info *psi; 48 struct pstore_info *psi;
43 enum pstore_type_id type; 49 enum pstore_type_id type;
44 u64 id; 50 u64 id;
@@ -81,8 +87,16 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
81 87
82static void pstore_evict_inode(struct inode *inode) 88static void pstore_evict_inode(struct inode *inode)
83{ 89{
90 struct pstore_private *p = inode->i_private;
91 unsigned long flags;
92
84 end_writeback(inode); 93 end_writeback(inode);
85 kfree(inode->i_private); 94 if (p) {
95 spin_lock_irqsave(&allpstore_lock, flags);
96 list_del(&p->list);
97 spin_unlock_irqrestore(&allpstore_lock, flags);
98 kfree(p);
99 }
86} 100}
87 101
88static const struct inode_operations pstore_dir_inode_operations = { 102static const struct inode_operations pstore_dir_inode_operations = {
@@ -182,9 +196,23 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
182 struct dentry *root = pstore_sb->s_root; 196 struct dentry *root = pstore_sb->s_root;
183 struct dentry *dentry; 197 struct dentry *dentry;
184 struct inode *inode; 198 struct inode *inode;
185 int rc; 199 int rc = 0;
186 char name[PSTORE_NAMELEN]; 200 char name[PSTORE_NAMELEN];
187 struct pstore_private *private; 201 struct pstore_private *private, *pos;
202 unsigned long flags;
203
204 spin_lock_irqsave(&allpstore_lock, flags);
205 list_for_each_entry(pos, &allpstore, list) {
206 if (pos->type == type &&
207 pos->id == id &&
208 pos->psi == psi) {
209 rc = -EEXIST;
210 break;
211 }
212 }
213 spin_unlock_irqrestore(&allpstore_lock, flags);
214 if (rc)
215 return rc;
188 216
189 rc = -ENOMEM; 217 rc = -ENOMEM;
190 inode = pstore_get_inode(pstore_sb, root->d_inode, S_IFREG | 0444, 0); 218 inode = pstore_get_inode(pstore_sb, root->d_inode, S_IFREG | 0444, 0);
@@ -229,6 +257,10 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
229 257
230 d_add(dentry, inode); 258 d_add(dentry, inode);
231 259
260 spin_lock_irqsave(&allpstore_lock, flags);
261 list_add(&private->list, &allpstore);
262 spin_unlock_irqrestore(&allpstore_lock, flags);
263
232 mutex_unlock(&root->d_inode->i_mutex); 264 mutex_unlock(&root->d_inode->i_mutex);
233 265
234 return 0; 266 return 0;
@@ -277,7 +309,7 @@ int pstore_fill_super(struct super_block *sb, void *data, int silent)
277 goto fail; 309 goto fail;
278 } 310 }
279 311
280 pstore_get_records(); 312 pstore_get_records(0);
281 313
282 return 0; 314 return 0;
283fail: 315fail:
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 611c1b3c46fa..3bde461c3f34 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -1,5 +1,5 @@
1extern void pstore_set_kmsg_bytes(int); 1extern void pstore_set_kmsg_bytes(int);
2extern void pstore_get_records(void); 2extern void pstore_get_records(int);
3extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, 3extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
4 char *data, size_t size, 4 char *data, size_t size,
5 struct timespec time, struct pstore_info *psi); 5 struct timespec time, struct pstore_info *psi);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index c5300ec31696..2bd620f0d796 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -25,12 +25,30 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/pstore.h> 26#include <linux/pstore.h>
27#include <linux/string.h> 27#include <linux/string.h>
28#include <linux/timer.h>
28#include <linux/slab.h> 29#include <linux/slab.h>
29#include <linux/uaccess.h> 30#include <linux/uaccess.h>
31#include <linux/hardirq.h>
32#include <linux/workqueue.h>
30 33
31#include "internal.h" 34#include "internal.h"
32 35
33/* 36/*
37 * We defer making "oops" entries appear in pstore - see
38 * whether the system is actually still running well enough
39 * to let someone see the entry
40 */
41#define PSTORE_INTERVAL (60 * HZ)
42
43static int pstore_new_entry;
44
45static void pstore_timefunc(unsigned long);
46static DEFINE_TIMER(pstore_timer, pstore_timefunc, 0, 0);
47
48static void pstore_dowork(struct work_struct *);
49static DECLARE_WORK(pstore_work, pstore_dowork);
50
51/*
34 * pstore_lock just protects "psinfo" during 52 * pstore_lock just protects "psinfo" during
35 * calls to pstore_register() 53 * calls to pstore_register()
36 */ 54 */
@@ -69,15 +87,22 @@ static void pstore_dump(struct kmsg_dumper *dumper,
69 unsigned long size, total = 0; 87 unsigned long size, total = 0;
70 char *dst, *why; 88 char *dst, *why;
71 u64 id; 89 u64 id;
72 int hsize; 90 int hsize, ret;
73 unsigned int part = 1; 91 unsigned int part = 1;
92 unsigned long flags = 0;
93 int is_locked = 0;
74 94
75 if (reason < ARRAY_SIZE(reason_str)) 95 if (reason < ARRAY_SIZE(reason_str))
76 why = reason_str[reason]; 96 why = reason_str[reason];
77 else 97 else
78 why = "Unknown"; 98 why = "Unknown";
79 99
80 mutex_lock(&psinfo->buf_mutex); 100 if (in_nmi()) {
101 is_locked = spin_trylock(&psinfo->buf_lock);
102 if (!is_locked)
103 pr_err("pstore dump routine blocked in NMI, may corrupt error record\n");
104 } else
105 spin_lock_irqsave(&psinfo->buf_lock, flags);
81 oopscount++; 106 oopscount++;
82 while (total < kmsg_bytes) { 107 while (total < kmsg_bytes) {
83 dst = psinfo->buf; 108 dst = psinfo->buf;
@@ -97,18 +122,20 @@ static void pstore_dump(struct kmsg_dumper *dumper,
97 memcpy(dst, s1 + s1_start, l1_cpy); 122 memcpy(dst, s1 + s1_start, l1_cpy);
98 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); 123 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
99 124
100 id = psinfo->write(PSTORE_TYPE_DMESG, part, 125 ret = psinfo->write(PSTORE_TYPE_DMESG, &id, part,
101 hsize + l1_cpy + l2_cpy, psinfo); 126 hsize + l1_cpy + l2_cpy, psinfo);
102 if (reason == KMSG_DUMP_OOPS && pstore_is_mounted()) 127 if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
103 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, 128 pstore_new_entry = 1;
104 psinfo->buf, hsize + l1_cpy + l2_cpy,
105 CURRENT_TIME, psinfo);
106 l1 -= l1_cpy; 129 l1 -= l1_cpy;
107 l2 -= l2_cpy; 130 l2 -= l2_cpy;
108 total += l1_cpy + l2_cpy; 131 total += l1_cpy + l2_cpy;
109 part++; 132 part++;
110 } 133 }
111 mutex_unlock(&psinfo->buf_mutex); 134 if (in_nmi()) {
135 if (is_locked)
136 spin_unlock(&psinfo->buf_lock);
137 } else
138 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
112} 139}
113 140
114static struct kmsg_dumper pstore_dumper = { 141static struct kmsg_dumper pstore_dumper = {
@@ -148,19 +175,24 @@ int pstore_register(struct pstore_info *psi)
148 } 175 }
149 176
150 if (pstore_is_mounted()) 177 if (pstore_is_mounted())
151 pstore_get_records(); 178 pstore_get_records(0);
152 179
153 kmsg_dump_register(&pstore_dumper); 180 kmsg_dump_register(&pstore_dumper);
154 181
182 pstore_timer.expires = jiffies + PSTORE_INTERVAL;
183 add_timer(&pstore_timer);
184
155 return 0; 185 return 0;
156} 186}
157EXPORT_SYMBOL_GPL(pstore_register); 187EXPORT_SYMBOL_GPL(pstore_register);
158 188
159/* 189/*
160 * Read all the records from the persistent store. Create and 190 * Read all the records from the persistent store. Create
161 * file files in our filesystem. 191 * files in our filesystem. Don't warn about -EEXIST errors
192 * when we are re-scanning the backing store looking to add new
193 * error records.
162 */ 194 */
163void pstore_get_records(void) 195void pstore_get_records(int quiet)
164{ 196{
165 struct pstore_info *psi = psinfo; 197 struct pstore_info *psi = psinfo;
166 ssize_t size; 198 ssize_t size;
@@ -168,36 +200,55 @@ void pstore_get_records(void)
168 enum pstore_type_id type; 200 enum pstore_type_id type;
169 struct timespec time; 201 struct timespec time;
170 int failed = 0, rc; 202 int failed = 0, rc;
203 unsigned long flags;
171 204
172 if (!psi) 205 if (!psi)
173 return; 206 return;
174 207
175 mutex_lock(&psinfo->buf_mutex); 208 spin_lock_irqsave(&psinfo->buf_lock, flags);
176 rc = psi->open(psi); 209 rc = psi->open(psi);
177 if (rc) 210 if (rc)
178 goto out; 211 goto out;
179 212
180 while ((size = psi->read(&id, &type, &time, psi)) > 0) { 213 while ((size = psi->read(&id, &type, &time, psi)) > 0) {
181 if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size, 214 rc = pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
182 time, psi)) 215 time, psi);
216 if (rc && (rc != -EEXIST || !quiet))
183 failed++; 217 failed++;
184 } 218 }
185 psi->close(psi); 219 psi->close(psi);
186out: 220out:
187 mutex_unlock(&psinfo->buf_mutex); 221 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
188 222
189 if (failed) 223 if (failed)
190 printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n", 224 printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n",
191 failed, psi->name); 225 failed, psi->name);
192} 226}
193 227
228static void pstore_dowork(struct work_struct *work)
229{
230 pstore_get_records(1);
231}
232
233static void pstore_timefunc(unsigned long dummy)
234{
235 if (pstore_new_entry) {
236 pstore_new_entry = 0;
237 schedule_work(&pstore_work);
238 }
239
240 mod_timer(&pstore_timer, jiffies + PSTORE_INTERVAL);
241}
242
194/* 243/*
195 * Call platform driver to write a record to the 244 * Call platform driver to write a record to the
196 * persistent store. 245 * persistent store.
197 */ 246 */
198int pstore_write(enum pstore_type_id type, char *buf, size_t size) 247int pstore_write(enum pstore_type_id type, char *buf, size_t size)
199{ 248{
200 u64 id; 249 u64 id;
250 int ret;
251 unsigned long flags;
201 252
202 if (!psinfo) 253 if (!psinfo)
203 return -ENODEV; 254 return -ENODEV;
@@ -205,13 +256,13 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size)
205 if (size > psinfo->bufsize) 256 if (size > psinfo->bufsize)
206 return -EFBIG; 257 return -EFBIG;
207 258
208 mutex_lock(&psinfo->buf_mutex); 259 spin_lock_irqsave(&psinfo->buf_lock, flags);
209 memcpy(psinfo->buf, buf, size); 260 memcpy(psinfo->buf, buf, size);
210 id = psinfo->write(type, 0, size, psinfo); 261 ret = psinfo->write(type, &id, 0, size, psinfo);
211 if (pstore_is_mounted()) 262 if (ret == 0 && pstore_is_mounted())
212 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, 263 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
213 size, CURRENT_TIME, psinfo); 264 size, CURRENT_TIME, psinfo);
214 mutex_unlock(&psinfo->buf_mutex); 265 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
215 266
216 return 0; 267 return 0;
217} 268}
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 2b0646613f5a..3bdd21418432 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -379,7 +379,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
379 inode->i_mode = le16_to_cpu(raw_inode->di_mode); 379 inode->i_mode = le16_to_cpu(raw_inode->di_mode);
380 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->di_uid); 380 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->di_uid);
381 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->di_gid); 381 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->di_gid);
382 inode->i_nlink = le16_to_cpu(raw_inode->di_nlink); 382 set_nlink(inode, le16_to_cpu(raw_inode->di_nlink));
383 inode->i_size = le32_to_cpu(raw_inode->di_size); 383 inode->i_size = le32_to_cpu(raw_inode->di_size);
384 inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->di_mtime); 384 inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->di_mtime);
385 inode->i_mtime.tv_nsec = 0; 385 inode->i_mtime.tv_nsec = 0;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 10b6be3ca280..35f4b0ecdeb3 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -286,7 +286,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
286 /* caller already holds s_umount */ 286 /* caller already holds s_umount */
287 if (sb->s_flags & MS_RDONLY) 287 if (sb->s_flags & MS_RDONLY)
288 return -EROFS; 288 return -EROFS;
289 writeback_inodes_sb(sb); 289 writeback_inodes_sb(sb, WB_REASON_SYNC);
290 return 0; 290 return 0;
291 default: 291 default:
292 return -EINVAL; 292 return -EINVAL;
@@ -363,12 +363,15 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
363 } 363 }
364 364
365 sb = quotactl_block(special); 365 sb = quotactl_block(special);
366 if (IS_ERR(sb)) 366 if (IS_ERR(sb)) {
367 return PTR_ERR(sb); 367 ret = PTR_ERR(sb);
368 goto out;
369 }
368 370
369 ret = do_quotactl(sb, type, cmds, id, addr, pathp); 371 ret = do_quotactl(sb, type, cmds, id, addr, pathp);
370 372
371 drop_super(sb); 373 drop_super(sb);
374out:
372 if (pathp && !IS_ERR(pathp)) 375 if (pathp && !IS_ERR(pathp))
373 path_put(pathp); 376 path_put(pathp);
374 return ret; 377 return ret;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index eacb166fb259..462ceb38fec6 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -23,7 +23,6 @@
23 * caches is sufficient. 23 * caches is sufficient.
24 */ 24 */
25 25
26#include <linux/module.h>
27#include <linux/fs.h> 26#include <linux/fs.h>
28#include <linux/pagemap.h> 27#include <linux/pagemap.h>
29#include <linux/highmem.h> 28#include <linux/highmem.h>
@@ -288,14 +287,7 @@ static int __init init_ramfs_fs(void)
288{ 287{
289 return register_filesystem(&ramfs_fs_type); 288 return register_filesystem(&ramfs_fs_type);
290} 289}
291
292static void __exit exit_ramfs_fs(void)
293{
294 unregister_filesystem(&ramfs_fs_type);
295}
296
297module_init(init_ramfs_fs) 290module_init(init_ramfs_fs)
298module_exit(exit_ramfs_fs)
299 291
300int __init init_rootfs(void) 292int __init init_rootfs(void)
301{ 293{
@@ -311,5 +303,3 @@ int __init init_rootfs(void)
311 303
312 return err; 304 return err;
313} 305}
314
315MODULE_LICENSE("GPL");
diff --git a/fs/read_write.c b/fs/read_write.c
index dfd125798791..5ad4248b0cd8 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -633,7 +633,8 @@ ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
633ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 633ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
634 unsigned long nr_segs, unsigned long fast_segs, 634 unsigned long nr_segs, unsigned long fast_segs,
635 struct iovec *fast_pointer, 635 struct iovec *fast_pointer,
636 struct iovec **ret_pointer) 636 struct iovec **ret_pointer,
637 int check_access)
637{ 638{
638 unsigned long seg; 639 unsigned long seg;
639 ssize_t ret; 640 ssize_t ret;
@@ -689,7 +690,8 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
689 ret = -EINVAL; 690 ret = -EINVAL;
690 goto out; 691 goto out;
691 } 692 }
692 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { 693 if (check_access
694 && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
693 ret = -EFAULT; 695 ret = -EFAULT;
694 goto out; 696 goto out;
695 } 697 }
@@ -721,7 +723,7 @@ static ssize_t do_readv_writev(int type, struct file *file,
721 } 723 }
722 724
723 ret = rw_copy_check_uvector(type, uvector, nr_segs, 725 ret = rw_copy_check_uvector(type, uvector, nr_segs,
724 ARRAY_SIZE(iovstack), iovstack, &iov); 726 ARRAY_SIZE(iovstack), iovstack, &iov, 1);
725 if (ret <= 0) 727 if (ret <= 0)
726 goto out; 728 goto out;
727 729
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9b0d4b78b4fb..950f13af0951 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1154,7 +1154,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
1154 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 1154 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1155 set_inode_sd_version(inode, STAT_DATA_V1); 1155 set_inode_sd_version(inode, STAT_DATA_V1);
1156 inode->i_mode = sd_v1_mode(sd); 1156 inode->i_mode = sd_v1_mode(sd);
1157 inode->i_nlink = sd_v1_nlink(sd); 1157 set_nlink(inode, sd_v1_nlink(sd));
1158 inode->i_uid = sd_v1_uid(sd); 1158 inode->i_uid = sd_v1_uid(sd);
1159 inode->i_gid = sd_v1_gid(sd); 1159 inode->i_gid = sd_v1_gid(sd);
1160 inode->i_size = sd_v1_size(sd); 1160 inode->i_size = sd_v1_size(sd);
@@ -1199,7 +1199,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
1199 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); 1199 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
1200 1200
1201 inode->i_mode = sd_v2_mode(sd); 1201 inode->i_mode = sd_v2_mode(sd);
1202 inode->i_nlink = sd_v2_nlink(sd); 1202 set_nlink(inode, sd_v2_nlink(sd));
1203 inode->i_uid = sd_v2_uid(sd); 1203 inode->i_uid = sd_v2_uid(sd);
1204 inode->i_size = sd_v2_size(sd); 1204 inode->i_size = sd_v2_size(sd);
1205 inode->i_gid = sd_v2_gid(sd); 1205 inode->i_gid = sd_v2_gid(sd);
@@ -1444,7 +1444,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
1444 /* a stale NFS handle can trigger this without it being an error */ 1444 /* a stale NFS handle can trigger this without it being an error */
1445 pathrelse(&path_to_sd); 1445 pathrelse(&path_to_sd);
1446 reiserfs_make_bad_inode(inode); 1446 reiserfs_make_bad_inode(inode);
1447 inode->i_nlink = 0; 1447 clear_nlink(inode);
1448 return; 1448 return;
1449 } 1449 }
1450 1450
@@ -1832,7 +1832,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1832#endif 1832#endif
1833 1833
1834 /* fill stat data */ 1834 /* fill stat data */
1835 inode->i_nlink = (S_ISDIR(mode) ? 2 : 1); 1835 set_nlink(inode, (S_ISDIR(mode) ? 2 : 1));
1836 1836
1837 /* uid and gid must already be set by the caller for quota init */ 1837 /* uid and gid must already be set by the caller for quota init */
1838 1838
@@ -1987,7 +1987,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1987 make_bad_inode(inode); 1987 make_bad_inode(inode);
1988 1988
1989 out_inserted_sd: 1989 out_inserted_sd:
1990 inode->i_nlink = 0; 1990 clear_nlink(inode);
1991 th->t_trans_id = 0; /* so the caller can't use this handle later */ 1991 th->t_trans_id = 0; /* so the caller can't use this handle later */
1992 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ 1992 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
1993 iput(inode); 1993 iput(inode);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ef392324bbf1..80058e8ce361 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -19,7 +19,7 @@
19#include <linux/reiserfs_xattr.h> 19#include <linux/reiserfs_xattr.h>
20#include <linux/quotaops.h> 20#include <linux/quotaops.h>
21 21
22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; } 22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
23#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); 23#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
24 24
25// directory item contains array of entry headers. This performs 25// directory item contains array of entry headers. This performs
@@ -622,7 +622,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
622 dentry->d_name.len, inode, 1 /*visible */ ); 622 dentry->d_name.len, inode, 1 /*visible */ );
623 if (retval) { 623 if (retval) {
624 int err; 624 int err;
625 inode->i_nlink--; 625 drop_nlink(inode);
626 reiserfs_update_sd(&th, inode); 626 reiserfs_update_sd(&th, inode);
627 err = journal_end(&th, dir->i_sb, jbegin_count); 627 err = journal_end(&th, dir->i_sb, jbegin_count);
628 if (err) 628 if (err)
@@ -702,7 +702,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
702 dentry->d_name.len, inode, 1 /*visible */ ); 702 dentry->d_name.len, inode, 1 /*visible */ );
703 if (retval) { 703 if (retval) {
704 int err; 704 int err;
705 inode->i_nlink--; 705 drop_nlink(inode);
706 reiserfs_update_sd(&th, inode); 706 reiserfs_update_sd(&th, inode);
707 err = journal_end(&th, dir->i_sb, jbegin_count); 707 err = journal_end(&th, dir->i_sb, jbegin_count);
708 if (err) 708 if (err)
@@ -787,7 +787,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
787 dentry->d_name.len, inode, 1 /*visible */ ); 787 dentry->d_name.len, inode, 1 /*visible */ );
788 if (retval) { 788 if (retval) {
789 int err; 789 int err;
790 inode->i_nlink = 0; 790 clear_nlink(inode);
791 DEC_DIR_INODE_NLINK(dir); 791 DEC_DIR_INODE_NLINK(dir);
792 reiserfs_update_sd(&th, inode); 792 reiserfs_update_sd(&th, inode);
793 err = journal_end(&th, dir->i_sb, jbegin_count); 793 err = journal_end(&th, dir->i_sb, jbegin_count);
@@ -964,7 +964,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
964 reiserfs_warning(inode->i_sb, "reiserfs-7042", 964 reiserfs_warning(inode->i_sb, "reiserfs-7042",
965 "deleting nonexistent file (%lu), %d", 965 "deleting nonexistent file (%lu), %d",
966 inode->i_ino, inode->i_nlink); 966 inode->i_ino, inode->i_nlink);
967 inode->i_nlink = 1; 967 set_nlink(inode, 1);
968 } 968 }
969 969
970 drop_nlink(inode); 970 drop_nlink(inode);
@@ -1086,7 +1086,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
1086 dentry->d_name.len, inode, 1 /*visible */ ); 1086 dentry->d_name.len, inode, 1 /*visible */ );
1087 if (retval) { 1087 if (retval) {
1088 int err; 1088 int err;
1089 inode->i_nlink--; 1089 drop_nlink(inode);
1090 reiserfs_update_sd(&th, inode); 1090 reiserfs_update_sd(&th, inode);
1091 err = journal_end(&th, parent_dir->i_sb, jbegin_count); 1091 err = journal_end(&th, parent_dir->i_sb, jbegin_count);
1092 if (err) 1092 if (err)
@@ -1129,7 +1129,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1129 1129
1130 retval = journal_begin(&th, dir->i_sb, jbegin_count); 1130 retval = journal_begin(&th, dir->i_sb, jbegin_count);
1131 if (retval) { 1131 if (retval) {
1132 inode->i_nlink--; 1132 drop_nlink(inode);
1133 reiserfs_write_unlock(dir->i_sb); 1133 reiserfs_write_unlock(dir->i_sb);
1134 return retval; 1134 return retval;
1135 } 1135 }
@@ -1144,7 +1144,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1144 1144
1145 if (retval) { 1145 if (retval) {
1146 int err; 1146 int err;
1147 inode->i_nlink--; 1147 drop_nlink(inode);
1148 err = journal_end(&th, dir->i_sb, jbegin_count); 1148 err = journal_end(&th, dir->i_sb, jbegin_count);
1149 reiserfs_write_unlock(dir->i_sb); 1149 reiserfs_write_unlock(dir->i_sb);
1150 return err ? err : retval; 1150 return err ? err : retval;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 2305e3121cb1..8b4089f30408 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -337,7 +337,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
337 inode->i_metasize = (ROMFH_SIZE + nlen + 1 + ROMFH_PAD) & ROMFH_MASK; 337 inode->i_metasize = (ROMFH_SIZE + nlen + 1 + ROMFH_PAD) & ROMFH_MASK;
338 inode->i_dataoffset = pos + inode->i_metasize; 338 inode->i_dataoffset = pos + inode->i_metasize;
339 339
340 i->i_nlink = 1; /* Hard to decide.. */ 340 set_nlink(i, 1); /* Hard to decide.. */
341 i->i_size = be32_to_cpu(ri.size); 341 i->i_size = be32_to_cpu(ri.size);
342 i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0; 342 i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0;
343 i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0; 343 i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index 048b59d5b2f0..c70111ebefd4 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -78,6 +78,28 @@ config SQUASHFS_XZ
78 78
79 If unsure, say N. 79 If unsure, say N.
80 80
81config SQUASHFS_4K_DEVBLK_SIZE
82 bool "Use 4K device block size?"
83 depends on SQUASHFS
84 help
85 By default Squashfs sets the dev block size (sb_min_blocksize)
86 to 1K or the smallest block size supported by the block device
87 (if larger). This, because blocks are packed together and
88 unaligned in Squashfs, should reduce latency.
89
90 This, however, gives poor performance on MTD NAND devices where
91 the optimal I/O size is 4K (even though the devices can support
92 smaller block sizes).
93
94 Using a 4K device block size may also improve overall I/O
95 performance for some file access patterns (e.g. sequential
96 accesses of files in filesystem order) on all media.
97
98 Setting this option will force Squashfs to use a 4K device block
99 size by default.
100
101 If unsure, say N.
102
81config SQUASHFS_EMBEDDED 103config SQUASHFS_EMBEDDED
82 bool "Additional option for memory-constrained systems" 104 bool "Additional option for memory-constrained systems"
83 depends on SQUASHFS 105 depends on SQUASHFS
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 04bebcaa2373..fd7b3b3bda13 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -159,7 +159,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
159 frag_offset = 0; 159 frag_offset = 0;
160 } 160 }
161 161
162 inode->i_nlink = 1; 162 set_nlink(inode, 1);
163 inode->i_size = le32_to_cpu(sqsh_ino->file_size); 163 inode->i_size = le32_to_cpu(sqsh_ino->file_size);
164 inode->i_fop = &generic_ro_fops; 164 inode->i_fop = &generic_ro_fops;
165 inode->i_mode |= S_IFREG; 165 inode->i_mode |= S_IFREG;
@@ -203,7 +203,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
203 } 203 }
204 204
205 xattr_id = le32_to_cpu(sqsh_ino->xattr); 205 xattr_id = le32_to_cpu(sqsh_ino->xattr);
206 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 206 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
207 inode->i_size = le64_to_cpu(sqsh_ino->file_size); 207 inode->i_size = le64_to_cpu(sqsh_ino->file_size);
208 inode->i_op = &squashfs_inode_ops; 208 inode->i_op = &squashfs_inode_ops;
209 inode->i_fop = &generic_ro_fops; 209 inode->i_fop = &generic_ro_fops;
@@ -232,7 +232,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
232 if (err < 0) 232 if (err < 0)
233 goto failed_read; 233 goto failed_read;
234 234
235 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 235 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
236 inode->i_size = le16_to_cpu(sqsh_ino->file_size); 236 inode->i_size = le16_to_cpu(sqsh_ino->file_size);
237 inode->i_op = &squashfs_dir_inode_ops; 237 inode->i_op = &squashfs_dir_inode_ops;
238 inode->i_fop = &squashfs_dir_ops; 238 inode->i_fop = &squashfs_dir_ops;
@@ -257,7 +257,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
257 goto failed_read; 257 goto failed_read;
258 258
259 xattr_id = le32_to_cpu(sqsh_ino->xattr); 259 xattr_id = le32_to_cpu(sqsh_ino->xattr);
260 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 260 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
261 inode->i_size = le32_to_cpu(sqsh_ino->file_size); 261 inode->i_size = le32_to_cpu(sqsh_ino->file_size);
262 inode->i_op = &squashfs_dir_inode_ops; 262 inode->i_op = &squashfs_dir_inode_ops;
263 inode->i_fop = &squashfs_dir_ops; 263 inode->i_fop = &squashfs_dir_ops;
@@ -284,7 +284,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
284 if (err < 0) 284 if (err < 0)
285 goto failed_read; 285 goto failed_read;
286 286
287 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 287 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
288 inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); 288 inode->i_size = le32_to_cpu(sqsh_ino->symlink_size);
289 inode->i_op = &squashfs_symlink_inode_ops; 289 inode->i_op = &squashfs_symlink_inode_ops;
290 inode->i_data.a_ops = &squashfs_symlink_aops; 290 inode->i_data.a_ops = &squashfs_symlink_aops;
@@ -325,7 +325,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
325 inode->i_mode |= S_IFCHR; 325 inode->i_mode |= S_IFCHR;
326 else 326 else
327 inode->i_mode |= S_IFBLK; 327 inode->i_mode |= S_IFBLK;
328 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 328 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
329 rdev = le32_to_cpu(sqsh_ino->rdev); 329 rdev = le32_to_cpu(sqsh_ino->rdev);
330 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); 330 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
331 331
@@ -349,7 +349,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
349 inode->i_mode |= S_IFBLK; 349 inode->i_mode |= S_IFBLK;
350 xattr_id = le32_to_cpu(sqsh_ino->xattr); 350 xattr_id = le32_to_cpu(sqsh_ino->xattr);
351 inode->i_op = &squashfs_inode_ops; 351 inode->i_op = &squashfs_inode_ops;
352 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 352 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
353 rdev = le32_to_cpu(sqsh_ino->rdev); 353 rdev = le32_to_cpu(sqsh_ino->rdev);
354 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); 354 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
355 355
@@ -370,7 +370,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
370 inode->i_mode |= S_IFIFO; 370 inode->i_mode |= S_IFIFO;
371 else 371 else
372 inode->i_mode |= S_IFSOCK; 372 inode->i_mode |= S_IFSOCK;
373 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 373 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
374 init_special_inode(inode, inode->i_mode, 0); 374 init_special_inode(inode, inode->i_mode, 0);
375 break; 375 break;
376 } 376 }
@@ -389,7 +389,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
389 inode->i_mode |= S_IFSOCK; 389 inode->i_mode |= S_IFSOCK;
390 xattr_id = le32_to_cpu(sqsh_ino->xattr); 390 xattr_id = le32_to_cpu(sqsh_ino->xattr);
391 inode->i_op = &squashfs_inode_ops; 391 inode->i_op = &squashfs_inode_ops;
392 inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); 392 set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
393 init_special_inode(inode, inode->i_mode, 0); 393 init_special_inode(inode, inode->i_mode, 0);
394 break; 394 break;
395 } 395 }
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index b4a4e539a08c..e8e14645de9a 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -36,6 +36,13 @@
36#define SQUASHFS_FILE_SIZE 131072 36#define SQUASHFS_FILE_SIZE 131072
37#define SQUASHFS_FILE_LOG 17 37#define SQUASHFS_FILE_LOG 17
38 38
39/* default size of block device I/O */
40#ifdef CONFIG_SQUASHFS_4K_DEVBLK_SIZE
41#define SQUASHFS_DEVBLK_SIZE 4096
42#else
43#define SQUASHFS_DEVBLK_SIZE 1024
44#endif
45
39#define SQUASHFS_FILE_MAX_SIZE 1048576 46#define SQUASHFS_FILE_MAX_SIZE 1048576
40#define SQUASHFS_FILE_MAX_LOG 20 47#define SQUASHFS_FILE_MAX_LOG 20
41 48
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 7438850c62d0..2da1715452ac 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -95,7 +95,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
95 } 95 }
96 msblk = sb->s_fs_info; 96 msblk = sb->s_fs_info;
97 97
98 msblk->devblksize = sb_min_blocksize(sb, BLOCK_SIZE); 98 msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
99 msblk->devblksize_log2 = ffz(~msblk->devblksize); 99 msblk->devblksize_log2 = ffz(~msblk->devblksize);
100 100
101 mutex_init(&msblk->read_data_mutex); 101 mutex_init(&msblk->read_data_mutex);
diff --git a/fs/stack.c b/fs/stack.c
index b4f2ab48a61f..9c11519245a6 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -71,6 +71,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
71 dest->i_ctime = src->i_ctime; 71 dest->i_ctime = src->i_ctime;
72 dest->i_blkbits = src->i_blkbits; 72 dest->i_blkbits = src->i_blkbits;
73 dest->i_flags = src->i_flags; 73 dest->i_flags = src->i_flags;
74 dest->i_nlink = src->i_nlink; 74 set_nlink(dest, src->i_nlink);
75} 75}
76EXPORT_SYMBOL_GPL(fsstack_copy_attr_all); 76EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/fs/stat.c b/fs/stat.c
index 78a3aa83c7ea..8806b8997d2e 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -294,15 +294,16 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
294{ 294{
295 struct path path; 295 struct path path;
296 int error; 296 int error;
297 int empty = 0;
297 298
298 if (bufsiz <= 0) 299 if (bufsiz <= 0)
299 return -EINVAL; 300 return -EINVAL;
300 301
301 error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path); 302 error = user_path_at_empty(dfd, pathname, LOOKUP_EMPTY, &path, &empty);
302 if (!error) { 303 if (!error) {
303 struct inode *inode = path.dentry->d_inode; 304 struct inode *inode = path.dentry->d_inode;
304 305
305 error = -EINVAL; 306 error = empty ? -ENOENT : -EINVAL;
306 if (inode->i_op->readlink) { 307 if (inode->i_op->readlink) {
307 error = security_inode_readlink(path.dentry); 308 error = security_inode_readlink(path.dentry);
308 if (!error) { 309 if (!error) {
diff --git a/fs/statfs.c b/fs/statfs.c
index 8244924dec55..9cf04a118965 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -76,7 +76,7 @@ EXPORT_SYMBOL(vfs_statfs);
76int user_statfs(const char __user *pathname, struct kstatfs *st) 76int user_statfs(const char __user *pathname, struct kstatfs *st)
77{ 77{
78 struct path path; 78 struct path path;
79 int error = user_path(pathname, &path); 79 int error = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
80 if (!error) { 80 if (!error) {
81 error = vfs_statfs(&path, st); 81 error = vfs_statfs(&path, st);
82 path_put(&path); 82 path_put(&path);
diff --git a/fs/super.c b/fs/super.c
index 3f56a269a4f4..afd0f1ad45e0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -61,7 +61,7 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
61 return -1; 61 return -1;
62 62
63 if (!grab_super_passive(sb)) 63 if (!grab_super_passive(sb))
64 return -1; 64 return !sc->nr_to_scan ? 0 : -1;
65 65
66 if (sb->s_op && sb->s_op->nr_cached_objects) 66 if (sb->s_op && sb->s_op->nr_cached_objects)
67 fs_objects = sb->s_op->nr_cached_objects(sb); 67 fs_objects = sb->s_op->nr_cached_objects(sb);
@@ -727,8 +727,13 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
727 727
728 if (sb->s_op->remount_fs) { 728 if (sb->s_op->remount_fs) {
729 retval = sb->s_op->remount_fs(sb, &flags, data); 729 retval = sb->s_op->remount_fs(sb, &flags, data);
730 if (retval) 730 if (retval) {
731 return retval; 731 if (!force)
732 return retval;
733 /* If forced remount, go ahead despite any errors */
734 WARN(1, "forced remount of a %s fs returned %i\n",
735 sb->s_type->name, retval);
736 }
732 } 737 }
733 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); 738 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
734 739
diff --git a/fs/sync.c b/fs/sync.c
index c98a7477edfd..101b8ef901d7 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -43,7 +43,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
43 if (wait) 43 if (wait)
44 sync_inodes_sb(sb); 44 sync_inodes_sb(sb);
45 else 45 else
46 writeback_inodes_sb(sb); 46 writeback_inodes_sb(sb, WB_REASON_SYNC);
47 47
48 if (sb->s_op->sync_fs) 48 if (sb->s_op->sync_fs)
49 sb->s_op->sync_fs(sb, wait); 49 sb->s_op->sync_fs(sb, wait);
@@ -98,7 +98,7 @@ static void sync_filesystems(int wait)
98 */ 98 */
99SYSCALL_DEFINE0(sync) 99SYSCALL_DEFINE0(sync)
100{ 100{
101 wakeup_flusher_threads(0); 101 wakeup_flusher_threads(0, WB_REASON_SYNC);
102 sync_filesystems(0); 102 sync_filesystems(0);
103 sync_filesystems(1); 103 sync_filesystems(1);
104 if (unlikely(laptop_mode)) 104 if (unlikely(laptop_mode))
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 48ffbdf0d017..7fdf6a7b7436 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -865,15 +865,13 @@ int sysfs_rename(struct sysfs_dirent *sd,
865 sd->s_name = new_name; 865 sd->s_name = new_name;
866 } 866 }
867 867
868 /* Remove from old parent's list and insert into new parent's list. */ 868 /* Move to the appropriate place in the appropriate directories rbtree. */
869 if (sd->s_parent != new_parent_sd) { 869 sysfs_unlink_sibling(sd);
870 sysfs_unlink_sibling(sd); 870 sysfs_get(new_parent_sd);
871 sysfs_get(new_parent_sd); 871 sysfs_put(sd->s_parent);
872 sysfs_put(sd->s_parent);
873 sd->s_parent = new_parent_sd;
874 sysfs_link_sibling(sd);
875 }
876 sd->s_ns = new_ns; 872 sd->s_ns = new_ns;
873 sd->s_parent = new_parent_sd;
874 sysfs_link_sibling(sd);
877 875
878 error = 0; 876 error = 0;
879 out: 877 out:
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index e23f28894a3a..c81b22f3ace1 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -218,7 +218,7 @@ static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
218 } 218 }
219 219
220 if (sysfs_type(sd) == SYSFS_DIR) 220 if (sysfs_type(sd) == SYSFS_DIR)
221 inode->i_nlink = sd->s_dir.subdirs + 2; 221 set_nlink(inode, sd->s_dir.subdirs + 2);
222} 222}
223 223
224int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 224int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 0630eb969a28..25ffb3e9a3f8 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -219,7 +219,7 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino)
219 inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode); 219 inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode);
220 inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid); 220 inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid);
221 inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid); 221 inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid);
222 inode->i_nlink = fs16_to_cpu(sbi, raw_inode->i_nlink); 222 set_nlink(inode, fs16_to_cpu(sbi, raw_inode->i_nlink));
223 inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size); 223 inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size);
224 inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime); 224 inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime);
225 inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime); 225 inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 315de66e52b2..bc4f94b28706 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -63,7 +63,7 @@
63static void shrink_liability(struct ubifs_info *c, int nr_to_write) 63static void shrink_liability(struct ubifs_info *c, int nr_to_write)
64{ 64{
65 down_read(&c->vfs_sb->s_umount); 65 down_read(&c->vfs_sb->s_umount);
66 writeback_inodes_sb(c->vfs_sb); 66 writeback_inodes_sb(c->vfs_sb, WB_REASON_FS_FREE_SPACE);
67 up_read(&c->vfs_sb->s_umount); 67 up_read(&c->vfs_sb->s_umount);
68} 68}
69 69
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index b28121278d46..20403dc5d437 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -129,7 +129,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
129 goto out_ino; 129 goto out_ino;
130 130
131 inode->i_flags |= (S_NOCMTIME | S_NOATIME); 131 inode->i_flags |= (S_NOCMTIME | S_NOATIME);
132 inode->i_nlink = le32_to_cpu(ino->nlink); 132 set_nlink(inode, le32_to_cpu(ino->nlink));
133 inode->i_uid = le32_to_cpu(ino->uid); 133 inode->i_uid = le32_to_cpu(ino->uid);
134 inode->i_gid = le32_to_cpu(ino->gid); 134 inode->i_gid = le32_to_cpu(ino->gid);
135 inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec); 135 inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 16f19f55e63f..bf18f7a04544 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -558,10 +558,10 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
558 } 558 }
559 559
560 ubifs_assert(inode->i_nlink == 1); 560 ubifs_assert(inode->i_nlink == 1);
561 inode->i_nlink = 0; 561 clear_nlink(inode);
562 err = remove_xattr(c, host, inode, &nm); 562 err = remove_xattr(c, host, inode, &nm);
563 if (err) 563 if (err)
564 inode->i_nlink = 1; 564 set_nlink(inode, 1);
565 565
566 /* If @i_nlink is 0, 'iput()' will delete the inode */ 566 /* If @i_nlink is 0, 'iput()' will delete the inode */
567 iput(inode); 567 iput(inode);
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 95518a9f589e..987585bb0a1d 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -59,8 +59,8 @@ static int __load_block_bitmap(struct super_block *sb,
59 int nr_groups = bitmap->s_nr_groups; 59 int nr_groups = bitmap->s_nr_groups;
60 60
61 if (block_group >= nr_groups) { 61 if (block_group >= nr_groups) {
62 udf_debug("block_group (%d) > nr_groups (%d)\n", block_group, 62 udf_debug("block_group (%d) > nr_groups (%d)\n",
63 nr_groups); 63 block_group, nr_groups);
64 } 64 }
65 65
66 if (bitmap->s_block_bitmap[block_group]) { 66 if (bitmap->s_block_bitmap[block_group]) {
@@ -126,8 +126,9 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
126 if (bloc->logicalBlockNum + count < count || 126 if (bloc->logicalBlockNum + count < count ||
127 (bloc->logicalBlockNum + count) > partmap->s_partition_len) { 127 (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
128 udf_debug("%d < %d || %d + %d > %d\n", 128 udf_debug("%d < %d || %d + %d > %d\n",
129 bloc->logicalBlockNum, 0, bloc->logicalBlockNum, 129 bloc->logicalBlockNum, 0,
130 count, partmap->s_partition_len); 130 bloc->logicalBlockNum, count,
131 partmap->s_partition_len);
131 goto error_return; 132 goto error_return;
132 } 133 }
133 134
@@ -155,7 +156,7 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
155 if (udf_set_bit(bit + i, bh->b_data)) { 156 if (udf_set_bit(bit + i, bh->b_data)) {
156 udf_debug("bit %ld already set\n", bit + i); 157 udf_debug("bit %ld already set\n", bit + i);
157 udf_debug("byte=%2x\n", 158 udf_debug("byte=%2x\n",
158 ((char *)bh->b_data)[(bit + i) >> 3]); 159 ((char *)bh->b_data)[(bit + i) >> 3]);
159 } 160 }
160 } 161 }
161 udf_add_free_space(sb, sbi->s_partition, count); 162 udf_add_free_space(sb, sbi->s_partition, count);
@@ -369,7 +370,8 @@ static void udf_table_free_blocks(struct super_block *sb,
369 if (bloc->logicalBlockNum + count < count || 370 if (bloc->logicalBlockNum + count < count ||
370 (bloc->logicalBlockNum + count) > partmap->s_partition_len) { 371 (bloc->logicalBlockNum + count) > partmap->s_partition_len) {
371 udf_debug("%d < %d || %d + %d > %d\n", 372 udf_debug("%d < %d || %d + %d > %d\n",
372 bloc->logicalBlockNum, 0, bloc->logicalBlockNum, count, 373 bloc->logicalBlockNum, 0,
374 bloc->logicalBlockNum, count,
373 partmap->s_partition_len); 375 partmap->s_partition_len);
374 goto error_return; 376 goto error_return;
375 } 377 }
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 2ffdb6733af1..3e44f575fb9c 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -162,8 +162,8 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset)
162 int padlen; 162 int padlen;
163 163
164 if ((!buffer) || (!offset)) { 164 if ((!buffer) || (!offset)) {
165 udf_debug("invalidparms\n, buffer=%p, offset=%p\n", buffer, 165 udf_debug("invalidparms, buffer=%p, offset=%p\n",
166 offset); 166 buffer, offset);
167 return NULL; 167 return NULL;
168 } 168 }
169 169
@@ -201,7 +201,7 @@ struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offs
201 struct short_ad *sa; 201 struct short_ad *sa;
202 202
203 if ((!ptr) || (!offset)) { 203 if ((!ptr) || (!offset)) {
204 printk(KERN_ERR "udf: udf_get_fileshortad() invalidparms\n"); 204 pr_err("%s: invalidparms\n", __func__);
205 return NULL; 205 return NULL;
206 } 206 }
207 207
@@ -223,7 +223,7 @@ struct long_ad *udf_get_filelongad(uint8_t *ptr, int maxoffset, uint32_t *offset
223 struct long_ad *la; 223 struct long_ad *la;
224 224
225 if ((!ptr) || (!offset)) { 225 if ((!ptr) || (!offset)) {
226 printk(KERN_ERR "udf: udf_get_filelongad() invalidparms\n"); 226 pr_err("%s: invalidparms\n", __func__);
227 return NULL; 227 return NULL;
228 } 228 }
229 229
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1d1358ed80c1..4fd1d809738c 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -37,6 +37,7 @@
37#include <linux/writeback.h> 37#include <linux/writeback.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/crc-itu-t.h> 39#include <linux/crc-itu-t.h>
40#include <linux/mpage.h>
40 41
41#include "udf_i.h" 42#include "udf_i.h"
42#include "udf_sb.h" 43#include "udf_sb.h"
@@ -83,12 +84,10 @@ void udf_evict_inode(struct inode *inode)
83 end_writeback(inode); 84 end_writeback(inode);
84 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && 85 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
85 inode->i_size != iinfo->i_lenExtents) { 86 inode->i_size != iinfo->i_lenExtents) {
86 printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has " 87 udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
87 "inode size %llu different from extent length %llu. " 88 inode->i_ino, inode->i_mode,
88 "Filesystem need not be standards compliant.\n", 89 (unsigned long long)inode->i_size,
89 inode->i_sb->s_id, inode->i_ino, inode->i_mode, 90 (unsigned long long)iinfo->i_lenExtents);
90 (unsigned long long)inode->i_size,
91 (unsigned long long)iinfo->i_lenExtents);
92 } 91 }
93 kfree(iinfo->i_ext.i_data); 92 kfree(iinfo->i_ext.i_data);
94 iinfo->i_ext.i_data = NULL; 93 iinfo->i_ext.i_data = NULL;
@@ -104,7 +103,13 @@ static int udf_writepage(struct page *page, struct writeback_control *wbc)
104 103
105static int udf_readpage(struct file *file, struct page *page) 104static int udf_readpage(struct file *file, struct page *page)
106{ 105{
107 return block_read_full_page(page, udf_get_block); 106 return mpage_readpage(page, udf_get_block);
107}
108
109static int udf_readpages(struct file *file, struct address_space *mapping,
110 struct list_head *pages, unsigned nr_pages)
111{
112 return mpage_readpages(mapping, pages, nr_pages, udf_get_block);
108} 113}
109 114
110static int udf_write_begin(struct file *file, struct address_space *mapping, 115static int udf_write_begin(struct file *file, struct address_space *mapping,
@@ -139,6 +144,7 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
139 144
140const struct address_space_operations udf_aops = { 145const struct address_space_operations udf_aops = {
141 .readpage = udf_readpage, 146 .readpage = udf_readpage,
147 .readpages = udf_readpages,
142 .writepage = udf_writepage, 148 .writepage = udf_writepage,
143 .write_begin = udf_write_begin, 149 .write_begin = udf_write_begin,
144 .write_end = generic_write_end, 150 .write_end = generic_write_end,
@@ -1169,16 +1175,15 @@ static void __udf_read_inode(struct inode *inode)
1169 */ 1175 */
1170 bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident); 1176 bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident);
1171 if (!bh) { 1177 if (!bh) {
1172 printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n", 1178 udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino);
1173 inode->i_ino);
1174 make_bad_inode(inode); 1179 make_bad_inode(inode);
1175 return; 1180 return;
1176 } 1181 }
1177 1182
1178 if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE && 1183 if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE &&
1179 ident != TAG_IDENT_USE) { 1184 ident != TAG_IDENT_USE) {
1180 printk(KERN_ERR "udf: udf_read_inode(ino %ld) " 1185 udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n",
1181 "failed ident=%d\n", inode->i_ino, ident); 1186 inode->i_ino, ident);
1182 brelse(bh); 1187 brelse(bh);
1183 make_bad_inode(inode); 1188 make_bad_inode(inode);
1184 return; 1189 return;
@@ -1218,8 +1223,8 @@ static void __udf_read_inode(struct inode *inode)
1218 } 1223 }
1219 brelse(ibh); 1224 brelse(ibh);
1220 } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { 1225 } else if (fe->icbTag.strategyType != cpu_to_le16(4)) {
1221 printk(KERN_ERR "udf: unsupported strategy type: %d\n", 1226 udf_err(inode->i_sb, "unsupported strategy type: %d\n",
1222 le16_to_cpu(fe->icbTag.strategyType)); 1227 le16_to_cpu(fe->icbTag.strategyType));
1223 brelse(bh); 1228 brelse(bh);
1224 make_bad_inode(inode); 1229 make_bad_inode(inode);
1225 return; 1230 return;
@@ -1236,6 +1241,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1236 int offset; 1241 int offset;
1237 struct udf_sb_info *sbi = UDF_SB(inode->i_sb); 1242 struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
1238 struct udf_inode_info *iinfo = UDF_I(inode); 1243 struct udf_inode_info *iinfo = UDF_I(inode);
1244 unsigned int link_count;
1239 1245
1240 fe = (struct fileEntry *)bh->b_data; 1246 fe = (struct fileEntry *)bh->b_data;
1241 efe = (struct extendedFileEntry *)bh->b_data; 1247 efe = (struct extendedFileEntry *)bh->b_data;
@@ -1318,9 +1324,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1318 inode->i_mode &= ~sbi->s_umask; 1324 inode->i_mode &= ~sbi->s_umask;
1319 read_unlock(&sbi->s_cred_lock); 1325 read_unlock(&sbi->s_cred_lock);
1320 1326
1321 inode->i_nlink = le16_to_cpu(fe->fileLinkCount); 1327 link_count = le16_to_cpu(fe->fileLinkCount);
1322 if (!inode->i_nlink) 1328 if (!link_count)
1323 inode->i_nlink = 1; 1329 link_count = 1;
1330 set_nlink(inode, link_count);
1324 1331
1325 inode->i_size = le64_to_cpu(fe->informationLength); 1332 inode->i_size = le64_to_cpu(fe->informationLength);
1326 iinfo->i_lenExtents = inode->i_size; 1333 iinfo->i_lenExtents = inode->i_size;
@@ -1413,9 +1420,8 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1413 udf_debug("METADATA BITMAP FILE-----\n"); 1420 udf_debug("METADATA BITMAP FILE-----\n");
1414 break; 1421 break;
1415 default: 1422 default:
1416 printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown " 1423 udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n",
1417 "file type=%d\n", inode->i_ino, 1424 inode->i_ino, fe->icbTag.fileType);
1418 fe->icbTag.fileType);
1419 make_bad_inode(inode); 1425 make_bad_inode(inode);
1420 return; 1426 return;
1421 } 1427 }
@@ -1438,8 +1444,8 @@ static int udf_alloc_i_data(struct inode *inode, size_t size)
1438 iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL); 1444 iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL);
1439 1445
1440 if (!iinfo->i_ext.i_data) { 1446 if (!iinfo->i_ext.i_data) {
1441 printk(KERN_ERR "udf:udf_alloc_i_data (ino %ld) " 1447 udf_err(inode->i_sb, "(ino %ld) no free memory\n",
1442 "no free memory\n", inode->i_ino); 1448 inode->i_ino);
1443 return -ENOMEM; 1449 return -ENOMEM;
1444 } 1450 }
1445 1451
@@ -1689,9 +1695,8 @@ out:
1689 if (do_sync) { 1695 if (do_sync) {
1690 sync_dirty_buffer(bh); 1696 sync_dirty_buffer(bh);
1691 if (buffer_write_io_error(bh)) { 1697 if (buffer_write_io_error(bh)) {
1692 printk(KERN_WARNING "IO error syncing udf inode " 1698 udf_warn(inode->i_sb, "IO error syncing udf inode [%08lx]\n",
1693 "[%s:%08lx]\n", inode->i_sb->s_id, 1699 inode->i_ino);
1694 inode->i_ino);
1695 err = -EIO; 1700 err = -EIO;
1696 } 1701 }
1697 } 1702 }
@@ -1982,8 +1987,7 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
1982 *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK; 1987 *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK;
1983 break; 1988 break;
1984 default: 1989 default:
1985 udf_debug("alloc_type = %d unsupported\n", 1990 udf_debug("alloc_type = %d unsupported\n", iinfo->i_alloc_type);
1986 iinfo->i_alloc_type);
1987 return -1; 1991 return -1;
1988 } 1992 }
1989 1993
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 43e24a3b8e10..6583fe9b0645 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -38,7 +38,7 @@ unsigned int udf_get_last_session(struct super_block *sb)
38 38
39 if (i == 0) { 39 if (i == 0) {
40 udf_debug("XA disk: %s, vol_desc_start=%d\n", 40 udf_debug("XA disk: %s, vol_desc_start=%d\n",
41 (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba); 41 ms_info.xa_flag ? "yes" : "no", ms_info.addr.lba);
42 if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ 42 if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
43 vol_desc_start = ms_info.addr.lba; 43 vol_desc_start = ms_info.addr.lba;
44 } else { 44 } else {
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 9215700c00a4..c175b4dabc14 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -204,6 +204,7 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
204{ 204{
205 struct tag *tag_p; 205 struct tag *tag_p;
206 struct buffer_head *bh = NULL; 206 struct buffer_head *bh = NULL;
207 u8 checksum;
207 208
208 /* Read the block */ 209 /* Read the block */
209 if (block == 0xFFFFFFFF) 210 if (block == 0xFFFFFFFF)
@@ -211,8 +212,8 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
211 212
212 bh = udf_tread(sb, block); 213 bh = udf_tread(sb, block);
213 if (!bh) { 214 if (!bh) {
214 udf_debug("block=%d, location=%d: read failed\n", 215 udf_err(sb, "read failed, block=%u, location=%d\n",
215 block, location); 216 block, location);
216 return NULL; 217 return NULL;
217 } 218 }
218 219
@@ -227,16 +228,18 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
227 } 228 }
228 229
229 /* Verify the tag checksum */ 230 /* Verify the tag checksum */
230 if (udf_tag_checksum(tag_p) != tag_p->tagChecksum) { 231 checksum = udf_tag_checksum(tag_p);
231 printk(KERN_ERR "udf: tag checksum failed block %d\n", block); 232 if (checksum != tag_p->tagChecksum) {
233 udf_err(sb, "tag checksum failed, block %u: 0x%02x != 0x%02x\n",
234 block, checksum, tag_p->tagChecksum);
232 goto error_out; 235 goto error_out;
233 } 236 }
234 237
235 /* Verify the tag version */ 238 /* Verify the tag version */
236 if (tag_p->descVersion != cpu_to_le16(0x0002U) && 239 if (tag_p->descVersion != cpu_to_le16(0x0002U) &&
237 tag_p->descVersion != cpu_to_le16(0x0003U)) { 240 tag_p->descVersion != cpu_to_le16(0x0003U)) {
238 udf_debug("tag version 0x%04x != 0x0002 || 0x0003 block %d\n", 241 udf_err(sb, "tag version 0x%04x != 0x0002 || 0x0003, block %u\n",
239 le16_to_cpu(tag_p->descVersion), block); 242 le16_to_cpu(tag_p->descVersion), block);
240 goto error_out; 243 goto error_out;
241 } 244 }
242 245
@@ -248,8 +251,8 @@ struct buffer_head *udf_read_tagged(struct super_block *sb, uint32_t block,
248 return bh; 251 return bh;
249 252
250 udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", block, 253 udf_debug("Crc failure block %d: crc = %d, crclen = %d\n", block,
251 le16_to_cpu(tag_p->descCRC), le16_to_cpu(tag_p->descCRCLength)); 254 le16_to_cpu(tag_p->descCRC),
252 255 le16_to_cpu(tag_p->descCRCLength));
253error_out: 256error_out:
254 brelse(bh); 257 brelse(bh);
255 return NULL; 258 return NULL;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index f1dce848ef96..4639e137222f 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -577,8 +577,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
577 577
578 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 578 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
579 if (!fi) { 579 if (!fi) {
580 inode->i_nlink--; 580 inode_dec_link_count(inode);
581 mark_inode_dirty(inode);
582 iput(inode); 581 iput(inode);
583 return err; 582 return err;
584 } 583 }
@@ -618,8 +617,7 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
618 init_special_inode(inode, mode, rdev); 617 init_special_inode(inode, mode, rdev);
619 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 618 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
620 if (!fi) { 619 if (!fi) {
621 inode->i_nlink--; 620 inode_dec_link_count(inode);
622 mark_inode_dirty(inode);
623 iput(inode); 621 iput(inode);
624 return err; 622 return err;
625 } 623 }
@@ -665,12 +663,11 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
665 inode->i_fop = &udf_dir_operations; 663 inode->i_fop = &udf_dir_operations;
666 fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); 664 fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err);
667 if (!fi) { 665 if (!fi) {
668 inode->i_nlink--; 666 inode_dec_link_count(inode);
669 mark_inode_dirty(inode);
670 iput(inode); 667 iput(inode);
671 goto out; 668 goto out;
672 } 669 }
673 inode->i_nlink = 2; 670 set_nlink(inode, 2);
674 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); 671 cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize);
675 cfi.icb.extLocation = cpu_to_lelb(dinfo->i_location); 672 cfi.icb.extLocation = cpu_to_lelb(dinfo->i_location);
676 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = 673 *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
@@ -683,7 +680,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
683 680
684 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 681 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
685 if (!fi) { 682 if (!fi) {
686 inode->i_nlink = 0; 683 clear_nlink(inode);
687 mark_inode_dirty(inode); 684 mark_inode_dirty(inode);
688 iput(inode); 685 iput(inode);
689 goto out; 686 goto out;
@@ -799,9 +796,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
799 if (retval) 796 if (retval)
800 goto end_rmdir; 797 goto end_rmdir;
801 if (inode->i_nlink != 2) 798 if (inode->i_nlink != 2)
802 udf_warning(inode->i_sb, "udf_rmdir", 799 udf_warn(inode->i_sb, "empty directory has nlink != 2 (%d)\n",
803 "empty directory has nlink != 2 (%d)", 800 inode->i_nlink);
804 inode->i_nlink);
805 clear_nlink(inode); 801 clear_nlink(inode);
806 inode->i_size = 0; 802 inode->i_size = 0;
807 inode_dec_link_count(dir); 803 inode_dec_link_count(dir);
@@ -840,7 +836,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
840 if (!inode->i_nlink) { 836 if (!inode->i_nlink) {
841 udf_debug("Deleting nonexistent file (%lu), %d\n", 837 udf_debug("Deleting nonexistent file (%lu), %d\n",
842 inode->i_ino, inode->i_nlink); 838 inode->i_ino, inode->i_nlink);
843 inode->i_nlink = 1; 839 set_nlink(inode, 1);
844 } 840 }
845 retval = udf_delete_entry(dir, fi, &fibh, &cfi); 841 retval = udf_delete_entry(dir, fi, &fibh, &cfi);
846 if (retval) 842 if (retval)
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index a71090ea0e07..d6caf01a2097 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -33,8 +33,8 @@ uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
33 struct udf_sb_info *sbi = UDF_SB(sb); 33 struct udf_sb_info *sbi = UDF_SB(sb);
34 struct udf_part_map *map; 34 struct udf_part_map *map;
35 if (partition >= sbi->s_partitions) { 35 if (partition >= sbi->s_partitions) {
36 udf_debug("block=%d, partition=%d, offset=%d: " 36 udf_debug("block=%d, partition=%d, offset=%d: invalid partition\n",
37 "invalid partition\n", block, partition, offset); 37 block, partition, offset);
38 return 0xFFFFFFFF; 38 return 0xFFFFFFFF;
39 } 39 }
40 map = &sbi->s_partmaps[partition]; 40 map = &sbi->s_partmaps[partition];
@@ -60,8 +60,8 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block,
60 vdata = &map->s_type_specific.s_virtual; 60 vdata = &map->s_type_specific.s_virtual;
61 61
62 if (block > vdata->s_num_entries) { 62 if (block > vdata->s_num_entries) {
63 udf_debug("Trying to access block beyond end of VAT " 63 udf_debug("Trying to access block beyond end of VAT (%d max %d)\n",
64 "(%d max %d)\n", block, vdata->s_num_entries); 64 block, vdata->s_num_entries);
65 return 0xFFFFFFFF; 65 return 0xFFFFFFFF;
66 } 66 }
67 67
@@ -321,9 +321,14 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block,
321 /* We shouldn't mount such media... */ 321 /* We shouldn't mount such media... */
322 BUG_ON(!inode); 322 BUG_ON(!inode);
323 retblk = udf_try_read_meta(inode, block, partition, offset); 323 retblk = udf_try_read_meta(inode, block, partition, offset);
324 if (retblk == 0xFFFFFFFF) { 324 if (retblk == 0xFFFFFFFF && mdata->s_metadata_fe) {
325 udf_warning(sb, __func__, "error reading from METADATA, " 325 udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n");
326 "trying to read from MIRROR"); 326 if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) {
327 mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
328 mdata->s_mirror_file_loc, map->s_partition_num);
329 mdata->s_flags |= MF_MIRROR_FE_LOADED;
330 }
331
327 inode = mdata->s_mirror_fe; 332 inode = mdata->s_mirror_fe;
328 if (!inode) 333 if (!inode)
329 return 0xFFFFFFFF; 334 return 0xFFFFFFFF;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 7b27b063ff6d..e185253470df 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -75,8 +75,6 @@
75 75
76#define UDF_DEFAULT_BLOCKSIZE 2048 76#define UDF_DEFAULT_BLOCKSIZE 2048
77 77
78static char error_buf[1024];
79
80/* These are the "meat" - everything else is stuffing */ 78/* These are the "meat" - everything else is stuffing */
81static int udf_fill_super(struct super_block *, void *, int); 79static int udf_fill_super(struct super_block *, void *, int);
82static void udf_put_super(struct super_block *); 80static void udf_put_super(struct super_block *);
@@ -92,8 +90,6 @@ static void udf_close_lvid(struct super_block *);
92static unsigned int udf_count_free(struct super_block *); 90static unsigned int udf_count_free(struct super_block *);
93static int udf_statfs(struct dentry *, struct kstatfs *); 91static int udf_statfs(struct dentry *, struct kstatfs *);
94static int udf_show_options(struct seq_file *, struct vfsmount *); 92static int udf_show_options(struct seq_file *, struct vfsmount *);
95static void udf_error(struct super_block *sb, const char *function,
96 const char *fmt, ...);
97 93
98struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) 94struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
99{ 95{
@@ -244,9 +240,8 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
244 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map), 240 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map),
245 GFP_KERNEL); 241 GFP_KERNEL);
246 if (!sbi->s_partmaps) { 242 if (!sbi->s_partmaps) {
247 udf_error(sb, __func__, 243 udf_err(sb, "Unable to allocate space for %d partition maps\n",
248 "Unable to allocate space for %d partition maps", 244 count);
249 count);
250 sbi->s_partitions = 0; 245 sbi->s_partitions = 0;
251 return -ENOMEM; 246 return -ENOMEM;
252 } 247 }
@@ -550,8 +545,7 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
550 uopt->dmode = option & 0777; 545 uopt->dmode = option & 0777;
551 break; 546 break;
552 default: 547 default:
553 printk(KERN_ERR "udf: bad mount option \"%s\" " 548 pr_err("bad mount option \"%s\" or missing value\n", p);
554 "or missing value\n", p);
555 return 0; 549 return 0;
556 } 550 }
557 } 551 }
@@ -645,20 +639,16 @@ static loff_t udf_check_vsd(struct super_block *sb)
645 udf_debug("ISO9660 Boot Record found\n"); 639 udf_debug("ISO9660 Boot Record found\n");
646 break; 640 break;
647 case 1: 641 case 1:
648 udf_debug("ISO9660 Primary Volume Descriptor " 642 udf_debug("ISO9660 Primary Volume Descriptor found\n");
649 "found\n");
650 break; 643 break;
651 case 2: 644 case 2:
652 udf_debug("ISO9660 Supplementary Volume " 645 udf_debug("ISO9660 Supplementary Volume Descriptor found\n");
653 "Descriptor found\n");
654 break; 646 break;
655 case 3: 647 case 3:
656 udf_debug("ISO9660 Volume Partition Descriptor " 648 udf_debug("ISO9660 Volume Partition Descriptor found\n");
657 "found\n");
658 break; 649 break;
659 case 255: 650 case 255:
660 udf_debug("ISO9660 Volume Descriptor Set " 651 udf_debug("ISO9660 Volume Descriptor Set Terminator found\n");
661 "Terminator found\n");
662 break; 652 break;
663 default: 653 default:
664 udf_debug("ISO9660 VRS (%u) found\n", 654 udf_debug("ISO9660 VRS (%u) found\n",
@@ -809,8 +799,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
809 pvoldesc->recordingDateAndTime)) { 799 pvoldesc->recordingDateAndTime)) {
810#ifdef UDFFS_DEBUG 800#ifdef UDFFS_DEBUG
811 struct timestamp *ts = &pvoldesc->recordingDateAndTime; 801 struct timestamp *ts = &pvoldesc->recordingDateAndTime;
812 udf_debug("recording time %04u/%02u/%02u" 802 udf_debug("recording time %04u/%02u/%02u %02u:%02u (%x)\n",
813 " %02u:%02u (%x)\n",
814 le16_to_cpu(ts->year), ts->month, ts->day, ts->hour, 803 le16_to_cpu(ts->year), ts->month, ts->day, ts->hour,
815 ts->minute, le16_to_cpu(ts->typeAndTimezone)); 804 ts->minute, le16_to_cpu(ts->typeAndTimezone));
816#endif 805#endif
@@ -821,7 +810,7 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
821 strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name, 810 strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name,
822 outstr->u_len > 31 ? 31 : outstr->u_len); 811 outstr->u_len > 31 ? 31 : outstr->u_len);
823 udf_debug("volIdent[] = '%s'\n", 812 udf_debug("volIdent[] = '%s'\n",
824 UDF_SB(sb)->s_volume_ident); 813 UDF_SB(sb)->s_volume_ident);
825 } 814 }
826 815
827 if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128)) 816 if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128))
@@ -837,64 +826,57 @@ out1:
837 return ret; 826 return ret;
838} 827}
839 828
829struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
830 u32 meta_file_loc, u32 partition_num)
831{
832 struct kernel_lb_addr addr;
833 struct inode *metadata_fe;
834
835 addr.logicalBlockNum = meta_file_loc;
836 addr.partitionReferenceNum = partition_num;
837
838 metadata_fe = udf_iget(sb, &addr);
839
840 if (metadata_fe == NULL)
841 udf_warn(sb, "metadata inode efe not found\n");
842 else if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) {
843 udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n");
844 iput(metadata_fe);
845 metadata_fe = NULL;
846 }
847
848 return metadata_fe;
849}
850
840static int udf_load_metadata_files(struct super_block *sb, int partition) 851static int udf_load_metadata_files(struct super_block *sb, int partition)
841{ 852{
842 struct udf_sb_info *sbi = UDF_SB(sb); 853 struct udf_sb_info *sbi = UDF_SB(sb);
843 struct udf_part_map *map; 854 struct udf_part_map *map;
844 struct udf_meta_data *mdata; 855 struct udf_meta_data *mdata;
845 struct kernel_lb_addr addr; 856 struct kernel_lb_addr addr;
846 int fe_error = 0;
847 857
848 map = &sbi->s_partmaps[partition]; 858 map = &sbi->s_partmaps[partition];
849 mdata = &map->s_type_specific.s_metadata; 859 mdata = &map->s_type_specific.s_metadata;
850 860
851 /* metadata address */ 861 /* metadata address */
852 addr.logicalBlockNum = mdata->s_meta_file_loc;
853 addr.partitionReferenceNum = map->s_partition_num;
854
855 udf_debug("Metadata file location: block = %d part = %d\n", 862 udf_debug("Metadata file location: block = %d part = %d\n",
856 addr.logicalBlockNum, addr.partitionReferenceNum); 863 mdata->s_meta_file_loc, map->s_partition_num);
857 864
858 mdata->s_metadata_fe = udf_iget(sb, &addr); 865 mdata->s_metadata_fe = udf_find_metadata_inode_efe(sb,
866 mdata->s_meta_file_loc, map->s_partition_num);
859 867
860 if (mdata->s_metadata_fe == NULL) { 868 if (mdata->s_metadata_fe == NULL) {
861 udf_warning(sb, __func__, "metadata inode efe not found, " 869 /* mirror file entry */
862 "will try mirror inode."); 870 udf_debug("Mirror metadata file location: block = %d part = %d\n",
863 fe_error = 1; 871 mdata->s_mirror_file_loc, map->s_partition_num);
864 } else if (UDF_I(mdata->s_metadata_fe)->i_alloc_type !=
865 ICBTAG_FLAG_AD_SHORT) {
866 udf_warning(sb, __func__, "metadata inode efe does not have "
867 "short allocation descriptors!");
868 fe_error = 1;
869 iput(mdata->s_metadata_fe);
870 mdata->s_metadata_fe = NULL;
871 }
872 872
873 /* mirror file entry */ 873 mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
874 addr.logicalBlockNum = mdata->s_mirror_file_loc; 874 mdata->s_mirror_file_loc, map->s_partition_num);
875 addr.partitionReferenceNum = map->s_partition_num;
876
877 udf_debug("Mirror metadata file location: block = %d part = %d\n",
878 addr.logicalBlockNum, addr.partitionReferenceNum);
879 875
880 mdata->s_mirror_fe = udf_iget(sb, &addr); 876 if (mdata->s_mirror_fe == NULL) {
881 877 udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
882 if (mdata->s_mirror_fe == NULL) {
883 if (fe_error) {
884 udf_error(sb, __func__, "mirror inode efe not found "
885 "and metadata inode is missing too, exiting...");
886 goto error_exit;
887 } else
888 udf_warning(sb, __func__, "mirror inode efe not found,"
889 " but metadata inode is OK");
890 } else if (UDF_I(mdata->s_mirror_fe)->i_alloc_type !=
891 ICBTAG_FLAG_AD_SHORT) {
892 udf_warning(sb, __func__, "mirror inode efe does not have "
893 "short allocation descriptors!");
894 iput(mdata->s_mirror_fe);
895 mdata->s_mirror_fe = NULL;
896 if (fe_error)
897 goto error_exit; 878 goto error_exit;
879 }
898 } 880 }
899 881
900 /* 882 /*
@@ -907,18 +889,15 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
907 addr.partitionReferenceNum = map->s_partition_num; 889 addr.partitionReferenceNum = map->s_partition_num;
908 890
909 udf_debug("Bitmap file location: block = %d part = %d\n", 891 udf_debug("Bitmap file location: block = %d part = %d\n",
910 addr.logicalBlockNum, addr.partitionReferenceNum); 892 addr.logicalBlockNum, addr.partitionReferenceNum);
911 893
912 mdata->s_bitmap_fe = udf_iget(sb, &addr); 894 mdata->s_bitmap_fe = udf_iget(sb, &addr);
913 895
914 if (mdata->s_bitmap_fe == NULL) { 896 if (mdata->s_bitmap_fe == NULL) {
915 if (sb->s_flags & MS_RDONLY) 897 if (sb->s_flags & MS_RDONLY)
916 udf_warning(sb, __func__, "bitmap inode efe " 898 udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n");
917 "not found but it's ok since the disc"
918 " is mounted read-only");
919 else { 899 else {
920 udf_error(sb, __func__, "bitmap inode efe not " 900 udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n");
921 "found and attempted read-write mount");
922 goto error_exit; 901 goto error_exit;
923 } 902 }
924 } 903 }
@@ -971,9 +950,8 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
971 bitmap = vzalloc(size); /* TODO: get rid of vzalloc */ 950 bitmap = vzalloc(size); /* TODO: get rid of vzalloc */
972 951
973 if (bitmap == NULL) { 952 if (bitmap == NULL) {
974 udf_error(sb, __func__, 953 udf_err(sb, "Unable to allocate space for bitmap and %d buffer_head pointers\n",
975 "Unable to allocate space for bitmap " 954 nr_groups);
976 "and %d buffer_head pointers", nr_groups);
977 return NULL; 955 return NULL;
978 } 956 }
979 957
@@ -1003,10 +981,9 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1003 if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE)) 981 if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_OVERWRITABLE))
1004 map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE; 982 map->s_partition_flags |= UDF_PART_FLAG_OVERWRITABLE;
1005 983
1006 udf_debug("Partition (%d type %x) starts at physical %d, " 984 udf_debug("Partition (%d type %x) starts at physical %d, block length %d\n",
1007 "block length %d\n", p_index, 985 p_index, map->s_partition_type,
1008 map->s_partition_type, map->s_partition_root, 986 map->s_partition_root, map->s_partition_len);
1009 map->s_partition_len);
1010 987
1011 if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) && 988 if (strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR02) &&
1012 strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03)) 989 strcmp(p->partitionContents.ident, PD_PARTITION_CONTENTS_NSR03))
@@ -1023,12 +1000,12 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1023 map->s_uspace.s_table = udf_iget(sb, &loc); 1000 map->s_uspace.s_table = udf_iget(sb, &loc);
1024 if (!map->s_uspace.s_table) { 1001 if (!map->s_uspace.s_table) {
1025 udf_debug("cannot load unallocSpaceTable (part %d)\n", 1002 udf_debug("cannot load unallocSpaceTable (part %d)\n",
1026 p_index); 1003 p_index);
1027 return 1; 1004 return 1;
1028 } 1005 }
1029 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; 1006 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
1030 udf_debug("unallocSpaceTable (part %d) @ %ld\n", 1007 udf_debug("unallocSpaceTable (part %d) @ %ld\n",
1031 p_index, map->s_uspace.s_table->i_ino); 1008 p_index, map->s_uspace.s_table->i_ino);
1032 } 1009 }
1033 1010
1034 if (phd->unallocSpaceBitmap.extLength) { 1011 if (phd->unallocSpaceBitmap.extLength) {
@@ -1041,8 +1018,8 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1041 bitmap->s_extPosition = le32_to_cpu( 1018 bitmap->s_extPosition = le32_to_cpu(
1042 phd->unallocSpaceBitmap.extPosition); 1019 phd->unallocSpaceBitmap.extPosition);
1043 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP; 1020 map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP;
1044 udf_debug("unallocSpaceBitmap (part %d) @ %d\n", p_index, 1021 udf_debug("unallocSpaceBitmap (part %d) @ %d\n",
1045 bitmap->s_extPosition); 1022 p_index, bitmap->s_extPosition);
1046 } 1023 }
1047 1024
1048 if (phd->partitionIntegrityTable.extLength) 1025 if (phd->partitionIntegrityTable.extLength)
@@ -1058,13 +1035,13 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1058 map->s_fspace.s_table = udf_iget(sb, &loc); 1035 map->s_fspace.s_table = udf_iget(sb, &loc);
1059 if (!map->s_fspace.s_table) { 1036 if (!map->s_fspace.s_table) {
1060 udf_debug("cannot load freedSpaceTable (part %d)\n", 1037 udf_debug("cannot load freedSpaceTable (part %d)\n",
1061 p_index); 1038 p_index);
1062 return 1; 1039 return 1;
1063 } 1040 }
1064 1041
1065 map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE; 1042 map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
1066 udf_debug("freedSpaceTable (part %d) @ %ld\n", 1043 udf_debug("freedSpaceTable (part %d) @ %ld\n",
1067 p_index, map->s_fspace.s_table->i_ino); 1044 p_index, map->s_fspace.s_table->i_ino);
1068 } 1045 }
1069 1046
1070 if (phd->freedSpaceBitmap.extLength) { 1047 if (phd->freedSpaceBitmap.extLength) {
@@ -1077,8 +1054,8 @@ static int udf_fill_partdesc_info(struct super_block *sb,
1077 bitmap->s_extPosition = le32_to_cpu( 1054 bitmap->s_extPosition = le32_to_cpu(
1078 phd->freedSpaceBitmap.extPosition); 1055 phd->freedSpaceBitmap.extPosition);
1079 map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP; 1056 map->s_partition_flags |= UDF_PART_FLAG_FREED_BITMAP;
1080 udf_debug("freedSpaceBitmap (part %d) @ %d\n", p_index, 1057 udf_debug("freedSpaceBitmap (part %d) @ %d\n",
1081 bitmap->s_extPosition); 1058 p_index, bitmap->s_extPosition);
1082 } 1059 }
1083 return 0; 1060 return 0;
1084} 1061}
@@ -1118,11 +1095,9 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
1118 udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block); 1095 udf_find_vat_block(sb, p_index, type1_index, sbi->s_last_block);
1119 if (!sbi->s_vat_inode && 1096 if (!sbi->s_vat_inode &&
1120 sbi->s_last_block != blocks - 1) { 1097 sbi->s_last_block != blocks - 1) {
1121 printk(KERN_NOTICE "UDF-fs: Failed to read VAT inode from the" 1098 pr_notice("Failed to read VAT inode from the last recorded block (%lu), retrying with the last block of the device (%lu).\n",
1122 " last recorded block (%lu), retrying with the last " 1099 (unsigned long)sbi->s_last_block,
1123 "block of the device (%lu).\n", 1100 (unsigned long)blocks - 1);
1124 (unsigned long)sbi->s_last_block,
1125 (unsigned long)blocks - 1);
1126 udf_find_vat_block(sb, p_index, type1_index, blocks - 1); 1101 udf_find_vat_block(sb, p_index, type1_index, blocks - 1);
1127 } 1102 }
1128 if (!sbi->s_vat_inode) 1103 if (!sbi->s_vat_inode)
@@ -1220,8 +1195,8 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
1220 if (map->s_partition_type == UDF_METADATA_MAP25) { 1195 if (map->s_partition_type == UDF_METADATA_MAP25) {
1221 ret = udf_load_metadata_files(sb, i); 1196 ret = udf_load_metadata_files(sb, i);
1222 if (ret) { 1197 if (ret) {
1223 printk(KERN_ERR "UDF-fs: error loading MetaData " 1198 udf_err(sb, "error loading MetaData partition map %d\n",
1224 "partition map %d\n", i); 1199 i);
1225 goto out_bh; 1200 goto out_bh;
1226 } 1201 }
1227 } else { 1202 } else {
@@ -1234,9 +1209,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
1234 * overwrite blocks instead of relocating them). 1209 * overwrite blocks instead of relocating them).
1235 */ 1210 */
1236 sb->s_flags |= MS_RDONLY; 1211 sb->s_flags |= MS_RDONLY;
1237 printk(KERN_NOTICE "UDF-fs: Filesystem marked read-only " 1212 pr_notice("Filesystem marked read-only because writing to pseudooverwrite partition is not implemented\n");
1238 "because writing to pseudooverwrite partition is "
1239 "not implemented.\n");
1240 } 1213 }
1241out_bh: 1214out_bh:
1242 /* In case loading failed, we handle cleanup in udf_fill_super */ 1215 /* In case loading failed, we handle cleanup in udf_fill_super */
@@ -1344,9 +1317,8 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1344 struct metadataPartitionMap *mdm = 1317 struct metadataPartitionMap *mdm =
1345 (struct metadataPartitionMap *) 1318 (struct metadataPartitionMap *)
1346 &(lvd->partitionMaps[offset]); 1319 &(lvd->partitionMaps[offset]);
1347 udf_debug("Parsing Logical vol part %d " 1320 udf_debug("Parsing Logical vol part %d type %d id=%s\n",
1348 "type %d id=%s\n", i, type, 1321 i, type, UDF_ID_METADATA);
1349 UDF_ID_METADATA);
1350 1322
1351 map->s_partition_type = UDF_METADATA_MAP25; 1323 map->s_partition_type = UDF_METADATA_MAP25;
1352 map->s_partition_func = udf_get_pblock_meta25; 1324 map->s_partition_func = udf_get_pblock_meta25;
@@ -1361,25 +1333,24 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1361 le32_to_cpu(mdm->allocUnitSize); 1333 le32_to_cpu(mdm->allocUnitSize);
1362 mdata->s_align_unit_size = 1334 mdata->s_align_unit_size =
1363 le16_to_cpu(mdm->alignUnitSize); 1335 le16_to_cpu(mdm->alignUnitSize);
1364 mdata->s_dup_md_flag = 1336 if (mdm->flags & 0x01)
1365 mdm->flags & 0x01; 1337 mdata->s_flags |= MF_DUPLICATE_MD;
1366 1338
1367 udf_debug("Metadata Ident suffix=0x%x\n", 1339 udf_debug("Metadata Ident suffix=0x%x\n",
1368 (le16_to_cpu( 1340 le16_to_cpu(*(__le16 *)
1369 ((__le16 *) 1341 mdm->partIdent.identSuffix));
1370 mdm->partIdent.identSuffix)[0])));
1371 udf_debug("Metadata part num=%d\n", 1342 udf_debug("Metadata part num=%d\n",
1372 le16_to_cpu(mdm->partitionNum)); 1343 le16_to_cpu(mdm->partitionNum));
1373 udf_debug("Metadata part alloc unit size=%d\n", 1344 udf_debug("Metadata part alloc unit size=%d\n",
1374 le32_to_cpu(mdm->allocUnitSize)); 1345 le32_to_cpu(mdm->allocUnitSize));
1375 udf_debug("Metadata file loc=%d\n", 1346 udf_debug("Metadata file loc=%d\n",
1376 le32_to_cpu(mdm->metadataFileLoc)); 1347 le32_to_cpu(mdm->metadataFileLoc));
1377 udf_debug("Mirror file loc=%d\n", 1348 udf_debug("Mirror file loc=%d\n",
1378 le32_to_cpu(mdm->metadataMirrorFileLoc)); 1349 le32_to_cpu(mdm->metadataMirrorFileLoc));
1379 udf_debug("Bitmap file loc=%d\n", 1350 udf_debug("Bitmap file loc=%d\n",
1380 le32_to_cpu(mdm->metadataBitmapFileLoc)); 1351 le32_to_cpu(mdm->metadataBitmapFileLoc));
1381 udf_debug("Duplicate Flag: %d %d\n", 1352 udf_debug("Flags: %d %d\n",
1382 mdata->s_dup_md_flag, mdm->flags); 1353 mdata->s_flags, mdm->flags);
1383 } else { 1354 } else {
1384 udf_debug("Unknown ident: %s\n", 1355 udf_debug("Unknown ident: %s\n",
1385 upm2->partIdent.ident); 1356 upm2->partIdent.ident);
@@ -1389,16 +1360,15 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1389 map->s_partition_num = le16_to_cpu(upm2->partitionNum); 1360 map->s_partition_num = le16_to_cpu(upm2->partitionNum);
1390 } 1361 }
1391 udf_debug("Partition (%d:%d) type %d on volume %d\n", 1362 udf_debug("Partition (%d:%d) type %d on volume %d\n",
1392 i, map->s_partition_num, type, 1363 i, map->s_partition_num, type, map->s_volumeseqnum);
1393 map->s_volumeseqnum);
1394 } 1364 }
1395 1365
1396 if (fileset) { 1366 if (fileset) {
1397 struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]); 1367 struct long_ad *la = (struct long_ad *)&(lvd->logicalVolContentsUse[0]);
1398 1368
1399 *fileset = lelb_to_cpu(la->extLocation); 1369 *fileset = lelb_to_cpu(la->extLocation);
1400 udf_debug("FileSet found in LogicalVolDesc at block=%d, " 1370 udf_debug("FileSet found in LogicalVolDesc at block=%d, partition=%d\n",
1401 "partition=%d\n", fileset->logicalBlockNum, 1371 fileset->logicalBlockNum,
1402 fileset->partitionReferenceNum); 1372 fileset->partitionReferenceNum);
1403 } 1373 }
1404 if (lvd->integritySeqExt.extLength) 1374 if (lvd->integritySeqExt.extLength)
@@ -1478,9 +1448,9 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
1478 1448
1479 bh = udf_read_tagged(sb, block, block, &ident); 1449 bh = udf_read_tagged(sb, block, block, &ident);
1480 if (!bh) { 1450 if (!bh) {
1481 printk(KERN_ERR "udf: Block %Lu of volume descriptor " 1451 udf_err(sb,
1482 "sequence is corrupted or we could not read " 1452 "Block %llu of volume descriptor sequence is corrupted or we could not read it\n",
1483 "it.\n", (unsigned long long)block); 1453 (unsigned long long)block);
1484 return 1; 1454 return 1;
1485 } 1455 }
1486 1456
@@ -1553,7 +1523,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
1553 * in a suitable order 1523 * in a suitable order
1554 */ 1524 */
1555 if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) { 1525 if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) {
1556 printk(KERN_ERR "udf: Primary Volume Descriptor not found!\n"); 1526 udf_err(sb, "Primary Volume Descriptor not found!\n");
1557 return 1; 1527 return 1;
1558 } 1528 }
1559 if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block)) 1529 if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block))
@@ -1740,7 +1710,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
1740 1710
1741 if (!sb_set_blocksize(sb, uopt->blocksize)) { 1711 if (!sb_set_blocksize(sb, uopt->blocksize)) {
1742 if (!silent) 1712 if (!silent)
1743 printk(KERN_WARNING "UDF-fs: Bad block size\n"); 1713 udf_warn(sb, "Bad block size\n");
1744 return 0; 1714 return 0;
1745 } 1715 }
1746 sbi->s_last_block = uopt->lastblock; 1716 sbi->s_last_block = uopt->lastblock;
@@ -1749,12 +1719,11 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
1749 nsr_off = udf_check_vsd(sb); 1719 nsr_off = udf_check_vsd(sb);
1750 if (!nsr_off) { 1720 if (!nsr_off) {
1751 if (!silent) 1721 if (!silent)
1752 printk(KERN_WARNING "UDF-fs: No VRS found\n"); 1722 udf_warn(sb, "No VRS found\n");
1753 return 0; 1723 return 0;
1754 } 1724 }
1755 if (nsr_off == -1) 1725 if (nsr_off == -1)
1756 udf_debug("Failed to read byte 32768. Assuming open " 1726 udf_debug("Failed to read byte 32768. Assuming open disc. Skipping validity check\n");
1757 "disc. Skipping validity check\n");
1758 if (!sbi->s_last_block) 1727 if (!sbi->s_last_block)
1759 sbi->s_last_block = udf_get_last_block(sb); 1728 sbi->s_last_block = udf_get_last_block(sb);
1760 } else { 1729 } else {
@@ -1765,7 +1734,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
1765 sbi->s_anchor = uopt->anchor; 1734 sbi->s_anchor = uopt->anchor;
1766 if (!udf_find_anchor(sb, fileset)) { 1735 if (!udf_find_anchor(sb, fileset)) {
1767 if (!silent) 1736 if (!silent)
1768 printk(KERN_WARNING "UDF-fs: No anchor found\n"); 1737 udf_warn(sb, "No anchor found\n");
1769 return 0; 1738 return 0;
1770 } 1739 }
1771 return 1; 1740 return 1;
@@ -1937,8 +1906,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1937 1906
1938 if (uopt.flags & (1 << UDF_FLAG_UTF8) && 1907 if (uopt.flags & (1 << UDF_FLAG_UTF8) &&
1939 uopt.flags & (1 << UDF_FLAG_NLS_MAP)) { 1908 uopt.flags & (1 << UDF_FLAG_NLS_MAP)) {
1940 udf_error(sb, "udf_read_super", 1909 udf_err(sb, "utf8 cannot be combined with iocharset\n");
1941 "utf8 cannot be combined with iocharset\n");
1942 goto error_out; 1910 goto error_out;
1943 } 1911 }
1944#ifdef CONFIG_UDF_NLS 1912#ifdef CONFIG_UDF_NLS
@@ -1987,15 +1955,14 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1987 ret = udf_load_vrs(sb, &uopt, silent, &fileset); 1955 ret = udf_load_vrs(sb, &uopt, silent, &fileset);
1988 if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) { 1956 if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
1989 if (!silent) 1957 if (!silent)
1990 printk(KERN_NOTICE 1958 pr_notice("Rescanning with blocksize %d\n",
1991 "UDF-fs: Rescanning with blocksize " 1959 UDF_DEFAULT_BLOCKSIZE);
1992 "%d\n", UDF_DEFAULT_BLOCKSIZE);
1993 uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; 1960 uopt.blocksize = UDF_DEFAULT_BLOCKSIZE;
1994 ret = udf_load_vrs(sb, &uopt, silent, &fileset); 1961 ret = udf_load_vrs(sb, &uopt, silent, &fileset);
1995 } 1962 }
1996 } 1963 }
1997 if (!ret) { 1964 if (!ret) {
1998 printk(KERN_WARNING "UDF-fs: No partition found (1)\n"); 1965 udf_warn(sb, "No partition found (1)\n");
1999 goto error_out; 1966 goto error_out;
2000 } 1967 }
2001 1968
@@ -2010,10 +1977,9 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2010 le16_to_cpu(lvidiu->maxUDFWriteRev); */ 1977 le16_to_cpu(lvidiu->maxUDFWriteRev); */
2011 1978
2012 if (minUDFReadRev > UDF_MAX_READ_VERSION) { 1979 if (minUDFReadRev > UDF_MAX_READ_VERSION) {
2013 printk(KERN_ERR "UDF-fs: minUDFReadRev=%x " 1980 udf_err(sb, "minUDFReadRev=%x (max is %x)\n",
2014 "(max is %x)\n", 1981 le16_to_cpu(lvidiu->minUDFReadRev),
2015 le16_to_cpu(lvidiu->minUDFReadRev), 1982 UDF_MAX_READ_VERSION);
2016 UDF_MAX_READ_VERSION);
2017 goto error_out; 1983 goto error_out;
2018 } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION) 1984 } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION)
2019 sb->s_flags |= MS_RDONLY; 1985 sb->s_flags |= MS_RDONLY;
@@ -2027,28 +1993,27 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2027 } 1993 }
2028 1994
2029 if (!sbi->s_partitions) { 1995 if (!sbi->s_partitions) {
2030 printk(KERN_WARNING "UDF-fs: No partition found (2)\n"); 1996 udf_warn(sb, "No partition found (2)\n");
2031 goto error_out; 1997 goto error_out;
2032 } 1998 }
2033 1999
2034 if (sbi->s_partmaps[sbi->s_partition].s_partition_flags & 2000 if (sbi->s_partmaps[sbi->s_partition].s_partition_flags &
2035 UDF_PART_FLAG_READ_ONLY) { 2001 UDF_PART_FLAG_READ_ONLY) {
2036 printk(KERN_NOTICE "UDF-fs: Partition marked readonly; " 2002 pr_notice("Partition marked readonly; forcing readonly mount\n");
2037 "forcing readonly mount\n");
2038 sb->s_flags |= MS_RDONLY; 2003 sb->s_flags |= MS_RDONLY;
2039 } 2004 }
2040 2005
2041 if (udf_find_fileset(sb, &fileset, &rootdir)) { 2006 if (udf_find_fileset(sb, &fileset, &rootdir)) {
2042 printk(KERN_WARNING "UDF-fs: No fileset found\n"); 2007 udf_warn(sb, "No fileset found\n");
2043 goto error_out; 2008 goto error_out;
2044 } 2009 }
2045 2010
2046 if (!silent) { 2011 if (!silent) {
2047 struct timestamp ts; 2012 struct timestamp ts;
2048 udf_time_to_disk_stamp(&ts, sbi->s_record_time); 2013 udf_time_to_disk_stamp(&ts, sbi->s_record_time);
2049 udf_info("UDF: Mounting volume '%s', " 2014 udf_info("Mounting volume '%s', timestamp %04u/%02u/%02u %02u:%02u (%x)\n",
2050 "timestamp %04u/%02u/%02u %02u:%02u (%x)\n", 2015 sbi->s_volume_ident,
2051 sbi->s_volume_ident, le16_to_cpu(ts.year), ts.month, ts.day, 2016 le16_to_cpu(ts.year), ts.month, ts.day,
2052 ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone)); 2017 ts.hour, ts.minute, le16_to_cpu(ts.typeAndTimezone));
2053 } 2018 }
2054 if (!(sb->s_flags & MS_RDONLY)) 2019 if (!(sb->s_flags & MS_RDONLY))
@@ -2059,8 +2024,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2059 /* perhaps it's not extensible enough, but for now ... */ 2024 /* perhaps it's not extensible enough, but for now ... */
2060 inode = udf_iget(sb, &rootdir); 2025 inode = udf_iget(sb, &rootdir);
2061 if (!inode) { 2026 if (!inode) {
2062 printk(KERN_ERR "UDF-fs: Error in udf_iget, block=%d, " 2027 udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n",
2063 "partition=%d\n",
2064 rootdir.logicalBlockNum, rootdir.partitionReferenceNum); 2028 rootdir.logicalBlockNum, rootdir.partitionReferenceNum);
2065 goto error_out; 2029 goto error_out;
2066 } 2030 }
@@ -2068,7 +2032,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2068 /* Allocate a dentry for the root inode */ 2032 /* Allocate a dentry for the root inode */
2069 sb->s_root = d_alloc_root(inode); 2033 sb->s_root = d_alloc_root(inode);
2070 if (!sb->s_root) { 2034 if (!sb->s_root) {
2071 printk(KERN_ERR "UDF-fs: Couldn't allocate root dentry\n"); 2035 udf_err(sb, "Couldn't allocate root dentry\n");
2072 iput(inode); 2036 iput(inode);
2073 goto error_out; 2037 goto error_out;
2074 } 2038 }
@@ -2096,32 +2060,40 @@ error_out:
2096 return -EINVAL; 2060 return -EINVAL;
2097} 2061}
2098 2062
2099static void udf_error(struct super_block *sb, const char *function, 2063void _udf_err(struct super_block *sb, const char *function,
2100 const char *fmt, ...) 2064 const char *fmt, ...)
2101{ 2065{
2066 struct va_format vaf;
2102 va_list args; 2067 va_list args;
2103 2068
2104 if (!(sb->s_flags & MS_RDONLY)) { 2069 /* mark sb error */
2105 /* mark sb error */ 2070 if (!(sb->s_flags & MS_RDONLY))
2106 sb->s_dirt = 1; 2071 sb->s_dirt = 1;
2107 } 2072
2108 va_start(args, fmt); 2073 va_start(args, fmt);
2109 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 2074
2075 vaf.fmt = fmt;
2076 vaf.va = &args;
2077
2078 pr_err("error (device %s): %s: %pV", sb->s_id, function, &vaf);
2079
2110 va_end(args); 2080 va_end(args);
2111 printk(KERN_CRIT "UDF-fs error (device %s): %s: %s\n",
2112 sb->s_id, function, error_buf);
2113} 2081}
2114 2082
2115void udf_warning(struct super_block *sb, const char *function, 2083void _udf_warn(struct super_block *sb, const char *function,
2116 const char *fmt, ...) 2084 const char *fmt, ...)
2117{ 2085{
2086 struct va_format vaf;
2118 va_list args; 2087 va_list args;
2119 2088
2120 va_start(args, fmt); 2089 va_start(args, fmt);
2121 vsnprintf(error_buf, sizeof(error_buf), fmt, args); 2090
2091 vaf.fmt = fmt;
2092 vaf.va = &args;
2093
2094 pr_warn("warning (device %s): %s: %pV", sb->s_id, function, &vaf);
2095
2122 va_end(args); 2096 va_end(args);
2123 printk(KERN_WARNING "UDF-fs warning (device %s): %s: %s\n",
2124 sb->s_id, function, error_buf);
2125} 2097}
2126 2098
2127static void udf_put_super(struct super_block *sb) 2099static void udf_put_super(struct super_block *sb)
@@ -2213,11 +2185,11 @@ static unsigned int udf_count_free_bitmap(struct super_block *sb,
2213 bh = udf_read_ptagged(sb, &loc, 0, &ident); 2185 bh = udf_read_ptagged(sb, &loc, 0, &ident);
2214 2186
2215 if (!bh) { 2187 if (!bh) {
2216 printk(KERN_ERR "udf: udf_count_free failed\n"); 2188 udf_err(sb, "udf_count_free failed\n");
2217 goto out; 2189 goto out;
2218 } else if (ident != TAG_IDENT_SBD) { 2190 } else if (ident != TAG_IDENT_SBD) {
2219 brelse(bh); 2191 brelse(bh);
2220 printk(KERN_ERR "udf: udf_count_free failed\n"); 2192 udf_err(sb, "udf_count_free failed\n");
2221 goto out; 2193 goto out;
2222 } 2194 }
2223 2195
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 8424308db4b4..4b98fee8e161 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -95,23 +95,21 @@ void udf_truncate_tail_extent(struct inode *inode)
95 lbcount += elen; 95 lbcount += elen;
96 if (lbcount > inode->i_size) { 96 if (lbcount > inode->i_size) {
97 if (lbcount - inode->i_size >= inode->i_sb->s_blocksize) 97 if (lbcount - inode->i_size >= inode->i_sb->s_blocksize)
98 printk(KERN_WARNING 98 udf_warn(inode->i_sb,
99 "udf_truncate_tail_extent(): Too long " 99 "Too long extent after EOF in inode %u: i_size: %lld lbcount: %lld extent %u+%u\n",
100 "extent after EOF in inode %u: i_size: " 100 (unsigned)inode->i_ino,
101 "%Ld lbcount: %Ld extent %u+%u\n", 101 (long long)inode->i_size,
102 (unsigned)inode->i_ino, 102 (long long)lbcount,
103 (long long)inode->i_size, 103 (unsigned)eloc.logicalBlockNum,
104 (long long)lbcount, 104 (unsigned)elen);
105 (unsigned)eloc.logicalBlockNum,
106 (unsigned)elen);
107 nelen = elen - (lbcount - inode->i_size); 105 nelen = elen - (lbcount - inode->i_size);
108 epos.offset -= adsize; 106 epos.offset -= adsize;
109 extent_trunc(inode, &epos, &eloc, etype, elen, nelen); 107 extent_trunc(inode, &epos, &eloc, etype, elen, nelen);
110 epos.offset += adsize; 108 epos.offset += adsize;
111 if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1) 109 if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1)
112 printk(KERN_ERR "udf_truncate_tail_extent(): " 110 udf_err(inode->i_sb,
113 "Extent after EOF in inode %u.\n", 111 "Extent after EOF in inode %u\n",
114 (unsigned)inode->i_ino); 112 (unsigned)inode->i_ino);
115 break; 113 break;
116 } 114 }
117 } 115 }
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 4858c191242b..5142a82e3276 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -54,13 +54,16 @@
54 54
55#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */ 55#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */
56 56
57#define MF_DUPLICATE_MD 0x01
58#define MF_MIRROR_FE_LOADED 0x02
59
57struct udf_meta_data { 60struct udf_meta_data {
58 __u32 s_meta_file_loc; 61 __u32 s_meta_file_loc;
59 __u32 s_mirror_file_loc; 62 __u32 s_mirror_file_loc;
60 __u32 s_bitmap_file_loc; 63 __u32 s_bitmap_file_loc;
61 __u32 s_alloc_unit_size; 64 __u32 s_alloc_unit_size;
62 __u16 s_align_unit_size; 65 __u16 s_align_unit_size;
63 __u8 s_dup_md_flag; 66 int s_flags;
64 struct inode *s_metadata_fe; 67 struct inode *s_metadata_fe;
65 struct inode *s_mirror_fe; 68 struct inode *s_mirror_fe;
66 struct inode *s_bitmap_fe; 69 struct inode *s_bitmap_fe;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index dbd52d4b5eed..f34e6fc0cdaa 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -1,6 +1,8 @@
1#ifndef __UDF_DECL_H 1#ifndef __UDF_DECL_H
2#define __UDF_DECL_H 2#define __UDF_DECL_H
3 3
4#define pr_fmt(fmt) "UDF-fs: " fmt
5
4#include "ecma_167.h" 6#include "ecma_167.h"
5#include "osta_udf.h" 7#include "osta_udf.h"
6 8
@@ -16,23 +18,30 @@
16#define UDF_PREALLOCATE 18#define UDF_PREALLOCATE
17#define UDF_DEFAULT_PREALLOC_BLOCKS 8 19#define UDF_DEFAULT_PREALLOC_BLOCKS 8
18 20
21extern __printf(3, 4) void _udf_err(struct super_block *sb,
22 const char *function, const char *fmt, ...);
23#define udf_err(sb, fmt, ...) \
24 _udf_err(sb, __func__, fmt, ##__VA_ARGS__)
25
26extern __printf(3, 4) void _udf_warn(struct super_block *sb,
27 const char *function, const char *fmt, ...);
28#define udf_warn(sb, fmt, ...) \
29 _udf_warn(sb, __func__, fmt, ##__VA_ARGS__)
30
31#define udf_info(fmt, ...) \
32 pr_info("INFO " fmt, ##__VA_ARGS__)
33
19#undef UDFFS_DEBUG 34#undef UDFFS_DEBUG
20 35
21#ifdef UDFFS_DEBUG 36#ifdef UDFFS_DEBUG
22#define udf_debug(f, a...) \ 37#define udf_debug(fmt, ...) \
23do { \ 38 printk(KERN_DEBUG pr_fmt("%s:%d:%s: " fmt), \
24 printk(KERN_DEBUG "UDF-fs DEBUG %s:%d:%s: ", \ 39 __FILE__, __LINE__, __func__, ##__VA_ARGS__)
25 __FILE__, __LINE__, __func__); \
26 printk(f, ##a); \
27} while (0)
28#else 40#else
29#define udf_debug(f, a...) /**/ 41#define udf_debug(fmt, ...) \
42 no_printk(fmt, ##__VA_ARGS__)
30#endif 43#endif
31 44
32#define udf_info(f, a...) \
33 printk(KERN_INFO "UDF-fs INFO " f, ##a);
34
35
36#define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) ) 45#define udf_fixed_to_variable(x) ( ( ( (x) >> 5 ) * 39 ) + ( (x) & 0x0000001F ) )
37#define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) ) 46#define udf_variable_to_fixed(x) ( ( ( (x) / 39 ) << 5 ) + ( (x) % 39 ) )
38 47
@@ -112,8 +121,6 @@ struct extent_position {
112 121
113/* super.c */ 122/* super.c */
114 123
115__attribute__((format(printf, 3, 4)))
116extern void udf_warning(struct super_block *, const char *, const char *, ...);
117static inline void udf_updated_lvid(struct super_block *sb) 124static inline void udf_updated_lvid(struct super_block *sb)
118{ 125{
119 struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh; 126 struct buffer_head *bh = UDF_SB(sb)->s_lvid_bh;
@@ -126,6 +133,8 @@ static inline void udf_updated_lvid(struct super_block *sb)
126 UDF_SB(sb)->s_lvid_dirty = 1; 133 UDF_SB(sb)->s_lvid_dirty = 1;
127} 134}
128extern u64 lvid_get_unique_id(struct super_block *sb); 135extern u64 lvid_get_unique_id(struct super_block *sb);
136struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
137 u32 meta_file_loc, u32 partition_num);
129 138
130/* namei.c */ 139/* namei.c */
131extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, 140extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c
index b8c828c4d200..1f11483eba6a 100644
--- a/fs/udf/udftime.c
+++ b/fs/udf/udftime.c
@@ -34,9 +34,10 @@
34 * http://www.boulder.nist.gov/timefreq/pubs/bulletin/leapsecond.htm 34 * http://www.boulder.nist.gov/timefreq/pubs/bulletin/leapsecond.htm
35 */ 35 */
36 36
37#include "udfdecl.h"
38
37#include <linux/types.h> 39#include <linux/types.h>
38#include <linux/kernel.h> 40#include <linux/kernel.h>
39#include "udfdecl.h"
40 41
41#define EPOCH_YEAR 1970 42#define EPOCH_YEAR 1970
42 43
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index d03a90b6ad69..44b815e57f94 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -114,7 +114,7 @@ int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
114 cmp_id = ocu_i->u_cmpID; 114 cmp_id = ocu_i->u_cmpID;
115 if (cmp_id != 8 && cmp_id != 16) { 115 if (cmp_id != 8 && cmp_id != 16) {
116 memset(utf_o, 0, sizeof(struct ustr)); 116 memset(utf_o, 0, sizeof(struct ustr));
117 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", 117 pr_err("unknown compression code (%d) stri=%s\n",
118 cmp_id, ocu_i->u_name); 118 cmp_id, ocu_i->u_name);
119 return 0; 119 return 0;
120 } 120 }
@@ -242,7 +242,7 @@ try_again:
242 if (utf_cnt) { 242 if (utf_cnt) {
243error_out: 243error_out:
244 ocu[++u_len] = '?'; 244 ocu[++u_len] = '?';
245 printk(KERN_DEBUG "udf: bad UTF-8 character\n"); 245 printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
246 } 246 }
247 247
248 ocu[length - 1] = (uint8_t)u_len + 1; 248 ocu[length - 1] = (uint8_t)u_len + 1;
@@ -267,7 +267,7 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
267 cmp_id = ocu_i->u_cmpID; 267 cmp_id = ocu_i->u_cmpID;
268 if (cmp_id != 8 && cmp_id != 16) { 268 if (cmp_id != 8 && cmp_id != 16) {
269 memset(utf_o, 0, sizeof(struct ustr)); 269 memset(utf_o, 0, sizeof(struct ustr));
270 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", 270 pr_err("unknown compression code (%d) stri=%s\n",
271 cmp_id, ocu_i->u_name); 271 cmp_id, ocu_i->u_name);
272 return 0; 272 return 0;
273 } 273 }
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 2eabf04af3de..78a4c70d46b5 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -341,7 +341,7 @@ cg_found:
341 341
342fail_remove_inode: 342fail_remove_inode:
343 unlock_super(sb); 343 unlock_super(sb);
344 inode->i_nlink = 0; 344 clear_nlink(inode);
345 iput(inode); 345 iput(inode);
346 UFSD("EXIT (FAILED): err %d\n", err); 346 UFSD("EXIT (FAILED): err %d\n", err);
347 return ERR_PTR(err); 347 return ERR_PTR(err);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index b4d791a83207..879b13436fa4 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -589,7 +589,7 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
589 * Copy data to the in-core inode. 589 * Copy data to the in-core inode.
590 */ 590 */
591 inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode); 591 inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode);
592 inode->i_nlink = fs16_to_cpu(sb, ufs_inode->ui_nlink); 592 set_nlink(inode, fs16_to_cpu(sb, ufs_inode->ui_nlink));
593 if (inode->i_nlink == 0) { 593 if (inode->i_nlink == 0) {
594 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); 594 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
595 return -1; 595 return -1;
@@ -637,7 +637,7 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
637 * Copy data to the in-core inode. 637 * Copy data to the in-core inode.
638 */ 638 */
639 inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode); 639 inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode);
640 inode->i_nlink = fs16_to_cpu(sb, ufs2_inode->ui_nlink); 640 set_nlink(inode, fs16_to_cpu(sb, ufs2_inode->ui_nlink));
641 if (inode->i_nlink == 0) { 641 if (inode->i_nlink == 0) {
642 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino); 642 ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
643 return -1; 643 return -1;
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 5be2755dd715..c26f2bcec264 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -117,9 +117,12 @@ extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buf
117extern const struct file_operations ufs_dir_operations; 117extern const struct file_operations ufs_dir_operations;
118 118
119/* super.c */ 119/* super.c */
120extern void ufs_warning (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); 120extern __printf(3, 4)
121extern void ufs_error (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); 121void ufs_warning(struct super_block *, const char *, const char *, ...);
122extern void ufs_panic (struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); 122extern __printf(3, 4)
123void ufs_error(struct super_block *, const char *, const char *, ...);
124extern __printf(3, 4)
125void ufs_panic(struct super_block *, const char *, const char *, ...);
123 126
124/* symlink.c */ 127/* symlink.c */
125extern const struct inode_operations ufs_fast_symlink_inode_operations; 128extern const struct inode_operations ufs_fast_symlink_inode_operations;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 11b2aad982d4..33b13310ee0c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -902,11 +902,11 @@ xfs_vm_writepage(
902 * random callers for direct reclaim or memcg reclaim. We explicitly 902 * random callers for direct reclaim or memcg reclaim. We explicitly
903 * allow reclaim from kswapd as the stack usage there is relatively low. 903 * allow reclaim from kswapd as the stack usage there is relatively low.
904 * 904 *
905 * This should really be done by the core VM, but until that happens 905 * This should never happen except in the case of a VM regression so
906 * filesystems like XFS, btrfs and ext4 have to take care of this 906 * warn about it.
907 * by themselves.
908 */ 907 */
909 if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) 908 if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
909 PF_MEMALLOC))
910 goto redirty; 910 goto redirty;
911 911
912 /* 912 /*
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 9ba2a07b7343..23ce927973a4 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1153,7 +1153,7 @@ xfs_setup_inode(
1153 hlist_add_fake(&inode->i_hash); 1153 hlist_add_fake(&inode->i_hash);
1154 1154
1155 inode->i_mode = ip->i_d.di_mode; 1155 inode->i_mode = ip->i_d.di_mode;
1156 inode->i_nlink = ip->i_d.di_nlink; 1156 set_nlink(inode, ip->i_d.di_nlink);
1157 inode->i_uid = ip->i_d.di_uid; 1157 inode->i_uid = ip->i_d.di_uid;
1158 inode->i_gid = ip->i_d.di_gid; 1158 inode->i_gid = ip->i_d.di_gid;
1159 1159
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index 7fb7ea007672..56dc0c17f16a 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -3,31 +3,29 @@
3 3
4struct xfs_mount; 4struct xfs_mount;
5 5
6extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) 6extern __printf(2, 3)
7 __attribute__ ((format (printf, 2, 3))); 7void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...);
8extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) 8extern __printf(2, 3)
9 __attribute__ ((format (printf, 2, 3))); 9void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...);
10extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, 10extern __printf(3, 4)
11 const char *fmt, ...) 11void xfs_alert_tag(const struct xfs_mount *mp, int tag, const char *fmt, ...);
12 __attribute__ ((format (printf, 3, 4))); 12extern __printf(2, 3)
13extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) 13void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...);
14 __attribute__ ((format (printf, 2, 3))); 14extern __printf(2, 3)
15extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) 15void xfs_err(const struct xfs_mount *mp, const char *fmt, ...);
16 __attribute__ ((format (printf, 2, 3))); 16extern __printf(2, 3)
17extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) 17void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...);
18 __attribute__ ((format (printf, 2, 3))); 18extern __printf(2, 3)
19extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) 19void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...);
20 __attribute__ ((format (printf, 2, 3))); 20extern __printf(2, 3)
21extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) 21void xfs_info(const struct xfs_mount *mp, const char *fmt, ...);
22 __attribute__ ((format (printf, 2, 3)));
23 22
24#ifdef DEBUG 23#ifdef DEBUG
25extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 24extern __printf(2, 3)
26 __attribute__ ((format (printf, 2, 3))); 25void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...);
27#else 26#else
28static inline void 27static inline __printf(2, 3)
29__attribute__ ((format (printf, 2, 3))) 28void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
30xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
31{ 29{
32} 30}
33#endif 31#endif