aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/adfs/adfs.h2
-rw-r--r--fs/adfs/dir_f.c4
-rw-r--r--fs/affs/file.c25
-rw-r--r--fs/affs/super.c3
-rw-r--r--fs/afs/afs_cm.h3
-rw-r--r--fs/afs/cell.c2
-rw-r--r--fs/afs/cmservice.c133
-rw-r--r--fs/afs/dir.c4
-rw-r--r--fs/afs/internal.h8
-rw-r--r--fs/afs/proc.c33
-rw-r--r--fs/aio.c91
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/expire.c26
-rw-r--r--fs/autofs4/root.c40
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/befs/linuxvfs.c6
-rw-r--r--fs/bfs/bfs.h2
-rw-r--r--fs/binfmt_aout.c12
-rw-r--r--fs/binfmt_elf.c30
-rw-r--r--fs/binfmt_elf_fdpic.c14
-rw-r--r--fs/binfmt_em86.c2
-rw-r--r--fs/binfmt_flat.c9
-rw-r--r--fs/binfmt_misc.c6
-rw-r--r--fs/binfmt_script.c2
-rw-r--r--fs/bio.c90
-rw-r--r--fs/buffer.c44
-rw-r--r--fs/char_dev.c1
-rw-r--r--fs/cifs/cifs_debug.c4
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c6
-rw-r--r--fs/compat.c14
-rw-r--r--fs/compat_ioctl.c4
-rw-r--r--fs/configfs/file.c2
-rw-r--r--fs/configfs/inode.c2
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/devpts/inode.c43
-rw-r--r--fs/dlm/lockspace.c2
-rw-r--r--fs/dnotify.c11
-rw-r--r--fs/dquot.c109
-rw-r--r--fs/drop_caches.c14
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c33
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h102
-rw-r--r--fs/ecryptfs/file.c2
-rw-r--r--fs/ecryptfs/inode.c6
-rw-r--r--fs/ecryptfs/keystore.c89
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/ecryptfs/messaging.c524
-rw-r--r--fs/ecryptfs/miscdev.c598
-rw-r--r--fs/ecryptfs/mmap.c18
-rw-r--r--fs/ecryptfs/netlink.c33
-rw-r--r--fs/ecryptfs/read_write.c16
-rw-r--r--fs/eventpoll.c34
-rw-r--r--fs/exec.c23
-rw-r--r--fs/exportfs/expfs.c10
-rw-r--r--fs/ext2/balloc.c29
-rw-r--r--fs/ext2/dir.c20
-rw-r--r--fs/ext2/ialloc.c12
-rw-r--r--fs/ext2/inode.c15
-rw-r--r--fs/ext2/super.c27
-rw-r--r--fs/ext2/xattr.c15
-rw-r--r--fs/ext2/xip.c53
-rw-r--r--fs/ext2/xip.h9
-rw-r--r--fs/ext3/balloc.c30
-rw-r--r--fs/ext3/ext3_jbd.c12
-rw-r--r--fs/ext3/fsync.c3
-rw-r--r--fs/ext3/ialloc.c6
-rw-r--r--fs/ext3/inode.c43
-rw-r--r--fs/ext3/namei.c37
-rw-r--r--fs/ext3/resize.c71
-rw-r--r--fs/ext3/super.c53
-rw-r--r--fs/ext3/xattr.c24
-rw-r--r--fs/ext3/xattr.h7
-rw-r--r--fs/ext4/acl.c12
-rw-r--r--fs/ext4/balloc.c33
-rw-r--r--fs/ext4/bitmap.c2
-rw-r--r--fs/ext4/dir.c4
-rw-r--r--fs/ext4/ext4.h1205
-rw-r--r--fs/ext4/ext4_extents.h232
-rw-r--r--fs/ext4/ext4_i.h167
-rw-r--r--fs/ext4/ext4_jbd2.c14
-rw-r--r--fs/ext4/ext4_jbd2.h231
-rw-r--r--fs/ext4/ext4_sb.h148
-rw-r--r--fs/ext4/extents.c354
-rw-r--r--fs/ext4/file.c6
-rw-r--r--fs/ext4/fsync.c7
-rw-r--r--fs/ext4/hash.c2
-rw-r--r--fs/ext4/ialloc.c44
-rw-r--r--fs/ext4/inode.c57
-rw-r--r--fs/ext4/ioctl.c16
-rw-r--r--fs/ext4/mballoc.c459
-rw-r--r--fs/ext4/mballoc.h304
-rw-r--r--fs/ext4/migrate.c43
-rw-r--r--fs/ext4/namei.c44
-rw-r--r--fs/ext4/resize.c83
-rw-r--r--fs/ext4/super.c81
-rw-r--r--fs/ext4/symlink.c2
-rw-r--r--fs/ext4/xattr.c40
-rw-r--r--fs/ext4/xattr.h7
-rw-r--r--fs/ext4/xattr_security.c4
-rw-r--r--fs/ext4/xattr_trusted.c4
-rw-r--r--fs/ext4/xattr_user.c4
-rw-r--r--fs/fat/cache.c6
-rw-r--r--fs/fat/dir.c52
-rw-r--r--fs/fat/fatent.c11
-rw-r--r--fs/fat/file.c206
-rw-r--r--fs/fat/inode.c46
-rw-r--r--fs/freevxfs/vxfs_extern.h5
-rw-r--r--fs/freevxfs/vxfs_immed.c1
-rw-r--r--fs/freevxfs/vxfs_inode.c5
-rw-r--r--fs/fs-writeback.c78
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/dev.c23
-rw-r--r--fs/fuse/dir.c86
-rw-r--r--fs/fuse/file.c633
-rw-r--r--fs/fuse/fuse_i.h52
-rw-r--r--fs/fuse/inode.c90
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c2
-rw-r--r--fs/gfs2/ops_address.c2
-rw-r--r--fs/gfs2/util.h18
-rw-r--r--fs/hfs/btree.c10
-rw-r--r--fs/hfs/mdb.c2
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/btree.c10
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/inode.c3
-rw-r--r--fs/hfsplus/options.c3
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--fs/inode.c6
-rw-r--r--fs/inotify_user.c2
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/isofs/dir.c8
-rw-r--r--fs/isofs/isofs.h12
-rw-r--r--fs/isofs/namei.c7
-rw-r--r--fs/jbd/commit.c19
-rw-r--r--fs/jbd/journal.c18
-rw-r--r--fs/jbd/revoke.c2
-rw-r--r--fs/jbd/transaction.c38
-rw-r--r--fs/jbd2/commit.c19
-rw-r--r--fs/jbd2/journal.c55
-rw-r--r--fs/jbd2/revoke.c165
-rw-r--r--fs/jbd2/transaction.c41
-rw-r--r--fs/jffs2/debug.h8
-rw-r--r--fs/jffs2/xattr.c4
-rw-r--r--fs/jfs/jfs_debug.c4
-rw-r--r--fs/lockd/clntproc.c2
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/msdos/namei.c4
-rw-r--r--fs/namei.c9
-rw-r--r--fs/namespace.c19
-rw-r--r--fs/ncpfs/inode.c6
-rw-r--r--fs/ncpfs/ioctl.c17
-rw-r--r--fs/ncpfs/ncplib_kernel.c39
-rw-r--r--fs/ncpfs/ncpsign_kernel.c2
-rw-r--r--fs/nfs/client.c20
-rw-r--r--fs/nfs/super.c26
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/nfsctl.c4
-rw-r--r--fs/ntfs/debug.h6
-rw-r--r--fs/ntfs/mft.c6
-rw-r--r--fs/ocfs2/dlm/dlmfs.c2
-rw-r--r--fs/open.c2
-rw-r--r--fs/partitions/ldm.c8
-rw-r--r--fs/partitions/msdos.c20
-rw-r--r--fs/proc/array.c6
-rw-r--r--fs/proc/base.c113
-rw-r--r--fs/proc/generic.c132
-rw-r--r--fs/proc/inode.c69
-rw-r--r--fs/proc/internal.h4
-rw-r--r--fs/proc/nommu.c2
-rw-r--r--fs/proc/proc_misc.c85
-rw-r--r--fs/proc/proc_sysctl.c52
-rw-r--r--fs/proc/proc_tty.c87
-rw-r--r--fs/proc/root.c14
-rw-r--r--fs/proc/task_mmu.c52
-rw-r--r--fs/proc/task_nommu.c34
-rw-r--r--fs/quota.c5
-rw-r--r--fs/quota_v1.c3
-rw-r--r--fs/quota_v2.c7
-rw-r--r--fs/ramfs/file-mmu.c3
-rw-r--r--fs/ramfs/inode.c2
-rw-r--r--fs/ramfs/internal.h1
-rw-r--r--fs/reiserfs/bitmap.c8
-rw-r--r--fs/reiserfs/do_balan.c14
-rw-r--r--fs/reiserfs/ioctl.c4
-rw-r--r--fs/reiserfs/journal.c60
-rw-r--r--fs/reiserfs/namei.c8
-rw-r--r--fs/reiserfs/objectid.c7
-rw-r--r--fs/reiserfs/procfs.c9
-rw-r--r--fs/reiserfs/stree.c3
-rw-r--r--fs/reiserfs/super.c38
-rw-r--r--fs/select.c12
-rw-r--r--fs/smbfs/smb_debug.h6
-rw-r--r--fs/splice.c2
-rw-r--r--fs/super.c12
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/file.c16
-rw-r--r--fs/sysfs/group.c83
-rw-r--r--fs/sysfs/inode.c4
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/sysv/sysv.h8
-rw-r--r--fs/timerfd.c1
-rw-r--r--fs/udf/namei.c8
-rw-r--r--fs/udf/super.c4
-rw-r--r--fs/ufs/balloc.c4
-rw-r--r--fs/ufs/dir.c14
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/ufs/swab.h36
-rw-r--r--fs/ufs/ufs.h2
-rw-r--r--fs/utimes.c17
-rw-r--r--fs/vfat/namei.c39
-rw-r--r--fs/xattr.c41
-rw-r--r--fs/xfs/Kconfig13
-rw-r--r--fs/xfs/linux-2.6/mrlock.h60
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c75
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c21
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h24
-rw-r--r--fs/xfs/quota/xfs_dquot.c4
-rw-r--r--fs/xfs/quota/xfs_qm.c27
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c6
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h5
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c2
-rw-r--r--fs/xfs/support/debug.h2
-rw-r--r--fs/xfs/xfs.h2
-rw-r--r--fs/xfs/xfs_acl.c53
-rw-r--r--fs/xfs/xfs_attr.c93
-rw-r--r--fs/xfs/xfs_attr.h6
-rw-r--r--fs/xfs/xfs_bmap.c1
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_fsops.c8
-rw-r--r--fs/xfs/xfs_ialloc.c10
-rw-r--r--fs/xfs/xfs_iget.c140
-rw-r--r--fs/xfs/xfs_inode.c25
-rw-r--r--fs/xfs/xfs_inode.h16
-rw-r--r--fs/xfs/xfs_inode_item.c12
-rw-r--r--fs/xfs/xfs_iomap.c19
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_mount.c83
-rw-r--r--fs/xfs/xfs_mount.h7
-rw-r--r--fs/xfs/xfs_rename.c252
-rw-r--r--fs/xfs/xfs_trans_inode.c12
-rw-r--r--fs/xfs/xfs_utils.c45
-rw-r--r--fs/xfs/xfs_utils.h2
-rw-r--r--fs/xfs/xfs_vfsops.c1
-rw-r--r--fs/xfs/xfs_vnodeops.c274
-rw-r--r--fs/xfs/xfs_vnodeops.h8
260 files changed, 7284 insertions, 3558 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 2e43d46f65d6..cf12c403b8c7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1005,7 +1005,8 @@ config TMPFS_POSIX_ACL
1005 1005
1006config HUGETLBFS 1006config HUGETLBFS
1007 bool "HugeTLB file system support" 1007 bool "HugeTLB file system support"
1008 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN 1008 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
1009 (S390 && 64BIT) || BROKEN
1009 help 1010 help
1010 hugetlbfs is a filesystem backing for HugeTLB pages, based on 1011 hugetlbfs is a filesystem backing for HugeTLB pages, based on
1011 ramfs. For architectures that support it, say Y here and read 1012 ramfs. For architectures that support it, say Y here and read
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 853845abcca6..55e8ee1900a5 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -41,7 +41,7 @@ config BINFMT_ELF_FDPIC
41 It is also possible to run FDPIC ELF binaries on MMU linux also. 41 It is also possible to run FDPIC ELF binaries on MMU linux also.
42 42
43config BINFMT_FLAT 43config BINFMT_FLAT
44 tristate "Kernel support for flat binaries" 44 bool "Kernel support for flat binaries"
45 depends on !MMU 45 depends on !MMU
46 help 46 help
47 Support uClinux FLAT format binaries. 47 Support uClinux FLAT format binaries.
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 936f2af39c43..831157502d5a 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -75,7 +75,7 @@ extern unsigned int adfs_map_free(struct super_block *sb);
75/* Misc */ 75/* Misc */
76void __adfs_error(struct super_block *sb, const char *function, 76void __adfs_error(struct super_block *sb, const char *function,
77 const char *fmt, ...); 77 const char *fmt, ...);
78#define adfs_error(sb, fmt...) __adfs_error(sb, __FUNCTION__, fmt) 78#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt)
79 79
80/* super.c */ 80/* super.c */
81 81
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index b9b2b27b68c3..ea7df2146921 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -122,9 +122,9 @@ adfs_dir_checkbyte(const struct adfs_dir *dir)
122 ptr.ptr8 = bufoff(bh, i); 122 ptr.ptr8 = bufoff(bh, i);
123 end.ptr8 = ptr.ptr8 + last - i; 123 end.ptr8 = ptr.ptr8 + last - i;
124 124
125 do 125 do {
126 dircheck = *ptr.ptr8++ ^ ror13(dircheck); 126 dircheck = *ptr.ptr8++ ^ ror13(dircheck);
127 while (ptr.ptr8 < end.ptr8); 127 } while (ptr.ptr8 < end.ptr8);
128 } 128 }
129 129
130 /* 130 /*
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6e0c9399200e..1a4f092f24ef 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -325,8 +325,7 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
325 pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block); 325 pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block);
326 326
327 327
328 if (block > (sector_t)0x7fffffffUL) 328 BUG_ON(block > (sector_t)0x7fffffffUL);
329 BUG();
330 329
331 if (block >= AFFS_I(inode)->i_blkcnt) { 330 if (block >= AFFS_I(inode)->i_blkcnt) {
332 if (block > AFFS_I(inode)->i_blkcnt || !create) 331 if (block > AFFS_I(inode)->i_blkcnt || !create)
@@ -493,8 +492,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign
493 u32 tmp; 492 u32 tmp;
494 493
495 pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); 494 pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to);
496 if (from > to || to > PAGE_CACHE_SIZE) 495 BUG_ON(from > to || to > PAGE_CACHE_SIZE);
497 BUG();
498 kmap(page); 496 kmap(page);
499 data = page_address(page); 497 data = page_address(page);
500 bsize = AFFS_SB(sb)->s_data_blksize; 498 bsize = AFFS_SB(sb)->s_data_blksize;
@@ -507,8 +505,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign
507 if (IS_ERR(bh)) 505 if (IS_ERR(bh))
508 return PTR_ERR(bh); 506 return PTR_ERR(bh);
509 tmp = min(bsize - boff, to - from); 507 tmp = min(bsize - boff, to - from);
510 if (from + tmp > to || tmp > bsize) 508 BUG_ON(from + tmp > to || tmp > bsize);
511 BUG();
512 memcpy(data + from, AFFS_DATA(bh) + boff, tmp); 509 memcpy(data + from, AFFS_DATA(bh) + boff, tmp);
513 affs_brelse(bh); 510 affs_brelse(bh);
514 bidx++; 511 bidx++;
@@ -540,10 +537,9 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
540 if (IS_ERR(bh)) 537 if (IS_ERR(bh))
541 return PTR_ERR(bh); 538 return PTR_ERR(bh);
542 tmp = min(bsize - boff, newsize - size); 539 tmp = min(bsize - boff, newsize - size);
543 if (boff + tmp > bsize || tmp > bsize) 540 BUG_ON(boff + tmp > bsize || tmp > bsize);
544 BUG();
545 memset(AFFS_DATA(bh) + boff, 0, tmp); 541 memset(AFFS_DATA(bh) + boff, 0, tmp);
546 AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); 542 be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
547 affs_fix_checksum(sb, bh); 543 affs_fix_checksum(sb, bh);
548 mark_buffer_dirty_inode(bh, inode); 544 mark_buffer_dirty_inode(bh, inode);
549 size += tmp; 545 size += tmp;
@@ -560,8 +556,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
560 if (IS_ERR(bh)) 556 if (IS_ERR(bh))
561 goto out; 557 goto out;
562 tmp = min(bsize, newsize - size); 558 tmp = min(bsize, newsize - size);
563 if (tmp > bsize) 559 BUG_ON(tmp > bsize);
564 BUG();
565 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); 560 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
566 AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); 561 AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino);
567 AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); 562 AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx);
@@ -683,10 +678,9 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
683 if (IS_ERR(bh)) 678 if (IS_ERR(bh))
684 return PTR_ERR(bh); 679 return PTR_ERR(bh);
685 tmp = min(bsize - boff, to - from); 680 tmp = min(bsize - boff, to - from);
686 if (boff + tmp > bsize || tmp > bsize) 681 BUG_ON(boff + tmp > bsize || tmp > bsize);
687 BUG();
688 memcpy(AFFS_DATA(bh) + boff, data + from, tmp); 682 memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
689 AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); 683 be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
690 affs_fix_checksum(sb, bh); 684 affs_fix_checksum(sb, bh);
691 mark_buffer_dirty_inode(bh, inode); 685 mark_buffer_dirty_inode(bh, inode);
692 written += tmp; 686 written += tmp;
@@ -732,8 +726,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
732 if (IS_ERR(bh)) 726 if (IS_ERR(bh))
733 goto out; 727 goto out;
734 tmp = min(bsize, to - from); 728 tmp = min(bsize, to - from);
735 if (tmp > bsize) 729 BUG_ON(tmp > bsize);
736 BUG();
737 memcpy(AFFS_DATA(bh), data + from, tmp); 730 memcpy(AFFS_DATA(bh), data + from, tmp);
738 if (buffer_new(bh)) { 731 if (buffer_new(bh)) {
739 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); 732 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d2dc047cb479..01d25d532541 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -199,7 +199,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
199 case Opt_prefix: 199 case Opt_prefix:
200 /* Free any previous prefix */ 200 /* Free any previous prefix */
201 kfree(*prefix); 201 kfree(*prefix);
202 *prefix = NULL;
203 *prefix = match_strdup(&args[0]); 202 *prefix = match_strdup(&args[0]);
204 if (!*prefix) 203 if (!*prefix)
205 return 0; 204 return 0;
@@ -233,6 +232,8 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
233 break; 232 break;
234 case Opt_volume: { 233 case Opt_volume: {
235 char *vol = match_strdup(&args[0]); 234 char *vol = match_strdup(&args[0]);
235 if (!vol)
236 return 0;
236 strlcpy(volume, vol, 32); 237 strlcpy(volume, vol, 32);
237 kfree(vol); 238 kfree(vol);
238 break; 239 break;
diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h
index 7b4d4fab4c80..255f5dd6040c 100644
--- a/fs/afs/afs_cm.h
+++ b/fs/afs/afs_cm.h
@@ -24,7 +24,8 @@ enum AFS_CM_Operations {
24 CBGetXStatsVersion = 209, /* get version of extended statistics */ 24 CBGetXStatsVersion = 209, /* get version of extended statistics */
25 CBGetXStats = 210, /* get contents of extended statistics data */ 25 CBGetXStats = 210, /* get contents of extended statistics data */
26 CBInitCallBackState3 = 213, /* initialise callback state, version 3 */ 26 CBInitCallBackState3 = 213, /* initialise callback state, version 3 */
27 CBGetCapabilities = 65538, /* get client capabilities */ 27 CBProbeUuid = 214, /* check the client hasn't rebooted */
28 CBTellMeAboutYourself = 65538, /* get client capabilities */
28}; 29};
29 30
30#define AFS_CAP_ERROR_TRANSLATION 0x1 31#define AFS_CAP_ERROR_TRANSLATION 0x1
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 584bb0f9c36a..5e1df14e16b1 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -20,7 +20,7 @@
20DECLARE_RWSEM(afs_proc_cells_sem); 20DECLARE_RWSEM(afs_proc_cells_sem);
21LIST_HEAD(afs_proc_cells); 21LIST_HEAD(afs_proc_cells);
22 22
23static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells); 23static LIST_HEAD(afs_cells);
24static DEFINE_RWLOCK(afs_cells_lock); 24static DEFINE_RWLOCK(afs_cells_lock);
25static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ 25static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
26static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq); 26static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 47b71c8947f9..eb765489164f 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -26,8 +26,9 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
26 struct sk_buff *, bool); 26 struct sk_buff *, bool);
27static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool); 27static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
28static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool); 28static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
29static int afs_deliver_cb_get_capabilities(struct afs_call *, struct sk_buff *, 29static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool);
30 bool); 30static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *,
31 struct sk_buff *, bool);
31static void afs_cm_destructor(struct afs_call *); 32static void afs_cm_destructor(struct afs_call *);
32 33
33/* 34/*
@@ -71,11 +72,21 @@ static const struct afs_call_type afs_SRXCBProbe = {
71}; 72};
72 73
73/* 74/*
74 * CB.GetCapabilities operation type 75 * CB.ProbeUuid operation type
75 */ 76 */
76static const struct afs_call_type afs_SRXCBGetCapabilites = { 77static const struct afs_call_type afs_SRXCBProbeUuid = {
77 .name = "CB.GetCapabilities", 78 .name = "CB.ProbeUuid",
78 .deliver = afs_deliver_cb_get_capabilities, 79 .deliver = afs_deliver_cb_probe_uuid,
80 .abort_to_error = afs_abort_to_error,
81 .destructor = afs_cm_destructor,
82};
83
84/*
85 * CB.TellMeAboutYourself operation type
86 */
87static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
88 .name = "CB.TellMeAboutYourself",
89 .deliver = afs_deliver_cb_tell_me_about_yourself,
79 .abort_to_error = afs_abort_to_error, 90 .abort_to_error = afs_abort_to_error,
80 .destructor = afs_cm_destructor, 91 .destructor = afs_cm_destructor,
81}; 92};
@@ -103,8 +114,8 @@ bool afs_cm_incoming_call(struct afs_call *call)
103 case CBProbe: 114 case CBProbe:
104 call->type = &afs_SRXCBProbe; 115 call->type = &afs_SRXCBProbe;
105 return true; 116 return true;
106 case CBGetCapabilities: 117 case CBTellMeAboutYourself:
107 call->type = &afs_SRXCBGetCapabilites; 118 call->type = &afs_SRXCBTellMeAboutYourself;
108 return true; 119 return true;
109 default: 120 default:
110 return false; 121 return false;
@@ -393,9 +404,105 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
393} 404}
394 405
395/* 406/*
407 * allow the fileserver to quickly find out if the fileserver has been rebooted
408 */
409static void SRXAFSCB_ProbeUuid(struct work_struct *work)
410{
411 struct afs_call *call = container_of(work, struct afs_call, work);
412 struct afs_uuid *r = call->request;
413
414 struct {
415 __be32 match;
416 } reply;
417
418 _enter("");
419
420
421 if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
422 reply.match = htonl(0);
423 else
424 reply.match = htonl(1);
425
426 afs_send_simple_reply(call, &reply, sizeof(reply));
427 _leave("");
428}
429
430/*
431 * deliver request data to a CB.ProbeUuid call
432 */
433static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
434 bool last)
435{
436 struct afs_uuid *r;
437 unsigned loop;
438 __be32 *b;
439 int ret;
440
441 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
442
443 if (skb->len > 0)
444 return -EBADMSG;
445 if (!last)
446 return 0;
447
448 switch (call->unmarshall) {
449 case 0:
450 call->offset = 0;
451 call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL);
452 if (!call->buffer)
453 return -ENOMEM;
454 call->unmarshall++;
455
456 case 1:
457 _debug("extract UUID");
458 ret = afs_extract_data(call, skb, last, call->buffer,
459 11 * sizeof(__be32));
460 switch (ret) {
461 case 0: break;
462 case -EAGAIN: return 0;
463 default: return ret;
464 }
465
466 _debug("unmarshall UUID");
467 call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
468 if (!call->request)
469 return -ENOMEM;
470
471 b = call->buffer;
472 r = call->request;
473 r->time_low = ntohl(b[0]);
474 r->time_mid = ntohl(b[1]);
475 r->time_hi_and_version = ntohl(b[2]);
476 r->clock_seq_hi_and_reserved = ntohl(b[3]);
477 r->clock_seq_low = ntohl(b[4]);
478
479 for (loop = 0; loop < 6; loop++)
480 r->node[loop] = ntohl(b[loop + 5]);
481
482 call->offset = 0;
483 call->unmarshall++;
484
485 case 2:
486 _debug("trailer");
487 if (skb->len != 0)
488 return -EBADMSG;
489 break;
490 }
491
492 if (!last)
493 return 0;
494
495 call->state = AFS_CALL_REPLYING;
496
497 INIT_WORK(&call->work, SRXAFSCB_ProbeUuid);
498 schedule_work(&call->work);
499 return 0;
500}
501
502/*
396 * allow the fileserver to ask about the cache manager's capabilities 503 * allow the fileserver to ask about the cache manager's capabilities
397 */ 504 */
398static void SRXAFSCB_GetCapabilities(struct work_struct *work) 505static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
399{ 506{
400 struct afs_interface *ifs; 507 struct afs_interface *ifs;
401 struct afs_call *call = container_of(work, struct afs_call, work); 508 struct afs_call *call = container_of(work, struct afs_call, work);
@@ -456,10 +563,10 @@ static void SRXAFSCB_GetCapabilities(struct work_struct *work)
456} 563}
457 564
458/* 565/*
459 * deliver request data to a CB.GetCapabilities call 566 * deliver request data to a CB.TellMeAboutYourself call
460 */ 567 */
461static int afs_deliver_cb_get_capabilities(struct afs_call *call, 568static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
462 struct sk_buff *skb, bool last) 569 struct sk_buff *skb, bool last)
463{ 570{
464 _enter(",{%u},%d", skb->len, last); 571 _enter(",{%u},%d", skb->len, last);
465 572
@@ -471,7 +578,7 @@ static int afs_deliver_cb_get_capabilities(struct afs_call *call,
471 /* no unmarshalling required */ 578 /* no unmarshalling required */
472 call->state = AFS_CALL_REPLYING; 579 call->state = AFS_CALL_REPLYING;
473 580
474 INIT_WORK(&call->work, SRXAFSCB_GetCapabilities); 581 INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself);
475 schedule_work(&call->work); 582 schedule_work(&call->work);
476 return 0; 583 return 0;
477} 584}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b58af8f18bc4..dfda03d4397d 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -140,7 +140,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
140 140
141 if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) { 141 if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) {
142 printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n", 142 printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
143 __FUNCTION__, dir->i_ino, qty, 143 __func__, dir->i_ino, qty,
144 ntohs(dbuf->blocks[0].pagehdr.npages)); 144 ntohs(dbuf->blocks[0].pagehdr.npages));
145 goto error; 145 goto error;
146 } 146 }
@@ -159,7 +159,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
159 for (tmp = 0; tmp < qty; tmp++) { 159 for (tmp = 0; tmp < qty; tmp++) {
160 if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { 160 if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
161 printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n", 161 printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
162 __FUNCTION__, dir->i_ino, tmp, qty, 162 __func__, dir->i_ino, tmp, qty,
163 ntohs(dbuf->blocks[tmp].pagehdr.magic)); 163 ntohs(dbuf->blocks[tmp].pagehdr.magic));
164 goto error; 164 goto error;
165 } 165 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index eec41c76de72..7102824ba847 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -757,8 +757,8 @@ void _dbprintk(const char *fmt, ...)
757{ 757{
758} 758}
759 759
760#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) 760#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
761#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) 761#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
762#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) 762#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
763 763
764 764
@@ -791,8 +791,8 @@ do { \
791} while (0) 791} while (0)
792 792
793#else 793#else
794#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) 794#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
795#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) 795#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
796#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__) 796#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__)
797#endif 797#endif
798 798
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 846c7615ac9e..9f7d1ae70269 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -41,6 +41,7 @@ static const struct file_operations afs_proc_cells_fops = {
41 .write = afs_proc_cells_write, 41 .write = afs_proc_cells_write,
42 .llseek = seq_lseek, 42 .llseek = seq_lseek,
43 .release = seq_release, 43 .release = seq_release,
44 .owner = THIS_MODULE,
44}; 45};
45 46
46static int afs_proc_rootcell_open(struct inode *inode, struct file *file); 47static int afs_proc_rootcell_open(struct inode *inode, struct file *file);
@@ -56,7 +57,8 @@ static const struct file_operations afs_proc_rootcell_fops = {
56 .read = afs_proc_rootcell_read, 57 .read = afs_proc_rootcell_read,
57 .write = afs_proc_rootcell_write, 58 .write = afs_proc_rootcell_write,
58 .llseek = no_llseek, 59 .llseek = no_llseek,
59 .release = afs_proc_rootcell_release 60 .release = afs_proc_rootcell_release,
61 .owner = THIS_MODULE,
60}; 62};
61 63
62static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file); 64static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
@@ -80,6 +82,7 @@ static const struct file_operations afs_proc_cell_volumes_fops = {
80 .read = seq_read, 82 .read = seq_read,
81 .llseek = seq_lseek, 83 .llseek = seq_lseek,
82 .release = afs_proc_cell_volumes_release, 84 .release = afs_proc_cell_volumes_release,
85 .owner = THIS_MODULE,
83}; 86};
84 87
85static int afs_proc_cell_vlservers_open(struct inode *inode, 88static int afs_proc_cell_vlservers_open(struct inode *inode,
@@ -104,6 +107,7 @@ static const struct file_operations afs_proc_cell_vlservers_fops = {
104 .read = seq_read, 107 .read = seq_read,
105 .llseek = seq_lseek, 108 .llseek = seq_lseek,
106 .release = afs_proc_cell_vlservers_release, 109 .release = afs_proc_cell_vlservers_release,
110 .owner = THIS_MODULE,
107}; 111};
108 112
109static int afs_proc_cell_servers_open(struct inode *inode, struct file *file); 113static int afs_proc_cell_servers_open(struct inode *inode, struct file *file);
@@ -127,6 +131,7 @@ static const struct file_operations afs_proc_cell_servers_fops = {
127 .read = seq_read, 131 .read = seq_read,
128 .llseek = seq_lseek, 132 .llseek = seq_lseek,
129 .release = afs_proc_cell_servers_release, 133 .release = afs_proc_cell_servers_release,
134 .owner = THIS_MODULE,
130}; 135};
131 136
132/* 137/*
@@ -143,17 +148,13 @@ int afs_proc_init(void)
143 goto error_dir; 148 goto error_dir;
144 proc_afs->owner = THIS_MODULE; 149 proc_afs->owner = THIS_MODULE;
145 150
146 p = create_proc_entry("cells", 0, proc_afs); 151 p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops);
147 if (!p) 152 if (!p)
148 goto error_cells; 153 goto error_cells;
149 p->proc_fops = &afs_proc_cells_fops;
150 p->owner = THIS_MODULE;
151 154
152 p = create_proc_entry("rootcell", 0, proc_afs); 155 p = proc_create("rootcell", 0, proc_afs, &afs_proc_rootcell_fops);
153 if (!p) 156 if (!p)
154 goto error_rootcell; 157 goto error_rootcell;
155 p->proc_fops = &afs_proc_rootcell_fops;
156 p->owner = THIS_MODULE;
157 158
158 _leave(" = 0"); 159 _leave(" = 0");
159 return 0; 160 return 0;
@@ -395,26 +396,20 @@ int afs_proc_cell_setup(struct afs_cell *cell)
395 if (!cell->proc_dir) 396 if (!cell->proc_dir)
396 goto error_dir; 397 goto error_dir;
397 398
398 p = create_proc_entry("servers", 0, cell->proc_dir); 399 p = proc_create_data("servers", 0, cell->proc_dir,
400 &afs_proc_cell_servers_fops, cell);
399 if (!p) 401 if (!p)
400 goto error_servers; 402 goto error_servers;
401 p->proc_fops = &afs_proc_cell_servers_fops;
402 p->owner = THIS_MODULE;
403 p->data = cell;
404 403
405 p = create_proc_entry("vlservers", 0, cell->proc_dir); 404 p = proc_create_data("vlservers", 0, cell->proc_dir,
405 &afs_proc_cell_vlservers_fops, cell);
406 if (!p) 406 if (!p)
407 goto error_vlservers; 407 goto error_vlservers;
408 p->proc_fops = &afs_proc_cell_vlservers_fops;
409 p->owner = THIS_MODULE;
410 p->data = cell;
411 408
412 p = create_proc_entry("volumes", 0, cell->proc_dir); 409 p = proc_create_data("volumes", 0, cell->proc_dir,
410 &afs_proc_cell_volumes_fops, cell);
413 if (!p) 411 if (!p)
414 goto error_volumes; 412 goto error_volumes;
415 p->proc_fops = &afs_proc_cell_volumes_fops;
416 p->owner = THIS_MODULE;
417 p->data = cell;
418 413
419 _leave(" = 0"); 414 _leave(" = 0");
420 return 0; 415 return 0;
diff --git a/fs/aio.c b/fs/aio.c
index 228368610dfa..b5253e77eb2f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -191,6 +191,43 @@ static int aio_setup_ring(struct kioctx *ctx)
191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ 191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
192} while(0) 192} while(0)
193 193
194
195/* __put_ioctx
196 * Called when the last user of an aio context has gone away,
197 * and the struct needs to be freed.
198 */
199static void __put_ioctx(struct kioctx *ctx)
200{
201 unsigned nr_events = ctx->max_reqs;
202
203 BUG_ON(ctx->reqs_active);
204
205 cancel_delayed_work(&ctx->wq);
206 cancel_work_sync(&ctx->wq.work);
207 aio_free_ring(ctx);
208 mmdrop(ctx->mm);
209 ctx->mm = NULL;
210 pr_debug("__put_ioctx: freeing %p\n", ctx);
211 kmem_cache_free(kioctx_cachep, ctx);
212
213 if (nr_events) {
214 spin_lock(&aio_nr_lock);
215 BUG_ON(aio_nr - nr_events > aio_nr);
216 aio_nr -= nr_events;
217 spin_unlock(&aio_nr_lock);
218 }
219}
220
221#define get_ioctx(kioctx) do { \
222 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
223 atomic_inc(&(kioctx)->users); \
224} while (0)
225#define put_ioctx(kioctx) do { \
226 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
227 if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \
228 __put_ioctx(kioctx); \
229} while (0)
230
194/* ioctx_alloc 231/* ioctx_alloc
195 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 232 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
196 */ 233 */
@@ -240,7 +277,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
240 if (ctx->max_reqs == 0) 277 if (ctx->max_reqs == 0)
241 goto out_cleanup; 278 goto out_cleanup;
242 279
243 /* now link into global list. kludge. FIXME */ 280 /* now link into global list. */
244 write_lock(&mm->ioctx_list_lock); 281 write_lock(&mm->ioctx_list_lock);
245 ctx->next = mm->ioctx_list; 282 ctx->next = mm->ioctx_list;
246 mm->ioctx_list = ctx; 283 mm->ioctx_list = ctx;
@@ -361,32 +398,6 @@ void exit_aio(struct mm_struct *mm)
361 } 398 }
362} 399}
363 400
364/* __put_ioctx
365 * Called when the last user of an aio context has gone away,
366 * and the struct needs to be freed.
367 */
368void __put_ioctx(struct kioctx *ctx)
369{
370 unsigned nr_events = ctx->max_reqs;
371
372 BUG_ON(ctx->reqs_active);
373
374 cancel_delayed_work(&ctx->wq);
375 cancel_work_sync(&ctx->wq.work);
376 aio_free_ring(ctx);
377 mmdrop(ctx->mm);
378 ctx->mm = NULL;
379 pr_debug("__put_ioctx: freeing %p\n", ctx);
380 kmem_cache_free(kioctx_cachep, ctx);
381
382 if (nr_events) {
383 spin_lock(&aio_nr_lock);
384 BUG_ON(aio_nr - nr_events > aio_nr);
385 aio_nr -= nr_events;
386 spin_unlock(&aio_nr_lock);
387 }
388}
389
390/* aio_get_req 401/* aio_get_req
391 * Allocate a slot for an aio request. Increments the users count 402 * Allocate a slot for an aio request. Increments the users count
392 * of the kioctx so that the kioctx stays around until all requests are 403 * of the kioctx so that the kioctx stays around until all requests are
@@ -542,10 +553,7 @@ int aio_put_req(struct kiocb *req)
542 return ret; 553 return ret;
543} 554}
544 555
545/* Lookup an ioctx id. ioctx_list is lockless for reads. 556static struct kioctx *lookup_ioctx(unsigned long ctx_id)
546 * FIXME: this is O(n) and is only suitable for development.
547 */
548struct kioctx *lookup_ioctx(unsigned long ctx_id)
549{ 557{
550 struct kioctx *ioctx; 558 struct kioctx *ioctx;
551 struct mm_struct *mm; 559 struct mm_struct *mm;
@@ -1070,9 +1078,7 @@ static void timeout_func(unsigned long data)
1070 1078
1071static inline void init_timeout(struct aio_timeout *to) 1079static inline void init_timeout(struct aio_timeout *to)
1072{ 1080{
1073 init_timer(&to->timer); 1081 setup_timer_on_stack(&to->timer, timeout_func, (unsigned long) to);
1074 to->timer.data = (unsigned long)to;
1075 to->timer.function = timeout_func;
1076 to->timed_out = 0; 1082 to->timed_out = 0;
1077 to->p = current; 1083 to->p = current;
1078} 1084}
@@ -1166,7 +1172,10 @@ retry:
1166 break; 1172 break;
1167 if (min_nr <= i) 1173 if (min_nr <= i)
1168 break; 1174 break;
1169 ret = 0; 1175 if (unlikely(ctx->dead)) {
1176 ret = -EINVAL;
1177 break;
1178 }
1170 if (to.timed_out) /* Only check after read evt */ 1179 if (to.timed_out) /* Only check after read evt */
1171 break; 1180 break;
1172 /* Try to only show up in io wait if there are ops 1181 /* Try to only show up in io wait if there are ops
@@ -1202,6 +1211,7 @@ retry:
1202 if (timeout) 1211 if (timeout)
1203 clear_timeout(&to); 1212 clear_timeout(&to);
1204out: 1213out:
1214 destroy_timer_on_stack(&to.timer);
1205 return i ? i : ret; 1215 return i ? i : ret;
1206} 1216}
1207 1217
@@ -1231,6 +1241,13 @@ static void io_destroy(struct kioctx *ioctx)
1231 1241
1232 aio_cancel_all(ioctx); 1242 aio_cancel_all(ioctx);
1233 wait_for_all_aios(ioctx); 1243 wait_for_all_aios(ioctx);
1244
1245 /*
1246 * Wake up any waiters. The setting of ctx->dead must be seen
1247 * by other CPUs at this point. Right now, we rely on the
1248 * locking done by the above calls to ensure this consistency.
1249 */
1250 wake_up(&ioctx->wait);
1234 put_ioctx(ioctx); /* once for the lookup */ 1251 put_ioctx(ioctx); /* once for the lookup */
1235} 1252}
1236 1253
@@ -1542,7 +1559,7 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode,
1542 return 1; 1559 return 1;
1543} 1560}
1544 1561
1545int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1562static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1546 struct iocb *iocb) 1563 struct iocb *iocb)
1547{ 1564{
1548 struct kiocb *req; 1565 struct kiocb *req;
@@ -1583,7 +1600,7 @@ int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1583 * event using the eventfd_signal() function. 1600 * event using the eventfd_signal() function.
1584 */ 1601 */
1585 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); 1602 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
1586 if (unlikely(IS_ERR(req->ki_eventfd))) { 1603 if (IS_ERR(req->ki_eventfd)) {
1587 ret = PTR_ERR(req->ki_eventfd); 1604 ret = PTR_ERR(req->ki_eventfd);
1588 goto out_put_req; 1605 goto out_put_req;
1589 } 1606 }
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 2d4ae40718d9..c3d352d7fa93 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -35,7 +35,7 @@
35/* #define DEBUG */ 35/* #define DEBUG */
36 36
37#ifdef DEBUG 37#ifdef DEBUG
38#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __FUNCTION__ , ##args); } while(0) 38#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0)
39#else 39#else
40#define DPRINTK(fmt,args...) do {} while(0) 40#define DPRINTK(fmt,args...) do {} while(0)
41#endif 41#endif
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index d96e5c14a9ca..894fee54d4d8 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -73,8 +73,8 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
73 status = 0; 73 status = 0;
74done: 74done:
75 DPRINTK("returning = %d", status); 75 DPRINTK("returning = %d", status);
76 mntput(mnt);
77 dput(dentry); 76 dput(dentry);
77 mntput(mnt);
78 return status; 78 return status;
79} 79}
80 80
@@ -333,7 +333,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
333 /* Can we expire this guy */ 333 /* Can we expire this guy */
334 if (autofs4_can_expire(dentry, timeout, do_now)) { 334 if (autofs4_can_expire(dentry, timeout, do_now)) {
335 expired = dentry; 335 expired = dentry;
336 break; 336 goto found;
337 } 337 }
338 goto next; 338 goto next;
339 } 339 }
@@ -352,7 +352,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
352 inf->flags |= AUTOFS_INF_EXPIRING; 352 inf->flags |= AUTOFS_INF_EXPIRING;
353 spin_unlock(&sbi->fs_lock); 353 spin_unlock(&sbi->fs_lock);
354 expired = dentry; 354 expired = dentry;
355 break; 355 goto found;
356 } 356 }
357 spin_unlock(&sbi->fs_lock); 357 spin_unlock(&sbi->fs_lock);
358 /* 358 /*
@@ -363,7 +363,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
363 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 363 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
364 if (expired) { 364 if (expired) {
365 dput(dentry); 365 dput(dentry);
366 break; 366 goto found;
367 } 367 }
368 } 368 }
369next: 369next:
@@ -371,18 +371,16 @@ next:
371 spin_lock(&dcache_lock); 371 spin_lock(&dcache_lock);
372 next = next->next; 372 next = next->next;
373 } 373 }
374
375 if (expired) {
376 DPRINTK("returning %p %.*s",
377 expired, (int)expired->d_name.len, expired->d_name.name);
378 spin_lock(&dcache_lock);
379 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
380 spin_unlock(&dcache_lock);
381 return expired;
382 }
383 spin_unlock(&dcache_lock); 374 spin_unlock(&dcache_lock);
384
385 return NULL; 375 return NULL;
376
377found:
378 DPRINTK("returning %p %.*s",
379 expired, (int)expired->d_name.len, expired->d_name.name);
380 spin_lock(&dcache_lock);
381 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
382 spin_unlock(&dcache_lock);
383 return expired;
386} 384}
387 385
388/* Perform an expiry operation */ 386/* Perform an expiry operation */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index a54a946a50ae..edf5b6bddb52 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -146,17 +146,17 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
146 146
147 if (d_mountpoint(dentry)) { 147 if (d_mountpoint(dentry)) {
148 struct file *fp = NULL; 148 struct file *fp = NULL;
149 struct vfsmount *fp_mnt = mntget(mnt); 149 struct path fp_path = { .dentry = dentry, .mnt = mnt };
150 struct dentry *fp_dentry = dget(dentry);
151 150
152 if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) { 151 path_get(&fp_path);
153 dput(fp_dentry); 152
154 mntput(fp_mnt); 153 if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
154 path_put(&fp_path);
155 dcache_dir_close(inode, file); 155 dcache_dir_close(inode, file);
156 goto out; 156 goto out;
157 } 157 }
158 158
159 fp = dentry_open(fp_dentry, fp_mnt, file->f_flags); 159 fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
160 status = PTR_ERR(fp); 160 status = PTR_ERR(fp);
161 if (IS_ERR(fp)) { 161 if (IS_ERR(fp)) {
162 dcache_dir_close(inode, file); 162 dcache_dir_close(inode, file);
@@ -242,7 +242,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
242{ 242{
243 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 243 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
244 struct autofs_info *ino = autofs4_dentry_ino(dentry); 244 struct autofs_info *ino = autofs4_dentry_ino(dentry);
245 int status = 0; 245 struct dentry *new;
246 int status;
246 247
247 /* Block on any pending expiry here; invalidate the dentry 248 /* Block on any pending expiry here; invalidate the dentry
248 when expiration is done to trigger mount request with a new 249 when expiration is done to trigger mount request with a new
@@ -318,7 +319,28 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
318 spin_lock(&dentry->d_lock); 319 spin_lock(&dentry->d_lock);
319 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 320 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
320 spin_unlock(&dentry->d_lock); 321 spin_unlock(&dentry->d_lock);
321 return status; 322
323 /*
324 * The dentry that is passed in from lookup may not be the one
325 * we end up using, as mkdir can create a new one. If this
326 * happens, and another process tries the lookup at the same time,
327 * it will set the PENDING flag on this new dentry, but add itself
328 * to our waitq. Then, if after the lookup succeeds, the first
329 * process that requested the mount performs another lookup of the
330 * same directory, it will show up as still pending! So, we need
331 * to redo the lookup here and clear pending on that dentry.
332 */
333 if (d_unhashed(dentry)) {
334 new = d_lookup(dentry->d_parent, &dentry->d_name);
335 if (new) {
336 spin_lock(&new->d_lock);
337 new->d_flags &= ~DCACHE_AUTOFS_PENDING;
338 spin_unlock(&new->d_lock);
339 dput(new);
340 }
341 }
342
343 return 0;
322} 344}
323 345
324/* For autofs direct mounts the follow link triggers the mount */ 346/* For autofs direct mounts the follow link triggers the mount */
@@ -533,9 +555,9 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
533 goto next; 555 goto next;
534 556
535 if (d_unhashed(dentry)) { 557 if (d_unhashed(dentry)) {
536 struct autofs_info *ino = autofs4_dentry_ino(dentry);
537 struct inode *inode = dentry->d_inode; 558 struct inode *inode = dentry->d_inode;
538 559
560 ino = autofs4_dentry_ino(dentry);
539 list_del_init(&ino->rehash); 561 list_del_init(&ino->rehash);
540 dget(dentry); 562 dget(dentry);
541 /* 563 /*
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 1fe28e4754c2..75e5955c3f6d 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -171,7 +171,7 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
171 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) 171 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
172 len += tmp->d_name.len + 1; 172 len += tmp->d_name.len + 1;
173 173
174 if (--len > NAME_MAX) { 174 if (!len || --len > NAME_MAX) {
175 spin_unlock(&dcache_lock); 175 spin_unlock(&dcache_lock);
176 return 0; 176 return 0;
177 } 177 }
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 82123ff3e1dd..e8717de3bab3 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -489,9 +489,9 @@ static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
489{ 489{
490 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); 490 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
491 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { 491 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
492 char *p = nd_get_link(nd); 492 char *link = nd_get_link(nd);
493 if (!IS_ERR(p)) 493 if (!IS_ERR(link))
494 kfree(p); 494 kfree(link);
495 } 495 }
496} 496}
497 497
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 71faf4d23908..70f5d3a8eede 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -42,7 +42,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
42 42
43 43
44#define printf(format, args...) \ 44#define printf(format, args...) \
45 printk(KERN_ERR "BFS-fs: %s(): " format, __FUNCTION__, ## args) 45 printk(KERN_ERR "BFS-fs: %s(): " format, __func__, ## args)
46 46
47/* inode.c */ 47/* inode.c */
48extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino); 48extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a1bb2244cac7..ba4cddb92f1d 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -372,21 +372,17 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
372 372
373 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); 373 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
374 } else { 374 } else {
375 static unsigned long error_time, error_time2;
376 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && 375 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
377 (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) 376 (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
378 { 377 {
379 printk(KERN_NOTICE "executable not page aligned\n"); 378 printk(KERN_NOTICE "executable not page aligned\n");
380 error_time2 = jiffies;
381 } 379 }
382 380
383 if ((fd_offset & ~PAGE_MASK) != 0 && 381 if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit())
384 (jiffies-error_time) > 5*HZ)
385 { 382 {
386 printk(KERN_WARNING 383 printk(KERN_WARNING
387 "fd_offset is not page aligned. Please convert program: %s\n", 384 "fd_offset is not page aligned. Please convert program: %s\n",
388 bprm->file->f_path.dentry->d_name.name); 385 bprm->file->f_path.dentry->d_name.name);
389 error_time = jiffies;
390 } 386 }
391 387
392 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { 388 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
@@ -495,15 +491,13 @@ static int load_aout_library(struct file *file)
495 start_addr = ex.a_entry & 0xfffff000; 491 start_addr = ex.a_entry & 0xfffff000;
496 492
497 if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { 493 if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
498 static unsigned long error_time;
499 loff_t pos = N_TXTOFF(ex); 494 loff_t pos = N_TXTOFF(ex);
500 495
501 if ((jiffies-error_time) > 5*HZ) 496 if (printk_ratelimit())
502 { 497 {
503 printk(KERN_WARNING 498 printk(KERN_WARNING
504 "N_TXTOFF is not page aligned. Please convert library: %s\n", 499 "N_TXTOFF is not page aligned. Please convert library: %s\n",
505 file->f_path.dentry->d_name.name); 500 file->f_path.dentry->d_name.name);
506 error_time = jiffies;
507 } 501 }
508 down_write(&current->mm->mmap_sem); 502 down_write(&current->mm->mmap_sem);
509 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); 503 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9924581df6f6..b25707fee2cc 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1255,26 +1255,23 @@ static int writenote(struct memelfnote *men, struct file *file,
1255static void fill_elf_header(struct elfhdr *elf, int segs, 1255static void fill_elf_header(struct elfhdr *elf, int segs,
1256 u16 machine, u32 flags, u8 osabi) 1256 u16 machine, u32 flags, u8 osabi)
1257{ 1257{
1258 memset(elf, 0, sizeof(*elf));
1259
1258 memcpy(elf->e_ident, ELFMAG, SELFMAG); 1260 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1259 elf->e_ident[EI_CLASS] = ELF_CLASS; 1261 elf->e_ident[EI_CLASS] = ELF_CLASS;
1260 elf->e_ident[EI_DATA] = ELF_DATA; 1262 elf->e_ident[EI_DATA] = ELF_DATA;
1261 elf->e_ident[EI_VERSION] = EV_CURRENT; 1263 elf->e_ident[EI_VERSION] = EV_CURRENT;
1262 elf->e_ident[EI_OSABI] = ELF_OSABI; 1264 elf->e_ident[EI_OSABI] = ELF_OSABI;
1263 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1264 1265
1265 elf->e_type = ET_CORE; 1266 elf->e_type = ET_CORE;
1266 elf->e_machine = machine; 1267 elf->e_machine = machine;
1267 elf->e_version = EV_CURRENT; 1268 elf->e_version = EV_CURRENT;
1268 elf->e_entry = 0;
1269 elf->e_phoff = sizeof(struct elfhdr); 1269 elf->e_phoff = sizeof(struct elfhdr);
1270 elf->e_shoff = 0;
1271 elf->e_flags = flags; 1270 elf->e_flags = flags;
1272 elf->e_ehsize = sizeof(struct elfhdr); 1271 elf->e_ehsize = sizeof(struct elfhdr);
1273 elf->e_phentsize = sizeof(struct elf_phdr); 1272 elf->e_phentsize = sizeof(struct elf_phdr);
1274 elf->e_phnum = segs; 1273 elf->e_phnum = segs;
1275 elf->e_shentsize = 0; 1274
1276 elf->e_shnum = 0;
1277 elf->e_shstrndx = 0;
1278 return; 1275 return;
1279} 1276}
1280 1277
@@ -1725,26 +1722,25 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1725 1722
1726 info->thread_status_size = 0; 1723 info->thread_status_size = 0;
1727 if (signr) { 1724 if (signr) {
1728 struct elf_thread_status *tmp; 1725 struct elf_thread_status *ets;
1729 rcu_read_lock(); 1726 rcu_read_lock();
1730 do_each_thread(g, p) 1727 do_each_thread(g, p)
1731 if (current->mm == p->mm && current != p) { 1728 if (current->mm == p->mm && current != p) {
1732 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); 1729 ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
1733 if (!tmp) { 1730 if (!ets) {
1734 rcu_read_unlock(); 1731 rcu_read_unlock();
1735 return 0; 1732 return 0;
1736 } 1733 }
1737 tmp->thread = p; 1734 ets->thread = p;
1738 list_add(&tmp->list, &info->thread_list); 1735 list_add(&ets->list, &info->thread_list);
1739 } 1736 }
1740 while_each_thread(g, p); 1737 while_each_thread(g, p);
1741 rcu_read_unlock(); 1738 rcu_read_unlock();
1742 list_for_each(t, &info->thread_list) { 1739 list_for_each(t, &info->thread_list) {
1743 struct elf_thread_status *tmp;
1744 int sz; 1740 int sz;
1745 1741
1746 tmp = list_entry(t, struct elf_thread_status, list); 1742 ets = list_entry(t, struct elf_thread_status, list);
1747 sz = elf_dump_thread_status(signr, tmp); 1743 sz = elf_dump_thread_status(signr, ets);
1748 info->thread_status_size += sz; 1744 info->thread_status_size += sz;
1749 } 1745 }
1750 } 1746 }
@@ -2000,10 +1996,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2000 1996
2001 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { 1997 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2002 struct page *page; 1998 struct page *page;
2003 struct vm_area_struct *vma; 1999 struct vm_area_struct *tmp_vma;
2004 2000
2005 if (get_user_pages(current, current->mm, addr, 1, 0, 1, 2001 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2006 &page, &vma) <= 0) { 2002 &page, &tmp_vma) <= 0) {
2007 DUMP_SEEK(PAGE_SIZE); 2003 DUMP_SEEK(PAGE_SIZE);
2008 } else { 2004 } else {
2009 if (page == ZERO_PAGE(0)) { 2005 if (page == ZERO_PAGE(0)) {
@@ -2013,7 +2009,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2013 } 2009 }
2014 } else { 2010 } else {
2015 void *kaddr; 2011 void *kaddr;
2016 flush_cache_page(vma, addr, 2012 flush_cache_page(tmp_vma, addr,
2017 page_to_pfn(page)); 2013 page_to_pfn(page));
2018 kaddr = kmap(page); 2014 kaddr = kmap(page);
2019 if ((size += PAGE_SIZE) > limit || 2015 if ((size += PAGE_SIZE) > limit ||
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 32649f2a1654..ddd35d873391 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -136,8 +136,8 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params,
136 136
137 retval = kernel_read(file, params->hdr.e_phoff, 137 retval = kernel_read(file, params->hdr.e_phoff,
138 (char *) params->phdrs, size); 138 (char *) params->phdrs, size);
139 if (retval < 0) 139 if (unlikely(retval != size))
140 return retval; 140 return retval < 0 ? retval : -ENOEXEC;
141 141
142 /* determine stack size for this binary */ 142 /* determine stack size for this binary */
143 phdr = params->phdrs; 143 phdr = params->phdrs;
@@ -218,8 +218,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
218 phdr->p_offset, 218 phdr->p_offset,
219 interpreter_name, 219 interpreter_name,
220 phdr->p_filesz); 220 phdr->p_filesz);
221 if (retval < 0) 221 if (unlikely(retval != phdr->p_filesz)) {
222 if (retval >= 0)
223 retval = -ENOEXEC;
222 goto error; 224 goto error;
225 }
223 226
224 retval = -ENOENT; 227 retval = -ENOENT;
225 if (interpreter_name[phdr->p_filesz - 1] != '\0') 228 if (interpreter_name[phdr->p_filesz - 1] != '\0')
@@ -245,8 +248,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
245 248
246 retval = kernel_read(interpreter, 0, bprm->buf, 249 retval = kernel_read(interpreter, 0, bprm->buf,
247 BINPRM_BUF_SIZE); 250 BINPRM_BUF_SIZE);
248 if (retval < 0) 251 if (unlikely(retval != BINPRM_BUF_SIZE)) {
252 if (retval >= 0)
253 retval = -ENOEXEC;
249 goto error; 254 goto error;
255 }
250 256
251 interp_params.hdr = *((struct elfhdr *) bprm->buf); 257 interp_params.hdr = *((struct elfhdr *) bprm->buf);
252 break; 258 break;
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index f95ae9789c91..f9c88d0c8ced 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
43 return -ENOEXEC; 43 return -ENOEXEC;
44 } 44 }
45 45
46 bprm->sh_bang++; /* Well, the bang-shell is implicit... */ 46 bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */
47 allow_write_access(bprm->file); 47 allow_write_access(bprm->file);
48 fput(bprm->file); 48 fput(bprm->file);
49 bprm->file = NULL; 49 bprm->file = NULL;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 0498b181dd52..3b40d45a3a16 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -531,7 +531,8 @@ static int load_flat_file(struct linux_binprm * bprm,
531 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); 531 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
532 532
533 down_write(&current->mm->mmap_sem); 533 down_write(&current->mm->mmap_sem);
534 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0); 534 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
535 MAP_PRIVATE|MAP_EXECUTABLE, 0);
535 up_write(&current->mm->mmap_sem); 536 up_write(&current->mm->mmap_sem);
536 if (!textpos || textpos >= (unsigned long) -4096) { 537 if (!textpos || textpos >= (unsigned long) -4096) {
537 if (!textpos) 538 if (!textpos)
@@ -932,14 +933,8 @@ static int __init init_flat_binfmt(void)
932 return register_binfmt(&flat_format); 933 return register_binfmt(&flat_format);
933} 934}
934 935
935static void __exit exit_flat_binfmt(void)
936{
937 unregister_binfmt(&flat_format);
938}
939
940/****************************************************************************/ 936/****************************************************************************/
941 937
942core_initcall(init_flat_binfmt); 938core_initcall(init_flat_binfmt);
943module_exit(exit_flat_binfmt);
944 939
945/****************************************************************************/ 940/****************************************************************************/
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index dbf0ac0523de..7191306367c5 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -115,6 +115,12 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
115 if (!enabled) 115 if (!enabled)
116 goto _ret; 116 goto _ret;
117 117
118 retval = -ENOEXEC;
119 if (bprm->misc_bang)
120 goto _ret;
121
122 bprm->misc_bang = 1;
123
118 /* to keep locking time low, we copy the interpreter string */ 124 /* to keep locking time low, we copy the interpreter string */
119 read_lock(&entries_lock); 125 read_lock(&entries_lock);
120 fmt = check_file(bprm); 126 fmt = check_file(bprm);
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index ab33939b12a7..9e3963f7ebf1 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -29,7 +29,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
29 * Sorta complicated, but hopefully it will work. -TYT 29 * Sorta complicated, but hopefully it will work. -TYT
30 */ 30 */
31 31
32 bprm->sh_bang++; 32 bprm->sh_bang = 1;
33 allow_write_access(bprm->file); 33 allow_write_access(bprm->file);
34 fput(bprm->file); 34 fput(bprm->file);
35 bprm->file = NULL; 35 bprm->file = NULL;
diff --git a/fs/bio.c b/fs/bio.c
index 6e0b6f66df03..799f86deff24 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -937,6 +937,95 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
937 return ERR_PTR(-EINVAL); 937 return ERR_PTR(-EINVAL);
938} 938}
939 939
940static void bio_copy_kern_endio(struct bio *bio, int err)
941{
942 struct bio_vec *bvec;
943 const int read = bio_data_dir(bio) == READ;
944 char *p = bio->bi_private;
945 int i;
946
947 __bio_for_each_segment(bvec, bio, i, 0) {
948 char *addr = page_address(bvec->bv_page);
949
950 if (read && !err)
951 memcpy(p, addr, bvec->bv_len);
952
953 __free_page(bvec->bv_page);
954 p += bvec->bv_len;
955 }
956
957 bio_put(bio);
958}
959
960/**
961 * bio_copy_kern - copy kernel address into bio
962 * @q: the struct request_queue for the bio
963 * @data: pointer to buffer to copy
964 * @len: length in bytes
965 * @gfp_mask: allocation flags for bio and page allocation
966 *
967 * copy the kernel address into a bio suitable for io to a block
968 * device. Returns an error pointer in case of error.
969 */
970struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
971 gfp_t gfp_mask, int reading)
972{
973 unsigned long kaddr = (unsigned long)data;
974 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
975 unsigned long start = kaddr >> PAGE_SHIFT;
976 const int nr_pages = end - start;
977 struct bio *bio;
978 struct bio_vec *bvec;
979 int i, ret;
980
981 bio = bio_alloc(gfp_mask, nr_pages);
982 if (!bio)
983 return ERR_PTR(-ENOMEM);
984
985 while (len) {
986 struct page *page;
987 unsigned int bytes = PAGE_SIZE;
988
989 if (bytes > len)
990 bytes = len;
991
992 page = alloc_page(q->bounce_gfp | gfp_mask);
993 if (!page) {
994 ret = -ENOMEM;
995 goto cleanup;
996 }
997
998 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
999 ret = -EINVAL;
1000 goto cleanup;
1001 }
1002
1003 len -= bytes;
1004 }
1005
1006 if (!reading) {
1007 void *p = data;
1008
1009 bio_for_each_segment(bvec, bio, i) {
1010 char *addr = page_address(bvec->bv_page);
1011
1012 memcpy(addr, p, bvec->bv_len);
1013 p += bvec->bv_len;
1014 }
1015 }
1016
1017 bio->bi_private = data;
1018 bio->bi_end_io = bio_copy_kern_endio;
1019 return bio;
1020cleanup:
1021 bio_for_each_segment(bvec, bio, i)
1022 __free_page(bvec->bv_page);
1023
1024 bio_put(bio);
1025
1026 return ERR_PTR(ret);
1027}
1028
940/* 1029/*
941 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions 1030 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
942 * for performing direct-IO in BIOs. 1031 * for performing direct-IO in BIOs.
@@ -1273,6 +1362,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
1273EXPORT_SYMBOL(bio_map_user); 1362EXPORT_SYMBOL(bio_map_user);
1274EXPORT_SYMBOL(bio_unmap_user); 1363EXPORT_SYMBOL(bio_unmap_user);
1275EXPORT_SYMBOL(bio_map_kern); 1364EXPORT_SYMBOL(bio_map_kern);
1365EXPORT_SYMBOL(bio_copy_kern);
1276EXPORT_SYMBOL(bio_pair_release); 1366EXPORT_SYMBOL(bio_pair_release);
1277EXPORT_SYMBOL(bio_split); 1367EXPORT_SYMBOL(bio_split);
1278EXPORT_SYMBOL(bio_split_pool); 1368EXPORT_SYMBOL(bio_split_pool);
diff --git a/fs/buffer.c b/fs/buffer.c
index 39ff14403d13..a073f3f4f013 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -360,16 +360,19 @@ void invalidate_bdev(struct block_device *bdev)
360 */ 360 */
361static void free_more_memory(void) 361static void free_more_memory(void)
362{ 362{
363 struct zone **zones; 363 struct zone *zone;
364 pg_data_t *pgdat; 364 int nid;
365 365
366 wakeup_pdflush(1024); 366 wakeup_pdflush(1024);
367 yield(); 367 yield();
368 368
369 for_each_online_pgdat(pgdat) { 369 for_each_online_node(nid) {
370 zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; 370 (void)first_zones_zonelist(node_zonelist(nid, GFP_NOFS),
371 if (*zones) 371 gfp_zone(GFP_NOFS), NULL,
372 try_to_free_pages(zones, 0, GFP_NOFS); 372 &zone);
373 if (zone)
374 try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
375 GFP_NOFS);
373 } 376 }
374} 377}
375 378
@@ -1098,7 +1101,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
1098 1101
1099 printk(KERN_ERR "%s: requested out-of-range block %llu for " 1102 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1100 "device %s\n", 1103 "device %s\n",
1101 __FUNCTION__, (unsigned long long)block, 1104 __func__, (unsigned long long)block,
1102 bdevname(bdev, b)); 1105 bdevname(bdev, b));
1103 return -EIO; 1106 return -EIO;
1104 } 1107 }
@@ -2208,8 +2211,8 @@ out:
2208 return err; 2211 return err;
2209} 2212}
2210 2213
2211int cont_expand_zero(struct file *file, struct address_space *mapping, 2214static int cont_expand_zero(struct file *file, struct address_space *mapping,
2212 loff_t pos, loff_t *bytes) 2215 loff_t pos, loff_t *bytes)
2213{ 2216{
2214 struct inode *inode = mapping->host; 2217 struct inode *inode = mapping->host;
2215 unsigned blocksize = 1 << inode->i_blkbits; 2218 unsigned blocksize = 1 << inode->i_blkbits;
@@ -2243,6 +2246,8 @@ int cont_expand_zero(struct file *file, struct address_space *mapping,
2243 goto out; 2246 goto out;
2244 BUG_ON(err != len); 2247 BUG_ON(err != len);
2245 err = 0; 2248 err = 0;
2249
2250 balance_dirty_pages_ratelimited(mapping);
2246 } 2251 }
2247 2252
2248 /* page covers the boundary, find the boundary offset */ 2253 /* page covers the boundary, find the boundary offset */
@@ -2323,23 +2328,6 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
2323 return 0; 2328 return 0;
2324} 2329}
2325 2330
2326int generic_commit_write(struct file *file, struct page *page,
2327 unsigned from, unsigned to)
2328{
2329 struct inode *inode = page->mapping->host;
2330 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2331 __block_commit_write(inode,page,from,to);
2332 /*
2333 * No need to use i_size_read() here, the i_size
2334 * cannot change under us because we hold i_mutex.
2335 */
2336 if (pos > inode->i_size) {
2337 i_size_write(inode, pos);
2338 mark_inode_dirty(inode);
2339 }
2340 return 0;
2341}
2342
2343/* 2331/*
2344 * block_page_mkwrite() is not allowed to change the file size as it gets 2332 * block_page_mkwrite() is not allowed to change the file size as it gets
2345 * called from a page fault handler when a page is first dirtied. Hence we must 2333 * called from a page fault handler when a page is first dirtied. Hence we must
@@ -3180,8 +3168,7 @@ static void recalc_bh_state(void)
3180 3168
3181struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) 3169struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3182{ 3170{
3183 struct buffer_head *ret = kmem_cache_alloc(bh_cachep, 3171 struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
3184 set_migrateflags(gfp_flags, __GFP_RECLAIMABLE));
3185 if (ret) { 3172 if (ret) {
3186 INIT_LIST_HEAD(&ret->b_assoc_buffers); 3173 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3187 get_cpu_var(bh_accounting).nr++; 3174 get_cpu_var(bh_accounting).nr++;
@@ -3311,7 +3298,6 @@ EXPORT_SYMBOL(end_buffer_write_sync);
3311EXPORT_SYMBOL(file_fsync); 3298EXPORT_SYMBOL(file_fsync);
3312EXPORT_SYMBOL(fsync_bdev); 3299EXPORT_SYMBOL(fsync_bdev);
3313EXPORT_SYMBOL(generic_block_bmap); 3300EXPORT_SYMBOL(generic_block_bmap);
3314EXPORT_SYMBOL(generic_commit_write);
3315EXPORT_SYMBOL(generic_cont_expand_simple); 3301EXPORT_SYMBOL(generic_cont_expand_simple);
3316EXPORT_SYMBOL(init_buffer); 3302EXPORT_SYMBOL(init_buffer);
3317EXPORT_SYMBOL(invalidate_bdev); 3303EXPORT_SYMBOL(invalidate_bdev);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 038674aa88a7..68e510b88457 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -55,7 +55,6 @@ static struct char_device_struct {
55 unsigned int baseminor; 55 unsigned int baseminor;
56 int minorct; 56 int minorct;
57 char name[64]; 57 char name[64];
58 struct file_operations *fops;
59 struct cdev *cdev; /* will die */ 58 struct cdev *cdev; /* will die */
60} *chrdevs[CHRDEV_MAJOR_HASH_SIZE]; 59} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
61 60
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 0228ed06069e..cc950f69e51e 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -468,7 +468,7 @@ cifs_proc_init(void)
468{ 468{
469 struct proc_dir_entry *pde; 469 struct proc_dir_entry *pde;
470 470
471 proc_fs_cifs = proc_mkdir("cifs", proc_root_fs); 471 proc_fs_cifs = proc_mkdir("fs/cifs", NULL);
472 if (proc_fs_cifs == NULL) 472 if (proc_fs_cifs == NULL)
473 return; 473 return;
474 474
@@ -559,7 +559,7 @@ cifs_proc_clean(void)
559 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); 559 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
560 remove_proc_entry("Experimental", proc_fs_cifs); 560 remove_proc_entry("Experimental", proc_fs_cifs);
561 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); 561 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
562 remove_proc_entry("cifs", proc_root_fs); 562 remove_proc_entry("fs/cifs", NULL);
563} 563}
564 564
565static int 565static int
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 95a54253c047..e1c854890f94 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -134,7 +134,7 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr)
134 unsigned int valid; 134 unsigned int valid;
135 135
136 /* clean out */ 136 /* clean out */
137 vattr->va_mode = (umode_t) -1; 137 vattr->va_mode = -1;
138 vattr->va_uid = (vuid_t) -1; 138 vattr->va_uid = (vuid_t) -1;
139 vattr->va_gid = (vgid_t) -1; 139 vattr->va_gid = (vgid_t) -1;
140 vattr->va_size = (off_t) -1; 140 vattr->va_size = (off_t) -1;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index f89ff083079b..3d2580e00a3e 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -345,7 +345,7 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de,
345} 345}
346 346
347/* destruction routines: unlink, rmdir */ 347/* destruction routines: unlink, rmdir */
348int coda_unlink(struct inode *dir, struct dentry *de) 348static int coda_unlink(struct inode *dir, struct dentry *de)
349{ 349{
350 int error; 350 int error;
351 const char *name = de->d_name.name; 351 const char *name = de->d_name.name;
@@ -365,7 +365,7 @@ int coda_unlink(struct inode *dir, struct dentry *de)
365 return 0; 365 return 0;
366} 366}
367 367
368int coda_rmdir(struct inode *dir, struct dentry *de) 368static int coda_rmdir(struct inode *dir, struct dentry *de)
369{ 369{
370 const char *name = de->d_name.name; 370 const char *name = de->d_name.name;
371 int len = de->d_name.len; 371 int len = de->d_name.len;
@@ -424,7 +424,7 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
424 424
425 425
426/* file operations for directories */ 426/* file operations for directories */
427int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) 427static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir)
428{ 428{
429 struct coda_file_info *cfi; 429 struct coda_file_info *cfi;
430 struct file *host_file; 430 struct file *host_file;
diff --git a/fs/compat.c b/fs/compat.c
index 2ce4456aad30..139dc93c092d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1634,7 +1634,7 @@ sticky:
1634 return ret; 1634 return ret;
1635} 1635}
1636 1636
1637#ifdef TIF_RESTORE_SIGMASK 1637#ifdef HAVE_SET_RESTORE_SIGMASK
1638asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, 1638asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1639 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1639 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1640 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, 1640 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
@@ -1720,7 +1720,7 @@ sticky:
1720 if (sigmask) { 1720 if (sigmask) {
1721 memcpy(&current->saved_sigmask, &sigsaved, 1721 memcpy(&current->saved_sigmask, &sigsaved,
1722 sizeof(sigsaved)); 1722 sizeof(sigsaved));
1723 set_thread_flag(TIF_RESTORE_SIGMASK); 1723 set_restore_sigmask();
1724 } 1724 }
1725 } else if (sigmask) 1725 } else if (sigmask)
1726 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1726 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -1791,7 +1791,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1791 if (sigmask) { 1791 if (sigmask) {
1792 memcpy(&current->saved_sigmask, &sigsaved, 1792 memcpy(&current->saved_sigmask, &sigsaved,
1793 sizeof(sigsaved)); 1793 sizeof(sigsaved));
1794 set_thread_flag(TIF_RESTORE_SIGMASK); 1794 set_restore_sigmask();
1795 } 1795 }
1796 ret = -ERESTARTNOHAND; 1796 ret = -ERESTARTNOHAND;
1797 } else if (sigmask) 1797 } else if (sigmask)
@@ -1825,7 +1825,7 @@ sticky:
1825 1825
1826 return ret; 1826 return ret;
1827} 1827}
1828#endif /* TIF_RESTORE_SIGMASK */ 1828#endif /* HAVE_SET_RESTORE_SIGMASK */
1829 1829
1830#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) 1830#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
1831/* Stuff for NFS server syscalls... */ 1831/* Stuff for NFS server syscalls... */
@@ -2080,7 +2080,7 @@ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2)
2080 2080
2081#ifdef CONFIG_EPOLL 2081#ifdef CONFIG_EPOLL
2082 2082
2083#ifdef TIF_RESTORE_SIGMASK 2083#ifdef HAVE_SET_RESTORE_SIGMASK
2084asmlinkage long compat_sys_epoll_pwait(int epfd, 2084asmlinkage long compat_sys_epoll_pwait(int epfd,
2085 struct compat_epoll_event __user *events, 2085 struct compat_epoll_event __user *events,
2086 int maxevents, int timeout, 2086 int maxevents, int timeout,
@@ -2117,14 +2117,14 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
2117 if (err == -EINTR) { 2117 if (err == -EINTR) {
2118 memcpy(&current->saved_sigmask, &sigsaved, 2118 memcpy(&current->saved_sigmask, &sigsaved,
2119 sizeof(sigsaved)); 2119 sizeof(sigsaved));
2120 set_thread_flag(TIF_RESTORE_SIGMASK); 2120 set_restore_sigmask();
2121 } else 2121 } else
2122 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 2122 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2123 } 2123 }
2124 2124
2125 return err; 2125 return err;
2126} 2126}
2127#endif /* TIF_RESTORE_SIGMASK */ 2127#endif /* HAVE_SET_RESTORE_SIGMASK */
2128 2128
2129#endif /* CONFIG_EPOLL */ 2129#endif /* CONFIG_EPOLL */
2130 2130
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c6e72aebd16b..97dba0d92348 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1046,14 +1046,14 @@ static int vt_check(struct file *file)
1046 struct inode *inode = file->f_path.dentry->d_inode; 1046 struct inode *inode = file->f_path.dentry->d_inode;
1047 struct vc_data *vc; 1047 struct vc_data *vc;
1048 1048
1049 if (file->f_op->ioctl != tty_ioctl) 1049 if (file->f_op->unlocked_ioctl != tty_ioctl)
1050 return -EINVAL; 1050 return -EINVAL;
1051 1051
1052 tty = (struct tty_struct *)file->private_data; 1052 tty = (struct tty_struct *)file->private_data;
1053 if (tty_paranoia_check(tty, inode, "tty_ioctl")) 1053 if (tty_paranoia_check(tty, inode, "tty_ioctl"))
1054 return -EINVAL; 1054 return -EINVAL;
1055 1055
1056 if (tty->driver->ioctl != vt_ioctl) 1056 if (tty->ops->ioctl != vt_ioctl)
1057 return -EINVAL; 1057 return -EINVAL;
1058 1058
1059 vc = (struct vc_data *)tty->driver_data; 1059 vc = (struct vc_data *)tty->driver_data;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 397cb503a180..2b6cb23dd14e 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -115,7 +115,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
115 goto out; 115 goto out;
116 } 116 }
117 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", 117 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
118 __FUNCTION__, count, *ppos, buffer->page); 118 __func__, count, *ppos, buffer->page);
119 retval = simple_read_from_buffer(buf, count, ppos, buffer->page, 119 retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
120 buffer->count); 120 buffer->count);
121out: 121out:
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4c1ebff778ee..b9a1d810346d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -47,7 +47,7 @@ static const struct address_space_operations configfs_aops = {
47 47
48static struct backing_dev_info configfs_backing_dev_info = { 48static struct backing_dev_info configfs_backing_dev_info = {
49 .ra_pages = 0, /* No readahead */ 49 .ra_pages = 0, /* No readahead */
50 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 50 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
51}; 51};
52 52
53static const struct inode_operations configfs_inode_operations ={ 53static const struct inode_operations configfs_inode_operations ={
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index de3b31d0a37d..8421cea7d8c7 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -92,7 +92,7 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
92 92
93 root = d_alloc_root(inode); 93 root = d_alloc_root(inode);
94 if (!root) { 94 if (!root) {
95 pr_debug("%s: could not get root dentry!\n",__FUNCTION__); 95 pr_debug("%s: could not get root dentry!\n",__func__);
96 iput(inode); 96 iput(inode);
97 return -ENOMEM; 97 return -ENOMEM;
98 } 98 }
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 78929ea84ff2..2a731ef5f305 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -210,13 +210,13 @@ static int configfs_get_target_path(struct config_item * item, struct config_ite
210 if (size > PATH_MAX) 210 if (size > PATH_MAX)
211 return -ENAMETOOLONG; 211 return -ENAMETOOLONG;
212 212
213 pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size); 213 pr_debug("%s: depth = %d, size = %d\n", __func__, depth, size);
214 214
215 for (s = path; depth--; s += 3) 215 for (s = path; depth--; s += 3)
216 strcpy(s,"../"); 216 strcpy(s,"../");
217 217
218 fill_item_path(target, path, size); 218 fill_item_path(target, path, size);
219 pr_debug("%s: path = '%s'\n", __FUNCTION__, path); 219 pr_debug("%s: path = '%s'\n", __func__, path);
220 220
221 return 0; 221 return 0;
222} 222}
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index fddffe4851f5..159a5efd6a8a 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -9,7 +9,7 @@
9 * 2 as published by the Free Software Foundation. 9 * 2 as published by the Free Software Foundation.
10 * 10 *
11 * debugfs is for people to use instead of /proc or /sys. 11 * debugfs is for people to use instead of /proc or /sys.
12 * See Documentation/DocBook/kernel-api for more details. 12 * See Documentation/DocBook/filesystems for more details.
13 * 13 *
14 */ 14 */
15 15
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f120e1207874..285b64a8b06e 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -17,6 +17,8 @@
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/tty.h> 19#include <linux/tty.h>
20#include <linux/mutex.h>
21#include <linux/idr.h>
20#include <linux/devpts_fs.h> 22#include <linux/devpts_fs.h>
21#include <linux/parser.h> 23#include <linux/parser.h>
22#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
@@ -26,6 +28,10 @@
26 28
27#define DEVPTS_DEFAULT_MODE 0600 29#define DEVPTS_DEFAULT_MODE 0600
28 30
31extern int pty_limit; /* Config limit on Unix98 ptys */
32static DEFINE_IDR(allocated_ptys);
33static DEFINE_MUTEX(allocated_ptys_lock);
34
29static struct vfsmount *devpts_mnt; 35static struct vfsmount *devpts_mnt;
30static struct dentry *devpts_root; 36static struct dentry *devpts_root;
31 37
@@ -171,9 +177,44 @@ static struct dentry *get_node(int num)
171 return lookup_one_len(s, root, sprintf(s, "%d", num)); 177 return lookup_one_len(s, root, sprintf(s, "%d", num));
172} 178}
173 179
180int devpts_new_index(void)
181{
182 int index;
183 int idr_ret;
184
185retry:
186 if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
187 return -ENOMEM;
188 }
189
190 mutex_lock(&allocated_ptys_lock);
191 idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
192 if (idr_ret < 0) {
193 mutex_unlock(&allocated_ptys_lock);
194 if (idr_ret == -EAGAIN)
195 goto retry;
196 return -EIO;
197 }
198
199 if (index >= pty_limit) {
200 idr_remove(&allocated_ptys, index);
201 mutex_unlock(&allocated_ptys_lock);
202 return -EIO;
203 }
204 mutex_unlock(&allocated_ptys_lock);
205 return index;
206}
207
208void devpts_kill_index(int idx)
209{
210 mutex_lock(&allocated_ptys_lock);
211 idr_remove(&allocated_ptys, idx);
212 mutex_unlock(&allocated_ptys_lock);
213}
214
174int devpts_pty_new(struct tty_struct *tty) 215int devpts_pty_new(struct tty_struct *tty)
175{ 216{
176 int number = tty->index; 217 int number = tty->index; /* tty layer puts index from devpts_new_index() in here */
177 struct tty_driver *driver = tty->driver; 218 struct tty_driver *driver = tty->driver;
178 dev_t device = MKDEV(driver->major, driver->minor_start+number); 219 dev_t device = MKDEV(driver->major, driver->minor_start+number);
179 struct dentry *dentry; 220 struct dentry *dentry;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index b64e55e0515d..499e16759e96 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -200,7 +200,7 @@ int __init dlm_lockspace_init(void)
200 200
201 dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj); 201 dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj);
202 if (!dlm_kset) { 202 if (!dlm_kset) {
203 printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); 203 printk(KERN_WARNING "%s: can not create kset\n", __func__);
204 return -ENOMEM; 204 return -ENOMEM;
205 } 205 }
206 return 0; 206 return 0;
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 28d01ed66de0..eaecc4cfe540 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -20,6 +20,7 @@
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/spinlock.h> 21#include <linux/spinlock.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/file.h>
23 24
24int dir_notify_enable __read_mostly = 1; 25int dir_notify_enable __read_mostly = 1;
25 26
@@ -66,6 +67,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
66 struct dnotify_struct **prev; 67 struct dnotify_struct **prev;
67 struct inode *inode; 68 struct inode *inode;
68 fl_owner_t id = current->files; 69 fl_owner_t id = current->files;
70 struct file *f;
69 int error = 0; 71 int error = 0;
70 72
71 if ((arg & ~DN_MULTISHOT) == 0) { 73 if ((arg & ~DN_MULTISHOT) == 0) {
@@ -92,6 +94,15 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
92 prev = &odn->dn_next; 94 prev = &odn->dn_next;
93 } 95 }
94 96
97 rcu_read_lock();
98 f = fcheck(fd);
99 rcu_read_unlock();
100 /* we'd lost the race with close(), sod off silently */
101 /* note that inode->i_lock prevents reordering problems
102 * between accesses to descriptor table and ->i_dnotify */
103 if (f != filp)
104 goto out_free;
105
95 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); 106 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
96 if (error) 107 if (error)
97 goto out_free; 108 goto out_free;
diff --git a/fs/dquot.c b/fs/dquot.c
index 41b9dbd68b0e..dfba1623cccb 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -289,7 +289,15 @@ static void wait_on_dquot(struct dquot *dquot)
289 mutex_unlock(&dquot->dq_lock); 289 mutex_unlock(&dquot->dq_lock);
290} 290}
291 291
292#define mark_dquot_dirty(dquot) ((dquot)->dq_sb->dq_op->mark_dirty(dquot)) 292static inline int dquot_dirty(struct dquot *dquot)
293{
294 return test_bit(DQ_MOD_B, &dquot->dq_flags);
295}
296
297static inline int mark_dquot_dirty(struct dquot *dquot)
298{
299 return dquot->dq_sb->dq_op->mark_dirty(dquot);
300}
293 301
294int dquot_mark_dquot_dirty(struct dquot *dquot) 302int dquot_mark_dquot_dirty(struct dquot *dquot)
295{ 303{
@@ -1441,31 +1449,43 @@ static inline void set_enable_flags(struct quota_info *dqopt, int type)
1441 switch (type) { 1449 switch (type) {
1442 case USRQUOTA: 1450 case USRQUOTA:
1443 dqopt->flags |= DQUOT_USR_ENABLED; 1451 dqopt->flags |= DQUOT_USR_ENABLED;
1452 dqopt->flags &= ~DQUOT_USR_SUSPENDED;
1444 break; 1453 break;
1445 case GRPQUOTA: 1454 case GRPQUOTA:
1446 dqopt->flags |= DQUOT_GRP_ENABLED; 1455 dqopt->flags |= DQUOT_GRP_ENABLED;
1456 dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
1447 break; 1457 break;
1448 } 1458 }
1449} 1459}
1450 1460
1451static inline void reset_enable_flags(struct quota_info *dqopt, int type) 1461static inline void reset_enable_flags(struct quota_info *dqopt, int type,
1462 int remount)
1452{ 1463{
1453 switch (type) { 1464 switch (type) {
1454 case USRQUOTA: 1465 case USRQUOTA:
1455 dqopt->flags &= ~DQUOT_USR_ENABLED; 1466 dqopt->flags &= ~DQUOT_USR_ENABLED;
1467 if (remount)
1468 dqopt->flags |= DQUOT_USR_SUSPENDED;
1469 else
1470 dqopt->flags &= ~DQUOT_USR_SUSPENDED;
1456 break; 1471 break;
1457 case GRPQUOTA: 1472 case GRPQUOTA:
1458 dqopt->flags &= ~DQUOT_GRP_ENABLED; 1473 dqopt->flags &= ~DQUOT_GRP_ENABLED;
1474 if (remount)
1475 dqopt->flags |= DQUOT_GRP_SUSPENDED;
1476 else
1477 dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
1459 break; 1478 break;
1460 } 1479 }
1461} 1480}
1462 1481
1482
1463/* 1483/*
1464 * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) 1484 * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
1465 */ 1485 */
1466int vfs_quota_off(struct super_block *sb, int type) 1486int vfs_quota_off(struct super_block *sb, int type, int remount)
1467{ 1487{
1468 int cnt; 1488 int cnt, ret = 0;
1469 struct quota_info *dqopt = sb_dqopt(sb); 1489 struct quota_info *dqopt = sb_dqopt(sb);
1470 struct inode *toputinode[MAXQUOTAS]; 1490 struct inode *toputinode[MAXQUOTAS];
1471 1491
@@ -1475,9 +1495,17 @@ int vfs_quota_off(struct super_block *sb, int type)
1475 toputinode[cnt] = NULL; 1495 toputinode[cnt] = NULL;
1476 if (type != -1 && cnt != type) 1496 if (type != -1 && cnt != type)
1477 continue; 1497 continue;
1498 /* If we keep inodes of quota files after remount and quotaoff
1499 * is called, drop kept inodes. */
1500 if (!remount && sb_has_quota_suspended(sb, cnt)) {
1501 iput(dqopt->files[cnt]);
1502 dqopt->files[cnt] = NULL;
1503 reset_enable_flags(dqopt, cnt, 0);
1504 continue;
1505 }
1478 if (!sb_has_quota_enabled(sb, cnt)) 1506 if (!sb_has_quota_enabled(sb, cnt))
1479 continue; 1507 continue;
1480 reset_enable_flags(dqopt, cnt); 1508 reset_enable_flags(dqopt, cnt, remount);
1481 1509
1482 /* Note: these are blocking operations */ 1510 /* Note: these are blocking operations */
1483 drop_dquot_ref(sb, cnt); 1511 drop_dquot_ref(sb, cnt);
@@ -1493,7 +1521,8 @@ int vfs_quota_off(struct super_block *sb, int type)
1493 put_quota_format(dqopt->info[cnt].dqi_format); 1521 put_quota_format(dqopt->info[cnt].dqi_format);
1494 1522
1495 toputinode[cnt] = dqopt->files[cnt]; 1523 toputinode[cnt] = dqopt->files[cnt];
1496 dqopt->files[cnt] = NULL; 1524 if (!remount)
1525 dqopt->files[cnt] = NULL;
1497 dqopt->info[cnt].dqi_flags = 0; 1526 dqopt->info[cnt].dqi_flags = 0;
1498 dqopt->info[cnt].dqi_igrace = 0; 1527 dqopt->info[cnt].dqi_igrace = 0;
1499 dqopt->info[cnt].dqi_bgrace = 0; 1528 dqopt->info[cnt].dqi_bgrace = 0;
@@ -1523,12 +1552,19 @@ int vfs_quota_off(struct super_block *sb, int type)
1523 mutex_unlock(&toputinode[cnt]->i_mutex); 1552 mutex_unlock(&toputinode[cnt]->i_mutex);
1524 mark_inode_dirty(toputinode[cnt]); 1553 mark_inode_dirty(toputinode[cnt]);
1525 } 1554 }
1526 iput(toputinode[cnt]);
1527 mutex_unlock(&dqopt->dqonoff_mutex); 1555 mutex_unlock(&dqopt->dqonoff_mutex);
1556 /* On remount RO, we keep the inode pointer so that we
1557 * can reenable quota on the subsequent remount RW.
1558 * But we have better not keep inode pointer when there
1559 * is pending delete on the quota file... */
1560 if (!remount)
1561 iput(toputinode[cnt]);
1562 else if (!toputinode[cnt]->i_nlink)
1563 ret = -EBUSY;
1528 } 1564 }
1529 if (sb->s_bdev) 1565 if (sb->s_bdev)
1530 invalidate_bdev(sb->s_bdev); 1566 invalidate_bdev(sb->s_bdev);
1531 return 0; 1567 return ret;
1532} 1568}
1533 1569
1534/* 1570/*
@@ -1566,7 +1602,8 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
1566 invalidate_bdev(sb->s_bdev); 1602 invalidate_bdev(sb->s_bdev);
1567 mutex_lock(&inode->i_mutex); 1603 mutex_lock(&inode->i_mutex);
1568 mutex_lock(&dqopt->dqonoff_mutex); 1604 mutex_lock(&dqopt->dqonoff_mutex);
1569 if (sb_has_quota_enabled(sb, type)) { 1605 if (sb_has_quota_enabled(sb, type) ||
1606 sb_has_quota_suspended(sb, type)) {
1570 error = -EBUSY; 1607 error = -EBUSY;
1571 goto out_lock; 1608 goto out_lock;
1572 } 1609 }
@@ -1589,6 +1626,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
1589 1626
1590 dqopt->ops[type] = fmt->qf_ops; 1627 dqopt->ops[type] = fmt->qf_ops;
1591 dqopt->info[type].dqi_format = fmt; 1628 dqopt->info[type].dqi_format = fmt;
1629 dqopt->info[type].dqi_fmt_id = format_id;
1592 INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list); 1630 INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list);
1593 mutex_lock(&dqopt->dqio_mutex); 1631 mutex_lock(&dqopt->dqio_mutex);
1594 if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) { 1632 if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) {
@@ -1624,12 +1662,41 @@ out_fmt:
1624 return error; 1662 return error;
1625} 1663}
1626 1664
1665/* Reenable quotas on remount RW */
1666static int vfs_quota_on_remount(struct super_block *sb, int type)
1667{
1668 struct quota_info *dqopt = sb_dqopt(sb);
1669 struct inode *inode;
1670 int ret;
1671
1672 mutex_lock(&dqopt->dqonoff_mutex);
1673 if (!sb_has_quota_suspended(sb, type)) {
1674 mutex_unlock(&dqopt->dqonoff_mutex);
1675 return 0;
1676 }
1677 BUG_ON(sb_has_quota_enabled(sb, type));
1678
1679 inode = dqopt->files[type];
1680 dqopt->files[type] = NULL;
1681 reset_enable_flags(dqopt, type, 0);
1682 mutex_unlock(&dqopt->dqonoff_mutex);
1683
1684 ret = vfs_quota_on_inode(inode, type, dqopt->info[type].dqi_fmt_id);
1685 iput(inode);
1686
1687 return ret;
1688}
1689
1627/* Actual function called from quotactl() */ 1690/* Actual function called from quotactl() */
1628int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) 1691int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
1692 int remount)
1629{ 1693{
1630 struct nameidata nd; 1694 struct nameidata nd;
1631 int error; 1695 int error;
1632 1696
1697 if (remount)
1698 return vfs_quota_on_remount(sb, type);
1699
1633 error = path_lookup(path, LOOKUP_FOLLOW, &nd); 1700 error = path_lookup(path, LOOKUP_FOLLOW, &nd);
1634 if (error < 0) 1701 if (error < 0)
1635 return error; 1702 return error;
@@ -1709,10 +1776,19 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
1709} 1776}
1710 1777
1711/* Generic routine for setting common part of quota structure */ 1778/* Generic routine for setting common part of quota structure */
1712static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) 1779static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
1713{ 1780{
1714 struct mem_dqblk *dm = &dquot->dq_dqb; 1781 struct mem_dqblk *dm = &dquot->dq_dqb;
1715 int check_blim = 0, check_ilim = 0; 1782 int check_blim = 0, check_ilim = 0;
1783 struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
1784
1785 if ((di->dqb_valid & QIF_BLIMITS &&
1786 (di->dqb_bhardlimit > dqi->dqi_maxblimit ||
1787 di->dqb_bsoftlimit > dqi->dqi_maxblimit)) ||
1788 (di->dqb_valid & QIF_ILIMITS &&
1789 (di->dqb_ihardlimit > dqi->dqi_maxilimit ||
1790 di->dqb_isoftlimit > dqi->dqi_maxilimit)))
1791 return -ERANGE;
1716 1792
1717 spin_lock(&dq_data_lock); 1793 spin_lock(&dq_data_lock);
1718 if (di->dqb_valid & QIF_SPACE) { 1794 if (di->dqb_valid & QIF_SPACE) {
@@ -1744,7 +1820,7 @@ static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
1744 clear_bit(DQ_BLKS_B, &dquot->dq_flags); 1820 clear_bit(DQ_BLKS_B, &dquot->dq_flags);
1745 } 1821 }
1746 else if (!(di->dqb_valid & QIF_BTIME)) /* Set grace only if user hasn't provided his own... */ 1822 else if (!(di->dqb_valid & QIF_BTIME)) /* Set grace only if user hasn't provided his own... */
1747 dm->dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace; 1823 dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
1748 } 1824 }
1749 if (check_ilim) { 1825 if (check_ilim) {
1750 if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) { 1826 if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) {
@@ -1752,7 +1828,7 @@ static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
1752 clear_bit(DQ_INODES_B, &dquot->dq_flags); 1828 clear_bit(DQ_INODES_B, &dquot->dq_flags);
1753 } 1829 }
1754 else if (!(di->dqb_valid & QIF_ITIME)) /* Set grace only if user hasn't provided his own... */ 1830 else if (!(di->dqb_valid & QIF_ITIME)) /* Set grace only if user hasn't provided his own... */
1755 dm->dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace; 1831 dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
1756 } 1832 }
1757 if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit) 1833 if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit)
1758 clear_bit(DQ_FAKE_B, &dquot->dq_flags); 1834 clear_bit(DQ_FAKE_B, &dquot->dq_flags);
@@ -1760,21 +1836,24 @@ static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
1760 set_bit(DQ_FAKE_B, &dquot->dq_flags); 1836 set_bit(DQ_FAKE_B, &dquot->dq_flags);
1761 spin_unlock(&dq_data_lock); 1837 spin_unlock(&dq_data_lock);
1762 mark_dquot_dirty(dquot); 1838 mark_dquot_dirty(dquot);
1839
1840 return 0;
1763} 1841}
1764 1842
1765int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) 1843int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di)
1766{ 1844{
1767 struct dquot *dquot; 1845 struct dquot *dquot;
1846 int rc;
1768 1847
1769 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); 1848 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
1770 if (!(dquot = dqget(sb, id, type))) { 1849 if (!(dquot = dqget(sb, id, type))) {
1771 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); 1850 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
1772 return -ESRCH; 1851 return -ESRCH;
1773 } 1852 }
1774 do_set_dqblk(dquot, di); 1853 rc = do_set_dqblk(dquot, di);
1775 dqput(dquot); 1854 dqput(dquot);
1776 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); 1855 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
1777 return 0; 1856 return rc;
1778} 1857}
1779 1858
1780/* Generic routine for getting common part of quota file information */ 1859/* Generic routine for getting common part of quota file information */
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 59375efcf39d..3e5637fc3779 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -14,18 +14,26 @@ int sysctl_drop_caches;
14 14
15static void drop_pagecache_sb(struct super_block *sb) 15static void drop_pagecache_sb(struct super_block *sb)
16{ 16{
17 struct inode *inode; 17 struct inode *inode, *toput_inode = NULL;
18 18
19 spin_lock(&inode_lock); 19 spin_lock(&inode_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_WILL_FREE)) 21 if (inode->i_state & (I_FREEING|I_WILL_FREE))
22 continue; 22 continue;
23 if (inode->i_mapping->nrpages == 0)
24 continue;
25 __iget(inode);
26 spin_unlock(&inode_lock);
23 __invalidate_mapping_pages(inode->i_mapping, 0, -1, true); 27 __invalidate_mapping_pages(inode->i_mapping, 0, -1, true);
28 iput(toput_inode);
29 toput_inode = inode;
30 spin_lock(&inode_lock);
24 } 31 }
25 spin_unlock(&inode_lock); 32 spin_unlock(&inode_lock);
33 iput(toput_inode);
26} 34}
27 35
28void drop_pagecache(void) 36static void drop_pagecache(void)
29{ 37{
30 struct super_block *sb; 38 struct super_block *sb;
31 39
@@ -45,7 +53,7 @@ restart:
45 spin_unlock(&sb_lock); 53 spin_unlock(&sb_lock);
46} 54}
47 55
48void drop_slab(void) 56static void drop_slab(void)
49{ 57{
50 int nr_objects; 58 int nr_objects;
51 59
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 768857015516..1e34a7fd4884 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o 5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6 6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o debug.o 7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index a066e109ad9c..cd62d75b2cc0 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -119,21 +119,21 @@ static int ecryptfs_calculate_md5(char *dst,
119 if (rc) { 119 if (rc) {
120 printk(KERN_ERR 120 printk(KERN_ERR
121 "%s: Error initializing crypto hash; rc = [%d]\n", 121 "%s: Error initializing crypto hash; rc = [%d]\n",
122 __FUNCTION__, rc); 122 __func__, rc);
123 goto out; 123 goto out;
124 } 124 }
125 rc = crypto_hash_update(&desc, &sg, len); 125 rc = crypto_hash_update(&desc, &sg, len);
126 if (rc) { 126 if (rc) {
127 printk(KERN_ERR 127 printk(KERN_ERR
128 "%s: Error updating crypto hash; rc = [%d]\n", 128 "%s: Error updating crypto hash; rc = [%d]\n",
129 __FUNCTION__, rc); 129 __func__, rc);
130 goto out; 130 goto out;
131 } 131 }
132 rc = crypto_hash_final(&desc, dst); 132 rc = crypto_hash_final(&desc, dst);
133 if (rc) { 133 if (rc) {
134 printk(KERN_ERR 134 printk(KERN_ERR
135 "%s: Error finalizing crypto hash; rc = [%d]\n", 135 "%s: Error finalizing crypto hash; rc = [%d]\n",
136 __FUNCTION__, rc); 136 __func__, rc);
137 goto out; 137 goto out;
138 } 138 }
139out: 139out:
@@ -437,7 +437,7 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
437 if (rc < 0) { 437 if (rc < 0) {
438 printk(KERN_ERR "%s: Error attempting to encrypt page with " 438 printk(KERN_ERR "%s: Error attempting to encrypt page with "
439 "page->index = [%ld], extent_offset = [%ld]; " 439 "page->index = [%ld], extent_offset = [%ld]; "
440 "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, 440 "rc = [%d]\n", __func__, page->index, extent_offset,
441 rc); 441 rc);
442 goto out; 442 goto out;
443 } 443 }
@@ -487,7 +487,7 @@ int ecryptfs_encrypt_page(struct page *page)
487 0, PAGE_CACHE_SIZE); 487 0, PAGE_CACHE_SIZE);
488 if (rc) 488 if (rc)
489 printk(KERN_ERR "%s: Error attempting to copy " 489 printk(KERN_ERR "%s: Error attempting to copy "
490 "page at index [%ld]\n", __FUNCTION__, 490 "page at index [%ld]\n", __func__,
491 page->index); 491 page->index);
492 goto out; 492 goto out;
493 } 493 }
@@ -508,7 +508,7 @@ int ecryptfs_encrypt_page(struct page *page)
508 extent_offset); 508 extent_offset);
509 if (rc) { 509 if (rc) {
510 printk(KERN_ERR "%s: Error encrypting extent; " 510 printk(KERN_ERR "%s: Error encrypting extent; "
511 "rc = [%d]\n", __FUNCTION__, rc); 511 "rc = [%d]\n", __func__, rc);
512 goto out; 512 goto out;
513 } 513 }
514 ecryptfs_lower_offset_for_extent( 514 ecryptfs_lower_offset_for_extent(
@@ -569,7 +569,7 @@ static int ecryptfs_decrypt_extent(struct page *page,
569 if (rc < 0) { 569 if (rc < 0) {
570 printk(KERN_ERR "%s: Error attempting to decrypt to page with " 570 printk(KERN_ERR "%s: Error attempting to decrypt to page with "
571 "page->index = [%ld], extent_offset = [%ld]; " 571 "page->index = [%ld], extent_offset = [%ld]; "
572 "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, 572 "rc = [%d]\n", __func__, page->index, extent_offset,
573 rc); 573 rc);
574 goto out; 574 goto out;
575 } 575 }
@@ -622,7 +622,7 @@ int ecryptfs_decrypt_page(struct page *page)
622 ecryptfs_inode); 622 ecryptfs_inode);
623 if (rc) 623 if (rc)
624 printk(KERN_ERR "%s: Error attempting to copy " 624 printk(KERN_ERR "%s: Error attempting to copy "
625 "page at index [%ld]\n", __FUNCTION__, 625 "page at index [%ld]\n", __func__,
626 page->index); 626 page->index);
627 goto out; 627 goto out;
628 } 628 }
@@ -656,7 +656,7 @@ int ecryptfs_decrypt_page(struct page *page)
656 extent_offset); 656 extent_offset);
657 if (rc) { 657 if (rc) {
658 printk(KERN_ERR "%s: Error encrypting extent; " 658 printk(KERN_ERR "%s: Error encrypting extent; "
659 "rc = [%d]\n", __FUNCTION__, rc); 659 "rc = [%d]\n", __func__, rc);
660 goto out; 660 goto out;
661 } 661 }
662 } 662 }
@@ -1215,7 +1215,7 @@ int ecryptfs_read_and_validate_header_region(char *data,
1215 ecryptfs_inode); 1215 ecryptfs_inode);
1216 if (rc) { 1216 if (rc) {
1217 printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", 1217 printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n",
1218 __FUNCTION__, rc); 1218 __func__, rc);
1219 goto out; 1219 goto out;
1220 } 1220 }
1221 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { 1221 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) {
@@ -1246,7 +1246,6 @@ ecryptfs_write_header_metadata(char *virt,
1246 (*written) = 6; 1246 (*written) = 6;
1247} 1247}
1248 1248
1249struct kmem_cache *ecryptfs_header_cache_0;
1250struct kmem_cache *ecryptfs_header_cache_1; 1249struct kmem_cache *ecryptfs_header_cache_1;
1251struct kmem_cache *ecryptfs_header_cache_2; 1250struct kmem_cache *ecryptfs_header_cache_2;
1252 1251
@@ -1320,7 +1319,7 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
1320 0, crypt_stat->num_header_bytes_at_front); 1319 0, crypt_stat->num_header_bytes_at_front);
1321 if (rc) 1320 if (rc)
1322 printk(KERN_ERR "%s: Error attempting to write header " 1321 printk(KERN_ERR "%s: Error attempting to write header "
1323 "information to lower file; rc = [%d]\n", __FUNCTION__, 1322 "information to lower file; rc = [%d]\n", __func__,
1324 rc); 1323 rc);
1325 return rc; 1324 return rc;
1326} 1325}
@@ -1365,14 +1364,14 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1365 } 1364 }
1366 } else { 1365 } else {
1367 printk(KERN_WARNING "%s: Encrypted flag not set\n", 1366 printk(KERN_WARNING "%s: Encrypted flag not set\n",
1368 __FUNCTION__); 1367 __func__);
1369 rc = -EINVAL; 1368 rc = -EINVAL;
1370 goto out; 1369 goto out;
1371 } 1370 }
1372 /* Released in this function */ 1371 /* Released in this function */
1373 virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); 1372 virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL);
1374 if (!virt) { 1373 if (!virt) {
1375 printk(KERN_ERR "%s: Out of memory\n", __FUNCTION__); 1374 printk(KERN_ERR "%s: Out of memory\n", __func__);
1376 rc = -ENOMEM; 1375 rc = -ENOMEM;
1377 goto out; 1376 goto out;
1378 } 1377 }
@@ -1380,7 +1379,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1380 ecryptfs_dentry); 1379 ecryptfs_dentry);
1381 if (unlikely(rc)) { 1380 if (unlikely(rc)) {
1382 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", 1381 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n",
1383 __FUNCTION__, rc); 1382 __func__, rc);
1384 goto out_free; 1383 goto out_free;
1385 } 1384 }
1386 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 1385 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
@@ -1391,7 +1390,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1391 ecryptfs_dentry, virt); 1390 ecryptfs_dentry, virt);
1392 if (rc) { 1391 if (rc) {
1393 printk(KERN_ERR "%s: Error writing metadata out to lower file; " 1392 printk(KERN_ERR "%s: Error writing metadata out to lower file; "
1394 "rc = [%d]\n", __FUNCTION__, rc); 1393 "rc = [%d]\n", __func__, rc);
1395 goto out_free; 1394 goto out_free;
1396 } 1395 }
1397out_free: 1396out_free:
@@ -1585,7 +1584,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
1585 if (!page_virt) { 1584 if (!page_virt) {
1586 rc = -ENOMEM; 1585 rc = -ENOMEM;
1587 printk(KERN_ERR "%s: Unable to allocate page_virt\n", 1586 printk(KERN_ERR "%s: Unable to allocate page_virt\n",
1588 __FUNCTION__); 1587 __func__);
1589 goto out; 1588 goto out;
1590 } 1589 }
1591 rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, 1590 rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 5007f788da01..951ee33a022d 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -4,7 +4,7 @@
4 * 4 *
5 * Copyright (C) 1997-2003 Erez Zadok 5 * Copyright (C) 1997-2003 Erez Zadok
6 * Copyright (C) 2001-2003 Stony Brook University 6 * Copyright (C) 2001-2003 Stony Brook University
7 * Copyright (C) 2004-2007 International Business Machines Corp. 7 * Copyright (C) 2004-2008 International Business Machines Corp.
8 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> 8 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
9 * Trevor S. Highland <trevor.highland@gmail.com> 9 * Trevor S. Highland <trevor.highland@gmail.com>
10 * Tyler Hicks <tyhicks@ou.edu> 10 * Tyler Hicks <tyhicks@ou.edu>
@@ -34,6 +34,7 @@
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/nsproxy.h>
37 38
38/* Version verification for shared data structures w/ userspace */ 39/* Version verification for shared data structures w/ userspace */
39#define ECRYPTFS_VERSION_MAJOR 0x00 40#define ECRYPTFS_VERSION_MAJOR 0x00
@@ -49,11 +50,13 @@
49#define ECRYPTFS_VERSIONING_POLICY 0x00000008 50#define ECRYPTFS_VERSIONING_POLICY 0x00000008
50#define ECRYPTFS_VERSIONING_XATTR 0x00000010 51#define ECRYPTFS_VERSIONING_XATTR 0x00000010
51#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 52#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020
53#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040
52#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ 54#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
53 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ 55 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
54 | ECRYPTFS_VERSIONING_PUBKEY \ 56 | ECRYPTFS_VERSIONING_PUBKEY \
55 | ECRYPTFS_VERSIONING_XATTR \ 57 | ECRYPTFS_VERSIONING_XATTR \
56 | ECRYPTFS_VERSIONING_MULTKEY) 58 | ECRYPTFS_VERSIONING_MULTKEY \
59 | ECRYPTFS_VERSIONING_DEVMISC)
57#define ECRYPTFS_MAX_PASSWORD_LENGTH 64 60#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
58#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH 61#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
59#define ECRYPTFS_SALT_SIZE 8 62#define ECRYPTFS_SALT_SIZE 8
@@ -73,17 +76,14 @@
73#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32 76#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32
74#define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ 77#define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ
75#define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3) 78#define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3)
76#define ECRYPTFS_NLMSG_HELO 100
77#define ECRYPTFS_NLMSG_QUIT 101
78#define ECRYPTFS_NLMSG_REQUEST 102
79#define ECRYPTFS_NLMSG_RESPONSE 103
80#define ECRYPTFS_MAX_PKI_NAME_BYTES 16 79#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
81#define ECRYPTFS_DEFAULT_NUM_USERS 4 80#define ECRYPTFS_DEFAULT_NUM_USERS 4
82#define ECRYPTFS_MAX_NUM_USERS 32768 81#define ECRYPTFS_MAX_NUM_USERS 32768
83#define ECRYPTFS_TRANSPORT_NETLINK 0 82#define ECRYPTFS_TRANSPORT_NETLINK 0
84#define ECRYPTFS_TRANSPORT_CONNECTOR 1 83#define ECRYPTFS_TRANSPORT_CONNECTOR 1
85#define ECRYPTFS_TRANSPORT_RELAYFS 2 84#define ECRYPTFS_TRANSPORT_RELAYFS 2
86#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_NETLINK 85#define ECRYPTFS_TRANSPORT_MISCDEV 3
86#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_MISCDEV
87#define ECRYPTFS_XATTR_NAME "user.ecryptfs" 87#define ECRYPTFS_XATTR_NAME "user.ecryptfs"
88 88
89#define RFC2440_CIPHER_DES3_EDE 0x02 89#define RFC2440_CIPHER_DES3_EDE 0x02
@@ -366,32 +366,63 @@ struct ecryptfs_auth_tok_list_item {
366}; 366};
367 367
368struct ecryptfs_message { 368struct ecryptfs_message {
369 /* Can never be greater than ecryptfs_message_buf_len */
370 /* Used to find the parent msg_ctx */
371 /* Inherits from msg_ctx->index */
369 u32 index; 372 u32 index;
370 u32 data_len; 373 u32 data_len;
371 u8 data[]; 374 u8 data[];
372}; 375};
373 376
374struct ecryptfs_msg_ctx { 377struct ecryptfs_msg_ctx {
375#define ECRYPTFS_MSG_CTX_STATE_FREE 0x0001 378#define ECRYPTFS_MSG_CTX_STATE_FREE 0x01
376#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x0002 379#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x02
377#define ECRYPTFS_MSG_CTX_STATE_DONE 0x0003 380#define ECRYPTFS_MSG_CTX_STATE_DONE 0x03
378 u32 state; 381#define ECRYPTFS_MSG_CTX_STATE_NO_REPLY 0x04
379 unsigned int index; 382 u8 state;
380 unsigned int counter; 383#define ECRYPTFS_MSG_HELO 100
384#define ECRYPTFS_MSG_QUIT 101
385#define ECRYPTFS_MSG_REQUEST 102
386#define ECRYPTFS_MSG_RESPONSE 103
387 u8 type;
388 u32 index;
389 /* Counter converts to a sequence number. Each message sent
390 * out for which we expect a response has an associated
391 * sequence number. The response must have the same sequence
392 * number as the counter for the msg_stc for the message to be
393 * valid. */
394 u32 counter;
395 size_t msg_size;
381 struct ecryptfs_message *msg; 396 struct ecryptfs_message *msg;
382 struct task_struct *task; 397 struct task_struct *task;
383 struct list_head node; 398 struct list_head node;
399 struct list_head daemon_out_list;
384 struct mutex mux; 400 struct mutex mux;
385}; 401};
386 402
387extern unsigned int ecryptfs_transport; 403extern unsigned int ecryptfs_transport;
388 404
389struct ecryptfs_daemon_id { 405struct ecryptfs_daemon;
390 pid_t pid; 406
391 uid_t uid; 407struct ecryptfs_daemon {
392 struct hlist_node id_chain; 408#define ECRYPTFS_DAEMON_IN_READ 0x00000001
409#define ECRYPTFS_DAEMON_IN_POLL 0x00000002
410#define ECRYPTFS_DAEMON_ZOMBIE 0x00000004
411#define ECRYPTFS_DAEMON_MISCDEV_OPEN 0x00000008
412 u32 flags;
413 u32 num_queued_msg_ctx;
414 struct pid *pid;
415 uid_t euid;
416 struct user_namespace *user_ns;
417 struct task_struct *task;
418 struct mutex mux;
419 struct list_head msg_ctx_out_queue;
420 wait_queue_head_t wait;
421 struct hlist_node euid_chain;
393}; 422};
394 423
424extern struct mutex ecryptfs_daemon_hash_mux;
425
395static inline struct ecryptfs_file_info * 426static inline struct ecryptfs_file_info *
396ecryptfs_file_to_private(struct file *file) 427ecryptfs_file_to_private(struct file *file)
397{ 428{
@@ -500,7 +531,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
500} 531}
501 532
502#define ecryptfs_printk(type, fmt, arg...) \ 533#define ecryptfs_printk(type, fmt, arg...) \
503 __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg); 534 __ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
504void __ecryptfs_printk(const char *fmt, ...); 535void __ecryptfs_printk(const char *fmt, ...);
505 536
506extern const struct file_operations ecryptfs_main_fops; 537extern const struct file_operations ecryptfs_main_fops;
@@ -581,10 +612,13 @@ int
581ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, 612ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
582 size_t size, int flags); 613 size_t size, int flags);
583int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); 614int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode);
584int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid); 615int ecryptfs_process_helo(unsigned int transport, uid_t euid,
585int ecryptfs_process_quit(uid_t uid, pid_t pid); 616 struct user_namespace *user_ns, struct pid *pid);
586int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, 617int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
587 pid_t pid, u32 seq); 618 struct pid *pid);
619int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
620 struct user_namespace *user_ns, struct pid *pid,
621 u32 seq);
588int ecryptfs_send_message(unsigned int transport, char *data, int data_len, 622int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
589 struct ecryptfs_msg_ctx **msg_ctx); 623 struct ecryptfs_msg_ctx **msg_ctx);
590int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, 624int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
@@ -593,14 +627,14 @@ int ecryptfs_init_messaging(unsigned int transport);
593void ecryptfs_release_messaging(unsigned int transport); 627void ecryptfs_release_messaging(unsigned int transport);
594 628
595int ecryptfs_send_netlink(char *data, int data_len, 629int ecryptfs_send_netlink(char *data, int data_len,
596 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, 630 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
597 u16 msg_flags, pid_t daemon_pid); 631 u16 msg_flags, struct pid *daemon_pid);
598int ecryptfs_init_netlink(void); 632int ecryptfs_init_netlink(void);
599void ecryptfs_release_netlink(void); 633void ecryptfs_release_netlink(void);
600 634
601int ecryptfs_send_connector(char *data, int data_len, 635int ecryptfs_send_connector(char *data, int data_len,
602 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, 636 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
603 u16 msg_flags, pid_t daemon_pid); 637 u16 msg_flags, struct pid *daemon_pid);
604int ecryptfs_init_connector(void); 638int ecryptfs_init_connector(void);
605void ecryptfs_release_connector(void); 639void ecryptfs_release_connector(void);
606void 640void
@@ -642,5 +676,21 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
642 size_t offset_in_page, size_t size, 676 size_t offset_in_page, size_t size,
643 struct inode *ecryptfs_inode); 677 struct inode *ecryptfs_inode);
644struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); 678struct page *ecryptfs_get_locked_page(struct file *file, loff_t index);
679int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon);
680int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
681 struct user_namespace *user_ns);
682int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
683 size_t *length_size);
684int ecryptfs_write_packet_length(char *dest, size_t size,
685 size_t *packet_size_length);
686int ecryptfs_init_ecryptfs_miscdev(void);
687void ecryptfs_destroy_ecryptfs_miscdev(void);
688int ecryptfs_send_miscdev(char *data, size_t data_size,
689 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
690 u16 msg_flags, struct ecryptfs_daemon *daemon);
691void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
692int
693ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
694 struct user_namespace *user_ns, struct pid *pid);
645 695
646#endif /* #ifndef ECRYPTFS_KERNEL_H */ 696#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 2b8f5ed4adea..2258b8f654a6 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -195,7 +195,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
195 file, ecryptfs_inode_to_private(inode)->lower_file); 195 file, ecryptfs_inode_to_private(inode)->lower_file);
196 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 196 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
197 ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); 197 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
198 mutex_lock(&crypt_stat->cs_mutex);
198 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 199 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
200 mutex_unlock(&crypt_stat->cs_mutex);
199 rc = 0; 201 rc = 0;
200 goto out; 202 goto out;
201 } 203 }
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e23861152101..0a1397335a8e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -111,7 +111,7 @@ ecryptfs_do_create(struct inode *directory_inode,
111 111
112 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 112 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
113 lower_dir_dentry = lock_parent(lower_dentry); 113 lower_dir_dentry = lock_parent(lower_dentry);
114 if (unlikely(IS_ERR(lower_dir_dentry))) { 114 if (IS_ERR(lower_dir_dentry)) {
115 ecryptfs_printk(KERN_ERR, "Error locking directory of " 115 ecryptfs_printk(KERN_ERR, "Error locking directory of "
116 "dentry\n"); 116 "dentry\n");
117 rc = PTR_ERR(lower_dir_dentry); 117 rc = PTR_ERR(lower_dir_dentry);
@@ -121,7 +121,7 @@ ecryptfs_do_create(struct inode *directory_inode,
121 ecryptfs_dentry, mode, nd); 121 ecryptfs_dentry, mode, nd);
122 if (rc) { 122 if (rc) {
123 printk(KERN_ERR "%s: Failure to create dentry in lower fs; " 123 printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
124 "rc = [%d]\n", __FUNCTION__, rc); 124 "rc = [%d]\n", __func__, rc);
125 goto out_lock; 125 goto out_lock;
126 } 126 }
127 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, 127 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
@@ -908,7 +908,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
908 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) 908 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
909 ia->ia_valid &= ~ATTR_MODE; 909 ia->ia_valid &= ~ATTR_MODE;
910 910
911 mutex_lock(&lower_dentry->d_inode->i_mutex);
911 rc = notify_change(lower_dentry, ia); 912 rc = notify_change(lower_dentry, ia);
913 mutex_unlock(&lower_dentry->d_inode->i_mutex);
912out: 914out:
913 fsstack_copy_attr_all(inode, lower_inode, NULL); 915 fsstack_copy_attr_all(inode, lower_inode, NULL);
914 return rc; 916 return rc;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 682b1b2482c2..e82b457180be 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -65,7 +65,7 @@ static int process_request_key_err(long err_code)
65} 65}
66 66
67/** 67/**
68 * parse_packet_length 68 * ecryptfs_parse_packet_length
69 * @data: Pointer to memory containing length at offset 69 * @data: Pointer to memory containing length at offset
70 * @size: This function writes the decoded size to this memory 70 * @size: This function writes the decoded size to this memory
71 * address; zero on error 71 * address; zero on error
@@ -73,8 +73,8 @@ static int process_request_key_err(long err_code)
73 * 73 *
74 * Returns zero on success; non-zero on error 74 * Returns zero on success; non-zero on error
75 */ 75 */
76static int parse_packet_length(unsigned char *data, size_t *size, 76int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
77 size_t *length_size) 77 size_t *length_size)
78{ 78{
79 int rc = 0; 79 int rc = 0;
80 80
@@ -105,7 +105,7 @@ out:
105} 105}
106 106
107/** 107/**
108 * write_packet_length 108 * ecryptfs_write_packet_length
109 * @dest: The byte array target into which to write the length. Must 109 * @dest: The byte array target into which to write the length. Must
110 * have at least 5 bytes allocated. 110 * have at least 5 bytes allocated.
111 * @size: The length to write. 111 * @size: The length to write.
@@ -114,8 +114,8 @@ out:
114 * 114 *
115 * Returns zero on success; non-zero on error. 115 * Returns zero on success; non-zero on error.
116 */ 116 */
117static int write_packet_length(char *dest, size_t size, 117int ecryptfs_write_packet_length(char *dest, size_t size,
118 size_t *packet_size_length) 118 size_t *packet_size_length)
119{ 119{
120 int rc = 0; 120 int rc = 0;
121 121
@@ -162,8 +162,8 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key,
162 goto out; 162 goto out;
163 } 163 }
164 message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE; 164 message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE;
165 rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, 165 rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
166 &packet_size_len); 166 &packet_size_len);
167 if (rc) { 167 if (rc) {
168 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " 168 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
169 "header; cannot generate packet length\n"); 169 "header; cannot generate packet length\n");
@@ -172,8 +172,9 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key,
172 i += packet_size_len; 172 i += packet_size_len;
173 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); 173 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
174 i += ECRYPTFS_SIG_SIZE_HEX; 174 i += ECRYPTFS_SIG_SIZE_HEX;
175 rc = write_packet_length(&message[i], session_key->encrypted_key_size, 175 rc = ecryptfs_write_packet_length(&message[i],
176 &packet_size_len); 176 session_key->encrypted_key_size,
177 &packet_size_len);
177 if (rc) { 178 if (rc) {
178 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " 179 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
179 "header; cannot generate packet length\n"); 180 "header; cannot generate packet length\n");
@@ -225,7 +226,7 @@ parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code,
225 rc = -EIO; 226 rc = -EIO;
226 goto out; 227 goto out;
227 } 228 }
228 rc = parse_packet_length(&data[i], &m_size, &data_len); 229 rc = ecryptfs_parse_packet_length(&data[i], &m_size, &data_len);
229 if (rc) { 230 if (rc) {
230 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " 231 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
231 "rc = [%d]\n", rc); 232 "rc = [%d]\n", rc);
@@ -304,8 +305,8 @@ write_tag_66_packet(char *signature, u8 cipher_code,
304 goto out; 305 goto out;
305 } 306 }
306 message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE; 307 message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE;
307 rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, 308 rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
308 &packet_size_len); 309 &packet_size_len);
309 if (rc) { 310 if (rc) {
310 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " 311 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
311 "header; cannot generate packet length\n"); 312 "header; cannot generate packet length\n");
@@ -315,8 +316,8 @@ write_tag_66_packet(char *signature, u8 cipher_code,
315 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); 316 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
316 i += ECRYPTFS_SIG_SIZE_HEX; 317 i += ECRYPTFS_SIG_SIZE_HEX;
317 /* The encrypted key includes 1 byte cipher code and 2 byte checksum */ 318 /* The encrypted key includes 1 byte cipher code and 2 byte checksum */
318 rc = write_packet_length(&message[i], crypt_stat->key_size + 3, 319 rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3,
319 &packet_size_len); 320 &packet_size_len);
320 if (rc) { 321 if (rc) {
321 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " 322 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
322 "header; cannot generate packet length\n"); 323 "header; cannot generate packet length\n");
@@ -357,20 +358,25 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
357 /* verify that everything through the encrypted FEK size is present */ 358 /* verify that everything through the encrypted FEK size is present */
358 if (message_len < 4) { 359 if (message_len < 4) {
359 rc = -EIO; 360 rc = -EIO;
361 printk(KERN_ERR "%s: message_len is [%Zd]; minimum acceptable "
362 "message length is [%d]\n", __func__, message_len, 4);
360 goto out; 363 goto out;
361 } 364 }
362 if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) { 365 if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) {
363 ecryptfs_printk(KERN_ERR, "Type should be ECRYPTFS_TAG_67\n");
364 rc = -EIO; 366 rc = -EIO;
367 printk(KERN_ERR "%s: Type should be ECRYPTFS_TAG_67\n",
368 __func__);
365 goto out; 369 goto out;
366 } 370 }
367 if (data[i++]) { 371 if (data[i++]) {
368 ecryptfs_printk(KERN_ERR, "Status indicator has non zero value"
369 " [%d]\n", data[i-1]);
370 rc = -EIO; 372 rc = -EIO;
373 printk(KERN_ERR "%s: Status indicator has non zero "
374 "value [%d]\n", __func__, data[i-1]);
375
371 goto out; 376 goto out;
372 } 377 }
373 rc = parse_packet_length(&data[i], &key_rec->enc_key_size, &data_len); 378 rc = ecryptfs_parse_packet_length(&data[i], &key_rec->enc_key_size,
379 &data_len);
374 if (rc) { 380 if (rc) {
375 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " 381 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
376 "rc = [%d]\n", rc); 382 "rc = [%d]\n", rc);
@@ -378,17 +384,17 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
378 } 384 }
379 i += data_len; 385 i += data_len;
380 if (message_len < (i + key_rec->enc_key_size)) { 386 if (message_len < (i + key_rec->enc_key_size)) {
381 ecryptfs_printk(KERN_ERR, "message_len [%d]; max len is [%d]\n",
382 message_len, (i + key_rec->enc_key_size));
383 rc = -EIO; 387 rc = -EIO;
388 printk(KERN_ERR "%s: message_len [%Zd]; max len is [%Zd]\n",
389 __func__, message_len, (i + key_rec->enc_key_size));
384 goto out; 390 goto out;
385 } 391 }
386 if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { 392 if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
387 ecryptfs_printk(KERN_ERR, "Encrypted key_size [%d] larger than "
388 "the maximum key size [%d]\n",
389 key_rec->enc_key_size,
390 ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
391 rc = -EIO; 393 rc = -EIO;
394 printk(KERN_ERR "%s: Encrypted key_size [%Zd] larger than "
395 "the maximum key size [%d]\n", __func__,
396 key_rec->enc_key_size,
397 ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
392 goto out; 398 goto out;
393 } 399 }
394 memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size); 400 memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size);
@@ -445,7 +451,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
445 rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), 451 rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key),
446 &netlink_message, &netlink_message_length); 452 &netlink_message, &netlink_message_length);
447 if (rc) { 453 if (rc) {
448 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet"); 454 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n");
449 goto out; 455 goto out;
450 } 456 }
451 rc = ecryptfs_send_message(ecryptfs_transport, netlink_message, 457 rc = ecryptfs_send_message(ecryptfs_transport, netlink_message,
@@ -570,8 +576,8 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
570 goto out; 576 goto out;
571 } 577 }
572 (*new_auth_tok) = &auth_tok_list_item->auth_tok; 578 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
573 rc = parse_packet_length(&data[(*packet_size)], &body_size, 579 rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
574 &length_size); 580 &length_size);
575 if (rc) { 581 if (rc) {
576 printk(KERN_WARNING "Error parsing packet length; " 582 printk(KERN_WARNING "Error parsing packet length; "
577 "rc = [%d]\n", rc); 583 "rc = [%d]\n", rc);
@@ -704,8 +710,8 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
704 goto out; 710 goto out;
705 } 711 }
706 (*new_auth_tok) = &auth_tok_list_item->auth_tok; 712 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
707 rc = parse_packet_length(&data[(*packet_size)], &body_size, 713 rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
708 &length_size); 714 &length_size);
709 if (rc) { 715 if (rc) {
710 printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n", 716 printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n",
711 rc); 717 rc);
@@ -852,8 +858,8 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents,
852 rc = -EINVAL; 858 rc = -EINVAL;
853 goto out; 859 goto out;
854 } 860 }
855 rc = parse_packet_length(&data[(*packet_size)], &body_size, 861 rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
856 &length_size); 862 &length_size);
857 if (rc) { 863 if (rc) {
858 printk(KERN_WARNING "Invalid tag 11 packet format\n"); 864 printk(KERN_WARNING "Invalid tag 11 packet format\n");
859 goto out; 865 goto out;
@@ -1405,8 +1411,8 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes,
1405 auth_tok->token.private_key.key_size; 1411 auth_tok->token.private_key.key_size;
1406 rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec); 1412 rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec);
1407 if (rc) { 1413 if (rc) {
1408 ecryptfs_printk(KERN_ERR, "Failed to encrypt session key " 1414 printk(KERN_ERR "Failed to encrypt session key via a key "
1409 "via a pki"); 1415 "module; rc = [%d]\n", rc);
1410 goto out; 1416 goto out;
1411 } 1417 }
1412 if (ecryptfs_verbosity > 0) { 1418 if (ecryptfs_verbosity > 0) {
@@ -1430,8 +1436,9 @@ encrypted_session_key_set:
1430 goto out; 1436 goto out;
1431 } 1437 }
1432 dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE; 1438 dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE;
1433 rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), 1439 rc = ecryptfs_write_packet_length(&dest[(*packet_size)],
1434 &packet_size_length); 1440 (max_packet_size - 4),
1441 &packet_size_length);
1435 if (rc) { 1442 if (rc) {
1436 ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet " 1443 ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet "
1437 "header; cannot generate packet length\n"); 1444 "header; cannot generate packet length\n");
@@ -1489,8 +1496,9 @@ write_tag_11_packet(char *dest, size_t *remaining_bytes, char *contents,
1489 goto out; 1496 goto out;
1490 } 1497 }
1491 dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE; 1498 dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE;
1492 rc = write_packet_length(&dest[(*packet_length)], 1499 rc = ecryptfs_write_packet_length(&dest[(*packet_length)],
1493 (max_packet_size - 4), &packet_size_length); 1500 (max_packet_size - 4),
1501 &packet_size_length);
1494 if (rc) { 1502 if (rc) {
1495 printk(KERN_ERR "Error generating tag 11 packet header; cannot " 1503 printk(KERN_ERR "Error generating tag 11 packet header; cannot "
1496 "generate packet length. rc = [%d]\n", rc); 1504 "generate packet length. rc = [%d]\n", rc);
@@ -1682,8 +1690,9 @@ encrypted_session_key_set:
1682 dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE; 1690 dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE;
1683 /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3) 1691 /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3)
1684 * to get the number of octets in the actual Tag 3 packet */ 1692 * to get the number of octets in the actual Tag 3 packet */
1685 rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), 1693 rc = ecryptfs_write_packet_length(&dest[(*packet_size)],
1686 &packet_size_length); 1694 (max_packet_size - 4),
1695 &packet_size_length);
1687 if (rc) { 1696 if (rc) {
1688 printk(KERN_ERR "Error generating tag 3 packet header; cannot " 1697 printk(KERN_ERR "Error generating tag 3 packet header; cannot "
1689 "generate packet length. rc = [%d]\n", rc); 1698 "generate packet length. rc = [%d]\n", rc);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d25ac9500a92..d603631601eb 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -219,7 +219,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
219 if (rc) { 219 if (rc) {
220 printk(KERN_ERR "%s: Error attempting to initialize the " 220 printk(KERN_ERR "%s: Error attempting to initialize the "
221 "persistent file for the dentry with name [%s]; " 221 "persistent file for the dentry with name [%s]; "
222 "rc = [%d]\n", __FUNCTION__, dentry->d_name.name, rc); 222 "rc = [%d]\n", __func__, dentry->d_name.name, rc);
223 goto out; 223 goto out;
224 } 224 }
225out: 225out:
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 9cc2aec27b0d..1b5c20058acb 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * eCryptfs: Linux filesystem encryption layer 2 * eCryptfs: Linux filesystem encryption layer
3 * 3 *
4 * Copyright (C) 2004-2006 International Business Machines Corp. 4 * Copyright (C) 2004-2008 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> 5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
6 * Tyler Hicks <tyhicks@ou.edu> 6 * Tyler Hicks <tyhicks@ou.edu>
7 * 7 *
@@ -20,19 +20,21 @@
20 * 02111-1307, USA. 20 * 02111-1307, USA.
21 */ 21 */
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/user_namespace.h>
24#include <linux/nsproxy.h>
23#include "ecryptfs_kernel.h" 25#include "ecryptfs_kernel.h"
24 26
25static LIST_HEAD(ecryptfs_msg_ctx_free_list); 27static LIST_HEAD(ecryptfs_msg_ctx_free_list);
26static LIST_HEAD(ecryptfs_msg_ctx_alloc_list); 28static LIST_HEAD(ecryptfs_msg_ctx_alloc_list);
27static struct mutex ecryptfs_msg_ctx_lists_mux; 29static struct mutex ecryptfs_msg_ctx_lists_mux;
28 30
29static struct hlist_head *ecryptfs_daemon_id_hash; 31static struct hlist_head *ecryptfs_daemon_hash;
30static struct mutex ecryptfs_daemon_id_hash_mux; 32struct mutex ecryptfs_daemon_hash_mux;
31static int ecryptfs_hash_buckets; 33static int ecryptfs_hash_buckets;
32#define ecryptfs_uid_hash(uid) \ 34#define ecryptfs_uid_hash(uid) \
33 hash_long((unsigned long)uid, ecryptfs_hash_buckets) 35 hash_long((unsigned long)uid, ecryptfs_hash_buckets)
34 36
35static unsigned int ecryptfs_msg_counter; 37static u32 ecryptfs_msg_counter;
36static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; 38static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
37 39
38/** 40/**
@@ -40,9 +42,10 @@ static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
40 * @msg_ctx: The context that was acquired from the free list 42 * @msg_ctx: The context that was acquired from the free list
41 * 43 *
42 * Acquires a context element from the free list and locks the mutex 44 * Acquires a context element from the free list and locks the mutex
43 * on the context. Returns zero on success; non-zero on error or upon 45 * on the context. Sets the msg_ctx task to current. Returns zero on
44 * failure to acquire a free context element. Be sure to lock the 46 * success; non-zero on error or upon failure to acquire a free
45 * list mutex before calling. 47 * context element. Must be called with ecryptfs_msg_ctx_lists_mux
48 * held.
46 */ 49 */
47static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx) 50static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx)
48{ 51{
@@ -50,11 +53,11 @@ static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx)
50 int rc; 53 int rc;
51 54
52 if (list_empty(&ecryptfs_msg_ctx_free_list)) { 55 if (list_empty(&ecryptfs_msg_ctx_free_list)) {
53 ecryptfs_printk(KERN_WARNING, "The eCryptfs free " 56 printk(KERN_WARNING "%s: The eCryptfs free "
54 "context list is empty. It may be helpful to " 57 "context list is empty. It may be helpful to "
55 "specify the ecryptfs_message_buf_len " 58 "specify the ecryptfs_message_buf_len "
56 "parameter to be greater than the current " 59 "parameter to be greater than the current "
57 "value of [%d]\n", ecryptfs_message_buf_len); 60 "value of [%d]\n", __func__, ecryptfs_message_buf_len);
58 rc = -ENOMEM; 61 rc = -ENOMEM;
59 goto out; 62 goto out;
60 } 63 }
@@ -75,8 +78,7 @@ out:
75 * ecryptfs_msg_ctx_free_to_alloc 78 * ecryptfs_msg_ctx_free_to_alloc
76 * @msg_ctx: The context to move from the free list to the alloc list 79 * @msg_ctx: The context to move from the free list to the alloc list
77 * 80 *
78 * Be sure to lock the list mutex and the context mutex before 81 * Must be called with ecryptfs_msg_ctx_lists_mux held.
79 * calling.
80 */ 82 */
81static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx) 83static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
82{ 84{
@@ -89,36 +91,39 @@ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
89 * ecryptfs_msg_ctx_alloc_to_free 91 * ecryptfs_msg_ctx_alloc_to_free
90 * @msg_ctx: The context to move from the alloc list to the free list 92 * @msg_ctx: The context to move from the alloc list to the free list
91 * 93 *
92 * Be sure to lock the list mutex and the context mutex before 94 * Must be called with ecryptfs_msg_ctx_lists_mux held.
93 * calling.
94 */ 95 */
95static void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) 96void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
96{ 97{
97 list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list); 98 list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list);
98 if (msg_ctx->msg) 99 if (msg_ctx->msg)
99 kfree(msg_ctx->msg); 100 kfree(msg_ctx->msg);
101 msg_ctx->msg = NULL;
100 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE; 102 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE;
101} 103}
102 104
103/** 105/**
104 * ecryptfs_find_daemon_id 106 * ecryptfs_find_daemon_by_euid
105 * @uid: The user id which maps to the desired daemon id 107 * @euid: The effective user id which maps to the desired daemon id
106 * @id: If return value is zero, points to the desired daemon id 108 * @user_ns: The namespace in which @euid applies
107 * pointer 109 * @daemon: If return value is zero, points to the desired daemon pointer
108 * 110 *
109 * Search the hash list for the given user id. Returns zero if the 111 * Must be called with ecryptfs_daemon_hash_mux held.
110 * user id exists in the list; non-zero otherwise. The daemon id hash 112 *
111 * mutex should be held before calling this function. 113 * Search the hash list for the given user id.
114 *
115 * Returns zero if the user id exists in the list; non-zero otherwise.
112 */ 116 */
113static int ecryptfs_find_daemon_id(uid_t uid, struct ecryptfs_daemon_id **id) 117int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
118 struct user_namespace *user_ns)
114{ 119{
115 struct hlist_node *elem; 120 struct hlist_node *elem;
116 int rc; 121 int rc;
117 122
118 hlist_for_each_entry(*id, elem, 123 hlist_for_each_entry(*daemon, elem,
119 &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)], 124 &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)],
120 id_chain) { 125 euid_chain) {
121 if ((*id)->uid == uid) { 126 if ((*daemon)->euid == euid && (*daemon)->user_ns == user_ns) {
122 rc = 0; 127 rc = 0;
123 goto out; 128 goto out;
124 } 129 }
@@ -128,181 +133,325 @@ out:
128 return rc; 133 return rc;
129} 134}
130 135
131static int ecryptfs_send_raw_message(unsigned int transport, u16 msg_type, 136static int
132 pid_t pid) 137ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len,
138 u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx);
139
140/**
141 * ecryptfs_send_raw_message
142 * @transport: Transport type
143 * @msg_type: Message type
144 * @daemon: Daemon struct for recipient of message
145 *
146 * A raw message is one that does not include an ecryptfs_message
147 * struct. It simply has a type.
148 *
149 * Must be called with ecryptfs_daemon_hash_mux held.
150 *
151 * Returns zero on success; non-zero otherwise
152 */
153static int ecryptfs_send_raw_message(unsigned int transport, u8 msg_type,
154 struct ecryptfs_daemon *daemon)
133{ 155{
156 struct ecryptfs_msg_ctx *msg_ctx;
134 int rc; 157 int rc;
135 158
136 switch(transport) { 159 switch(transport) {
137 case ECRYPTFS_TRANSPORT_NETLINK: 160 case ECRYPTFS_TRANSPORT_NETLINK:
138 rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, pid); 161 rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0,
162 daemon->pid);
163 break;
164 case ECRYPTFS_TRANSPORT_MISCDEV:
165 rc = ecryptfs_send_message_locked(transport, NULL, 0, msg_type,
166 &msg_ctx);
167 if (rc) {
168 printk(KERN_ERR "%s: Error whilst attempting to send "
169 "message via procfs; rc = [%d]\n", __func__, rc);
170 goto out;
171 }
172 /* Raw messages are logically context-free (e.g., no
173 * reply is expected), so we set the state of the
174 * ecryptfs_msg_ctx object to indicate that it should
175 * be freed as soon as the transport sends out the message. */
176 mutex_lock(&msg_ctx->mux);
177 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY;
178 mutex_unlock(&msg_ctx->mux);
139 break; 179 break;
140 case ECRYPTFS_TRANSPORT_CONNECTOR: 180 case ECRYPTFS_TRANSPORT_CONNECTOR:
141 case ECRYPTFS_TRANSPORT_RELAYFS: 181 case ECRYPTFS_TRANSPORT_RELAYFS:
142 default: 182 default:
143 rc = -ENOSYS; 183 rc = -ENOSYS;
144 } 184 }
185out:
186 return rc;
187}
188
189/**
190 * ecryptfs_spawn_daemon - Create and initialize a new daemon struct
191 * @daemon: Pointer to set to newly allocated daemon struct
192 * @euid: Effective user id for the daemon
193 * @user_ns: The namespace in which @euid applies
194 * @pid: Process id for the daemon
195 *
196 * Must be called ceremoniously while in possession of
197 * ecryptfs_sacred_daemon_hash_mux
198 *
199 * Returns zero on success; non-zero otherwise
200 */
201int
202ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
203 struct user_namespace *user_ns, struct pid *pid)
204{
205 int rc = 0;
206
207 (*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL);
208 if (!(*daemon)) {
209 rc = -ENOMEM;
210 printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of "
211 "GFP_KERNEL memory\n", __func__, sizeof(**daemon));
212 goto out;
213 }
214 (*daemon)->euid = euid;
215 (*daemon)->user_ns = get_user_ns(user_ns);
216 (*daemon)->pid = get_pid(pid);
217 (*daemon)->task = current;
218 mutex_init(&(*daemon)->mux);
219 INIT_LIST_HEAD(&(*daemon)->msg_ctx_out_queue);
220 init_waitqueue_head(&(*daemon)->wait);
221 (*daemon)->num_queued_msg_ctx = 0;
222 hlist_add_head(&(*daemon)->euid_chain,
223 &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)]);
224out:
145 return rc; 225 return rc;
146} 226}
147 227
148/** 228/**
149 * ecryptfs_process_helo 229 * ecryptfs_process_helo
150 * @transport: The underlying transport (netlink, etc.) 230 * @transport: The underlying transport (netlink, etc.)
151 * @uid: The user ID owner of the message 231 * @euid: The user ID owner of the message
232 * @user_ns: The namespace in which @euid applies
152 * @pid: The process ID for the userspace program that sent the 233 * @pid: The process ID for the userspace program that sent the
153 * message 234 * message
154 * 235 *
155 * Adds the uid and pid values to the daemon id hash. If a uid 236 * Adds the euid and pid values to the daemon euid hash. If an euid
156 * already has a daemon pid registered, the daemon will be 237 * already has a daemon pid registered, the daemon will be
157 * unregistered before the new daemon id is put into the hash list. 238 * unregistered before the new daemon is put into the hash list.
158 * Returns zero after adding a new daemon id to the hash list; 239 * Returns zero after adding a new daemon to the hash list;
159 * non-zero otherwise. 240 * non-zero otherwise.
160 */ 241 */
161int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid) 242int ecryptfs_process_helo(unsigned int transport, uid_t euid,
243 struct user_namespace *user_ns, struct pid *pid)
162{ 244{
163 struct ecryptfs_daemon_id *new_id; 245 struct ecryptfs_daemon *new_daemon;
164 struct ecryptfs_daemon_id *old_id; 246 struct ecryptfs_daemon *old_daemon;
165 int rc; 247 int rc;
166 248
167 mutex_lock(&ecryptfs_daemon_id_hash_mux); 249 mutex_lock(&ecryptfs_daemon_hash_mux);
168 new_id = kmalloc(sizeof(*new_id), GFP_KERNEL); 250 rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns);
169 if (!new_id) { 251 if (rc != 0) {
170 rc = -ENOMEM;
171 ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable "
172 "to register daemon [%d] for user [%d]\n",
173 pid, uid);
174 goto unlock;
175 }
176 if (!ecryptfs_find_daemon_id(uid, &old_id)) {
177 printk(KERN_WARNING "Received request from user [%d] " 252 printk(KERN_WARNING "Received request from user [%d] "
178 "to register daemon [%d]; unregistering daemon " 253 "to register daemon [0x%p]; unregistering daemon "
179 "[%d]\n", uid, pid, old_id->pid); 254 "[0x%p]\n", euid, pid, old_daemon->pid);
180 hlist_del(&old_id->id_chain); 255 rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT,
181 rc = ecryptfs_send_raw_message(transport, ECRYPTFS_NLMSG_QUIT, 256 old_daemon);
182 old_id->pid);
183 if (rc) 257 if (rc)
184 printk(KERN_WARNING "Failed to send QUIT " 258 printk(KERN_WARNING "Failed to send QUIT "
185 "message to daemon [%d]; rc = [%d]\n", 259 "message to daemon [0x%p]; rc = [%d]\n",
186 old_id->pid, rc); 260 old_daemon->pid, rc);
187 kfree(old_id); 261 hlist_del(&old_daemon->euid_chain);
262 kfree(old_daemon);
188 } 263 }
189 new_id->uid = uid; 264 rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid);
190 new_id->pid = pid; 265 if (rc)
191 hlist_add_head(&new_id->id_chain, 266 printk(KERN_ERR "%s: The gods are displeased with this attempt "
192 &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)]); 267 "to create a new daemon object for euid [%d]; pid "
193 rc = 0; 268 "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc);
194unlock: 269 mutex_unlock(&ecryptfs_daemon_hash_mux);
195 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 270 return rc;
271}
272
273/**
274 * ecryptfs_exorcise_daemon - Destroy the daemon struct
275 *
276 * Must be called ceremoniously while in possession of
277 * ecryptfs_daemon_hash_mux and the daemon's own mux.
278 */
279int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon)
280{
281 struct ecryptfs_msg_ctx *msg_ctx, *msg_ctx_tmp;
282 int rc = 0;
283
284 mutex_lock(&daemon->mux);
285 if ((daemon->flags & ECRYPTFS_DAEMON_IN_READ)
286 || (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)) {
287 rc = -EBUSY;
288 printk(KERN_WARNING "%s: Attempt to destroy daemon with pid "
289 "[0x%p], but it is in the midst of a read or a poll\n",
290 __func__, daemon->pid);
291 mutex_unlock(&daemon->mux);
292 goto out;
293 }
294 list_for_each_entry_safe(msg_ctx, msg_ctx_tmp,
295 &daemon->msg_ctx_out_queue, daemon_out_list) {
296 list_del(&msg_ctx->daemon_out_list);
297 daemon->num_queued_msg_ctx--;
298 printk(KERN_WARNING "%s: Warning: dropping message that is in "
299 "the out queue of a dying daemon\n", __func__);
300 ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
301 }
302 hlist_del(&daemon->euid_chain);
303 if (daemon->task)
304 wake_up_process(daemon->task);
305 if (daemon->pid)
306 put_pid(daemon->pid);
307 if (daemon->user_ns)
308 put_user_ns(daemon->user_ns);
309 mutex_unlock(&daemon->mux);
310 memset(daemon, 0, sizeof(*daemon));
311 kfree(daemon);
312out:
196 return rc; 313 return rc;
197} 314}
198 315
199/** 316/**
200 * ecryptfs_process_quit 317 * ecryptfs_process_quit
201 * @uid: The user ID owner of the message 318 * @euid: The user ID owner of the message
319 * @user_ns: The namespace in which @euid applies
202 * @pid: The process ID for the userspace program that sent the 320 * @pid: The process ID for the userspace program that sent the
203 * message 321 * message
204 * 322 *
205 * Deletes the corresponding daemon id for the given uid and pid, if 323 * Deletes the corresponding daemon for the given euid and pid, if
206 * it is the registered that is requesting the deletion. Returns zero 324 * it is the registered that is requesting the deletion. Returns zero
207 * after deleting the desired daemon id; non-zero otherwise. 325 * after deleting the desired daemon; non-zero otherwise.
208 */ 326 */
209int ecryptfs_process_quit(uid_t uid, pid_t pid) 327int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
328 struct pid *pid)
210{ 329{
211 struct ecryptfs_daemon_id *id; 330 struct ecryptfs_daemon *daemon;
212 int rc; 331 int rc;
213 332
214 mutex_lock(&ecryptfs_daemon_id_hash_mux); 333 mutex_lock(&ecryptfs_daemon_hash_mux);
215 if (ecryptfs_find_daemon_id(uid, &id)) { 334 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, user_ns);
335 if (rc || !daemon) {
216 rc = -EINVAL; 336 rc = -EINVAL;
217 ecryptfs_printk(KERN_ERR, "Received request from user [%d] to " 337 printk(KERN_ERR "Received request from user [%d] to "
218 "unregister unrecognized daemon [%d]\n", uid, 338 "unregister unrecognized daemon [0x%p]\n", euid, pid);
219 pid); 339 goto out_unlock;
220 goto unlock;
221 } 340 }
222 if (id->pid != pid) { 341 rc = ecryptfs_exorcise_daemon(daemon);
223 rc = -EINVAL; 342out_unlock:
224 ecryptfs_printk(KERN_WARNING, "Received request from user [%d] " 343 mutex_unlock(&ecryptfs_daemon_hash_mux);
225 "with pid [%d] to unregister daemon [%d]\n",
226 uid, pid, id->pid);
227 goto unlock;
228 }
229 hlist_del(&id->id_chain);
230 kfree(id);
231 rc = 0;
232unlock:
233 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
234 return rc; 344 return rc;
235} 345}
236 346
237/** 347/**
238 * ecryptfs_process_reponse 348 * ecryptfs_process_reponse
239 * @msg: The ecryptfs message received; the caller should sanity check 349 * @msg: The ecryptfs message received; the caller should sanity check
240 * msg->data_len 350 * msg->data_len and free the memory
241 * @pid: The process ID of the userspace application that sent the 351 * @pid: The process ID of the userspace application that sent the
242 * message 352 * message
243 * @seq: The sequence number of the message 353 * @seq: The sequence number of the message; must match the sequence
354 * number for the existing message context waiting for this
355 * response
356 *
357 * Processes a response message after sending an operation request to
358 * userspace. Some other process is awaiting this response. Before
359 * sending out its first communications, the other process allocated a
360 * msg_ctx from the ecryptfs_msg_ctx_arr at a particular index. The
361 * response message contains this index so that we can copy over the
362 * response message into the msg_ctx that the process holds a
363 * reference to. The other process is going to wake up, check to see
364 * that msg_ctx->state == ECRYPTFS_MSG_CTX_STATE_DONE, and then
365 * proceed to read off and process the response message. Returns zero
366 * upon delivery to desired context element; non-zero upon delivery
367 * failure or error.
244 * 368 *
245 * Processes a response message after sending a operation request to 369 * Returns zero on success; non-zero otherwise
246 * userspace. Returns zero upon delivery to desired context element;
247 * non-zero upon delivery failure or error.
248 */ 370 */
249int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, 371int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
250 pid_t pid, u32 seq) 372 struct user_namespace *user_ns, struct pid *pid,
373 u32 seq)
251{ 374{
252 struct ecryptfs_daemon_id *id; 375 struct ecryptfs_daemon *daemon;
253 struct ecryptfs_msg_ctx *msg_ctx; 376 struct ecryptfs_msg_ctx *msg_ctx;
254 int msg_size; 377 size_t msg_size;
378 struct nsproxy *nsproxy;
379 struct user_namespace *current_user_ns;
255 int rc; 380 int rc;
256 381
257 if (msg->index >= ecryptfs_message_buf_len) { 382 if (msg->index >= ecryptfs_message_buf_len) {
258 rc = -EINVAL; 383 rc = -EINVAL;
259 ecryptfs_printk(KERN_ERR, "Attempt to reference " 384 printk(KERN_ERR "%s: Attempt to reference "
260 "context buffer at index [%d]; maximum " 385 "context buffer at index [%d]; maximum "
261 "allowable is [%d]\n", msg->index, 386 "allowable is [%d]\n", __func__, msg->index,
262 (ecryptfs_message_buf_len - 1)); 387 (ecryptfs_message_buf_len - 1));
263 goto out; 388 goto out;
264 } 389 }
265 msg_ctx = &ecryptfs_msg_ctx_arr[msg->index]; 390 msg_ctx = &ecryptfs_msg_ctx_arr[msg->index];
266 mutex_lock(&msg_ctx->mux); 391 mutex_lock(&msg_ctx->mux);
267 if (ecryptfs_find_daemon_id(msg_ctx->task->euid, &id)) { 392 mutex_lock(&ecryptfs_daemon_hash_mux);
393 rcu_read_lock();
394 nsproxy = task_nsproxy(msg_ctx->task);
395 if (nsproxy == NULL) {
268 rc = -EBADMSG; 396 rc = -EBADMSG;
269 ecryptfs_printk(KERN_WARNING, "User [%d] received a " 397 printk(KERN_ERR "%s: Receiving process is a zombie. Dropping "
270 "message response from process [%d] but does " 398 "message.\n", __func__);
271 "not have a registered daemon\n", 399 rcu_read_unlock();
272 msg_ctx->task->euid, pid); 400 mutex_unlock(&ecryptfs_daemon_hash_mux);
273 goto wake_up; 401 goto wake_up;
274 } 402 }
275 if (msg_ctx->task->euid != uid) { 403 current_user_ns = nsproxy->user_ns;
404 rc = ecryptfs_find_daemon_by_euid(&daemon, msg_ctx->task->euid,
405 current_user_ns);
406 rcu_read_unlock();
407 mutex_unlock(&ecryptfs_daemon_hash_mux);
408 if (rc) {
409 rc = -EBADMSG;
410 printk(KERN_WARNING "%s: User [%d] received a "
411 "message response from process [0x%p] but does "
412 "not have a registered daemon\n", __func__,
413 msg_ctx->task->euid, pid);
414 goto wake_up;
415 }
416 if (msg_ctx->task->euid != euid) {
276 rc = -EBADMSG; 417 rc = -EBADMSG;
277 ecryptfs_printk(KERN_WARNING, "Received message from user " 418 printk(KERN_WARNING "%s: Received message from user "
278 "[%d]; expected message from user [%d]\n", 419 "[%d]; expected message from user [%d]\n", __func__,
279 uid, msg_ctx->task->euid); 420 euid, msg_ctx->task->euid);
280 goto unlock; 421 goto unlock;
281 } 422 }
282 if (id->pid != pid) { 423 if (current_user_ns != user_ns) {
283 rc = -EBADMSG; 424 rc = -EBADMSG;
284 ecryptfs_printk(KERN_ERR, "User [%d] received a " 425 printk(KERN_WARNING "%s: Received message from user_ns "
285 "message response from an unrecognized " 426 "[0x%p]; expected message from user_ns [0x%p]\n",
286 "process [%d]\n", msg_ctx->task->euid, pid); 427 __func__, user_ns, nsproxy->user_ns);
428 goto unlock;
429 }
430 if (daemon->pid != pid) {
431 rc = -EBADMSG;
432 printk(KERN_ERR "%s: User [%d] sent a message response "
433 "from an unrecognized process [0x%p]\n",
434 __func__, msg_ctx->task->euid, pid);
287 goto unlock; 435 goto unlock;
288 } 436 }
289 if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) { 437 if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) {
290 rc = -EINVAL; 438 rc = -EINVAL;
291 ecryptfs_printk(KERN_WARNING, "Desired context element is not " 439 printk(KERN_WARNING "%s: Desired context element is not "
292 "pending a response\n"); 440 "pending a response\n", __func__);
293 goto unlock; 441 goto unlock;
294 } else if (msg_ctx->counter != seq) { 442 } else if (msg_ctx->counter != seq) {
295 rc = -EINVAL; 443 rc = -EINVAL;
296 ecryptfs_printk(KERN_WARNING, "Invalid message sequence; " 444 printk(KERN_WARNING "%s: Invalid message sequence; "
297 "expected [%d]; received [%d]\n", 445 "expected [%d]; received [%d]\n", __func__,
298 msg_ctx->counter, seq); 446 msg_ctx->counter, seq);
299 goto unlock; 447 goto unlock;
300 } 448 }
301 msg_size = sizeof(*msg) + msg->data_len; 449 msg_size = (sizeof(*msg) + msg->data_len);
302 msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); 450 msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL);
303 if (!msg_ctx->msg) { 451 if (!msg_ctx->msg) {
304 rc = -ENOMEM; 452 rc = -ENOMEM;
305 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); 453 printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of "
454 "GFP_KERNEL memory\n", __func__, msg_size);
306 goto unlock; 455 goto unlock;
307 } 456 }
308 memcpy(msg_ctx->msg, msg, msg_size); 457 memcpy(msg_ctx->msg, msg, msg_size);
@@ -317,34 +466,38 @@ out:
317} 466}
318 467
319/** 468/**
320 * ecryptfs_send_message 469 * ecryptfs_send_message_locked
321 * @transport: The transport over which to send the message (i.e., 470 * @transport: The transport over which to send the message (i.e.,
322 * netlink) 471 * netlink)
323 * @data: The data to send 472 * @data: The data to send
324 * @data_len: The length of data 473 * @data_len: The length of data
325 * @msg_ctx: The message context allocated for the send 474 * @msg_ctx: The message context allocated for the send
475 *
476 * Must be called with ecryptfs_daemon_hash_mux held.
477 *
478 * Returns zero on success; non-zero otherwise
326 */ 479 */
327int ecryptfs_send_message(unsigned int transport, char *data, int data_len, 480static int
328 struct ecryptfs_msg_ctx **msg_ctx) 481ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len,
482 u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx)
329{ 483{
330 struct ecryptfs_daemon_id *id; 484 struct ecryptfs_daemon *daemon;
331 int rc; 485 int rc;
332 486
333 mutex_lock(&ecryptfs_daemon_id_hash_mux); 487 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
334 if (ecryptfs_find_daemon_id(current->euid, &id)) { 488 current->nsproxy->user_ns);
335 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 489 if (rc || !daemon) {
336 rc = -ENOTCONN; 490 rc = -ENOTCONN;
337 ecryptfs_printk(KERN_ERR, "User [%d] does not have a daemon " 491 printk(KERN_ERR "%s: User [%d] does not have a daemon "
338 "registered\n", current->euid); 492 "registered\n", __func__, current->euid);
339 goto out; 493 goto out;
340 } 494 }
341 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
342 mutex_lock(&ecryptfs_msg_ctx_lists_mux); 495 mutex_lock(&ecryptfs_msg_ctx_lists_mux);
343 rc = ecryptfs_acquire_free_msg_ctx(msg_ctx); 496 rc = ecryptfs_acquire_free_msg_ctx(msg_ctx);
344 if (rc) { 497 if (rc) {
345 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 498 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
346 ecryptfs_printk(KERN_WARNING, "Could not claim a free " 499 printk(KERN_WARNING "%s: Could not claim a free "
347 "context element\n"); 500 "context element\n", __func__);
348 goto out; 501 goto out;
349 } 502 }
350 ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); 503 ecryptfs_msg_ctx_free_to_alloc(*msg_ctx);
@@ -352,23 +505,50 @@ int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
352 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 505 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
353 switch (transport) { 506 switch (transport) {
354 case ECRYPTFS_TRANSPORT_NETLINK: 507 case ECRYPTFS_TRANSPORT_NETLINK:
355 rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, 508 rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, msg_type,
356 ECRYPTFS_NLMSG_REQUEST, 0, id->pid); 509 0, daemon->pid);
510 break;
511 case ECRYPTFS_TRANSPORT_MISCDEV:
512 rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type,
513 0, daemon);
357 break; 514 break;
358 case ECRYPTFS_TRANSPORT_CONNECTOR: 515 case ECRYPTFS_TRANSPORT_CONNECTOR:
359 case ECRYPTFS_TRANSPORT_RELAYFS: 516 case ECRYPTFS_TRANSPORT_RELAYFS:
360 default: 517 default:
361 rc = -ENOSYS; 518 rc = -ENOSYS;
362 } 519 }
363 if (rc) { 520 if (rc)
364 printk(KERN_ERR "Error attempting to send message to userspace " 521 printk(KERN_ERR "%s: Error attempting to send message to "
365 "daemon; rc = [%d]\n", rc); 522 "userspace daemon; rc = [%d]\n", __func__, rc);
366 }
367out: 523out:
368 return rc; 524 return rc;
369} 525}
370 526
371/** 527/**
528 * ecryptfs_send_message
529 * @transport: The transport over which to send the message (i.e.,
530 * netlink)
531 * @data: The data to send
532 * @data_len: The length of data
533 * @msg_ctx: The message context allocated for the send
534 *
535 * Grabs ecryptfs_daemon_hash_mux.
536 *
537 * Returns zero on success; non-zero otherwise
538 */
539int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
540 struct ecryptfs_msg_ctx **msg_ctx)
541{
542 int rc;
543
544 mutex_lock(&ecryptfs_daemon_hash_mux);
545 rc = ecryptfs_send_message_locked(transport, data, data_len,
546 ECRYPTFS_MSG_REQUEST, msg_ctx);
547 mutex_unlock(&ecryptfs_daemon_hash_mux);
548 return rc;
549}
550
551/**
372 * ecryptfs_wait_for_response 552 * ecryptfs_wait_for_response
373 * @msg_ctx: The context that was assigned when sending a message 553 * @msg_ctx: The context that was assigned when sending a message
374 * @msg: The incoming message from userspace; not set if rc != 0 554 * @msg: The incoming message from userspace; not set if rc != 0
@@ -377,7 +557,7 @@ out:
377 * of time exceeds ecryptfs_message_wait_timeout. If zero is 557 * of time exceeds ecryptfs_message_wait_timeout. If zero is
378 * returned, msg will point to a valid message from userspace; a 558 * returned, msg will point to a valid message from userspace; a
379 * non-zero value is returned upon failure to receive a message or an 559 * non-zero value is returned upon failure to receive a message or an
380 * error occurs. 560 * error occurs. Callee must free @msg on success.
381 */ 561 */
382int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, 562int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
383 struct ecryptfs_message **msg) 563 struct ecryptfs_message **msg)
@@ -413,32 +593,32 @@ int ecryptfs_init_messaging(unsigned int transport)
413 593
414 if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) { 594 if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) {
415 ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS; 595 ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS;
416 ecryptfs_printk(KERN_WARNING, "Specified number of users is " 596 printk(KERN_WARNING "%s: Specified number of users is "
417 "too large, defaulting to [%d] users\n", 597 "too large, defaulting to [%d] users\n", __func__,
418 ecryptfs_number_of_users); 598 ecryptfs_number_of_users);
419 } 599 }
420 mutex_init(&ecryptfs_daemon_id_hash_mux); 600 mutex_init(&ecryptfs_daemon_hash_mux);
421 mutex_lock(&ecryptfs_daemon_id_hash_mux); 601 mutex_lock(&ecryptfs_daemon_hash_mux);
422 ecryptfs_hash_buckets = 1; 602 ecryptfs_hash_buckets = 1;
423 while (ecryptfs_number_of_users >> ecryptfs_hash_buckets) 603 while (ecryptfs_number_of_users >> ecryptfs_hash_buckets)
424 ecryptfs_hash_buckets++; 604 ecryptfs_hash_buckets++;
425 ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head) 605 ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head)
426 * ecryptfs_hash_buckets, GFP_KERNEL); 606 * ecryptfs_hash_buckets), GFP_KERNEL);
427 if (!ecryptfs_daemon_id_hash) { 607 if (!ecryptfs_daemon_hash) {
428 rc = -ENOMEM; 608 rc = -ENOMEM;
429 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); 609 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
430 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 610 mutex_unlock(&ecryptfs_daemon_hash_mux);
431 goto out; 611 goto out;
432 } 612 }
433 for (i = 0; i < ecryptfs_hash_buckets; i++) 613 for (i = 0; i < ecryptfs_hash_buckets; i++)
434 INIT_HLIST_HEAD(&ecryptfs_daemon_id_hash[i]); 614 INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]);
435 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 615 mutex_unlock(&ecryptfs_daemon_hash_mux);
436
437 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx) 616 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx)
438 * ecryptfs_message_buf_len), GFP_KERNEL); 617 * ecryptfs_message_buf_len),
618 GFP_KERNEL);
439 if (!ecryptfs_msg_ctx_arr) { 619 if (!ecryptfs_msg_ctx_arr) {
440 rc = -ENOMEM; 620 rc = -ENOMEM;
441 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); 621 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
442 goto out; 622 goto out;
443 } 623 }
444 mutex_init(&ecryptfs_msg_ctx_lists_mux); 624 mutex_init(&ecryptfs_msg_ctx_lists_mux);
@@ -446,6 +626,7 @@ int ecryptfs_init_messaging(unsigned int transport)
446 ecryptfs_msg_counter = 0; 626 ecryptfs_msg_counter = 0;
447 for (i = 0; i < ecryptfs_message_buf_len; i++) { 627 for (i = 0; i < ecryptfs_message_buf_len; i++) {
448 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node); 628 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node);
629 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].daemon_out_list);
449 mutex_init(&ecryptfs_msg_ctx_arr[i].mux); 630 mutex_init(&ecryptfs_msg_ctx_arr[i].mux);
450 mutex_lock(&ecryptfs_msg_ctx_arr[i].mux); 631 mutex_lock(&ecryptfs_msg_ctx_arr[i].mux);
451 ecryptfs_msg_ctx_arr[i].index = i; 632 ecryptfs_msg_ctx_arr[i].index = i;
@@ -464,6 +645,11 @@ int ecryptfs_init_messaging(unsigned int transport)
464 if (rc) 645 if (rc)
465 ecryptfs_release_messaging(transport); 646 ecryptfs_release_messaging(transport);
466 break; 647 break;
648 case ECRYPTFS_TRANSPORT_MISCDEV:
649 rc = ecryptfs_init_ecryptfs_miscdev();
650 if (rc)
651 ecryptfs_release_messaging(transport);
652 break;
467 case ECRYPTFS_TRANSPORT_CONNECTOR: 653 case ECRYPTFS_TRANSPORT_CONNECTOR:
468 case ECRYPTFS_TRANSPORT_RELAYFS: 654 case ECRYPTFS_TRANSPORT_RELAYFS:
469 default: 655 default:
@@ -488,27 +674,37 @@ void ecryptfs_release_messaging(unsigned int transport)
488 kfree(ecryptfs_msg_ctx_arr); 674 kfree(ecryptfs_msg_ctx_arr);
489 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 675 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
490 } 676 }
491 if (ecryptfs_daemon_id_hash) { 677 if (ecryptfs_daemon_hash) {
492 struct hlist_node *elem; 678 struct hlist_node *elem;
493 struct ecryptfs_daemon_id *id; 679 struct ecryptfs_daemon *daemon;
494 int i; 680 int i;
495 681
496 mutex_lock(&ecryptfs_daemon_id_hash_mux); 682 mutex_lock(&ecryptfs_daemon_hash_mux);
497 for (i = 0; i < ecryptfs_hash_buckets; i++) { 683 for (i = 0; i < ecryptfs_hash_buckets; i++) {
498 hlist_for_each_entry(id, elem, 684 int rc;
499 &ecryptfs_daemon_id_hash[i], 685
500 id_chain) { 686 hlist_for_each_entry(daemon, elem,
501 hlist_del(elem); 687 &ecryptfs_daemon_hash[i],
502 kfree(id); 688 euid_chain) {
689 rc = ecryptfs_exorcise_daemon(daemon);
690 if (rc)
691 printk(KERN_ERR "%s: Error whilst "
692 "attempting to destroy daemon; "
693 "rc = [%d]. Dazed and confused, "
694 "but trying to continue.\n",
695 __func__, rc);
503 } 696 }
504 } 697 }
505 kfree(ecryptfs_daemon_id_hash); 698 kfree(ecryptfs_daemon_hash);
506 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 699 mutex_unlock(&ecryptfs_daemon_hash_mux);
507 } 700 }
508 switch(transport) { 701 switch(transport) {
509 case ECRYPTFS_TRANSPORT_NETLINK: 702 case ECRYPTFS_TRANSPORT_NETLINK:
510 ecryptfs_release_netlink(); 703 ecryptfs_release_netlink();
511 break; 704 break;
705 case ECRYPTFS_TRANSPORT_MISCDEV:
706 ecryptfs_destroy_ecryptfs_miscdev();
707 break;
512 case ECRYPTFS_TRANSPORT_CONNECTOR: 708 case ECRYPTFS_TRANSPORT_CONNECTOR:
513 case ECRYPTFS_TRANSPORT_RELAYFS: 709 case ECRYPTFS_TRANSPORT_RELAYFS:
514 default: 710 default:
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
new file mode 100644
index 000000000000..788995efd1d3
--- /dev/null
+++ b/fs/ecryptfs/miscdev.c
@@ -0,0 +1,598 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 2008 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19 * 02111-1307, USA.
20 */
21
22#include <linux/fs.h>
23#include <linux/hash.h>
24#include <linux/random.h>
25#include <linux/miscdevice.h>
26#include <linux/poll.h>
27#include <linux/wait.h>
28#include <linux/module.h>
29#include "ecryptfs_kernel.h"
30
31static atomic_t ecryptfs_num_miscdev_opens;
32
33/**
34 * ecryptfs_miscdev_poll
35 * @file: dev file (ignored)
36 * @pt: dev poll table (ignored)
37 *
38 * Returns the poll mask
39 */
40static unsigned int
41ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
42{
43 struct ecryptfs_daemon *daemon;
44 unsigned int mask = 0;
45 int rc;
46
47 mutex_lock(&ecryptfs_daemon_hash_mux);
48 /* TODO: Just use file->private_data? */
49 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
50 current->nsproxy->user_ns);
51 BUG_ON(rc || !daemon);
52 mutex_lock(&daemon->mux);
53 mutex_unlock(&ecryptfs_daemon_hash_mux);
54 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
55 printk(KERN_WARNING "%s: Attempt to poll on zombified "
56 "daemon\n", __func__);
57 goto out_unlock_daemon;
58 }
59 if (daemon->flags & ECRYPTFS_DAEMON_IN_READ)
60 goto out_unlock_daemon;
61 if (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)
62 goto out_unlock_daemon;
63 daemon->flags |= ECRYPTFS_DAEMON_IN_POLL;
64 mutex_unlock(&daemon->mux);
65 poll_wait(file, &daemon->wait, pt);
66 mutex_lock(&daemon->mux);
67 if (!list_empty(&daemon->msg_ctx_out_queue))
68 mask |= POLLIN | POLLRDNORM;
69out_unlock_daemon:
70 daemon->flags &= ~ECRYPTFS_DAEMON_IN_POLL;
71 mutex_unlock(&daemon->mux);
72 return mask;
73}
74
75/**
76 * ecryptfs_miscdev_open
77 * @inode: inode of miscdev handle (ignored)
78 * @file: file for miscdev handle (ignored)
79 *
80 * Returns zero on success; non-zero otherwise
81 */
82static int
83ecryptfs_miscdev_open(struct inode *inode, struct file *file)
84{
85 struct ecryptfs_daemon *daemon = NULL;
86 int rc;
87
88 mutex_lock(&ecryptfs_daemon_hash_mux);
89 rc = try_module_get(THIS_MODULE);
90 if (rc == 0) {
91 rc = -EIO;
92 printk(KERN_ERR "%s: Error attempting to increment module use "
93 "count; rc = [%d]\n", __func__, rc);
94 goto out_unlock_daemon_list;
95 }
96 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
97 current->nsproxy->user_ns);
98 if (rc || !daemon) {
99 rc = ecryptfs_spawn_daemon(&daemon, current->euid,
100 current->nsproxy->user_ns,
101 task_pid(current));
102 if (rc) {
103 printk(KERN_ERR "%s: Error attempting to spawn daemon; "
104 "rc = [%d]\n", __func__, rc);
105 goto out_module_put_unlock_daemon_list;
106 }
107 }
108 mutex_lock(&daemon->mux);
109 if (daemon->pid != task_pid(current)) {
110 rc = -EINVAL;
111 printk(KERN_ERR "%s: pid [0x%p] has registered with euid [%d], "
112 "but pid [0x%p] has attempted to open the handle "
113 "instead\n", __func__, daemon->pid, daemon->euid,
114 task_pid(current));
115 goto out_unlock_daemon;
116 }
117 if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) {
118 rc = -EBUSY;
119 printk(KERN_ERR "%s: Miscellaneous device handle may only be "
120 "opened once per daemon; pid [0x%p] already has this "
121 "handle open\n", __func__, daemon->pid);
122 goto out_unlock_daemon;
123 }
124 daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
125 atomic_inc(&ecryptfs_num_miscdev_opens);
126out_unlock_daemon:
127 mutex_unlock(&daemon->mux);
128out_module_put_unlock_daemon_list:
129 if (rc)
130 module_put(THIS_MODULE);
131out_unlock_daemon_list:
132 mutex_unlock(&ecryptfs_daemon_hash_mux);
133 return rc;
134}
135
136/**
137 * ecryptfs_miscdev_release
138 * @inode: inode of fs/ecryptfs/euid handle (ignored)
139 * @file: file for fs/ecryptfs/euid handle (ignored)
140 *
141 * This keeps the daemon registered until the daemon sends another
142 * ioctl to fs/ecryptfs/ctl or until the kernel module unregisters.
143 *
144 * Returns zero on success; non-zero otherwise
145 */
146static int
147ecryptfs_miscdev_release(struct inode *inode, struct file *file)
148{
149 struct ecryptfs_daemon *daemon = NULL;
150 int rc;
151
152 mutex_lock(&ecryptfs_daemon_hash_mux);
153 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
154 current->nsproxy->user_ns);
155 BUG_ON(rc || !daemon);
156 mutex_lock(&daemon->mux);
157 BUG_ON(daemon->pid != task_pid(current));
158 BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
159 daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
160 atomic_dec(&ecryptfs_num_miscdev_opens);
161 mutex_unlock(&daemon->mux);
162 rc = ecryptfs_exorcise_daemon(daemon);
163 if (rc) {
164 printk(KERN_CRIT "%s: Fatal error whilst attempting to "
165 "shut down daemon; rc = [%d]. Please report this "
166 "bug.\n", __func__, rc);
167 BUG();
168 }
169 module_put(THIS_MODULE);
170 mutex_unlock(&ecryptfs_daemon_hash_mux);
171 return rc;
172}
173
174/**
175 * ecryptfs_send_miscdev
176 * @data: Data to send to daemon; may be NULL
177 * @data_size: Amount of data to send to daemon
178 * @msg_ctx: Message context, which is used to handle the reply. If
179 * this is NULL, then we do not expect a reply.
180 * @msg_type: Type of message
181 * @msg_flags: Flags for message
182 * @daemon: eCryptfs daemon object
183 *
184 * Add msg_ctx to queue and then, if it exists, notify the blocked
185 * miscdevess about the data being available. Must be called with
186 * ecryptfs_daemon_hash_mux held.
187 *
188 * Returns zero on success; non-zero otherwise
189 */
190int ecryptfs_send_miscdev(char *data, size_t data_size,
191 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
192 u16 msg_flags, struct ecryptfs_daemon *daemon)
193{
194 int rc = 0;
195
196 mutex_lock(&msg_ctx->mux);
197 if (data) {
198 msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
199 GFP_KERNEL);
200 if (!msg_ctx->msg) {
201 rc = -ENOMEM;
202 printk(KERN_ERR "%s: Out of memory whilst attempting "
203 "to kmalloc(%Zd, GFP_KERNEL)\n", __func__,
204 (sizeof(*msg_ctx->msg) + data_size));
205 goto out_unlock;
206 }
207 } else
208 msg_ctx->msg = NULL;
209 msg_ctx->msg->index = msg_ctx->index;
210 msg_ctx->msg->data_len = data_size;
211 msg_ctx->type = msg_type;
212 if (data) {
213 memcpy(msg_ctx->msg->data, data, data_size);
214 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
215 } else
216 msg_ctx->msg_size = 0;
217 mutex_lock(&daemon->mux);
218 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
219 daemon->num_queued_msg_ctx++;
220 wake_up_interruptible(&daemon->wait);
221 mutex_unlock(&daemon->mux);
222out_unlock:
223 mutex_unlock(&msg_ctx->mux);
224 return rc;
225}
226
227/**
228 * ecryptfs_miscdev_read - format and send message from queue
229 * @file: fs/ecryptfs/euid miscdevfs handle (ignored)
230 * @buf: User buffer into which to copy the next message on the daemon queue
231 * @count: Amount of space available in @buf
232 * @ppos: Offset in file (ignored)
233 *
234 * Pulls the most recent message from the daemon queue, formats it for
235 * being sent via a miscdevfs handle, and copies it into @buf
236 *
237 * Returns the number of bytes copied into the user buffer
238 */
239static ssize_t
240ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
241 loff_t *ppos)
242{
243 struct ecryptfs_daemon *daemon;
244 struct ecryptfs_msg_ctx *msg_ctx;
245 size_t packet_length_size;
246 u32 counter_nbo;
247 char packet_length[3];
248 size_t i;
249 size_t total_length;
250 int rc;
251
252 mutex_lock(&ecryptfs_daemon_hash_mux);
253 /* TODO: Just use file->private_data? */
254 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
255 current->nsproxy->user_ns);
256 BUG_ON(rc || !daemon);
257 mutex_lock(&daemon->mux);
258 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
259 rc = 0;
260 printk(KERN_WARNING "%s: Attempt to read from zombified "
261 "daemon\n", __func__);
262 goto out_unlock_daemon;
263 }
264 if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) {
265 rc = 0;
266 goto out_unlock_daemon;
267 }
268 /* This daemon will not go away so long as this flag is set */
269 daemon->flags |= ECRYPTFS_DAEMON_IN_READ;
270 mutex_unlock(&ecryptfs_daemon_hash_mux);
271check_list:
272 if (list_empty(&daemon->msg_ctx_out_queue)) {
273 mutex_unlock(&daemon->mux);
274 rc = wait_event_interruptible(
275 daemon->wait, !list_empty(&daemon->msg_ctx_out_queue));
276 mutex_lock(&daemon->mux);
277 if (rc < 0) {
278 rc = 0;
279 goto out_unlock_daemon;
280 }
281 }
282 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
283 rc = 0;
284 goto out_unlock_daemon;
285 }
286 if (list_empty(&daemon->msg_ctx_out_queue)) {
287 /* Something else jumped in since the
288 * wait_event_interruptable() and removed the
289 * message from the queue; try again */
290 goto check_list;
291 }
292 BUG_ON(current->euid != daemon->euid);
293 BUG_ON(current->nsproxy->user_ns != daemon->user_ns);
294 BUG_ON(task_pid(current) != daemon->pid);
295 msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
296 struct ecryptfs_msg_ctx, daemon_out_list);
297 BUG_ON(!msg_ctx);
298 mutex_lock(&msg_ctx->mux);
299 if (msg_ctx->msg) {
300 rc = ecryptfs_write_packet_length(packet_length,
301 msg_ctx->msg_size,
302 &packet_length_size);
303 if (rc) {
304 rc = 0;
305 printk(KERN_WARNING "%s: Error writing packet length; "
306 "rc = [%d]\n", __func__, rc);
307 goto out_unlock_msg_ctx;
308 }
309 } else {
310 packet_length_size = 0;
311 msg_ctx->msg_size = 0;
312 }
313 /* miscdevfs packet format:
314 * Octet 0: Type
315 * Octets 1-4: network byte order msg_ctx->counter
316 * Octets 5-N0: Size of struct ecryptfs_message to follow
317 * Octets N0-N1: struct ecryptfs_message (including data)
318 *
319 * Octets 5-N1 not written if the packet type does not
320 * include a message */
321 total_length = (1 + 4 + packet_length_size + msg_ctx->msg_size);
322 if (count < total_length) {
323 rc = 0;
324 printk(KERN_WARNING "%s: Only given user buffer of "
325 "size [%Zd], but we need [%Zd] to read the "
326 "pending message\n", __func__, count, total_length);
327 goto out_unlock_msg_ctx;
328 }
329 i = 0;
330 buf[i++] = msg_ctx->type;
331 counter_nbo = cpu_to_be32(msg_ctx->counter);
332 memcpy(&buf[i], (char *)&counter_nbo, 4);
333 i += 4;
334 if (msg_ctx->msg) {
335 memcpy(&buf[i], packet_length, packet_length_size);
336 i += packet_length_size;
337 rc = copy_to_user(&buf[i], msg_ctx->msg, msg_ctx->msg_size);
338 if (rc) {
339 printk(KERN_ERR "%s: copy_to_user returned error "
340 "[%d]\n", __func__, rc);
341 goto out_unlock_msg_ctx;
342 }
343 i += msg_ctx->msg_size;
344 }
345 rc = i;
346 list_del(&msg_ctx->daemon_out_list);
347 kfree(msg_ctx->msg);
348 msg_ctx->msg = NULL;
349 /* We do not expect a reply from the userspace daemon for any
350 * message type other than ECRYPTFS_MSG_REQUEST */
351 if (msg_ctx->type != ECRYPTFS_MSG_REQUEST)
352 ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
353out_unlock_msg_ctx:
354 mutex_unlock(&msg_ctx->mux);
355out_unlock_daemon:
356 daemon->flags &= ~ECRYPTFS_DAEMON_IN_READ;
357 mutex_unlock(&daemon->mux);
358 return rc;
359}
360
361/**
362 * ecryptfs_miscdev_helo
363 * @euid: effective user id of miscdevess sending helo packet
364 * @user_ns: The namespace in which @euid applies
365 * @pid: miscdevess id of miscdevess sending helo packet
366 *
367 * Returns zero on success; non-zero otherwise
368 */
369static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns,
370 struct pid *pid)
371{
372 int rc;
373
374 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns,
375 pid);
376 if (rc)
377 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
378 return rc;
379}
380
381/**
382 * ecryptfs_miscdev_quit
383 * @euid: effective user id of miscdevess sending quit packet
384 * @user_ns: The namespace in which @euid applies
385 * @pid: miscdevess id of miscdevess sending quit packet
386 *
387 * Returns zero on success; non-zero otherwise
388 */
389static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns,
390 struct pid *pid)
391{
392 int rc;
393
394 rc = ecryptfs_process_quit(euid, user_ns, pid);
395 if (rc)
396 printk(KERN_WARNING
397 "Error processing QUIT message; rc = [%d]\n", rc);
398 return rc;
399}
400
401/**
402 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
403 * @data: Bytes comprising struct ecryptfs_message
404 * @data_size: sizeof(struct ecryptfs_message) + data len
405 * @euid: Effective user id of miscdevess sending the miscdev response
406 * @user_ns: The namespace in which @euid applies
407 * @pid: Miscdevess id of miscdevess sending the miscdev response
408 * @seq: Sequence number for miscdev response packet
409 *
410 * Returns zero on success; non-zero otherwise
411 */
412static int ecryptfs_miscdev_response(char *data, size_t data_size,
413 uid_t euid, struct user_namespace *user_ns,
414 struct pid *pid, u32 seq)
415{
416 struct ecryptfs_message *msg = (struct ecryptfs_message *)data;
417 int rc;
418
419 if ((sizeof(*msg) + msg->data_len) != data_size) {
420 printk(KERN_WARNING "%s: (sizeof(*msg) + msg->data_len) = "
421 "[%Zd]; data_size = [%Zd]. Invalid packet.\n", __func__,
422 (sizeof(*msg) + msg->data_len), data_size);
423 rc = -EINVAL;
424 goto out;
425 }
426 rc = ecryptfs_process_response(msg, euid, user_ns, pid, seq);
427 if (rc)
428 printk(KERN_ERR
429 "Error processing response message; rc = [%d]\n", rc);
430out:
431 return rc;
432}
433
434/**
435 * ecryptfs_miscdev_write - handle write to daemon miscdev handle
436 * @file: File for misc dev handle (ignored)
437 * @buf: Buffer containing user data
438 * @count: Amount of data in @buf
439 * @ppos: Pointer to offset in file (ignored)
440 *
441 * miscdevfs packet format:
442 * Octet 0: Type
443 * Octets 1-4: network byte order msg_ctx->counter (0's for non-response)
444 * Octets 5-N0: Size of struct ecryptfs_message to follow
445 * Octets N0-N1: struct ecryptfs_message (including data)
446 *
447 * Returns the number of bytes read from @buf
448 */
449static ssize_t
450ecryptfs_miscdev_write(struct file *file, const char __user *buf,
451 size_t count, loff_t *ppos)
452{
453 u32 counter_nbo, seq;
454 size_t packet_size, packet_size_length, i;
455 ssize_t sz = 0;
456 char *data;
457 int rc;
458
459 if (count == 0)
460 goto out;
461 data = kmalloc(count, GFP_KERNEL);
462 if (!data) {
463 printk(KERN_ERR "%s: Out of memory whilst attempting to "
464 "kmalloc([%Zd], GFP_KERNEL)\n", __func__, count);
465 goto out;
466 }
467 rc = copy_from_user(data, buf, count);
468 if (rc) {
469 printk(KERN_ERR "%s: copy_from_user returned error [%d]\n",
470 __func__, rc);
471 goto out_free;
472 }
473 sz = count;
474 i = 0;
475 switch (data[i++]) {
476 case ECRYPTFS_MSG_RESPONSE:
477 if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) {
478 printk(KERN_WARNING "%s: Minimum acceptable packet "
479 "size is [%Zd], but amount of data written is "
480 "only [%Zd]. Discarding response packet.\n",
481 __func__,
482 (1 + 4 + 1 + sizeof(struct ecryptfs_message)),
483 count);
484 goto out_free;
485 }
486 memcpy((char *)&counter_nbo, &data[i], 4);
487 seq = be32_to_cpu(counter_nbo);
488 i += 4;
489 rc = ecryptfs_parse_packet_length(&data[i], &packet_size,
490 &packet_size_length);
491 if (rc) {
492 printk(KERN_WARNING "%s: Error parsing packet length; "
493 "rc = [%d]\n", __func__, rc);
494 goto out_free;
495 }
496 i += packet_size_length;
497 if ((1 + 4 + packet_size_length + packet_size) != count) {
498 printk(KERN_WARNING "%s: (1 + packet_size_length([%Zd])"
499 " + packet_size([%Zd]))([%Zd]) != "
500 "count([%Zd]). Invalid packet format.\n",
501 __func__, packet_size_length, packet_size,
502 (1 + packet_size_length + packet_size), count);
503 goto out_free;
504 }
505 rc = ecryptfs_miscdev_response(&data[i], packet_size,
506 current->euid,
507 current->nsproxy->user_ns,
508 task_pid(current), seq);
509 if (rc)
510 printk(KERN_WARNING "%s: Failed to deliver miscdev "
511 "response to requesting operation; rc = [%d]\n",
512 __func__, rc);
513 break;
514 case ECRYPTFS_MSG_HELO:
515 rc = ecryptfs_miscdev_helo(current->euid,
516 current->nsproxy->user_ns,
517 task_pid(current));
518 if (rc) {
519 printk(KERN_ERR "%s: Error attempting to process "
520 "helo from pid [0x%p]; rc = [%d]\n", __func__,
521 task_pid(current), rc);
522 goto out_free;
523 }
524 break;
525 case ECRYPTFS_MSG_QUIT:
526 rc = ecryptfs_miscdev_quit(current->euid,
527 current->nsproxy->user_ns,
528 task_pid(current));
529 if (rc) {
530 printk(KERN_ERR "%s: Error attempting to process "
531 "quit from pid [0x%p]; rc = [%d]\n", __func__,
532 task_pid(current), rc);
533 goto out_free;
534 }
535 break;
536 default:
537 ecryptfs_printk(KERN_WARNING, "Dropping miscdev "
538 "message of unrecognized type [%d]\n",
539 data[0]);
540 break;
541 }
542out_free:
543 kfree(data);
544out:
545 return sz;
546}
547
548
549static const struct file_operations ecryptfs_miscdev_fops = {
550 .open = ecryptfs_miscdev_open,
551 .poll = ecryptfs_miscdev_poll,
552 .read = ecryptfs_miscdev_read,
553 .write = ecryptfs_miscdev_write,
554 .release = ecryptfs_miscdev_release,
555};
556
557static struct miscdevice ecryptfs_miscdev = {
558 .minor = MISC_DYNAMIC_MINOR,
559 .name = "ecryptfs",
560 .fops = &ecryptfs_miscdev_fops
561};
562
563/**
564 * ecryptfs_init_ecryptfs_miscdev
565 *
566 * Messages sent to the userspace daemon from the kernel are placed on
567 * a queue associated with the daemon. The next read against the
568 * miscdev handle by that daemon will return the oldest message placed
569 * on the message queue for the daemon.
570 *
571 * Returns zero on success; non-zero otherwise
572 */
573int ecryptfs_init_ecryptfs_miscdev(void)
574{
575 int rc;
576
577 atomic_set(&ecryptfs_num_miscdev_opens, 0);
578 mutex_lock(&ecryptfs_daemon_hash_mux);
579 rc = misc_register(&ecryptfs_miscdev);
580 if (rc)
581 printk(KERN_ERR "%s: Failed to register miscellaneous device "
582 "for communications with userspace daemons; rc = [%d]\n",
583 __func__, rc);
584 mutex_unlock(&ecryptfs_daemon_hash_mux);
585 return rc;
586}
587
588/**
589 * ecryptfs_destroy_ecryptfs_miscdev
590 *
591 * All of the daemons must be exorcised prior to calling this
592 * function.
593 */
594void ecryptfs_destroy_ecryptfs_miscdev(void)
595{
596 BUG_ON(atomic_read(&ecryptfs_num_miscdev_opens) != 0);
597 misc_deregister(&ecryptfs_miscdev);
598}
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 6df1debdccce..2b6fe1e6e8ba 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -153,7 +153,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
153 flush_dcache_page(page); 153 flush_dcache_page(page);
154 if (rc) { 154 if (rc) {
155 printk(KERN_ERR "%s: Error reading xattr " 155 printk(KERN_ERR "%s: Error reading xattr "
156 "region; rc = [%d]\n", __FUNCTION__, rc); 156 "region; rc = [%d]\n", __func__, rc);
157 goto out; 157 goto out;
158 } 158 }
159 } else { 159 } else {
@@ -169,7 +169,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
169 if (rc) { 169 if (rc) {
170 printk(KERN_ERR "%s: Error attempting to read " 170 printk(KERN_ERR "%s: Error attempting to read "
171 "extent at offset [%lld] in the lower " 171 "extent at offset [%lld] in the lower "
172 "file; rc = [%d]\n", __FUNCTION__, 172 "file; rc = [%d]\n", __func__,
173 lower_offset, rc); 173 lower_offset, rc);
174 goto out; 174 goto out;
175 } 175 }
@@ -212,7 +212,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
212 "the encrypted content from the lower " 212 "the encrypted content from the lower "
213 "file whilst inserting the metadata " 213 "file whilst inserting the metadata "
214 "from the xattr into the header; rc = " 214 "from the xattr into the header; rc = "
215 "[%d]\n", __FUNCTION__, rc); 215 "[%d]\n", __func__, rc);
216 goto out; 216 goto out;
217 } 217 }
218 218
@@ -293,7 +293,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
293 if (rc) { 293 if (rc) {
294 printk(KERN_ERR "%s: Error attemping to read " 294 printk(KERN_ERR "%s: Error attemping to read "
295 "lower page segment; rc = [%d]\n", 295 "lower page segment; rc = [%d]\n",
296 __FUNCTION__, rc); 296 __func__, rc);
297 ClearPageUptodate(page); 297 ClearPageUptodate(page);
298 goto out; 298 goto out;
299 } else 299 } else
@@ -308,7 +308,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
308 "from the lower file whilst " 308 "from the lower file whilst "
309 "inserting the metadata from " 309 "inserting the metadata from "
310 "the xattr into the header; rc " 310 "the xattr into the header; rc "
311 "= [%d]\n", __FUNCTION__, rc); 311 "= [%d]\n", __func__, rc);
312 ClearPageUptodate(page); 312 ClearPageUptodate(page);
313 goto out; 313 goto out;
314 } 314 }
@@ -320,7 +320,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
320 if (rc) { 320 if (rc) {
321 printk(KERN_ERR "%s: Error reading " 321 printk(KERN_ERR "%s: Error reading "
322 "page; rc = [%d]\n", 322 "page; rc = [%d]\n",
323 __FUNCTION__, rc); 323 __func__, rc);
324 ClearPageUptodate(page); 324 ClearPageUptodate(page);
325 goto out; 325 goto out;
326 } 326 }
@@ -331,7 +331,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
331 if (rc) { 331 if (rc) {
332 printk(KERN_ERR "%s: Error decrypting page " 332 printk(KERN_ERR "%s: Error decrypting page "
333 "at index [%ld]; rc = [%d]\n", 333 "at index [%ld]; rc = [%d]\n",
334 __FUNCTION__, page->index, rc); 334 __func__, page->index, rc);
335 ClearPageUptodate(page); 335 ClearPageUptodate(page);
336 goto out; 336 goto out;
337 } 337 }
@@ -348,7 +348,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
348 if (rc) { 348 if (rc) {
349 printk(KERN_ERR "%s: Error on attempt to " 349 printk(KERN_ERR "%s: Error on attempt to "
350 "truncate to (higher) offset [%lld];" 350 "truncate to (higher) offset [%lld];"
351 " rc = [%d]\n", __FUNCTION__, 351 " rc = [%d]\n", __func__,
352 prev_page_end_size, rc); 352 prev_page_end_size, rc);
353 goto out; 353 goto out;
354 } 354 }
@@ -389,7 +389,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
389 kfree(file_size_virt); 389 kfree(file_size_virt);
390 if (rc) 390 if (rc)
391 printk(KERN_ERR "%s: Error writing file size to header; " 391 printk(KERN_ERR "%s: Error writing file size to header; "
392 "rc = [%d]\n", __FUNCTION__, rc); 392 "rc = [%d]\n", __func__, rc);
393out: 393out:
394 return rc; 394 return rc;
395} 395}
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index f638a698dc52..e0abad62b395 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -44,8 +44,8 @@ static struct sock *ecryptfs_nl_sock;
44 * upon sending the message; non-zero upon error. 44 * upon sending the message; non-zero upon error.
45 */ 45 */
46int ecryptfs_send_netlink(char *data, int data_len, 46int ecryptfs_send_netlink(char *data, int data_len,
47 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, 47 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
48 u16 msg_flags, pid_t daemon_pid) 48 u16 msg_flags, struct pid *daemon_pid)
49{ 49{
50 struct sk_buff *skb; 50 struct sk_buff *skb;
51 struct nlmsghdr *nlh; 51 struct nlmsghdr *nlh;
@@ -60,7 +60,7 @@ int ecryptfs_send_netlink(char *data, int data_len,
60 ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n"); 60 ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n");
61 goto out; 61 goto out;
62 } 62 }
63 nlh = NLMSG_PUT(skb, daemon_pid, msg_ctx ? msg_ctx->counter : 0, 63 nlh = NLMSG_PUT(skb, pid_nr(daemon_pid), msg_ctx ? msg_ctx->counter : 0,
64 msg_type, payload_len); 64 msg_type, payload_len);
65 nlh->nlmsg_flags = msg_flags; 65 nlh->nlmsg_flags = msg_flags;
66 if (msg_ctx && payload_len) { 66 if (msg_ctx && payload_len) {
@@ -69,7 +69,7 @@ int ecryptfs_send_netlink(char *data, int data_len,
69 msg->data_len = data_len; 69 msg->data_len = data_len;
70 memcpy(msg->data, data, data_len); 70 memcpy(msg->data, data, data_len);
71 } 71 }
72 rc = netlink_unicast(ecryptfs_nl_sock, skb, daemon_pid, 0); 72 rc = netlink_unicast(ecryptfs_nl_sock, skb, pid_nr(daemon_pid), 0);
73 if (rc < 0) { 73 if (rc < 0) {
74 ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink " 74 ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink "
75 "message; rc = [%d]\n", rc); 75 "message; rc = [%d]\n", rc);
@@ -99,6 +99,7 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb)
99{ 99{
100 struct nlmsghdr *nlh = nlmsg_hdr(skb); 100 struct nlmsghdr *nlh = nlmsg_hdr(skb);
101 struct ecryptfs_message *msg = NLMSG_DATA(nlh); 101 struct ecryptfs_message *msg = NLMSG_DATA(nlh);
102 struct pid *pid;
102 int rc; 103 int rc;
103 104
104 if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) { 105 if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) {
@@ -107,8 +108,10 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb)
107 "incorrectly specified data length\n"); 108 "incorrectly specified data length\n");
108 goto out; 109 goto out;
109 } 110 }
110 rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, 111 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
111 NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq); 112 rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, NULL,
113 pid, nlh->nlmsg_seq);
114 put_pid(pid);
112 if (rc) 115 if (rc)
113 printk(KERN_ERR 116 printk(KERN_ERR
114 "Error processing response message; rc = [%d]\n", rc); 117 "Error processing response message; rc = [%d]\n", rc);
@@ -126,11 +129,13 @@ out:
126 */ 129 */
127static int ecryptfs_process_nl_helo(struct sk_buff *skb) 130static int ecryptfs_process_nl_helo(struct sk_buff *skb)
128{ 131{
132 struct pid *pid;
129 int rc; 133 int rc;
130 134
135 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
131 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK, 136 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK,
132 NETLINK_CREDS(skb)->uid, 137 NETLINK_CREDS(skb)->uid, NULL, pid);
133 NETLINK_CREDS(skb)->pid); 138 put_pid(pid);
134 if (rc) 139 if (rc)
135 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); 140 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
136 return rc; 141 return rc;
@@ -147,10 +152,12 @@ static int ecryptfs_process_nl_helo(struct sk_buff *skb)
147 */ 152 */
148static int ecryptfs_process_nl_quit(struct sk_buff *skb) 153static int ecryptfs_process_nl_quit(struct sk_buff *skb)
149{ 154{
155 struct pid *pid;
150 int rc; 156 int rc;
151 157
152 rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, 158 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
153 NETLINK_CREDS(skb)->pid); 159 rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, NULL, pid);
160 put_pid(pid);
154 if (rc) 161 if (rc)
155 printk(KERN_WARNING 162 printk(KERN_WARNING
156 "Error processing QUIT message; rc = [%d]\n", rc); 163 "Error processing QUIT message; rc = [%d]\n", rc);
@@ -176,20 +183,20 @@ static void ecryptfs_receive_nl_message(struct sk_buff *skb)
176 goto free; 183 goto free;
177 } 184 }
178 switch (nlh->nlmsg_type) { 185 switch (nlh->nlmsg_type) {
179 case ECRYPTFS_NLMSG_RESPONSE: 186 case ECRYPTFS_MSG_RESPONSE:
180 if (ecryptfs_process_nl_response(skb)) { 187 if (ecryptfs_process_nl_response(skb)) {
181 ecryptfs_printk(KERN_WARNING, "Failed to " 188 ecryptfs_printk(KERN_WARNING, "Failed to "
182 "deliver netlink response to " 189 "deliver netlink response to "
183 "requesting operation\n"); 190 "requesting operation\n");
184 } 191 }
185 break; 192 break;
186 case ECRYPTFS_NLMSG_HELO: 193 case ECRYPTFS_MSG_HELO:
187 if (ecryptfs_process_nl_helo(skb)) { 194 if (ecryptfs_process_nl_helo(skb)) {
188 ecryptfs_printk(KERN_WARNING, "Failed to " 195 ecryptfs_printk(KERN_WARNING, "Failed to "
189 "fulfill HELO request\n"); 196 "fulfill HELO request\n");
190 } 197 }
191 break; 198 break;
192 case ECRYPTFS_NLMSG_QUIT: 199 case ECRYPTFS_MSG_QUIT:
193 if (ecryptfs_process_nl_quit(skb)) { 200 if (ecryptfs_process_nl_quit(skb)) {
194 ecryptfs_printk(KERN_WARNING, "Failed to " 201 ecryptfs_printk(KERN_WARNING, "Failed to "
195 "fulfill QUIT request\n"); 202 "fulfill QUIT request\n");
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 0c4928623bbc..ebf55150be56 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -55,7 +55,7 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
55 set_fs(fs_save); 55 set_fs(fs_save);
56 if (octets_written < 0) { 56 if (octets_written < 0) {
57 printk(KERN_ERR "%s: octets_written = [%td]; " 57 printk(KERN_ERR "%s: octets_written = [%td]; "
58 "expected [%td]\n", __FUNCTION__, octets_written, size); 58 "expected [%td]\n", __func__, octets_written, size);
59 rc = -EINVAL; 59 rc = -EINVAL;
60 } 60 }
61 mutex_unlock(&inode_info->lower_file_mutex); 61 mutex_unlock(&inode_info->lower_file_mutex);
@@ -153,7 +153,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
153 rc = PTR_ERR(ecryptfs_page); 153 rc = PTR_ERR(ecryptfs_page);
154 printk(KERN_ERR "%s: Error getting page at " 154 printk(KERN_ERR "%s: Error getting page at "
155 "index [%ld] from eCryptfs inode " 155 "index [%ld] from eCryptfs inode "
156 "mapping; rc = [%d]\n", __FUNCTION__, 156 "mapping; rc = [%d]\n", __func__,
157 ecryptfs_page_idx, rc); 157 ecryptfs_page_idx, rc);
158 goto out; 158 goto out;
159 } 159 }
@@ -165,7 +165,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
165 if (rc) { 165 if (rc) {
166 printk(KERN_ERR "%s: Error decrypting " 166 printk(KERN_ERR "%s: Error decrypting "
167 "page; rc = [%d]\n", 167 "page; rc = [%d]\n",
168 __FUNCTION__, rc); 168 __func__, rc);
169 ClearPageUptodate(ecryptfs_page); 169 ClearPageUptodate(ecryptfs_page);
170 page_cache_release(ecryptfs_page); 170 page_cache_release(ecryptfs_page);
171 goto out; 171 goto out;
@@ -202,7 +202,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
202 page_cache_release(ecryptfs_page); 202 page_cache_release(ecryptfs_page);
203 if (rc) { 203 if (rc) {
204 printk(KERN_ERR "%s: Error encrypting " 204 printk(KERN_ERR "%s: Error encrypting "
205 "page; rc = [%d]\n", __FUNCTION__, rc); 205 "page; rc = [%d]\n", __func__, rc);
206 goto out; 206 goto out;
207 } 207 }
208 pos += num_bytes; 208 pos += num_bytes;
@@ -254,7 +254,7 @@ int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
254 set_fs(fs_save); 254 set_fs(fs_save);
255 if (octets_read < 0) { 255 if (octets_read < 0) {
256 printk(KERN_ERR "%s: octets_read = [%td]; " 256 printk(KERN_ERR "%s: octets_read = [%td]; "
257 "expected [%td]\n", __FUNCTION__, octets_read, size); 257 "expected [%td]\n", __func__, octets_read, size);
258 rc = -EINVAL; 258 rc = -EINVAL;
259 } 259 }
260 mutex_unlock(&inode_info->lower_file_mutex); 260 mutex_unlock(&inode_info->lower_file_mutex);
@@ -327,7 +327,7 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
327 printk(KERN_ERR "%s: Attempt to read data past the end of the " 327 printk(KERN_ERR "%s: Attempt to read data past the end of the "
328 "file; offset = [%lld]; size = [%td]; " 328 "file; offset = [%lld]; size = [%td]; "
329 "ecryptfs_file_size = [%lld]\n", 329 "ecryptfs_file_size = [%lld]\n",
330 __FUNCTION__, offset, size, ecryptfs_file_size); 330 __func__, offset, size, ecryptfs_file_size);
331 goto out; 331 goto out;
332 } 332 }
333 pos = offset; 333 pos = offset;
@@ -345,14 +345,14 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
345 rc = PTR_ERR(ecryptfs_page); 345 rc = PTR_ERR(ecryptfs_page);
346 printk(KERN_ERR "%s: Error getting page at " 346 printk(KERN_ERR "%s: Error getting page at "
347 "index [%ld] from eCryptfs inode " 347 "index [%ld] from eCryptfs inode "
348 "mapping; rc = [%d]\n", __FUNCTION__, 348 "mapping; rc = [%d]\n", __func__,
349 ecryptfs_page_idx, rc); 349 ecryptfs_page_idx, rc);
350 goto out; 350 goto out;
351 } 351 }
352 rc = ecryptfs_decrypt_page(ecryptfs_page); 352 rc = ecryptfs_decrypt_page(ecryptfs_page);
353 if (rc) { 353 if (rc) {
354 printk(KERN_ERR "%s: Error decrypting " 354 printk(KERN_ERR "%s: Error decrypting "
355 "page; rc = [%d]\n", __FUNCTION__, rc); 355 "page; rc = [%d]\n", __func__, rc);
356 ClearPageUptodate(ecryptfs_page); 356 ClearPageUptodate(ecryptfs_page);
357 page_cache_release(ecryptfs_page); 357 page_cache_release(ecryptfs_page);
358 goto out; 358 goto out;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a415f42d32cf..221086fef174 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -257,25 +257,6 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
257 (p1->file < p2->file ? -1 : p1->fd - p2->fd)); 257 (p1->file < p2->file ? -1 : p1->fd - p2->fd));
258} 258}
259 259
260/* Special initialization for the RB tree node to detect linkage */
261static inline void ep_rb_initnode(struct rb_node *n)
262{
263 rb_set_parent(n, n);
264}
265
266/* Removes a node from the RB tree and marks it for a fast is-linked check */
267static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
268{
269 rb_erase(n, r);
270 rb_set_parent(n, n);
271}
272
273/* Fast check to verify that the item is linked to the main RB tree */
274static inline int ep_rb_linked(struct rb_node *n)
275{
276 return rb_parent(n) != n;
277}
278
279/* Tells us if the item is currently linked */ 260/* Tells us if the item is currently linked */
280static inline int ep_is_linked(struct list_head *p) 261static inline int ep_is_linked(struct list_head *p)
281{ 262{
@@ -283,13 +264,13 @@ static inline int ep_is_linked(struct list_head *p)
283} 264}
284 265
285/* Get the "struct epitem" from a wait queue pointer */ 266/* Get the "struct epitem" from a wait queue pointer */
286static inline struct epitem * ep_item_from_wait(wait_queue_t *p) 267static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
287{ 268{
288 return container_of(p, struct eppoll_entry, wait)->base; 269 return container_of(p, struct eppoll_entry, wait)->base;
289} 270}
290 271
291/* Get the "struct epitem" from an epoll queue wrapper */ 272/* Get the "struct epitem" from an epoll queue wrapper */
292static inline struct epitem * ep_item_from_epqueue(poll_table *p) 273static inline struct epitem *ep_item_from_epqueue(poll_table *p)
293{ 274{
294 return container_of(p, struct ep_pqueue, pt)->epi; 275 return container_of(p, struct ep_pqueue, pt)->epi;
295} 276}
@@ -411,8 +392,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
411 list_del_init(&epi->fllink); 392 list_del_init(&epi->fllink);
412 spin_unlock(&file->f_ep_lock); 393 spin_unlock(&file->f_ep_lock);
413 394
414 if (ep_rb_linked(&epi->rbn)) 395 rb_erase(&epi->rbn, &ep->rbr);
415 ep_rb_erase(&epi->rbn, &ep->rbr);
416 396
417 spin_lock_irqsave(&ep->lock, flags); 397 spin_lock_irqsave(&ep->lock, flags);
418 if (ep_is_linked(&epi->rdllink)) 398 if (ep_is_linked(&epi->rdllink))
@@ -728,7 +708,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
728 goto error_return; 708 goto error_return;
729 709
730 /* Item initialization follow here ... */ 710 /* Item initialization follow here ... */
731 ep_rb_initnode(&epi->rbn);
732 INIT_LIST_HEAD(&epi->rdllink); 711 INIT_LIST_HEAD(&epi->rdllink);
733 INIT_LIST_HEAD(&epi->fllink); 712 INIT_LIST_HEAD(&epi->fllink);
734 INIT_LIST_HEAD(&epi->pwqlist); 713 INIT_LIST_HEAD(&epi->pwqlist);
@@ -1262,7 +1241,7 @@ error_return:
1262 return error; 1241 return error;
1263} 1242}
1264 1243
1265#ifdef TIF_RESTORE_SIGMASK 1244#ifdef HAVE_SET_RESTORE_SIGMASK
1266 1245
1267/* 1246/*
1268 * Implement the event wait interface for the eventpoll file. It is the kernel 1247 * Implement the event wait interface for the eventpoll file. It is the kernel
@@ -1300,7 +1279,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
1300 if (error == -EINTR) { 1279 if (error == -EINTR) {
1301 memcpy(&current->saved_sigmask, &sigsaved, 1280 memcpy(&current->saved_sigmask, &sigsaved,
1302 sizeof(sigsaved)); 1281 sizeof(sigsaved));
1303 set_thread_flag(TIF_RESTORE_SIGMASK); 1282 set_restore_sigmask();
1304 } else 1283 } else
1305 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1284 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1306 } 1285 }
@@ -1308,7 +1287,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
1308 return error; 1287 return error;
1309} 1288}
1310 1289
1311#endif /* #ifdef TIF_RESTORE_SIGMASK */ 1290#endif /* HAVE_SET_RESTORE_SIGMASK */
1312 1291
1313static int __init eventpoll_init(void) 1292static int __init eventpoll_init(void)
1314{ 1293{
@@ -1330,4 +1309,3 @@ static int __init eventpoll_init(void)
1330 return 0; 1309 return 0;
1331} 1310}
1332fs_initcall(eventpoll_init); 1311fs_initcall(eventpoll_init);
1333
diff --git a/fs/exec.c b/fs/exec.c
index b152029f18f6..9f9f931ef949 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -735,6 +735,7 @@ static int exec_mmap(struct mm_struct *mm)
735 tsk->active_mm = mm; 735 tsk->active_mm = mm;
736 activate_mm(active_mm, mm); 736 activate_mm(active_mm, mm);
737 task_unlock(tsk); 737 task_unlock(tsk);
738 mm_update_next_owner(mm);
738 arch_pick_mmap_layout(mm); 739 arch_pick_mmap_layout(mm);
739 if (old_mm) { 740 if (old_mm) {
740 up_read(&old_mm->mmap_sem); 741 up_read(&old_mm->mmap_sem);
@@ -765,9 +766,7 @@ static int de_thread(struct task_struct *tsk)
765 766
766 /* 767 /*
767 * Kill all other threads in the thread group. 768 * Kill all other threads in the thread group.
768 * We must hold tasklist_lock to call zap_other_threads.
769 */ 769 */
770 read_lock(&tasklist_lock);
771 spin_lock_irq(lock); 770 spin_lock_irq(lock);
772 if (signal_group_exit(sig)) { 771 if (signal_group_exit(sig)) {
773 /* 772 /*
@@ -775,21 +774,10 @@ static int de_thread(struct task_struct *tsk)
775 * return so that the signal is processed. 774 * return so that the signal is processed.
776 */ 775 */
777 spin_unlock_irq(lock); 776 spin_unlock_irq(lock);
778 read_unlock(&tasklist_lock);
779 return -EAGAIN; 777 return -EAGAIN;
780 } 778 }
781
782 /*
783 * child_reaper ignores SIGKILL, change it now.
784 * Reparenting needs write_lock on tasklist_lock,
785 * so it is safe to do it under read_lock.
786 */
787 if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
788 task_active_pid_ns(tsk)->child_reaper = tsk;
789
790 sig->group_exit_task = tsk; 779 sig->group_exit_task = tsk;
791 zap_other_threads(tsk); 780 zap_other_threads(tsk);
792 read_unlock(&tasklist_lock);
793 781
794 /* Account for the thread group leader hanging around: */ 782 /* Account for the thread group leader hanging around: */
795 count = thread_group_leader(tsk) ? 1 : 2; 783 count = thread_group_leader(tsk) ? 1 : 2;
@@ -810,7 +798,7 @@ static int de_thread(struct task_struct *tsk)
810 if (!thread_group_leader(tsk)) { 798 if (!thread_group_leader(tsk)) {
811 leader = tsk->group_leader; 799 leader = tsk->group_leader;
812 800
813 sig->notify_count = -1; 801 sig->notify_count = -1; /* for exit_notify() */
814 for (;;) { 802 for (;;) {
815 write_lock_irq(&tasklist_lock); 803 write_lock_irq(&tasklist_lock);
816 if (likely(leader->exit_state)) 804 if (likely(leader->exit_state))
@@ -820,6 +808,8 @@ static int de_thread(struct task_struct *tsk)
820 schedule(); 808 schedule();
821 } 809 }
822 810
811 if (unlikely(task_child_reaper(tsk) == leader))
812 task_active_pid_ns(tsk)->child_reaper = tsk;
823 /* 813 /*
824 * The only record we have of the real-time age of a 814 * The only record we have of the real-time age of a
825 * process, regardless of execs it's done, is start_time. 815 * process, regardless of execs it's done, is start_time.
@@ -963,6 +953,8 @@ int flush_old_exec(struct linux_binprm * bprm)
963 if (retval) 953 if (retval)
964 goto out; 954 goto out;
965 955
956 set_mm_exe_file(bprm->mm, bprm->file);
957
966 /* 958 /*
967 * Release all of the old mmap stuff 959 * Release all of the old mmap stuff
968 */ 960 */
@@ -1268,7 +1260,6 @@ int do_execve(char * filename,
1268{ 1260{
1269 struct linux_binprm *bprm; 1261 struct linux_binprm *bprm;
1270 struct file *file; 1262 struct file *file;
1271 unsigned long env_p;
1272 struct files_struct *displaced; 1263 struct files_struct *displaced;
1273 int retval; 1264 int retval;
1274 1265
@@ -1321,11 +1312,9 @@ int do_execve(char * filename,
1321 if (retval < 0) 1312 if (retval < 0)
1322 goto out; 1313 goto out;
1323 1314
1324 env_p = bprm->p;
1325 retval = copy_strings(bprm->argc, argv, bprm); 1315 retval = copy_strings(bprm->argc, argv, bprm);
1326 if (retval < 0) 1316 if (retval < 0)
1327 goto out; 1317 goto out;
1328 bprm->argv_len = env_p - bprm->p;
1329 1318
1330 retval = search_binary_handler(bprm,regs); 1319 retval = search_binary_handler(bprm,regs);
1331 if (retval >= 0) { 1320 if (retval >= 0) {
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 109ab5e44eca..cc91227d3bb8 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -150,12 +150,12 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
150 if (IS_ERR(ppd)) { 150 if (IS_ERR(ppd)) {
151 err = PTR_ERR(ppd); 151 err = PTR_ERR(ppd);
152 dprintk("%s: get_parent of %ld failed, err %d\n", 152 dprintk("%s: get_parent of %ld failed, err %d\n",
153 __FUNCTION__, pd->d_inode->i_ino, err); 153 __func__, pd->d_inode->i_ino, err);
154 dput(pd); 154 dput(pd);
155 break; 155 break;
156 } 156 }
157 157
158 dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, 158 dprintk("%s: find name of %lu in %lu\n", __func__,
159 pd->d_inode->i_ino, ppd->d_inode->i_ino); 159 pd->d_inode->i_ino, ppd->d_inode->i_ino);
160 err = exportfs_get_name(mnt, ppd, nbuf, pd); 160 err = exportfs_get_name(mnt, ppd, nbuf, pd);
161 if (err) { 161 if (err) {
@@ -168,14 +168,14 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
168 continue; 168 continue;
169 break; 169 break;
170 } 170 }
171 dprintk("%s: found name: %s\n", __FUNCTION__, nbuf); 171 dprintk("%s: found name: %s\n", __func__, nbuf);
172 mutex_lock(&ppd->d_inode->i_mutex); 172 mutex_lock(&ppd->d_inode->i_mutex);
173 npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); 173 npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
174 mutex_unlock(&ppd->d_inode->i_mutex); 174 mutex_unlock(&ppd->d_inode->i_mutex);
175 if (IS_ERR(npd)) { 175 if (IS_ERR(npd)) {
176 err = PTR_ERR(npd); 176 err = PTR_ERR(npd);
177 dprintk("%s: lookup failed: %d\n", 177 dprintk("%s: lookup failed: %d\n",
178 __FUNCTION__, err); 178 __func__, err);
179 dput(ppd); 179 dput(ppd);
180 dput(pd); 180 dput(pd);
181 break; 181 break;
@@ -188,7 +188,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
188 if (npd == pd) 188 if (npd == pd)
189 noprogress = 0; 189 noprogress = 0;
190 else 190 else
191 printk("%s: npd != pd\n", __FUNCTION__); 191 printk("%s: npd != pd\n", __func__);
192 dput(npd); 192 dput(npd);
193 dput(ppd); 193 dput(ppd);
194 if (IS_ROOT(pd)) { 194 if (IS_ROOT(pd)) {
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index e7b2bafa1dd9..10bb02c3f25c 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -106,7 +106,7 @@ static int ext2_valid_block_bitmap(struct super_block *sb,
106 return 1; 106 return 1;
107 107
108err_out: 108err_out:
109 ext2_error(sb, __FUNCTION__, 109 ext2_error(sb, __func__,
110 "Invalid block bitmap - " 110 "Invalid block bitmap - "
111 "block_group = %d, block = %lu", 111 "block_group = %d, block = %lu",
112 block_group, bitmap_blk); 112 block_group, bitmap_blk);
@@ -132,7 +132,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
132 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); 132 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
133 bh = sb_getblk(sb, bitmap_blk); 133 bh = sb_getblk(sb, bitmap_blk);
134 if (unlikely(!bh)) { 134 if (unlikely(!bh)) {
135 ext2_error(sb, __FUNCTION__, 135 ext2_error(sb, __func__,
136 "Cannot read block bitmap - " 136 "Cannot read block bitmap - "
137 "block_group = %d, block_bitmap = %u", 137 "block_group = %d, block_bitmap = %u",
138 block_group, le32_to_cpu(desc->bg_block_bitmap)); 138 block_group, le32_to_cpu(desc->bg_block_bitmap));
@@ -143,17 +143,18 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
143 143
144 if (bh_submit_read(bh) < 0) { 144 if (bh_submit_read(bh) < 0) {
145 brelse(bh); 145 brelse(bh);
146 ext2_error(sb, __FUNCTION__, 146 ext2_error(sb, __func__,
147 "Cannot read block bitmap - " 147 "Cannot read block bitmap - "
148 "block_group = %d, block_bitmap = %u", 148 "block_group = %d, block_bitmap = %u",
149 block_group, le32_to_cpu(desc->bg_block_bitmap)); 149 block_group, le32_to_cpu(desc->bg_block_bitmap));
150 return NULL; 150 return NULL;
151 } 151 }
152 if (!ext2_valid_block_bitmap(sb, desc, block_group, bh)) {
153 brelse(bh);
154 return NULL;
155 }
156 152
153 ext2_valid_block_bitmap(sb, desc, block_group, bh);
154 /*
155 * file system mounted not to panic on error, continue with corrupt
156 * bitmap
157 */
157 return bh; 158 return bh;
158} 159}
159 160
@@ -245,11 +246,10 @@ restart:
245 prev = rsv; 246 prev = rsv;
246 } 247 }
247 printk("Window map complete.\n"); 248 printk("Window map complete.\n");
248 if (bad) 249 BUG_ON(bad);
249 BUG();
250} 250}
251#define rsv_window_dump(root, verbose) \ 251#define rsv_window_dump(root, verbose) \
252 __rsv_window_dump((root), (verbose), __FUNCTION__) 252 __rsv_window_dump((root), (verbose), __func__)
253#else 253#else
254#define rsv_window_dump(root, verbose) do {} while (0) 254#define rsv_window_dump(root, verbose) do {} while (0)
255#endif 255#endif
@@ -548,7 +548,7 @@ do_more:
548 for (i = 0, group_freed = 0; i < count; i++) { 548 for (i = 0, group_freed = 0; i < count; i++) {
549 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 549 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
550 bit + i, bitmap_bh->b_data)) { 550 bit + i, bitmap_bh->b_data)) {
551 ext2_error(sb, __FUNCTION__, 551 ext2_error(sb, __func__,
552 "bit already cleared for block %lu", block + i); 552 "bit already cleared for block %lu", block + i);
553 } else { 553 } else {
554 group_freed++; 554 group_freed++;
@@ -1381,7 +1381,12 @@ allocated:
1381 "Allocating block in system zone - " 1381 "Allocating block in system zone - "
1382 "blocks from "E2FSBLK", length %lu", 1382 "blocks from "E2FSBLK", length %lu",
1383 ret_block, num); 1383 ret_block, num);
1384 goto out; 1384 /*
1385 * ext2_try_to_allocate marked the blocks we allocated as in
1386 * use. So we may want to selectively mark some of the blocks
1387 * as free
1388 */
1389 goto retry_alloc;
1385 } 1390 }
1386 1391
1387 performed_allocation = 1; 1392 performed_allocation = 1;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 8dededd80fe2..a78c6b4af060 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -41,8 +41,8 @@ static inline __le16 ext2_rec_len_to_disk(unsigned len)
41{ 41{
42 if (len == (1 << 16)) 42 if (len == (1 << 16))
43 return cpu_to_le16(EXT2_MAX_REC_LEN); 43 return cpu_to_le16(EXT2_MAX_REC_LEN);
44 else if (len > (1 << 16)) 44 else
45 BUG(); 45 BUG_ON(len > (1 << 16));
46 return cpu_to_le16(len); 46 return cpu_to_le16(len);
47} 47}
48 48
@@ -295,11 +295,11 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
295 struct page *page = ext2_get_page(inode, n); 295 struct page *page = ext2_get_page(inode, n);
296 296
297 if (IS_ERR(page)) { 297 if (IS_ERR(page)) {
298 ext2_error(sb, __FUNCTION__, 298 ext2_error(sb, __func__,
299 "bad page in #%lu", 299 "bad page in #%lu",
300 inode->i_ino); 300 inode->i_ino);
301 filp->f_pos += PAGE_CACHE_SIZE - offset; 301 filp->f_pos += PAGE_CACHE_SIZE - offset;
302 return -EIO; 302 return PTR_ERR(page);
303 } 303 }
304 kaddr = page_address(page); 304 kaddr = page_address(page);
305 if (unlikely(need_revalidate)) { 305 if (unlikely(need_revalidate)) {
@@ -314,7 +314,7 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
314 limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1); 314 limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1);
315 for ( ;(char*)de <= limit; de = ext2_next_entry(de)) { 315 for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
316 if (de->rec_len == 0) { 316 if (de->rec_len == 0) {
317 ext2_error(sb, __FUNCTION__, 317 ext2_error(sb, __func__,
318 "zero-length directory entry"); 318 "zero-length directory entry");
319 ext2_put_page(page); 319 ext2_put_page(page);
320 return -EIO; 320 return -EIO;
@@ -381,7 +381,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
381 kaddr += ext2_last_byte(dir, n) - reclen; 381 kaddr += ext2_last_byte(dir, n) - reclen;
382 while ((char *) de <= kaddr) { 382 while ((char *) de <= kaddr) {
383 if (de->rec_len == 0) { 383 if (de->rec_len == 0) {
384 ext2_error(dir->i_sb, __FUNCTION__, 384 ext2_error(dir->i_sb, __func__,
385 "zero-length directory entry"); 385 "zero-length directory entry");
386 ext2_put_page(page); 386 ext2_put_page(page);
387 goto out; 387 goto out;
@@ -396,7 +396,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
396 n = 0; 396 n = 0;
397 /* next page is past the blocks we've got */ 397 /* next page is past the blocks we've got */
398 if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) { 398 if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
399 ext2_error(dir->i_sb, __FUNCTION__, 399 ext2_error(dir->i_sb, __func__,
400 "dir %lu size %lld exceeds block count %llu", 400 "dir %lu size %lld exceeds block count %llu",
401 dir->i_ino, dir->i_size, 401 dir->i_ino, dir->i_size,
402 (unsigned long long)dir->i_blocks); 402 (unsigned long long)dir->i_blocks);
@@ -506,7 +506,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
506 goto got_it; 506 goto got_it;
507 } 507 }
508 if (de->rec_len == 0) { 508 if (de->rec_len == 0) {
509 ext2_error(dir->i_sb, __FUNCTION__, 509 ext2_error(dir->i_sb, __func__,
510 "zero-length directory entry"); 510 "zero-length directory entry");
511 err = -EIO; 511 err = -EIO;
512 goto out_unlock; 512 goto out_unlock;
@@ -578,7 +578,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
578 578
579 while ((char*)de < (char*)dir) { 579 while ((char*)de < (char*)dir) {
580 if (de->rec_len == 0) { 580 if (de->rec_len == 0) {
581 ext2_error(inode->i_sb, __FUNCTION__, 581 ext2_error(inode->i_sb, __func__,
582 "zero-length directory entry"); 582 "zero-length directory entry");
583 err = -EIO; 583 err = -EIO;
584 goto out; 584 goto out;
@@ -670,7 +670,7 @@ int ext2_empty_dir (struct inode * inode)
670 670
671 while ((char *)de <= kaddr) { 671 while ((char *)de <= kaddr) {
672 if (de->rec_len == 0) { 672 if (de->rec_len == 0) {
673 ext2_error(inode->i_sb, __FUNCTION__, 673 ext2_error(inode->i_sb, __func__,
674 "zero-length directory entry"); 674 "zero-length directory entry");
675 printk("kaddr=%p, de=%p\n", kaddr, de); 675 printk("kaddr=%p, de=%p\n", kaddr, de);
676 goto not_empty; 676 goto not_empty;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 08f647d8188d..f59741346760 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -75,11 +75,9 @@ static void ext2_release_inode(struct super_block *sb, int group, int dir)
75 } 75 }
76 76
77 spin_lock(sb_bgl_lock(EXT2_SB(sb), group)); 77 spin_lock(sb_bgl_lock(EXT2_SB(sb), group));
78 desc->bg_free_inodes_count = 78 le16_add_cpu(&desc->bg_free_inodes_count, 1);
79 cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
80 if (dir) 79 if (dir)
81 desc->bg_used_dirs_count = 80 le16_add_cpu(&desc->bg_used_dirs_count, -1);
82 cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
83 spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); 81 spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
84 if (dir) 82 if (dir)
85 percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); 83 percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);
@@ -539,13 +537,11 @@ got:
539 percpu_counter_inc(&sbi->s_dirs_counter); 537 percpu_counter_inc(&sbi->s_dirs_counter);
540 538
541 spin_lock(sb_bgl_lock(sbi, group)); 539 spin_lock(sb_bgl_lock(sbi, group));
542 gdp->bg_free_inodes_count = 540 le16_add_cpu(&gdp->bg_free_inodes_count, -1);
543 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
544 if (S_ISDIR(mode)) { 541 if (S_ISDIR(mode)) {
545 if (sbi->s_debts[group] < 255) 542 if (sbi->s_debts[group] < 255)
546 sbi->s_debts[group]++; 543 sbi->s_debts[group]++;
547 gdp->bg_used_dirs_count = 544 le16_add_cpu(&gdp->bg_used_dirs_count, 1);
548 cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
549 } else { 545 } else {
550 if (sbi->s_debts[group]) 546 if (sbi->s_debts[group])
551 sbi->s_debts[group]--; 547 sbi->s_debts[group]--;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b8a2990bab83..384fc0d1dd74 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -254,13 +254,13 @@ no_block:
254 * Caller must make sure that @ind is valid and will stay that way. 254 * Caller must make sure that @ind is valid and will stay that way.
255 */ 255 */
256 256
257static unsigned long ext2_find_near(struct inode *inode, Indirect *ind) 257static ext2_fsblk_t ext2_find_near(struct inode *inode, Indirect *ind)
258{ 258{
259 struct ext2_inode_info *ei = EXT2_I(inode); 259 struct ext2_inode_info *ei = EXT2_I(inode);
260 __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; 260 __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
261 __le32 *p; 261 __le32 *p;
262 unsigned long bg_start; 262 ext2_fsblk_t bg_start;
263 unsigned long colour; 263 ext2_fsblk_t colour;
264 264
265 /* Try to find previous block */ 265 /* Try to find previous block */
266 for (p = ind->p - 1; p >= start; p--) 266 for (p = ind->p - 1; p >= start; p--)
@@ -275,8 +275,7 @@ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
275 * It is going to be refered from inode itself? OK, just put it into 275 * It is going to be refered from inode itself? OK, just put it into
276 * the same cylinder group then. 276 * the same cylinder group then.
277 */ 277 */
278 bg_start = (ei->i_block_group * EXT2_BLOCKS_PER_GROUP(inode->i_sb)) + 278 bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group);
279 le32_to_cpu(EXT2_SB(inode->i_sb)->s_es->s_first_data_block);
280 colour = (current->pid % 16) * 279 colour = (current->pid % 16) *
281 (EXT2_BLOCKS_PER_GROUP(inode->i_sb) / 16); 280 (EXT2_BLOCKS_PER_GROUP(inode->i_sb) / 16);
282 return bg_start + colour; 281 return bg_start + colour;
@@ -291,8 +290,8 @@ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
291 * Returns preferred place for a block (the goal). 290 * Returns preferred place for a block (the goal).
292 */ 291 */
293 292
294static inline int ext2_find_goal(struct inode *inode, long block, 293static inline ext2_fsblk_t ext2_find_goal(struct inode *inode, long block,
295 Indirect *partial) 294 Indirect *partial)
296{ 295{
297 struct ext2_block_alloc_info *block_i; 296 struct ext2_block_alloc_info *block_i;
298 297
@@ -796,7 +795,7 @@ const struct address_space_operations ext2_aops = {
796 795
797const struct address_space_operations ext2_aops_xip = { 796const struct address_space_operations ext2_aops_xip = {
798 .bmap = ext2_bmap, 797 .bmap = ext2_bmap,
799 .get_xip_page = ext2_get_xip_page, 798 .get_xip_mem = ext2_get_xip_mem,
800}; 799};
801 800
802const struct address_space_operations ext2_nobh_aops = { 801const struct address_space_operations ext2_nobh_aops = {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 088b011bb97e..ef50cbc792db 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -51,8 +51,7 @@ void ext2_error (struct super_block * sb, const char * function,
51 51
52 if (!(sb->s_flags & MS_RDONLY)) { 52 if (!(sb->s_flags & MS_RDONLY)) {
53 sbi->s_mount_state |= EXT2_ERROR_FS; 53 sbi->s_mount_state |= EXT2_ERROR_FS;
54 es->s_state = 54 es->s_state |= cpu_to_le16(EXT2_ERROR_FS);
55 cpu_to_le16(le16_to_cpu(es->s_state) | EXT2_ERROR_FS);
56 ext2_sync_super(sb, es); 55 ext2_sync_super(sb, es);
57 } 56 }
58 57
@@ -90,7 +89,7 @@ void ext2_update_dynamic_rev(struct super_block *sb)
90 if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV) 89 if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV)
91 return; 90 return;
92 91
93 ext2_warning(sb, __FUNCTION__, 92 ext2_warning(sb, __func__,
94 "updating to rev %d because of new feature flag, " 93 "updating to rev %d because of new feature flag, "
95 "running e2fsck is recommended", 94 "running e2fsck is recommended",
96 EXT2_DYNAMIC_REV); 95 EXT2_DYNAMIC_REV);
@@ -604,7 +603,7 @@ static int ext2_setup_super (struct super_block * sb,
604 "running e2fsck is recommended\n"); 603 "running e2fsck is recommended\n");
605 if (!le16_to_cpu(es->s_max_mnt_count)) 604 if (!le16_to_cpu(es->s_max_mnt_count))
606 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); 605 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
607 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); 606 le16_add_cpu(&es->s_mnt_count, 1);
608 ext2_write_super(sb); 607 ext2_write_super(sb);
609 if (test_opt (sb, DEBUG)) 608 if (test_opt (sb, DEBUG))
610 printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, " 609 printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, "
@@ -622,13 +621,13 @@ static int ext2_check_descriptors(struct super_block *sb)
622{ 621{
623 int i; 622 int i;
624 struct ext2_sb_info *sbi = EXT2_SB(sb); 623 struct ext2_sb_info *sbi = EXT2_SB(sb);
625 unsigned long first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
626 unsigned long last_block;
627 624
628 ext2_debug ("Checking group descriptors"); 625 ext2_debug ("Checking group descriptors");
629 626
630 for (i = 0; i < sbi->s_groups_count; i++) { 627 for (i = 0; i < sbi->s_groups_count; i++) {
631 struct ext2_group_desc *gdp = ext2_get_group_desc(sb, i, NULL); 628 struct ext2_group_desc *gdp = ext2_get_group_desc(sb, i, NULL);
629 ext2_fsblk_t first_block = ext2_group_first_block_no(sb, i);
630 ext2_fsblk_t last_block;
632 631
633 if (i == sbi->s_groups_count - 1) 632 if (i == sbi->s_groups_count - 1)
634 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; 633 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
@@ -664,7 +663,6 @@ static int ext2_check_descriptors(struct super_block *sb)
664 i, (unsigned long) le32_to_cpu(gdp->bg_inode_table)); 663 i, (unsigned long) le32_to_cpu(gdp->bg_inode_table));
665 return 0; 664 return 0;
666 } 665 }
667 first_block += EXT2_BLOCKS_PER_GROUP(sb);
668 } 666 }
669 return 1; 667 return 1;
670} 668}
@@ -721,10 +719,9 @@ static unsigned long descriptor_loc(struct super_block *sb,
721 int nr) 719 int nr)
722{ 720{
723 struct ext2_sb_info *sbi = EXT2_SB(sb); 721 struct ext2_sb_info *sbi = EXT2_SB(sb);
724 unsigned long bg, first_data_block, first_meta_bg; 722 unsigned long bg, first_meta_bg;
725 int has_super = 0; 723 int has_super = 0;
726 724
727 first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block);
728 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); 725 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
729 726
730 if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_META_BG) || 727 if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_META_BG) ||
@@ -733,7 +730,8 @@ static unsigned long descriptor_loc(struct super_block *sb,
733 bg = sbi->s_desc_per_block * nr; 730 bg = sbi->s_desc_per_block * nr;
734 if (ext2_bg_has_super(sb, bg)) 731 if (ext2_bg_has_super(sb, bg))
735 has_super = 1; 732 has_super = 1;
736 return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); 733
734 return ext2_group_first_block_no(sb, bg) + has_super;
737} 735}
738 736
739static int ext2_fill_super(struct super_block *sb, void *data, int silent) 737static int ext2_fill_super(struct super_block *sb, void *data, int silent)
@@ -1062,7 +1060,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1062 goto failed_mount3; 1060 goto failed_mount3;
1063 } 1061 }
1064 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) 1062 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
1065 ext2_warning(sb, __FUNCTION__, 1063 ext2_warning(sb, __func__,
1066 "mounting ext3 filesystem as ext2"); 1064 "mounting ext3 filesystem as ext2");
1067 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); 1065 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
1068 return 0; 1066 return 0;
@@ -1126,10 +1124,9 @@ void ext2_write_super (struct super_block * sb)
1126 if (!(sb->s_flags & MS_RDONLY)) { 1124 if (!(sb->s_flags & MS_RDONLY)) {
1127 es = EXT2_SB(sb)->s_es; 1125 es = EXT2_SB(sb)->s_es;
1128 1126
1129 if (le16_to_cpu(es->s_state) & EXT2_VALID_FS) { 1127 if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
1130 ext2_debug ("setting valid to 0\n"); 1128 ext2_debug ("setting valid to 0\n");
1131 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & 1129 es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
1132 ~EXT2_VALID_FS);
1133 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); 1130 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
1134 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); 1131 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
1135 es->s_mtime = cpu_to_le32(get_seconds()); 1132 es->s_mtime = cpu_to_le32(get_seconds());
@@ -1180,7 +1177,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1180 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != 1177 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
1181 (old_mount_opt & EXT2_MOUNT_XIP)) && 1178 (old_mount_opt & EXT2_MOUNT_XIP)) &&
1182 invalidate_inodes(sb)) 1179 invalidate_inodes(sb))
1183 ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\ 1180 ext2_warning(sb, __func__, "busy inodes while remounting "\
1184 "xip remain in cache (no functional problem)"); 1181 "xip remain in cache (no functional problem)");
1185 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 1182 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1186 return 0; 1183 return 0;
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index a99d46f3b26e..987a5261cc2e 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -646,8 +646,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
646 unlock_buffer(new_bh); 646 unlock_buffer(new_bh);
647 goto cleanup; 647 goto cleanup;
648 } 648 }
649 HDR(new_bh)->h_refcount = cpu_to_le32(1 + 649 le32_add_cpu(&HDR(new_bh)->h_refcount, 1);
650 le32_to_cpu(HDR(new_bh)->h_refcount));
651 ea_bdebug(new_bh, "refcount now=%d", 650 ea_bdebug(new_bh, "refcount now=%d",
652 le32_to_cpu(HDR(new_bh)->h_refcount)); 651 le32_to_cpu(HDR(new_bh)->h_refcount));
653 } 652 }
@@ -660,10 +659,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
660 ext2_xattr_cache_insert(new_bh); 659 ext2_xattr_cache_insert(new_bh);
661 } else { 660 } else {
662 /* We need to allocate a new block */ 661 /* We need to allocate a new block */
663 int goal = le32_to_cpu(EXT2_SB(sb)->s_es-> 662 ext2_fsblk_t goal = ext2_group_first_block_no(sb,
664 s_first_data_block) + 663 EXT2_I(inode)->i_block_group);
665 EXT2_I(inode)->i_block_group *
666 EXT2_BLOCKS_PER_GROUP(sb);
667 int block = ext2_new_block(inode, goal, &error); 664 int block = ext2_new_block(inode, goal, &error);
668 if (error) 665 if (error)
669 goto cleanup; 666 goto cleanup;
@@ -731,8 +728,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
731 bforget(old_bh); 728 bforget(old_bh);
732 } else { 729 } else {
733 /* Decrement the refcount only. */ 730 /* Decrement the refcount only. */
734 HDR(old_bh)->h_refcount = cpu_to_le32( 731 le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
735 le32_to_cpu(HDR(old_bh)->h_refcount) - 1);
736 if (ce) 732 if (ce)
737 mb_cache_entry_release(ce); 733 mb_cache_entry_release(ce);
738 DQUOT_FREE_BLOCK(inode, 1); 734 DQUOT_FREE_BLOCK(inode, 1);
@@ -789,8 +785,7 @@ ext2_xattr_delete_inode(struct inode *inode)
789 bforget(bh); 785 bforget(bh);
790 unlock_buffer(bh); 786 unlock_buffer(bh);
791 } else { 787 } else {
792 HDR(bh)->h_refcount = cpu_to_le32( 788 le32_add_cpu(&HDR(bh)->h_refcount, -1);
793 le32_to_cpu(HDR(bh)->h_refcount) - 1);
794 if (ce) 789 if (ce)
795 mb_cache_entry_release(ce); 790 mb_cache_entry_release(ce);
796 ea_bdebug(bh, "refcount now=%d", 791 ea_bdebug(bh, "refcount now=%d",
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index ca7f00312388..4fb94c20041b 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -15,24 +15,28 @@
15#include "xip.h" 15#include "xip.h"
16 16
17static inline int 17static inline int
18__inode_direct_access(struct inode *inode, sector_t sector, 18__inode_direct_access(struct inode *inode, sector_t block,
19 unsigned long *data) 19 void **kaddr, unsigned long *pfn)
20{ 20{
21 BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access); 21 struct block_device *bdev = inode->i_sb->s_bdev;
22 return inode->i_sb->s_bdev->bd_disk->fops 22 struct block_device_operations *ops = bdev->bd_disk->fops;
23 ->direct_access(inode->i_sb->s_bdev,sector,data); 23 sector_t sector;
24
25 sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */
26
27 BUG_ON(!ops->direct_access);
28 return ops->direct_access(bdev, sector, kaddr, pfn);
24} 29}
25 30
26static inline int 31static inline int
27__ext2_get_sector(struct inode *inode, sector_t offset, int create, 32__ext2_get_block(struct inode *inode, pgoff_t pgoff, int create,
28 sector_t *result) 33 sector_t *result)
29{ 34{
30 struct buffer_head tmp; 35 struct buffer_head tmp;
31 int rc; 36 int rc;
32 37
33 memset(&tmp, 0, sizeof(struct buffer_head)); 38 memset(&tmp, 0, sizeof(struct buffer_head));
34 rc = ext2_get_block(inode, offset/ (PAGE_SIZE/512), &tmp, 39 rc = ext2_get_block(inode, pgoff, &tmp, create);
35 create);
36 *result = tmp.b_blocknr; 40 *result = tmp.b_blocknr;
37 41
38 /* did we get a sparse block (hole in the file)? */ 42 /* did we get a sparse block (hole in the file)? */
@@ -45,15 +49,15 @@ __ext2_get_sector(struct inode *inode, sector_t offset, int create,
45} 49}
46 50
47int 51int
48ext2_clear_xip_target(struct inode *inode, int block) 52ext2_clear_xip_target(struct inode *inode, sector_t block)
49{ 53{
50 sector_t sector = block * (PAGE_SIZE/512); 54 void *kaddr;
51 unsigned long data; 55 unsigned long pfn;
52 int rc; 56 int rc;
53 57
54 rc = __inode_direct_access(inode, sector, &data); 58 rc = __inode_direct_access(inode, block, &kaddr, &pfn);
55 if (!rc) 59 if (!rc)
56 clear_page((void*)data); 60 clear_page(kaddr);
57 return rc; 61 return rc;
58} 62}
59 63
@@ -64,30 +68,23 @@ void ext2_xip_verify_sb(struct super_block *sb)
64 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) && 68 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) &&
65 !sb->s_bdev->bd_disk->fops->direct_access) { 69 !sb->s_bdev->bd_disk->fops->direct_access) {
66 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP); 70 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
67 ext2_warning(sb, __FUNCTION__, 71 ext2_warning(sb, __func__,
68 "ignoring xip option - not supported by bdev"); 72 "ignoring xip option - not supported by bdev");
69 } 73 }
70} 74}
71 75
72struct page * 76int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create,
73ext2_get_xip_page(struct address_space *mapping, sector_t offset, 77 void **kmem, unsigned long *pfn)
74 int create)
75{ 78{
76 int rc; 79 int rc;
77 unsigned long data; 80 sector_t block;
78 sector_t sector;
79 81
80 /* first, retrieve the sector number */ 82 /* first, retrieve the sector number */
81 rc = __ext2_get_sector(mapping->host, offset, create, &sector); 83 rc = __ext2_get_block(mapping->host, pgoff, create, &block);
82 if (rc) 84 if (rc)
83 goto error; 85 return rc;
84 86
85 /* retrieve address of the target data */ 87 /* retrieve address of the target data */
86 rc = __inode_direct_access 88 rc = __inode_direct_access(mapping->host, block, kmem, pfn);
87 (mapping->host, sector * (PAGE_SIZE/512), &data); 89 return rc;
88 if (!rc)
89 return virt_to_page(data);
90
91 error:
92 return ERR_PTR(rc);
93} 90}
diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h
index aa85331d6c56..18b34d2f31b3 100644
--- a/fs/ext2/xip.h
+++ b/fs/ext2/xip.h
@@ -7,19 +7,20 @@
7 7
8#ifdef CONFIG_EXT2_FS_XIP 8#ifdef CONFIG_EXT2_FS_XIP
9extern void ext2_xip_verify_sb (struct super_block *); 9extern void ext2_xip_verify_sb (struct super_block *);
10extern int ext2_clear_xip_target (struct inode *, int); 10extern int ext2_clear_xip_target (struct inode *, sector_t);
11 11
12static inline int ext2_use_xip (struct super_block *sb) 12static inline int ext2_use_xip (struct super_block *sb)
13{ 13{
14 struct ext2_sb_info *sbi = EXT2_SB(sb); 14 struct ext2_sb_info *sbi = EXT2_SB(sb);
15 return (sbi->s_mount_opt & EXT2_MOUNT_XIP); 15 return (sbi->s_mount_opt & EXT2_MOUNT_XIP);
16} 16}
17struct page* ext2_get_xip_page (struct address_space *, sector_t, int); 17int ext2_get_xip_mem(struct address_space *, pgoff_t, int,
18#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_page) 18 void **, unsigned long *);
19#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_mem)
19#else 20#else
20#define mapping_is_xip(map) 0 21#define mapping_is_xip(map) 0
21#define ext2_xip_verify_sb(sb) do { } while (0) 22#define ext2_xip_verify_sb(sb) do { } while (0)
22#define ext2_use_xip(sb) 0 23#define ext2_use_xip(sb) 0
23#define ext2_clear_xip_target(inode, chain) 0 24#define ext2_clear_xip_target(inode, chain) 0
24#define ext2_get_xip_page NULL 25#define ext2_get_xip_mem NULL
25#endif 26#endif
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index da0cb2c0e437..92fd0338a6eb 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -117,7 +117,7 @@ static int ext3_valid_block_bitmap(struct super_block *sb,
117 return 1; 117 return 1;
118 118
119err_out: 119err_out:
120 ext3_error(sb, __FUNCTION__, 120 ext3_error(sb, __func__,
121 "Invalid block bitmap - " 121 "Invalid block bitmap - "
122 "block_group = %d, block = %lu", 122 "block_group = %d, block = %lu",
123 block_group, bitmap_blk); 123 block_group, bitmap_blk);
@@ -147,7 +147,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
147 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); 147 bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
148 bh = sb_getblk(sb, bitmap_blk); 148 bh = sb_getblk(sb, bitmap_blk);
149 if (unlikely(!bh)) { 149 if (unlikely(!bh)) {
150 ext3_error(sb, __FUNCTION__, 150 ext3_error(sb, __func__,
151 "Cannot read block bitmap - " 151 "Cannot read block bitmap - "
152 "block_group = %d, block_bitmap = %u", 152 "block_group = %d, block_bitmap = %u",
153 block_group, le32_to_cpu(desc->bg_block_bitmap)); 153 block_group, le32_to_cpu(desc->bg_block_bitmap));
@@ -158,16 +158,17 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
158 158
159 if (bh_submit_read(bh) < 0) { 159 if (bh_submit_read(bh) < 0) {
160 brelse(bh); 160 brelse(bh);
161 ext3_error(sb, __FUNCTION__, 161 ext3_error(sb, __func__,
162 "Cannot read block bitmap - " 162 "Cannot read block bitmap - "
163 "block_group = %d, block_bitmap = %u", 163 "block_group = %d, block_bitmap = %u",
164 block_group, le32_to_cpu(desc->bg_block_bitmap)); 164 block_group, le32_to_cpu(desc->bg_block_bitmap));
165 return NULL; 165 return NULL;
166 } 166 }
167 if (!ext3_valid_block_bitmap(sb, desc, block_group, bh)) { 167 ext3_valid_block_bitmap(sb, desc, block_group, bh);
168 brelse(bh); 168 /*
169 return NULL; 169 * file system mounted not to panic on error, continue with corrupt
170 } 170 * bitmap
171 */
171 return bh; 172 return bh;
172} 173}
173/* 174/*
@@ -232,11 +233,10 @@ restart:
232 prev = rsv; 233 prev = rsv;
233 } 234 }
234 printk("Window map complete.\n"); 235 printk("Window map complete.\n");
235 if (bad) 236 BUG_ON(bad);
236 BUG();
237} 237}
238#define rsv_window_dump(root, verbose) \ 238#define rsv_window_dump(root, verbose) \
239 __rsv_window_dump((root), (verbose), __FUNCTION__) 239 __rsv_window_dump((root), (verbose), __func__)
240#else 240#else
241#define rsv_window_dump(root, verbose) do {} while (0) 241#define rsv_window_dump(root, verbose) do {} while (0)
242#endif 242#endif
@@ -618,7 +618,7 @@ do_more:
618 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 618 if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
619 bit + i, bitmap_bh->b_data)) { 619 bit + i, bitmap_bh->b_data)) {
620 jbd_unlock_bh_state(bitmap_bh); 620 jbd_unlock_bh_state(bitmap_bh);
621 ext3_error(sb, __FUNCTION__, 621 ext3_error(sb, __func__,
622 "bit already cleared for block "E3FSBLK, 622 "bit already cleared for block "E3FSBLK,
623 block + i); 623 block + i);
624 jbd_lock_bh_state(bitmap_bh); 624 jbd_lock_bh_state(bitmap_bh);
@@ -1642,7 +1642,11 @@ allocated:
1642 "Allocating block in system zone - " 1642 "Allocating block in system zone - "
1643 "blocks from "E3FSBLK", length %lu", 1643 "blocks from "E3FSBLK", length %lu",
1644 ret_block, num); 1644 ret_block, num);
1645 goto out; 1645 /*
1646 * claim_block() marked the blocks we allocated as in use. So we
1647 * may want to selectively mark some of the blocks as free.
1648 */
1649 goto retry_alloc;
1646 } 1650 }
1647 1651
1648 performed_allocation = 1; 1652 performed_allocation = 1;
@@ -1668,7 +1672,7 @@ allocated:
1668 if (ext3_test_bit(grp_alloc_blk+i, 1672 if (ext3_test_bit(grp_alloc_blk+i,
1669 bh2jh(bitmap_bh)->b_committed_data)) { 1673 bh2jh(bitmap_bh)->b_committed_data)) {
1670 printk("%s: block was unexpectedly set in " 1674 printk("%s: block was unexpectedly set in "
1671 "b_committed_data\n", __FUNCTION__); 1675 "b_committed_data\n", __func__);
1672 } 1676 }
1673 } 1677 }
1674 } 1678 }
diff --git a/fs/ext3/ext3_jbd.c b/fs/ext3/ext3_jbd.c
index e1f91fd26a93..d401f148d74d 100644
--- a/fs/ext3/ext3_jbd.c
+++ b/fs/ext3/ext3_jbd.c
@@ -9,7 +9,7 @@ int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
9{ 9{
10 int err = journal_get_undo_access(handle, bh); 10 int err = journal_get_undo_access(handle, bh);
11 if (err) 11 if (err)
12 ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 12 ext3_journal_abort_handle(where, __func__, bh, handle,err);
13 return err; 13 return err;
14} 14}
15 15
@@ -18,7 +18,7 @@ int __ext3_journal_get_write_access(const char *where, handle_t *handle,
18{ 18{
19 int err = journal_get_write_access(handle, bh); 19 int err = journal_get_write_access(handle, bh);
20 if (err) 20 if (err)
21 ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 21 ext3_journal_abort_handle(where, __func__, bh, handle,err);
22 return err; 22 return err;
23} 23}
24 24
@@ -27,7 +27,7 @@ int __ext3_journal_forget(const char *where, handle_t *handle,
27{ 27{
28 int err = journal_forget(handle, bh); 28 int err = journal_forget(handle, bh);
29 if (err) 29 if (err)
30 ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 30 ext3_journal_abort_handle(where, __func__, bh, handle,err);
31 return err; 31 return err;
32} 32}
33 33
@@ -36,7 +36,7 @@ int __ext3_journal_revoke(const char *where, handle_t *handle,
36{ 36{
37 int err = journal_revoke(handle, blocknr, bh); 37 int err = journal_revoke(handle, blocknr, bh);
38 if (err) 38 if (err)
39 ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 39 ext3_journal_abort_handle(where, __func__, bh, handle,err);
40 return err; 40 return err;
41} 41}
42 42
@@ -45,7 +45,7 @@ int __ext3_journal_get_create_access(const char *where,
45{ 45{
46 int err = journal_get_create_access(handle, bh); 46 int err = journal_get_create_access(handle, bh);
47 if (err) 47 if (err)
48 ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 48 ext3_journal_abort_handle(where, __func__, bh, handle,err);
49 return err; 49 return err;
50} 50}
51 51
@@ -54,6 +54,6 @@ int __ext3_journal_dirty_metadata(const char *where,
54{ 54{
55 int err = journal_dirty_metadata(handle, bh); 55 int err = journal_dirty_metadata(handle, bh);
56 if (err) 56 if (err)
57 ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 57 ext3_journal_abort_handle(where, __func__, bh, handle,err);
58 return err; 58 return err;
59} 59}
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index a588e23841d4..d33634119e17 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -72,6 +72,9 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
72 goto out; 72 goto out;
73 } 73 }
74 74
75 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
76 goto out;
77
75 /* 78 /*
76 * The VFS has written the file data. If the inode is unaltered 79 * The VFS has written the file data. If the inode is unaltered
77 * then we need not start a commit. 80 * then we need not start a commit.
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 96dd5573e49b..77126821b2e9 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -644,7 +644,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
644 644
645 /* Error cases - e2fsck has already cleaned up for us */ 645 /* Error cases - e2fsck has already cleaned up for us */
646 if (ino > max_ino) { 646 if (ino > max_ino) {
647 ext3_warning(sb, __FUNCTION__, 647 ext3_warning(sb, __func__,
648 "bad orphan ino %lu! e2fsck was run?", ino); 648 "bad orphan ino %lu! e2fsck was run?", ino);
649 goto error; 649 goto error;
650 } 650 }
@@ -653,7 +653,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
653 bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb); 653 bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
654 bitmap_bh = read_inode_bitmap(sb, block_group); 654 bitmap_bh = read_inode_bitmap(sb, block_group);
655 if (!bitmap_bh) { 655 if (!bitmap_bh) {
656 ext3_warning(sb, __FUNCTION__, 656 ext3_warning(sb, __func__,
657 "inode bitmap error for orphan %lu", ino); 657 "inode bitmap error for orphan %lu", ino);
658 goto error; 658 goto error;
659 } 659 }
@@ -678,7 +678,7 @@ iget_failed:
678 err = PTR_ERR(inode); 678 err = PTR_ERR(inode);
679 inode = NULL; 679 inode = NULL;
680bad_orphan: 680bad_orphan:
681 ext3_warning(sb, __FUNCTION__, 681 ext3_warning(sb, __func__,
682 "bad orphan inode %lu! e2fsck was run?", ino); 682 "bad orphan inode %lu! e2fsck was run?", ino);
683 printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n", 683 printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
684 bit, (unsigned long long)bitmap_bh->b_blocknr, 684 bit, (unsigned long long)bitmap_bh->b_blocknr,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index c683609b0e3a..6ae4ecf3ce40 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -95,7 +95,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
95 BUFFER_TRACE(bh, "call ext3_journal_revoke"); 95 BUFFER_TRACE(bh, "call ext3_journal_revoke");
96 err = ext3_journal_revoke(handle, blocknr, bh); 96 err = ext3_journal_revoke(handle, blocknr, bh);
97 if (err) 97 if (err)
98 ext3_abort(inode->i_sb, __FUNCTION__, 98 ext3_abort(inode->i_sb, __func__,
99 "error %d when attempting revoke", err); 99 "error %d when attempting revoke", err);
100 BUFFER_TRACE(bh, "exit"); 100 BUFFER_TRACE(bh, "exit");
101 return err; 101 return err;
@@ -1190,7 +1190,7 @@ int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1190{ 1190{
1191 int err = journal_dirty_data(handle, bh); 1191 int err = journal_dirty_data(handle, bh);
1192 if (err) 1192 if (err)
1193 ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__, 1193 ext3_journal_abort_handle(__func__, __func__,
1194 bh, handle, err); 1194 bh, handle, err);
1195 return err; 1195 return err;
1196} 1196}
@@ -1261,10 +1261,11 @@ static int ext3_ordered_write_end(struct file *file,
1261 new_i_size = pos + copied; 1261 new_i_size = pos + copied;
1262 if (new_i_size > EXT3_I(inode)->i_disksize) 1262 if (new_i_size > EXT3_I(inode)->i_disksize)
1263 EXT3_I(inode)->i_disksize = new_i_size; 1263 EXT3_I(inode)->i_disksize = new_i_size;
1264 copied = ext3_generic_write_end(file, mapping, pos, len, copied, 1264 ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
1265 page, fsdata); 1265 page, fsdata);
1266 if (copied < 0) 1266 copied = ret2;
1267 ret = copied; 1267 if (ret2 < 0)
1268 ret = ret2;
1268 } 1269 }
1269 ret2 = ext3_journal_stop(handle); 1270 ret2 = ext3_journal_stop(handle);
1270 if (!ret) 1271 if (!ret)
@@ -1289,10 +1290,11 @@ static int ext3_writeback_write_end(struct file *file,
1289 if (new_i_size > EXT3_I(inode)->i_disksize) 1290 if (new_i_size > EXT3_I(inode)->i_disksize)
1290 EXT3_I(inode)->i_disksize = new_i_size; 1291 EXT3_I(inode)->i_disksize = new_i_size;
1291 1292
1292 copied = ext3_generic_write_end(file, mapping, pos, len, copied, 1293 ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
1293 page, fsdata); 1294 page, fsdata);
1294 if (copied < 0) 1295 copied = ret2;
1295 ret = copied; 1296 if (ret2 < 0)
1297 ret = ret2;
1296 1298
1297 ret2 = ext3_journal_stop(handle); 1299 ret2 = ext3_journal_stop(handle);
1298 if (!ret) 1300 if (!ret)
@@ -2454,11 +2456,10 @@ out_stop:
2454static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, 2456static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
2455 unsigned long ino, struct ext3_iloc *iloc) 2457 unsigned long ino, struct ext3_iloc *iloc)
2456{ 2458{
2457 unsigned long desc, group_desc, block_group; 2459 unsigned long block_group;
2458 unsigned long offset; 2460 unsigned long offset;
2459 ext3_fsblk_t block; 2461 ext3_fsblk_t block;
2460 struct buffer_head *bh; 2462 struct ext3_group_desc *gdp;
2461 struct ext3_group_desc * gdp;
2462 2463
2463 if (!ext3_valid_inum(sb, ino)) { 2464 if (!ext3_valid_inum(sb, ino)) {
2464 /* 2465 /*
@@ -2470,27 +2471,15 @@ static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
2470 } 2471 }
2471 2472
2472 block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); 2473 block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
2473 if (block_group >= EXT3_SB(sb)->s_groups_count) { 2474 gdp = ext3_get_group_desc(sb, block_group, NULL);
2474 ext3_error(sb,"ext3_get_inode_block","group >= groups count"); 2475 if (!gdp)
2475 return 0;
2476 }
2477 smp_rmb();
2478 group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
2479 desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
2480 bh = EXT3_SB(sb)->s_group_desc[group_desc];
2481 if (!bh) {
2482 ext3_error (sb, "ext3_get_inode_block",
2483 "Descriptor not loaded");
2484 return 0; 2476 return 0;
2485 }
2486
2487 gdp = (struct ext3_group_desc *)bh->b_data;
2488 /* 2477 /*
2489 * Figure out the offset within the block group inode table 2478 * Figure out the offset within the block group inode table
2490 */ 2479 */
2491 offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) * 2480 offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) *
2492 EXT3_INODE_SIZE(sb); 2481 EXT3_INODE_SIZE(sb);
2493 block = le32_to_cpu(gdp[desc].bg_inode_table) + 2482 block = le32_to_cpu(gdp->bg_inode_table) +
2494 (offset >> EXT3_BLOCK_SIZE_BITS(sb)); 2483 (offset >> EXT3_BLOCK_SIZE_BITS(sb));
2495 2484
2496 iloc->block_group = block_group; 2485 iloc->block_group = block_group;
@@ -3214,7 +3203,7 @@ void ext3_dirty_inode(struct inode *inode)
3214 current_handle->h_transaction != handle->h_transaction) { 3203 current_handle->h_transaction != handle->h_transaction) {
3215 /* This task has a transaction open against a different fs */ 3204 /* This task has a transaction open against a different fs */
3216 printk(KERN_EMERG "%s: transactions do not match!\n", 3205 printk(KERN_EMERG "%s: transactions do not match!\n",
3217 __FUNCTION__); 3206 __func__);
3218 } else { 3207 } else {
3219 jbd_debug(5, "marking dirty. outer handle=%p\n", 3208 jbd_debug(5, "marking dirty. outer handle=%p\n",
3220 current_handle); 3209 current_handle);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index dec3e0d88ab1..0b8cf80154f1 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -57,10 +57,15 @@ static struct buffer_head *ext3_append(handle_t *handle,
57 57
58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
59 59
60 if ((bh = ext3_bread(handle, inode, *block, 1, err))) { 60 bh = ext3_bread(handle, inode, *block, 1, err);
61 if (bh) {
61 inode->i_size += inode->i_sb->s_blocksize; 62 inode->i_size += inode->i_sb->s_blocksize;
62 EXT3_I(inode)->i_disksize = inode->i_size; 63 EXT3_I(inode)->i_disksize = inode->i_size;
63 ext3_journal_get_write_access(handle,bh); 64 *err = ext3_journal_get_write_access(handle, bh);
65 if (*err) {
66 brelse(bh);
67 bh = NULL;
68 }
64 } 69 }
65 return bh; 70 return bh;
66} 71}
@@ -356,7 +361,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
356 if (root->info.hash_version != DX_HASH_TEA && 361 if (root->info.hash_version != DX_HASH_TEA &&
357 root->info.hash_version != DX_HASH_HALF_MD4 && 362 root->info.hash_version != DX_HASH_HALF_MD4 &&
358 root->info.hash_version != DX_HASH_LEGACY) { 363 root->info.hash_version != DX_HASH_LEGACY) {
359 ext3_warning(dir->i_sb, __FUNCTION__, 364 ext3_warning(dir->i_sb, __func__,
360 "Unrecognised inode hash code %d", 365 "Unrecognised inode hash code %d",
361 root->info.hash_version); 366 root->info.hash_version);
362 brelse(bh); 367 brelse(bh);
@@ -370,7 +375,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
370 hash = hinfo->hash; 375 hash = hinfo->hash;
371 376
372 if (root->info.unused_flags & 1) { 377 if (root->info.unused_flags & 1) {
373 ext3_warning(dir->i_sb, __FUNCTION__, 378 ext3_warning(dir->i_sb, __func__,
374 "Unimplemented inode hash flags: %#06x", 379 "Unimplemented inode hash flags: %#06x",
375 root->info.unused_flags); 380 root->info.unused_flags);
376 brelse(bh); 381 brelse(bh);
@@ -379,7 +384,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
379 } 384 }
380 385
381 if ((indirect = root->info.indirect_levels) > 1) { 386 if ((indirect = root->info.indirect_levels) > 1) {
382 ext3_warning(dir->i_sb, __FUNCTION__, 387 ext3_warning(dir->i_sb, __func__,
383 "Unimplemented inode hash depth: %#06x", 388 "Unimplemented inode hash depth: %#06x",
384 root->info.indirect_levels); 389 root->info.indirect_levels);
385 brelse(bh); 390 brelse(bh);
@@ -392,7 +397,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
392 397
393 if (dx_get_limit(entries) != dx_root_limit(dir, 398 if (dx_get_limit(entries) != dx_root_limit(dir,
394 root->info.info_length)) { 399 root->info.info_length)) {
395 ext3_warning(dir->i_sb, __FUNCTION__, 400 ext3_warning(dir->i_sb, __func__,
396 "dx entry: limit != root limit"); 401 "dx entry: limit != root limit");
397 brelse(bh); 402 brelse(bh);
398 *err = ERR_BAD_DX_DIR; 403 *err = ERR_BAD_DX_DIR;
@@ -404,7 +409,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
404 { 409 {
405 count = dx_get_count(entries); 410 count = dx_get_count(entries);
406 if (!count || count > dx_get_limit(entries)) { 411 if (!count || count > dx_get_limit(entries)) {
407 ext3_warning(dir->i_sb, __FUNCTION__, 412 ext3_warning(dir->i_sb, __func__,
408 "dx entry: no count or count > limit"); 413 "dx entry: no count or count > limit");
409 brelse(bh); 414 brelse(bh);
410 *err = ERR_BAD_DX_DIR; 415 *err = ERR_BAD_DX_DIR;
@@ -449,7 +454,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
449 goto fail2; 454 goto fail2;
450 at = entries = ((struct dx_node *) bh->b_data)->entries; 455 at = entries = ((struct dx_node *) bh->b_data)->entries;
451 if (dx_get_limit(entries) != dx_node_limit (dir)) { 456 if (dx_get_limit(entries) != dx_node_limit (dir)) {
452 ext3_warning(dir->i_sb, __FUNCTION__, 457 ext3_warning(dir->i_sb, __func__,
453 "dx entry: limit != node limit"); 458 "dx entry: limit != node limit");
454 brelse(bh); 459 brelse(bh);
455 *err = ERR_BAD_DX_DIR; 460 *err = ERR_BAD_DX_DIR;
@@ -465,7 +470,7 @@ fail2:
465 } 470 }
466fail: 471fail:
467 if (*err == ERR_BAD_DX_DIR) 472 if (*err == ERR_BAD_DX_DIR)
468 ext3_warning(dir->i_sb, __FUNCTION__, 473 ext3_warning(dir->i_sb, __func__,
469 "Corrupt dir inode %ld, running e2fsck is " 474 "Corrupt dir inode %ld, running e2fsck is "
470 "recommended.", dir->i_ino); 475 "recommended.", dir->i_ino);
471 return NULL; 476 return NULL;
@@ -913,7 +918,7 @@ restart:
913 wait_on_buffer(bh); 918 wait_on_buffer(bh);
914 if (!buffer_uptodate(bh)) { 919 if (!buffer_uptodate(bh)) {
915 /* read error, skip block & hope for the best */ 920 /* read error, skip block & hope for the best */
916 ext3_error(sb, __FUNCTION__, "reading directory #%lu " 921 ext3_error(sb, __func__, "reading directory #%lu "
917 "offset %lu", dir->i_ino, block); 922 "offset %lu", dir->i_ino, block);
918 brelse(bh); 923 brelse(bh);
919 goto next; 924 goto next;
@@ -1005,7 +1010,7 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
1005 retval = ext3_htree_next_block(dir, hash, frame, 1010 retval = ext3_htree_next_block(dir, hash, frame,
1006 frames, NULL); 1011 frames, NULL);
1007 if (retval < 0) { 1012 if (retval < 0) {
1008 ext3_warning(sb, __FUNCTION__, 1013 ext3_warning(sb, __func__,
1009 "error reading index page in directory #%lu", 1014 "error reading index page in directory #%lu",
1010 dir->i_ino); 1015 dir->i_ino);
1011 *err = retval; 1016 *err = retval;
@@ -1530,7 +1535,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1530 1535
1531 if (levels && (dx_get_count(frames->entries) == 1536 if (levels && (dx_get_count(frames->entries) ==
1532 dx_get_limit(frames->entries))) { 1537 dx_get_limit(frames->entries))) {
1533 ext3_warning(sb, __FUNCTION__, 1538 ext3_warning(sb, __func__,
1534 "Directory index full!"); 1539 "Directory index full!");
1535 err = -ENOSPC; 1540 err = -ENOSPC;
1536 goto cleanup; 1541 goto cleanup;
@@ -1832,11 +1837,11 @@ static int empty_dir (struct inode * inode)
1832 if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) || 1837 if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
1833 !(bh = ext3_bread (NULL, inode, 0, 0, &err))) { 1838 !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
1834 if (err) 1839 if (err)
1835 ext3_error(inode->i_sb, __FUNCTION__, 1840 ext3_error(inode->i_sb, __func__,
1836 "error %d reading directory #%lu offset 0", 1841 "error %d reading directory #%lu offset 0",
1837 err, inode->i_ino); 1842 err, inode->i_ino);
1838 else 1843 else
1839 ext3_warning(inode->i_sb, __FUNCTION__, 1844 ext3_warning(inode->i_sb, __func__,
1840 "bad directory (dir #%lu) - no data block", 1845 "bad directory (dir #%lu) - no data block",
1841 inode->i_ino); 1846 inode->i_ino);
1842 return 1; 1847 return 1;
@@ -1865,7 +1870,7 @@ static int empty_dir (struct inode * inode)
1865 offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err); 1870 offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
1866 if (!bh) { 1871 if (!bh) {
1867 if (err) 1872 if (err)
1868 ext3_error(sb, __FUNCTION__, 1873 ext3_error(sb, __func__,
1869 "error %d reading directory" 1874 "error %d reading directory"
1870 " #%lu offset %lu", 1875 " #%lu offset %lu",
1871 err, inode->i_ino, offset); 1876 err, inode->i_ino, offset);
@@ -2318,6 +2323,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2318 EXT3_FEATURE_INCOMPAT_FILETYPE)) 2323 EXT3_FEATURE_INCOMPAT_FILETYPE))
2319 new_de->file_type = old_de->file_type; 2324 new_de->file_type = old_de->file_type;
2320 new_dir->i_version++; 2325 new_dir->i_version++;
2326 new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME_SEC;
2327 ext3_mark_inode_dirty(handle, new_dir);
2321 BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata"); 2328 BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
2322 ext3_journal_dirty_metadata(handle, new_bh); 2329 ext3_journal_dirty_metadata(handle, new_bh);
2323 brelse(new_bh); 2330 brelse(new_bh);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 0e97b6e07cb0..28cfd0b40527 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -48,60 +48,60 @@ static int verify_group_input(struct super_block *sb,
48 free_blocks_count, input->reserved_blocks); 48 free_blocks_count, input->reserved_blocks);
49 49
50 if (group != sbi->s_groups_count) 50 if (group != sbi->s_groups_count)
51 ext3_warning(sb, __FUNCTION__, 51 ext3_warning(sb, __func__,
52 "Cannot add at group %u (only %lu groups)", 52 "Cannot add at group %u (only %lu groups)",
53 input->group, sbi->s_groups_count); 53 input->group, sbi->s_groups_count);
54 else if ((start - le32_to_cpu(es->s_first_data_block)) % 54 else if ((start - le32_to_cpu(es->s_first_data_block)) %
55 EXT3_BLOCKS_PER_GROUP(sb)) 55 EXT3_BLOCKS_PER_GROUP(sb))
56 ext3_warning(sb, __FUNCTION__, "Last group not full"); 56 ext3_warning(sb, __func__, "Last group not full");
57 else if (input->reserved_blocks > input->blocks_count / 5) 57 else if (input->reserved_blocks > input->blocks_count / 5)
58 ext3_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", 58 ext3_warning(sb, __func__, "Reserved blocks too high (%u)",
59 input->reserved_blocks); 59 input->reserved_blocks);
60 else if (free_blocks_count < 0) 60 else if (free_blocks_count < 0)
61 ext3_warning(sb, __FUNCTION__, "Bad blocks count %u", 61 ext3_warning(sb, __func__, "Bad blocks count %u",
62 input->blocks_count); 62 input->blocks_count);
63 else if (!(bh = sb_bread(sb, end - 1))) 63 else if (!(bh = sb_bread(sb, end - 1)))
64 ext3_warning(sb, __FUNCTION__, 64 ext3_warning(sb, __func__,
65 "Cannot read last block ("E3FSBLK")", 65 "Cannot read last block ("E3FSBLK")",
66 end - 1); 66 end - 1);
67 else if (outside(input->block_bitmap, start, end)) 67 else if (outside(input->block_bitmap, start, end))
68 ext3_warning(sb, __FUNCTION__, 68 ext3_warning(sb, __func__,
69 "Block bitmap not in group (block %u)", 69 "Block bitmap not in group (block %u)",
70 input->block_bitmap); 70 input->block_bitmap);
71 else if (outside(input->inode_bitmap, start, end)) 71 else if (outside(input->inode_bitmap, start, end))
72 ext3_warning(sb, __FUNCTION__, 72 ext3_warning(sb, __func__,
73 "Inode bitmap not in group (block %u)", 73 "Inode bitmap not in group (block %u)",
74 input->inode_bitmap); 74 input->inode_bitmap);
75 else if (outside(input->inode_table, start, end) || 75 else if (outside(input->inode_table, start, end) ||
76 outside(itend - 1, start, end)) 76 outside(itend - 1, start, end))
77 ext3_warning(sb, __FUNCTION__, 77 ext3_warning(sb, __func__,
78 "Inode table not in group (blocks %u-"E3FSBLK")", 78 "Inode table not in group (blocks %u-"E3FSBLK")",
79 input->inode_table, itend - 1); 79 input->inode_table, itend - 1);
80 else if (input->inode_bitmap == input->block_bitmap) 80 else if (input->inode_bitmap == input->block_bitmap)
81 ext3_warning(sb, __FUNCTION__, 81 ext3_warning(sb, __func__,
82 "Block bitmap same as inode bitmap (%u)", 82 "Block bitmap same as inode bitmap (%u)",
83 input->block_bitmap); 83 input->block_bitmap);
84 else if (inside(input->block_bitmap, input->inode_table, itend)) 84 else if (inside(input->block_bitmap, input->inode_table, itend))
85 ext3_warning(sb, __FUNCTION__, 85 ext3_warning(sb, __func__,
86 "Block bitmap (%u) in inode table (%u-"E3FSBLK")", 86 "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
87 input->block_bitmap, input->inode_table, itend-1); 87 input->block_bitmap, input->inode_table, itend-1);
88 else if (inside(input->inode_bitmap, input->inode_table, itend)) 88 else if (inside(input->inode_bitmap, input->inode_table, itend))
89 ext3_warning(sb, __FUNCTION__, 89 ext3_warning(sb, __func__,
90 "Inode bitmap (%u) in inode table (%u-"E3FSBLK")", 90 "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
91 input->inode_bitmap, input->inode_table, itend-1); 91 input->inode_bitmap, input->inode_table, itend-1);
92 else if (inside(input->block_bitmap, start, metaend)) 92 else if (inside(input->block_bitmap, start, metaend))
93 ext3_warning(sb, __FUNCTION__, 93 ext3_warning(sb, __func__,
94 "Block bitmap (%u) in GDT table" 94 "Block bitmap (%u) in GDT table"
95 " ("E3FSBLK"-"E3FSBLK")", 95 " ("E3FSBLK"-"E3FSBLK")",
96 input->block_bitmap, start, metaend - 1); 96 input->block_bitmap, start, metaend - 1);
97 else if (inside(input->inode_bitmap, start, metaend)) 97 else if (inside(input->inode_bitmap, start, metaend))
98 ext3_warning(sb, __FUNCTION__, 98 ext3_warning(sb, __func__,
99 "Inode bitmap (%u) in GDT table" 99 "Inode bitmap (%u) in GDT table"
100 " ("E3FSBLK"-"E3FSBLK")", 100 " ("E3FSBLK"-"E3FSBLK")",
101 input->inode_bitmap, start, metaend - 1); 101 input->inode_bitmap, start, metaend - 1);
102 else if (inside(input->inode_table, start, metaend) || 102 else if (inside(input->inode_table, start, metaend) ||
103 inside(itend - 1, start, metaend)) 103 inside(itend - 1, start, metaend))
104 ext3_warning(sb, __FUNCTION__, 104 ext3_warning(sb, __func__,
105 "Inode table (%u-"E3FSBLK") overlaps" 105 "Inode table (%u-"E3FSBLK") overlaps"
106 "GDT table ("E3FSBLK"-"E3FSBLK")", 106 "GDT table ("E3FSBLK"-"E3FSBLK")",
107 input->inode_table, itend - 1, start, metaend - 1); 107 input->inode_table, itend - 1, start, metaend - 1);
@@ -386,7 +386,7 @@ static int verify_reserved_gdb(struct super_block *sb,
386 386
387 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { 387 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
388 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ 388 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
389 ext3_warning(sb, __FUNCTION__, 389 ext3_warning(sb, __func__,
390 "reserved GDT "E3FSBLK 390 "reserved GDT "E3FSBLK
391 " missing grp %d ("E3FSBLK")", 391 " missing grp %d ("E3FSBLK")",
392 blk, grp, 392 blk, grp,
@@ -440,7 +440,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
440 */ 440 */
441 if (EXT3_SB(sb)->s_sbh->b_blocknr != 441 if (EXT3_SB(sb)->s_sbh->b_blocknr !=
442 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) { 442 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
443 ext3_warning(sb, __FUNCTION__, 443 ext3_warning(sb, __func__,
444 "won't resize using backup superblock at %llu", 444 "won't resize using backup superblock at %llu",
445 (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr); 445 (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
446 return -EPERM; 446 return -EPERM;
@@ -464,7 +464,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
464 464
465 data = (__le32 *)dind->b_data; 465 data = (__le32 *)dind->b_data;
466 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { 466 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
467 ext3_warning(sb, __FUNCTION__, 467 ext3_warning(sb, __func__,
468 "new group %u GDT block "E3FSBLK" not reserved", 468 "new group %u GDT block "E3FSBLK" not reserved",
469 input->group, gdblock); 469 input->group, gdblock);
470 err = -EINVAL; 470 err = -EINVAL;
@@ -488,7 +488,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
488 GFP_NOFS); 488 GFP_NOFS);
489 if (!n_group_desc) { 489 if (!n_group_desc) {
490 err = -ENOMEM; 490 err = -ENOMEM;
491 ext3_warning (sb, __FUNCTION__, 491 ext3_warning (sb, __func__,
492 "not enough memory for %lu groups", gdb_num + 1); 492 "not enough memory for %lu groups", gdb_num + 1);
493 goto exit_inode; 493 goto exit_inode;
494 } 494 }
@@ -586,7 +586,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
586 /* Get each reserved primary GDT block and verify it holds backups */ 586 /* Get each reserved primary GDT block and verify it holds backups */
587 for (res = 0; res < reserved_gdb; res++, blk++) { 587 for (res = 0; res < reserved_gdb; res++, blk++) {
588 if (le32_to_cpu(*data) != blk) { 588 if (le32_to_cpu(*data) != blk) {
589 ext3_warning(sb, __FUNCTION__, 589 ext3_warning(sb, __func__,
590 "reserved block "E3FSBLK 590 "reserved block "E3FSBLK
591 " not at offset %ld", 591 " not at offset %ld",
592 blk, 592 blk,
@@ -730,7 +730,7 @@ static void update_backups(struct super_block *sb,
730 */ 730 */
731exit_err: 731exit_err:
732 if (err) { 732 if (err) {
733 ext3_warning(sb, __FUNCTION__, 733 ext3_warning(sb, __func__,
734 "can't update backup for group %d (err %d), " 734 "can't update backup for group %d (err %d), "
735 "forcing fsck on next reboot", group, err); 735 "forcing fsck on next reboot", group, err);
736 sbi->s_mount_state &= ~EXT3_VALID_FS; 736 sbi->s_mount_state &= ~EXT3_VALID_FS;
@@ -770,33 +770,33 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
770 770
771 if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb, 771 if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
772 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 772 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
773 ext3_warning(sb, __FUNCTION__, 773 ext3_warning(sb, __func__,
774 "Can't resize non-sparse filesystem further"); 774 "Can't resize non-sparse filesystem further");
775 return -EPERM; 775 return -EPERM;
776 } 776 }
777 777
778 if (le32_to_cpu(es->s_blocks_count) + input->blocks_count < 778 if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
779 le32_to_cpu(es->s_blocks_count)) { 779 le32_to_cpu(es->s_blocks_count)) {
780 ext3_warning(sb, __FUNCTION__, "blocks_count overflow\n"); 780 ext3_warning(sb, __func__, "blocks_count overflow\n");
781 return -EINVAL; 781 return -EINVAL;
782 } 782 }
783 783
784 if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) < 784 if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
785 le32_to_cpu(es->s_inodes_count)) { 785 le32_to_cpu(es->s_inodes_count)) {
786 ext3_warning(sb, __FUNCTION__, "inodes_count overflow\n"); 786 ext3_warning(sb, __func__, "inodes_count overflow\n");
787 return -EINVAL; 787 return -EINVAL;
788 } 788 }
789 789
790 if (reserved_gdb || gdb_off == 0) { 790 if (reserved_gdb || gdb_off == 0) {
791 if (!EXT3_HAS_COMPAT_FEATURE(sb, 791 if (!EXT3_HAS_COMPAT_FEATURE(sb,
792 EXT3_FEATURE_COMPAT_RESIZE_INODE)){ 792 EXT3_FEATURE_COMPAT_RESIZE_INODE)){
793 ext3_warning(sb, __FUNCTION__, 793 ext3_warning(sb, __func__,
794 "No reserved GDT blocks, can't resize"); 794 "No reserved GDT blocks, can't resize");
795 return -EPERM; 795 return -EPERM;
796 } 796 }
797 inode = ext3_iget(sb, EXT3_RESIZE_INO); 797 inode = ext3_iget(sb, EXT3_RESIZE_INO);
798 if (IS_ERR(inode)) { 798 if (IS_ERR(inode)) {
799 ext3_warning(sb, __FUNCTION__, 799 ext3_warning(sb, __func__,
800 "Error opening resize inode"); 800 "Error opening resize inode");
801 return PTR_ERR(inode); 801 return PTR_ERR(inode);
802 } 802 }
@@ -825,7 +825,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
825 825
826 lock_super(sb); 826 lock_super(sb);
827 if (input->group != sbi->s_groups_count) { 827 if (input->group != sbi->s_groups_count) {
828 ext3_warning(sb, __FUNCTION__, 828 ext3_warning(sb, __func__,
829 "multiple resizers run on filesystem!"); 829 "multiple resizers run on filesystem!");
830 err = -EBUSY; 830 err = -EBUSY;
831 goto exit_journal; 831 goto exit_journal;
@@ -988,13 +988,13 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
988 " too large to resize to %lu blocks safely\n", 988 " too large to resize to %lu blocks safely\n",
989 sb->s_id, n_blocks_count); 989 sb->s_id, n_blocks_count);
990 if (sizeof(sector_t) < 8) 990 if (sizeof(sector_t) < 8)
991 ext3_warning(sb, __FUNCTION__, 991 ext3_warning(sb, __func__,
992 "CONFIG_LBD not enabled\n"); 992 "CONFIG_LBD not enabled\n");
993 return -EINVAL; 993 return -EINVAL;
994 } 994 }
995 995
996 if (n_blocks_count < o_blocks_count) { 996 if (n_blocks_count < o_blocks_count) {
997 ext3_warning(sb, __FUNCTION__, 997 ext3_warning(sb, __func__,
998 "can't shrink FS - resize aborted"); 998 "can't shrink FS - resize aborted");
999 return -EBUSY; 999 return -EBUSY;
1000 } 1000 }
@@ -1004,7 +1004,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1004 EXT3_BLOCKS_PER_GROUP(sb); 1004 EXT3_BLOCKS_PER_GROUP(sb);
1005 1005
1006 if (last == 0) { 1006 if (last == 0) {
1007 ext3_warning(sb, __FUNCTION__, 1007 ext3_warning(sb, __func__,
1008 "need to use ext2online to resize further"); 1008 "need to use ext2online to resize further");
1009 return -EPERM; 1009 return -EPERM;
1010 } 1010 }
@@ -1012,7 +1012,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1012 add = EXT3_BLOCKS_PER_GROUP(sb) - last; 1012 add = EXT3_BLOCKS_PER_GROUP(sb) - last;
1013 1013
1014 if (o_blocks_count + add < o_blocks_count) { 1014 if (o_blocks_count + add < o_blocks_count) {
1015 ext3_warning(sb, __FUNCTION__, "blocks_count overflow"); 1015 ext3_warning(sb, __func__, "blocks_count overflow");
1016 return -EINVAL; 1016 return -EINVAL;
1017 } 1017 }
1018 1018
@@ -1020,7 +1020,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1020 add = n_blocks_count - o_blocks_count; 1020 add = n_blocks_count - o_blocks_count;
1021 1021
1022 if (o_blocks_count + add < n_blocks_count) 1022 if (o_blocks_count + add < n_blocks_count)
1023 ext3_warning(sb, __FUNCTION__, 1023 ext3_warning(sb, __func__,
1024 "will only finish group ("E3FSBLK 1024 "will only finish group ("E3FSBLK
1025 " blocks, %u new)", 1025 " blocks, %u new)",
1026 o_blocks_count + add, add); 1026 o_blocks_count + add, add);
@@ -1028,7 +1028,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1028 /* See if the device is actually as big as what was requested */ 1028 /* See if the device is actually as big as what was requested */
1029 bh = sb_bread(sb, o_blocks_count + add -1); 1029 bh = sb_bread(sb, o_blocks_count + add -1);
1030 if (!bh) { 1030 if (!bh) {
1031 ext3_warning(sb, __FUNCTION__, 1031 ext3_warning(sb, __func__,
1032 "can't read last block, resize aborted"); 1032 "can't read last block, resize aborted");
1033 return -ENOSPC; 1033 return -ENOSPC;
1034 } 1034 }
@@ -1040,22 +1040,23 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1040 handle = ext3_journal_start_sb(sb, 3); 1040 handle = ext3_journal_start_sb(sb, 3);
1041 if (IS_ERR(handle)) { 1041 if (IS_ERR(handle)) {
1042 err = PTR_ERR(handle); 1042 err = PTR_ERR(handle);
1043 ext3_warning(sb, __FUNCTION__, "error %d on journal start",err); 1043 ext3_warning(sb, __func__, "error %d on journal start",err);
1044 goto exit_put; 1044 goto exit_put;
1045 } 1045 }
1046 1046
1047 lock_super(sb); 1047 lock_super(sb);
1048 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { 1048 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
1049 ext3_warning(sb, __FUNCTION__, 1049 ext3_warning(sb, __func__,
1050 "multiple resizers run on filesystem!"); 1050 "multiple resizers run on filesystem!");
1051 unlock_super(sb); 1051 unlock_super(sb);
1052 ext3_journal_stop(handle);
1052 err = -EBUSY; 1053 err = -EBUSY;
1053 goto exit_put; 1054 goto exit_put;
1054 } 1055 }
1055 1056
1056 if ((err = ext3_journal_get_write_access(handle, 1057 if ((err = ext3_journal_get_write_access(handle,
1057 EXT3_SB(sb)->s_sbh))) { 1058 EXT3_SB(sb)->s_sbh))) {
1058 ext3_warning(sb, __FUNCTION__, 1059 ext3_warning(sb, __func__,
1059 "error %d on journal write access", err); 1060 "error %d on journal write access", err);
1060 unlock_super(sb); 1061 unlock_super(sb);
1061 ext3_journal_stop(handle); 1062 ext3_journal_stop(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index ad5360664082..fe3119a71ada 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -84,7 +84,7 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
84 * take the FS itself readonly cleanly. */ 84 * take the FS itself readonly cleanly. */
85 journal = EXT3_SB(sb)->s_journal; 85 journal = EXT3_SB(sb)->s_journal;
86 if (is_journal_aborted(journal)) { 86 if (is_journal_aborted(journal)) {
87 ext3_abort(sb, __FUNCTION__, 87 ext3_abort(sb, __func__,
88 "Detected aborted journal"); 88 "Detected aborted journal");
89 return ERR_PTR(-EROFS); 89 return ERR_PTR(-EROFS);
90 } 90 }
@@ -304,7 +304,7 @@ void ext3_update_dynamic_rev(struct super_block *sb)
304 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV) 304 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
305 return; 305 return;
306 306
307 ext3_warning(sb, __FUNCTION__, 307 ext3_warning(sb, __func__,
308 "updating to rev %d because of new feature flag, " 308 "updating to rev %d because of new feature flag, "
309 "running e2fsck is recommended", 309 "running e2fsck is recommended",
310 EXT3_DYNAMIC_REV); 310 EXT3_DYNAMIC_REV);
@@ -685,7 +685,8 @@ static int ext3_acquire_dquot(struct dquot *dquot);
685static int ext3_release_dquot(struct dquot *dquot); 685static int ext3_release_dquot(struct dquot *dquot);
686static int ext3_mark_dquot_dirty(struct dquot *dquot); 686static int ext3_mark_dquot_dirty(struct dquot *dquot);
687static int ext3_write_info(struct super_block *sb, int type); 687static int ext3_write_info(struct super_block *sb, int type);
688static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path); 688static int ext3_quota_on(struct super_block *sb, int type, int format_id,
689 char *path, int remount);
689static int ext3_quota_on_mount(struct super_block *sb, int type); 690static int ext3_quota_on_mount(struct super_block *sb, int type);
690static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 691static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
691 size_t len, loff_t off); 692 size_t len, loff_t off);
@@ -1096,6 +1097,9 @@ clear_qf_name:
1096 case Opt_quota: 1097 case Opt_quota:
1097 case Opt_usrquota: 1098 case Opt_usrquota:
1098 case Opt_grpquota: 1099 case Opt_grpquota:
1100 printk(KERN_ERR
1101 "EXT3-fs: quota options not supported.\n");
1102 break;
1099 case Opt_usrjquota: 1103 case Opt_usrjquota:
1100 case Opt_grpjquota: 1104 case Opt_grpjquota:
1101 case Opt_offusrjquota: 1105 case Opt_offusrjquota:
@@ -1103,7 +1107,7 @@ clear_qf_name:
1103 case Opt_jqfmt_vfsold: 1107 case Opt_jqfmt_vfsold:
1104 case Opt_jqfmt_vfsv0: 1108 case Opt_jqfmt_vfsv0:
1105 printk(KERN_ERR 1109 printk(KERN_ERR
1106 "EXT3-fs: journalled quota options not " 1110 "EXT3-fs: journaled quota options not "
1107 "supported.\n"); 1111 "supported.\n");
1108 break; 1112 break;
1109 case Opt_noquota: 1113 case Opt_noquota:
@@ -1218,7 +1222,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1218 inconsistencies, to force a fsck at reboot. But for 1222 inconsistencies, to force a fsck at reboot. But for
1219 a plain journaled filesystem we can keep it set as 1223 a plain journaled filesystem we can keep it set as
1220 valid forever! :) */ 1224 valid forever! :) */
1221 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3_VALID_FS); 1225 es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
1222#endif 1226#endif
1223 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1227 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1224 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); 1228 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
@@ -1253,14 +1257,14 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1253static int ext3_check_descriptors(struct super_block *sb) 1257static int ext3_check_descriptors(struct super_block *sb)
1254{ 1258{
1255 struct ext3_sb_info *sbi = EXT3_SB(sb); 1259 struct ext3_sb_info *sbi = EXT3_SB(sb);
1256 ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1257 ext3_fsblk_t last_block;
1258 int i; 1260 int i;
1259 1261
1260 ext3_debug ("Checking group descriptors"); 1262 ext3_debug ("Checking group descriptors");
1261 1263
1262 for (i = 0; i < sbi->s_groups_count; i++) { 1264 for (i = 0; i < sbi->s_groups_count; i++) {
1263 struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL); 1265 struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL);
1266 ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i);
1267 ext3_fsblk_t last_block;
1264 1268
1265 if (i == sbi->s_groups_count - 1) 1269 if (i == sbi->s_groups_count - 1)
1266 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; 1270 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
@@ -1299,7 +1303,6 @@ static int ext3_check_descriptors(struct super_block *sb)
1299 le32_to_cpu(gdp->bg_inode_table)); 1303 le32_to_cpu(gdp->bg_inode_table));
1300 return 0; 1304 return 0;
1301 } 1305 }
1302 first_block += EXT3_BLOCKS_PER_GROUP(sb);
1303 } 1306 }
1304 1307
1305 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb)); 1308 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
@@ -1387,7 +1390,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1387 if (inode->i_nlink) { 1390 if (inode->i_nlink) {
1388 printk(KERN_DEBUG 1391 printk(KERN_DEBUG
1389 "%s: truncating inode %lu to %Ld bytes\n", 1392 "%s: truncating inode %lu to %Ld bytes\n",
1390 __FUNCTION__, inode->i_ino, inode->i_size); 1393 __func__, inode->i_ino, inode->i_size);
1391 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1394 jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1392 inode->i_ino, inode->i_size); 1395 inode->i_ino, inode->i_size);
1393 ext3_truncate(inode); 1396 ext3_truncate(inode);
@@ -1395,7 +1398,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1395 } else { 1398 } else {
1396 printk(KERN_DEBUG 1399 printk(KERN_DEBUG
1397 "%s: deleting unreferenced inode %lu\n", 1400 "%s: deleting unreferenced inode %lu\n",
1398 __FUNCTION__, inode->i_ino); 1401 __func__, inode->i_ino);
1399 jbd_debug(2, "deleting unreferenced inode %lu\n", 1402 jbd_debug(2, "deleting unreferenced inode %lu\n",
1400 inode->i_ino); 1403 inode->i_ino);
1401 nr_orphans++; 1404 nr_orphans++;
@@ -1415,7 +1418,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1415 /* Turn quotas off */ 1418 /* Turn quotas off */
1416 for (i = 0; i < MAXQUOTAS; i++) { 1419 for (i = 0; i < MAXQUOTAS; i++) {
1417 if (sb_dqopt(sb)->files[i]) 1420 if (sb_dqopt(sb)->files[i])
1418 vfs_quota_off(sb, i); 1421 vfs_quota_off(sb, i, 0);
1419 } 1422 }
1420#endif 1423#endif
1421 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1424 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -2298,9 +2301,9 @@ static void ext3_clear_journal_err(struct super_block * sb,
2298 char nbuf[16]; 2301 char nbuf[16];
2299 2302
2300 errstr = ext3_decode_error(sb, j_errno, nbuf); 2303 errstr = ext3_decode_error(sb, j_errno, nbuf);
2301 ext3_warning(sb, __FUNCTION__, "Filesystem error recorded " 2304 ext3_warning(sb, __func__, "Filesystem error recorded "
2302 "from previous mount: %s", errstr); 2305 "from previous mount: %s", errstr);
2303 ext3_warning(sb, __FUNCTION__, "Marking fs in need of " 2306 ext3_warning(sb, __func__, "Marking fs in need of "
2304 "filesystem check."); 2307 "filesystem check.");
2305 2308
2306 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 2309 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
@@ -2427,7 +2430,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2427 } 2430 }
2428 2431
2429 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) 2432 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
2430 ext3_abort(sb, __FUNCTION__, "Abort forced by user"); 2433 ext3_abort(sb, __func__, "Abort forced by user");
2431 2434
2432 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2435 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2433 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2436 ((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
@@ -2639,8 +2642,14 @@ static int ext3_dquot_drop(struct inode *inode)
2639 2642
2640 /* We may delete quota structure so we need to reserve enough blocks */ 2643 /* We may delete quota structure so we need to reserve enough blocks */
2641 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb)); 2644 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
2642 if (IS_ERR(handle)) 2645 if (IS_ERR(handle)) {
2646 /*
2647 * We call dquot_drop() anyway to at least release references
2648 * to quota structures so that umount does not hang.
2649 */
2650 dquot_drop(inode);
2643 return PTR_ERR(handle); 2651 return PTR_ERR(handle);
2652 }
2644 ret = dquot_drop(inode); 2653 ret = dquot_drop(inode);
2645 err = ext3_journal_stop(handle); 2654 err = ext3_journal_stop(handle);
2646 if (!ret) 2655 if (!ret)
@@ -2743,17 +2752,17 @@ static int ext3_quota_on_mount(struct super_block *sb, int type)
2743 * Standard function to be called on quota_on 2752 * Standard function to be called on quota_on
2744 */ 2753 */
2745static int ext3_quota_on(struct super_block *sb, int type, int format_id, 2754static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2746 char *path) 2755 char *path, int remount)
2747{ 2756{
2748 int err; 2757 int err;
2749 struct nameidata nd; 2758 struct nameidata nd;
2750 2759
2751 if (!test_opt(sb, QUOTA)) 2760 if (!test_opt(sb, QUOTA))
2752 return -EINVAL; 2761 return -EINVAL;
2753 /* Not journalling quota? */ 2762 /* Not journalling quota or remount? */
2754 if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] && 2763 if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
2755 !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) 2764 !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
2756 return vfs_quota_on(sb, type, format_id, path); 2765 return vfs_quota_on(sb, type, format_id, path, remount);
2757 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2766 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
2758 if (err) 2767 if (err)
2759 return err; 2768 return err;
@@ -2762,13 +2771,13 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2762 path_put(&nd.path); 2771 path_put(&nd.path);
2763 return -EXDEV; 2772 return -EXDEV;
2764 } 2773 }
2765 /* Quotafile not of fs root? */ 2774 /* Quotafile not in fs root? */
2766 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 2775 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2767 printk(KERN_WARNING 2776 printk(KERN_WARNING
2768 "EXT3-fs: Quota file not on filesystem root. " 2777 "EXT3-fs: Quota file not on filesystem root. "
2769 "Journalled quota will not work.\n"); 2778 "Journalled quota will not work.\n");
2770 path_put(&nd.path); 2779 path_put(&nd.path);
2771 return vfs_quota_on(sb, type, format_id, path); 2780 return vfs_quota_on(sb, type, format_id, path, remount);
2772} 2781}
2773 2782
2774/* Read data from quotafile - avoid pagecache and such because we cannot afford 2783/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 42856541e9a5..d4a4f0e9ff69 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -99,6 +99,8 @@ static struct buffer_head *ext3_xattr_cache_find(struct inode *,
99 struct mb_cache_entry **); 99 struct mb_cache_entry **);
100static void ext3_xattr_rehash(struct ext3_xattr_header *, 100static void ext3_xattr_rehash(struct ext3_xattr_header *,
101 struct ext3_xattr_entry *); 101 struct ext3_xattr_entry *);
102static int ext3_xattr_list(struct inode *inode, char *buffer,
103 size_t buffer_size);
102 104
103static struct mb_cache *ext3_xattr_cache; 105static struct mb_cache *ext3_xattr_cache;
104 106
@@ -232,7 +234,7 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
232 ea_bdebug(bh, "b_count=%d, refcount=%d", 234 ea_bdebug(bh, "b_count=%d, refcount=%d",
233 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 235 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
234 if (ext3_xattr_check_block(bh)) { 236 if (ext3_xattr_check_block(bh)) {
235bad_block: ext3_error(inode->i_sb, __FUNCTION__, 237bad_block: ext3_error(inode->i_sb, __func__,
236 "inode %lu: bad block "E3FSBLK, inode->i_ino, 238 "inode %lu: bad block "E3FSBLK, inode->i_ino,
237 EXT3_I(inode)->i_file_acl); 239 EXT3_I(inode)->i_file_acl);
238 error = -EIO; 240 error = -EIO;
@@ -374,7 +376,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
374 ea_bdebug(bh, "b_count=%d, refcount=%d", 376 ea_bdebug(bh, "b_count=%d, refcount=%d",
375 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 377 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
376 if (ext3_xattr_check_block(bh)) { 378 if (ext3_xattr_check_block(bh)) {
377 ext3_error(inode->i_sb, __FUNCTION__, 379 ext3_error(inode->i_sb, __func__,
378 "inode %lu: bad block "E3FSBLK, inode->i_ino, 380 "inode %lu: bad block "E3FSBLK, inode->i_ino,
379 EXT3_I(inode)->i_file_acl); 381 EXT3_I(inode)->i_file_acl);
380 error = -EIO; 382 error = -EIO;
@@ -427,7 +429,7 @@ cleanup:
427 * Returns a negative error number on failure, or the number of bytes 429 * Returns a negative error number on failure, or the number of bytes
428 * used / required on success. 430 * used / required on success.
429 */ 431 */
430int 432static int
431ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 433ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
432{ 434{
433 int i_error, b_error; 435 int i_error, b_error;
@@ -649,7 +651,7 @@ ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
649 atomic_read(&(bs->bh->b_count)), 651 atomic_read(&(bs->bh->b_count)),
650 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 652 le32_to_cpu(BHDR(bs->bh)->h_refcount));
651 if (ext3_xattr_check_block(bs->bh)) { 653 if (ext3_xattr_check_block(bs->bh)) {
652 ext3_error(sb, __FUNCTION__, 654 ext3_error(sb, __func__,
653 "inode %lu: bad block "E3FSBLK, inode->i_ino, 655 "inode %lu: bad block "E3FSBLK, inode->i_ino,
654 EXT3_I(inode)->i_file_acl); 656 EXT3_I(inode)->i_file_acl);
655 error = -EIO; 657 error = -EIO;
@@ -797,10 +799,8 @@ inserted:
797 get_bh(new_bh); 799 get_bh(new_bh);
798 } else { 800 } else {
799 /* We need to allocate a new block */ 801 /* We need to allocate a new block */
800 ext3_fsblk_t goal = le32_to_cpu( 802 ext3_fsblk_t goal = ext3_group_first_block_no(sb,
801 EXT3_SB(sb)->s_es->s_first_data_block) + 803 EXT3_I(inode)->i_block_group);
802 (ext3_fsblk_t)EXT3_I(inode)->i_block_group *
803 EXT3_BLOCKS_PER_GROUP(sb);
804 ext3_fsblk_t block = ext3_new_block(handle, inode, 804 ext3_fsblk_t block = ext3_new_block(handle, inode,
805 goal, &error); 805 goal, &error);
806 if (error) 806 if (error)
@@ -852,7 +852,7 @@ cleanup_dquot:
852 goto cleanup; 852 goto cleanup;
853 853
854bad_block: 854bad_block:
855 ext3_error(inode->i_sb, __FUNCTION__, 855 ext3_error(inode->i_sb, __func__,
856 "inode %lu: bad block "E3FSBLK, inode->i_ino, 856 "inode %lu: bad block "E3FSBLK, inode->i_ino,
857 EXT3_I(inode)->i_file_acl); 857 EXT3_I(inode)->i_file_acl);
858 goto cleanup; 858 goto cleanup;
@@ -1081,14 +1081,14 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
1081 goto cleanup; 1081 goto cleanup;
1082 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); 1082 bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
1083 if (!bh) { 1083 if (!bh) {
1084 ext3_error(inode->i_sb, __FUNCTION__, 1084 ext3_error(inode->i_sb, __func__,
1085 "inode %lu: block "E3FSBLK" read error", inode->i_ino, 1085 "inode %lu: block "E3FSBLK" read error", inode->i_ino,
1086 EXT3_I(inode)->i_file_acl); 1086 EXT3_I(inode)->i_file_acl);
1087 goto cleanup; 1087 goto cleanup;
1088 } 1088 }
1089 if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || 1089 if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
1090 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1090 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1091 ext3_error(inode->i_sb, __FUNCTION__, 1091 ext3_error(inode->i_sb, __func__,
1092 "inode %lu: bad block "E3FSBLK, inode->i_ino, 1092 "inode %lu: bad block "E3FSBLK, inode->i_ino,
1093 EXT3_I(inode)->i_file_acl); 1093 EXT3_I(inode)->i_file_acl);
1094 goto cleanup; 1094 goto cleanup;
@@ -1215,7 +1215,7 @@ again:
1215 } 1215 }
1216 bh = sb_bread(inode->i_sb, ce->e_block); 1216 bh = sb_bread(inode->i_sb, ce->e_block);
1217 if (!bh) { 1217 if (!bh) {
1218 ext3_error(inode->i_sb, __FUNCTION__, 1218 ext3_error(inode->i_sb, __func__,
1219 "inode %lu: block %lu read error", 1219 "inode %lu: block %lu read error",
1220 inode->i_ino, (unsigned long) ce->e_block); 1220 inode->i_ino, (unsigned long) ce->e_block);
1221 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1221 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index 6b1ae1c6182c..148a4dfc82ab 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -67,7 +67,6 @@ extern struct xattr_handler ext3_xattr_security_handler;
67extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); 67extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
68 68
69extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); 69extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
70extern int ext3_xattr_list(struct inode *, char *, size_t);
71extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int); 70extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
72extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); 71extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
73 72
@@ -89,12 +88,6 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name,
89} 88}
90 89
91static inline int 90static inline int
92ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
93{
94 return -EOPNOTSUPP;
95}
96
97static inline int
98ext3_xattr_set(struct inode *inode, int name_index, const char *name, 91ext3_xattr_set(struct inode *inode, int name_index, const char *name,
99 const void *value, size_t size, int flags) 92 const void *value, size_t size, int flags)
100{ 93{
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index a8bae8cd1d5d..3c8dab880d91 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -9,8 +9,8 @@
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/ext4_jbd2.h> 12#include "ext4_jbd2.h"
13#include <linux/ext4_fs.h> 13#include "ext4.h"
14#include "xattr.h" 14#include "xattr.h"
15#include "acl.h" 15#include "acl.h"
16 16
@@ -37,7 +37,7 @@ ext4_acl_from_disk(const void *value, size_t size)
37 return ERR_PTR(-EINVAL); 37 return ERR_PTR(-EINVAL);
38 if (count == 0) 38 if (count == 0)
39 return NULL; 39 return NULL;
40 acl = posix_acl_alloc(count, GFP_KERNEL); 40 acl = posix_acl_alloc(count, GFP_NOFS);
41 if (!acl) 41 if (!acl)
42 return ERR_PTR(-ENOMEM); 42 return ERR_PTR(-ENOMEM);
43 for (n=0; n < count; n++) { 43 for (n=0; n < count; n++) {
@@ -91,7 +91,7 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
91 91
92 *size = ext4_acl_size(acl->a_count); 92 *size = ext4_acl_size(acl->a_count);
93 ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count * 93 ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
94 sizeof(ext4_acl_entry), GFP_KERNEL); 94 sizeof(ext4_acl_entry), GFP_NOFS);
95 if (!ext_acl) 95 if (!ext_acl)
96 return ERR_PTR(-ENOMEM); 96 return ERR_PTR(-ENOMEM);
97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); 97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
@@ -187,7 +187,7 @@ ext4_get_acl(struct inode *inode, int type)
187 } 187 }
188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0); 188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
189 if (retval > 0) { 189 if (retval > 0) {
190 value = kmalloc(retval, GFP_KERNEL); 190 value = kmalloc(retval, GFP_NOFS);
191 if (!value) 191 if (!value)
192 return ERR_PTR(-ENOMEM); 192 return ERR_PTR(-ENOMEM);
193 retval = ext4_xattr_get(inode, name_index, "", value, retval); 193 retval = ext4_xattr_get(inode, name_index, "", value, retval);
@@ -335,7 +335,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
335 if (error) 335 if (error)
336 goto cleanup; 336 goto cleanup;
337 } 337 }
338 clone = posix_acl_clone(acl, GFP_KERNEL); 338 clone = posix_acl_clone(acl, GFP_NOFS);
339 error = -ENOMEM; 339 error = -ENOMEM;
340 if (!clone) 340 if (!clone)
341 goto cleanup; 341 goto cleanup;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0737e05ba3dd..da994374ec3b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -15,12 +15,12 @@
15#include <linux/capability.h> 15#include <linux/capability.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h> 17#include <linux/jbd2.h>
18#include <linux/ext4_fs.h>
19#include <linux/ext4_jbd2.h>
20#include <linux/quotaops.h> 18#include <linux/quotaops.h>
21#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
22 20#include "ext4.h"
21#include "ext4_jbd2.h"
23#include "group.h" 22#include "group.h"
23
24/* 24/*
25 * balloc.c contains the blocks allocation and deallocation routines 25 * balloc.c contains the blocks allocation and deallocation routines
26 */ 26 */
@@ -48,7 +48,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
49 ext4_group_t block_group, struct ext4_group_desc *gdp) 49 ext4_group_t block_group, struct ext4_group_desc *gdp)
50{ 50{
51 unsigned long start;
52 int bit, bit_max; 51 int bit, bit_max;
53 unsigned free_blocks, group_blocks; 52 unsigned free_blocks, group_blocks;
54 struct ext4_sb_info *sbi = EXT4_SB(sb); 53 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -59,7 +58,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
59 /* If checksum is bad mark all blocks used to prevent allocation 58 /* If checksum is bad mark all blocks used to prevent allocation
60 * essentially implementing a per-group read-only flag. */ 59 * essentially implementing a per-group read-only flag. */
61 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 60 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
62 ext4_error(sb, __FUNCTION__, 61 ext4_error(sb, __func__,
63 "Checksum bad for group %lu\n", block_group); 62 "Checksum bad for group %lu\n", block_group);
64 gdp->bg_free_blocks_count = 0; 63 gdp->bg_free_blocks_count = 0;
65 gdp->bg_free_inodes_count = 0; 64 gdp->bg_free_inodes_count = 0;
@@ -106,11 +105,12 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
106 free_blocks = group_blocks - bit_max; 105 free_blocks = group_blocks - bit_max;
107 106
108 if (bh) { 107 if (bh) {
108 ext4_fsblk_t start;
109
109 for (bit = 0; bit < bit_max; bit++) 110 for (bit = 0; bit < bit_max; bit++)
110 ext4_set_bit(bit, bh->b_data); 111 ext4_set_bit(bit, bh->b_data);
111 112
112 start = block_group * EXT4_BLOCKS_PER_GROUP(sb) + 113 start = ext4_group_first_block_no(sb, block_group);
113 le32_to_cpu(sbi->s_es->s_first_data_block);
114 114
115 /* Set bits for block and inode bitmaps, and inode table */ 115 /* Set bits for block and inode bitmaps, and inode table */
116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); 116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
@@ -235,7 +235,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
235 return 1; 235 return 1;
236 236
237err_out: 237err_out:
238 ext4_error(sb, __FUNCTION__, 238 ext4_error(sb, __func__,
239 "Invalid block bitmap - " 239 "Invalid block bitmap - "
240 "block_group = %d, block = %llu", 240 "block_group = %d, block = %llu",
241 block_group, bitmap_blk); 241 block_group, bitmap_blk);
@@ -264,7 +264,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
264 bitmap_blk = ext4_block_bitmap(sb, desc); 264 bitmap_blk = ext4_block_bitmap(sb, desc);
265 bh = sb_getblk(sb, bitmap_blk); 265 bh = sb_getblk(sb, bitmap_blk);
266 if (unlikely(!bh)) { 266 if (unlikely(!bh)) {
267 ext4_error(sb, __FUNCTION__, 267 ext4_error(sb, __func__,
268 "Cannot read block bitmap - " 268 "Cannot read block bitmap - "
269 "block_group = %d, block_bitmap = %llu", 269 "block_group = %d, block_bitmap = %llu",
270 (int)block_group, (unsigned long long)bitmap_blk); 270 (int)block_group, (unsigned long long)bitmap_blk);
@@ -281,7 +281,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
281 } 281 }
282 if (bh_submit_read(bh) < 0) { 282 if (bh_submit_read(bh) < 0) {
283 put_bh(bh); 283 put_bh(bh);
284 ext4_error(sb, __FUNCTION__, 284 ext4_error(sb, __func__,
285 "Cannot read block bitmap - " 285 "Cannot read block bitmap - "
286 "block_group = %d, block_bitmap = %llu", 286 "block_group = %d, block_bitmap = %llu",
287 (int)block_group, (unsigned long long)bitmap_blk); 287 (int)block_group, (unsigned long long)bitmap_blk);
@@ -360,7 +360,7 @@ restart:
360 BUG(); 360 BUG();
361} 361}
362#define rsv_window_dump(root, verbose) \ 362#define rsv_window_dump(root, verbose) \
363 __rsv_window_dump((root), (verbose), __FUNCTION__) 363 __rsv_window_dump((root), (verbose), __func__)
364#else 364#else
365#define rsv_window_dump(root, verbose) do {} while (0) 365#define rsv_window_dump(root, verbose) do {} while (0)
366#endif 366#endif
@@ -740,7 +740,7 @@ do_more:
740 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 740 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
741 bit + i, bitmap_bh->b_data)) { 741 bit + i, bitmap_bh->b_data)) {
742 jbd_unlock_bh_state(bitmap_bh); 742 jbd_unlock_bh_state(bitmap_bh);
743 ext4_error(sb, __FUNCTION__, 743 ext4_error(sb, __func__,
744 "bit already cleared for block %llu", 744 "bit already cleared for block %llu",
745 (ext4_fsblk_t)(block + i)); 745 (ext4_fsblk_t)(block + i));
746 jbd_lock_bh_state(bitmap_bh); 746 jbd_lock_bh_state(bitmap_bh);
@@ -752,9 +752,7 @@ do_more:
752 jbd_unlock_bh_state(bitmap_bh); 752 jbd_unlock_bh_state(bitmap_bh);
753 753
754 spin_lock(sb_bgl_lock(sbi, block_group)); 754 spin_lock(sb_bgl_lock(sbi, block_group));
755 desc->bg_free_blocks_count = 755 le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
756 cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
757 group_freed);
758 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 756 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
759 spin_unlock(sb_bgl_lock(sbi, block_group)); 757 spin_unlock(sb_bgl_lock(sbi, block_group));
760 percpu_counter_add(&sbi->s_freeblocks_counter, count); 758 percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -1798,7 +1796,7 @@ allocated:
1798 if (ext4_test_bit(grp_alloc_blk+i, 1796 if (ext4_test_bit(grp_alloc_blk+i,
1799 bh2jh(bitmap_bh)->b_committed_data)) { 1797 bh2jh(bitmap_bh)->b_committed_data)) {
1800 printk("%s: block was unexpectedly set in " 1798 printk("%s: block was unexpectedly set in "
1801 "b_committed_data\n", __FUNCTION__); 1799 "b_committed_data\n", __func__);
1802 } 1800 }
1803 } 1801 }
1804 } 1802 }
@@ -1823,8 +1821,7 @@ allocated:
1823 spin_lock(sb_bgl_lock(sbi, group_no)); 1821 spin_lock(sb_bgl_lock(sbi, group_no));
1824 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) 1822 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1825 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 1823 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
1826 gdp->bg_free_blocks_count = 1824 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1827 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
1828 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); 1825 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1829 spin_unlock(sb_bgl_lock(sbi, group_no)); 1826 spin_unlock(sb_bgl_lock(sbi, group_no));
1830 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1827 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 420554f8f79d..d37ea6750454 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -9,7 +9,7 @@
9 9
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/jbd2.h> 11#include <linux/jbd2.h>
12#include <linux/ext4_fs.h> 12#include "ext4.h"
13 13
14#ifdef EXT4FS_DEBUG 14#ifdef EXT4FS_DEBUG
15 15
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2c23bade9aa6..2bf0331ea194 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -23,10 +23,10 @@
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/jbd2.h> 25#include <linux/jbd2.h>
26#include <linux/ext4_fs.h>
27#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
28#include <linux/slab.h> 27#include <linux/slab.h>
29#include <linux/rbtree.h> 28#include <linux/rbtree.h>
29#include "ext4.h"
30 30
31static unsigned char ext4_filetype_table[] = { 31static unsigned char ext4_filetype_table[] = {
32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -42,7 +42,7 @@ const struct file_operations ext4_dir_operations = {
42 .llseek = generic_file_llseek, 42 .llseek = generic_file_llseek,
43 .read = generic_read_dir, 43 .read = generic_read_dir,
44 .readdir = ext4_readdir, /* we take BKL. needed?*/ 44 .readdir = ext4_readdir, /* we take BKL. needed?*/
45 .ioctl = ext4_ioctl, /* BKL held */ 45 .unlocked_ioctl = ext4_ioctl,
46#ifdef CONFIG_COMPAT 46#ifdef CONFIG_COMPAT
47 .compat_ioctl = ext4_compat_ioctl, 47 .compat_ioctl = ext4_compat_ioctl,
48#endif 48#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
new file mode 100644
index 000000000000..8158083f7ac0
--- /dev/null
+++ b/fs/ext4/ext4.h
@@ -0,0 +1,1205 @@
1/*
2 * ext4.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_H
17#define _EXT4_H
18
19#include <linux/types.h>
20#include <linux/blkdev.h>
21#include <linux/magic.h>
22#include "ext4_i.h"
23
24/*
25 * The second extended filesystem constants/structures
26 */
27
28/*
29 * Define EXT4FS_DEBUG to produce debug messages
30 */
31#undef EXT4FS_DEBUG
32
33/*
34 * Define EXT4_RESERVATION to reserve data blocks for expanding files
35 */
36#define EXT4_DEFAULT_RESERVE_BLOCKS 8
37/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
38#define EXT4_MAX_RESERVE_BLOCKS 1027
39#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
40
41/*
42 * Debug code
43 */
44#ifdef EXT4FS_DEBUG
45#define ext4_debug(f, a...) \
46 do { \
47 printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
48 __FILE__, __LINE__, __FUNCTION__); \
49 printk (KERN_DEBUG f, ## a); \
50 } while (0)
51#else
52#define ext4_debug(f, a...) do {} while (0)
53#endif
54
55#define EXT4_MULTIBLOCK_ALLOCATOR 1
56
57/* prefer goal again. length */
58#define EXT4_MB_HINT_MERGE 1
59/* blocks already reserved */
60#define EXT4_MB_HINT_RESERVED 2
61/* metadata is being allocated */
62#define EXT4_MB_HINT_METADATA 4
63/* first blocks in the file */
64#define EXT4_MB_HINT_FIRST 8
65/* search for the best chunk */
66#define EXT4_MB_HINT_BEST 16
67/* data is being allocated */
68#define EXT4_MB_HINT_DATA 32
69/* don't preallocate (for tails) */
70#define EXT4_MB_HINT_NOPREALLOC 64
71/* allocate for locality group */
72#define EXT4_MB_HINT_GROUP_ALLOC 128
73/* allocate goal blocks or none */
74#define EXT4_MB_HINT_GOAL_ONLY 256
75/* goal is meaningful */
76#define EXT4_MB_HINT_TRY_GOAL 512
77
78struct ext4_allocation_request {
79 /* target inode for block we're allocating */
80 struct inode *inode;
81 /* logical block in target inode */
82 ext4_lblk_t logical;
83 /* phys. target (a hint) */
84 ext4_fsblk_t goal;
85 /* the closest logical allocated block to the left */
86 ext4_lblk_t lleft;
87 /* phys. block for ^^^ */
88 ext4_fsblk_t pleft;
89 /* the closest logical allocated block to the right */
90 ext4_lblk_t lright;
91 /* phys. block for ^^^ */
92 ext4_fsblk_t pright;
93 /* how many blocks we want to allocate */
94 unsigned long len;
95 /* flags. see above EXT4_MB_HINT_* */
96 unsigned long flags;
97};
98
99/*
100 * Special inodes numbers
101 */
102#define EXT4_BAD_INO 1 /* Bad blocks inode */
103#define EXT4_ROOT_INO 2 /* Root inode */
104#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */
105#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */
106#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */
107#define EXT4_JOURNAL_INO 8 /* Journal inode */
108
109/* First non-reserved inode for old ext4 filesystems */
110#define EXT4_GOOD_OLD_FIRST_INO 11
111
112/*
113 * Maximal count of links to a file
114 */
115#define EXT4_LINK_MAX 65000
116
117/*
118 * Macro-instructions used to manage several block sizes
119 */
120#define EXT4_MIN_BLOCK_SIZE 1024
121#define EXT4_MAX_BLOCK_SIZE 65536
122#define EXT4_MIN_BLOCK_LOG_SIZE 10
123#ifdef __KERNEL__
124# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
125#else
126# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
127#endif
128#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32))
129#ifdef __KERNEL__
130# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
131#else
132# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
133#endif
134#ifdef __KERNEL__
135#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits)
136#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size)
137#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino)
138#else
139#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
140 EXT4_GOOD_OLD_INODE_SIZE : \
141 (s)->s_inode_size)
142#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
143 EXT4_GOOD_OLD_FIRST_INO : \
144 (s)->s_first_ino)
145#endif
146#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
147
148/*
149 * Structure of a blocks group descriptor
150 */
151struct ext4_group_desc
152{
153 __le32 bg_block_bitmap_lo; /* Blocks bitmap block */
154 __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
155 __le32 bg_inode_table_lo; /* Inodes table block */
156 __le16 bg_free_blocks_count; /* Free blocks count */
157 __le16 bg_free_inodes_count; /* Free inodes count */
158 __le16 bg_used_dirs_count; /* Directories count */
159 __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
160 __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
161 __le16 bg_itable_unused; /* Unused inodes count */
162 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
163 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
164 __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
165 __le32 bg_inode_table_hi; /* Inodes table block MSB */
166 __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
167 __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
168 __le16 bg_used_dirs_count_hi; /* Directories count MSB */
169 __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
170 __u32 bg_reserved2[3];
171};
172
173#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
174#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
175#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
176
177#ifdef __KERNEL__
178#include "ext4_sb.h"
179#endif
180/*
181 * Macro-instructions used to manage group descriptors
182 */
183#define EXT4_MIN_DESC_SIZE 32
184#define EXT4_MIN_DESC_SIZE_64BIT 64
185#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE
186#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size)
187#ifdef __KERNEL__
188# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group)
189# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block)
190# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group)
191# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits)
192#else
193# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group)
194# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s))
195# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group)
196#endif
197
198/*
199 * Constants relative to the data blocks
200 */
201#define EXT4_NDIR_BLOCKS 12
202#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS
203#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1)
204#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1)
205#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1)
206
207/*
208 * Inode flags
209 */
210#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */
211#define EXT4_UNRM_FL 0x00000002 /* Undelete */
212#define EXT4_COMPR_FL 0x00000004 /* Compress file */
213#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */
214#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */
215#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */
216#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */
217#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */
218/* Reserved for compression usage... */
219#define EXT4_DIRTY_FL 0x00000100
220#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
221#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */
222#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */
223/* End compression flags --- maybe not all used */
224#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */
225#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */
226#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
227#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */
228#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
229#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
230#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
231#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
232#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */
233#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
234
235#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
236#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
237
238/*
239 * Inode dynamic state flags
240 */
241#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
242#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
243#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
244#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
245
246/* Used to pass group descriptor data when online resize is done */
247struct ext4_new_group_input {
248 __u32 group; /* Group number for this data */
249 __u64 block_bitmap; /* Absolute block number of block bitmap */
250 __u64 inode_bitmap; /* Absolute block number of inode bitmap */
251 __u64 inode_table; /* Absolute block number of inode table start */
252 __u32 blocks_count; /* Total number of blocks in this group */
253 __u16 reserved_blocks; /* Number of reserved blocks in this group */
254 __u16 unused;
255};
256
257/* The struct ext4_new_group_input in kernel space, with free_blocks_count */
258struct ext4_new_group_data {
259 __u32 group;
260 __u64 block_bitmap;
261 __u64 inode_bitmap;
262 __u64 inode_table;
263 __u32 blocks_count;
264 __u16 reserved_blocks;
265 __u16 unused;
266 __u32 free_blocks_count;
267};
268
269/*
270 * Following is used by preallocation code to tell get_blocks() that we
271 * want uninitialzed extents.
272 */
273#define EXT4_CREATE_UNINITIALIZED_EXT 2
274
275/*
276 * ioctl commands
277 */
278#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
279#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
280#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
281#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
282#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
283#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
284#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
285#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
286#ifdef CONFIG_JBD2_DEBUG
287#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
288#endif
289#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
290#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
291#define EXT4_IOC_MIGRATE _IO('f', 7)
292
293/*
294 * ioctl commands in 32 bit emulation
295 */
296#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS
297#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS
298#define EXT4_IOC32_GETVERSION _IOR('f', 3, int)
299#define EXT4_IOC32_SETVERSION _IOW('f', 4, int)
300#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
301#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
302#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
303#ifdef CONFIG_JBD2_DEBUG
304#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
305#endif
306#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
307#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
308
309
310/*
311 * Mount options
312 */
313struct ext4_mount_options {
314 unsigned long s_mount_opt;
315 uid_t s_resuid;
316 gid_t s_resgid;
317 unsigned long s_commit_interval;
318#ifdef CONFIG_QUOTA
319 int s_jquota_fmt;
320 char *s_qf_names[MAXQUOTAS];
321#endif
322};
323
324/*
325 * Structure of an inode on the disk
326 */
327struct ext4_inode {
328 __le16 i_mode; /* File mode */
329 __le16 i_uid; /* Low 16 bits of Owner Uid */
330 __le32 i_size_lo; /* Size in bytes */
331 __le32 i_atime; /* Access time */
332 __le32 i_ctime; /* Inode Change time */
333 __le32 i_mtime; /* Modification time */
334 __le32 i_dtime; /* Deletion Time */
335 __le16 i_gid; /* Low 16 bits of Group Id */
336 __le16 i_links_count; /* Links count */
337 __le32 i_blocks_lo; /* Blocks count */
338 __le32 i_flags; /* File flags */
339 union {
340 struct {
341 __le32 l_i_version;
342 } linux1;
343 struct {
344 __u32 h_i_translator;
345 } hurd1;
346 struct {
347 __u32 m_i_reserved1;
348 } masix1;
349 } osd1; /* OS dependent 1 */
350 __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
351 __le32 i_generation; /* File version (for NFS) */
352 __le32 i_file_acl_lo; /* File ACL */
353 __le32 i_size_high;
354 __le32 i_obso_faddr; /* Obsoleted fragment address */
355 union {
356 struct {
357 __le16 l_i_blocks_high; /* were l_i_reserved1 */
358 __le16 l_i_file_acl_high;
359 __le16 l_i_uid_high; /* these 2 fields */
360 __le16 l_i_gid_high; /* were reserved2[0] */
361 __u32 l_i_reserved2;
362 } linux2;
363 struct {
364 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
365 __u16 h_i_mode_high;
366 __u16 h_i_uid_high;
367 __u16 h_i_gid_high;
368 __u32 h_i_author;
369 } hurd2;
370 struct {
371 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
372 __le16 m_i_file_acl_high;
373 __u32 m_i_reserved2[2];
374 } masix2;
375 } osd2; /* OS dependent 2 */
376 __le16 i_extra_isize;
377 __le16 i_pad1;
378 __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
379 __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
380 __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
381 __le32 i_crtime; /* File Creation time */
382 __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
383 __le32 i_version_hi; /* high 32 bits for 64-bit version */
384};
385
386
387#define EXT4_EPOCH_BITS 2
388#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
389#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
390
391/*
392 * Extended fields will fit into an inode if the filesystem was formatted
393 * with large inodes (-I 256 or larger) and there are not currently any EAs
394 * consuming all of the available space. For new inodes we always reserve
395 * enough space for the kernel's known extended fields, but for inodes
396 * created with an old kernel this might not have been the case. None of
397 * the extended inode fields is critical for correct filesystem operation.
398 * This macro checks if a certain field fits in the inode. Note that
399 * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
400 */
401#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
402 ((offsetof(typeof(*ext4_inode), field) + \
403 sizeof((ext4_inode)->field)) \
404 <= (EXT4_GOOD_OLD_INODE_SIZE + \
405 (einode)->i_extra_isize)) \
406
407static inline __le32 ext4_encode_extra_time(struct timespec *time)
408{
409 return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
410 time->tv_sec >> 32 : 0) |
411 ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
412}
413
414static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
415{
416 if (sizeof(time->tv_sec) > 4)
417 time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
418 << 32;
419 time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
420}
421
422#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
423do { \
424 (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
425 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
426 (raw_inode)->xtime ## _extra = \
427 ext4_encode_extra_time(&(inode)->xtime); \
428} while (0)
429
430#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
431do { \
432 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
433 (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
434 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
435 (raw_inode)->xtime ## _extra = \
436 ext4_encode_extra_time(&(einode)->xtime); \
437} while (0)
438
439#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
440do { \
441 (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
442 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
443 ext4_decode_extra_time(&(inode)->xtime, \
444 raw_inode->xtime ## _extra); \
445} while (0)
446
447#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
448do { \
449 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
450 (einode)->xtime.tv_sec = \
451 (signed)le32_to_cpu((raw_inode)->xtime); \
452 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
453 ext4_decode_extra_time(&(einode)->xtime, \
454 raw_inode->xtime ## _extra); \
455} while (0)
456
457#define i_disk_version osd1.linux1.l_i_version
458
459#if defined(__KERNEL__) || defined(__linux__)
460#define i_reserved1 osd1.linux1.l_i_reserved1
461#define i_file_acl_high osd2.linux2.l_i_file_acl_high
462#define i_blocks_high osd2.linux2.l_i_blocks_high
463#define i_uid_low i_uid
464#define i_gid_low i_gid
465#define i_uid_high osd2.linux2.l_i_uid_high
466#define i_gid_high osd2.linux2.l_i_gid_high
467#define i_reserved2 osd2.linux2.l_i_reserved2
468
469#elif defined(__GNU__)
470
471#define i_translator osd1.hurd1.h_i_translator
472#define i_uid_high osd2.hurd2.h_i_uid_high
473#define i_gid_high osd2.hurd2.h_i_gid_high
474#define i_author osd2.hurd2.h_i_author
475
476#elif defined(__masix__)
477
478#define i_reserved1 osd1.masix1.m_i_reserved1
479#define i_file_acl_high osd2.masix2.m_i_file_acl_high
480#define i_reserved2 osd2.masix2.m_i_reserved2
481
482#endif /* defined(__KERNEL__) || defined(__linux__) */
483
484/*
485 * File system states
486 */
487#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
488#define EXT4_ERROR_FS 0x0002 /* Errors detected */
489#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
490
491/*
492 * Misc. filesystem flags
493 */
494#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
495#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
496#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
497
498/*
499 * Mount flags
500 */
501#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */
502#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
503#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
504#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
505#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
506#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
507#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
508#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
509#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
510#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */
511#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
512#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
513#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
514#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
515#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
516#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
517#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
518#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
519#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */
520#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
521#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */
522#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
523#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
524#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
525#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */
526#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
527#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
528#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
529#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
530/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
531#ifndef _LINUX_EXT2_FS_H
532#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
533#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
534#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
535 EXT4_MOUNT_##opt)
536#else
537#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD
538#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT
539#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS
540#endif
541
542#define ext4_set_bit ext2_set_bit
543#define ext4_set_bit_atomic ext2_set_bit_atomic
544#define ext4_clear_bit ext2_clear_bit
545#define ext4_clear_bit_atomic ext2_clear_bit_atomic
546#define ext4_test_bit ext2_test_bit
547#define ext4_find_first_zero_bit ext2_find_first_zero_bit
548#define ext4_find_next_zero_bit ext2_find_next_zero_bit
549#define ext4_find_next_bit ext2_find_next_bit
550
551/*
552 * Maximal mount counts between two filesystem checks
553 */
554#define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
555#define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */
556
557/*
558 * Behaviour when detecting errors
559 */
560#define EXT4_ERRORS_CONTINUE 1 /* Continue execution */
561#define EXT4_ERRORS_RO 2 /* Remount fs read-only */
562#define EXT4_ERRORS_PANIC 3 /* Panic */
563#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE
564
565/*
566 * Structure of the super block
567 */
568struct ext4_super_block {
569/*00*/ __le32 s_inodes_count; /* Inodes count */
570 __le32 s_blocks_count_lo; /* Blocks count */
571 __le32 s_r_blocks_count_lo; /* Reserved blocks count */
572 __le32 s_free_blocks_count_lo; /* Free blocks count */
573/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
574 __le32 s_first_data_block; /* First Data Block */
575 __le32 s_log_block_size; /* Block size */
576 __le32 s_obso_log_frag_size; /* Obsoleted fragment size */
577/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
578 __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */
579 __le32 s_inodes_per_group; /* # Inodes per group */
580 __le32 s_mtime; /* Mount time */
581/*30*/ __le32 s_wtime; /* Write time */
582 __le16 s_mnt_count; /* Mount count */
583 __le16 s_max_mnt_count; /* Maximal mount count */
584 __le16 s_magic; /* Magic signature */
585 __le16 s_state; /* File system state */
586 __le16 s_errors; /* Behaviour when detecting errors */
587 __le16 s_minor_rev_level; /* minor revision level */
588/*40*/ __le32 s_lastcheck; /* time of last check */
589 __le32 s_checkinterval; /* max. time between checks */
590 __le32 s_creator_os; /* OS */
591 __le32 s_rev_level; /* Revision level */
592/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
593 __le16 s_def_resgid; /* Default gid for reserved blocks */
594 /*
595 * These fields are for EXT4_DYNAMIC_REV superblocks only.
596 *
597 * Note: the difference between the compatible feature set and
598 * the incompatible feature set is that if there is a bit set
599 * in the incompatible feature set that the kernel doesn't
600 * know about, it should refuse to mount the filesystem.
601 *
602 * e2fsck's requirements are more strict; if it doesn't know
603 * about a feature in either the compatible or incompatible
604 * feature set, it must abort and not try to meddle with
605 * things it doesn't understand...
606 */
607 __le32 s_first_ino; /* First non-reserved inode */
608 __le16 s_inode_size; /* size of inode structure */
609 __le16 s_block_group_nr; /* block group # of this superblock */
610 __le32 s_feature_compat; /* compatible feature set */
611/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
612 __le32 s_feature_ro_compat; /* readonly-compatible feature set */
613/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
614/*78*/ char s_volume_name[16]; /* volume name */
615/*88*/ char s_last_mounted[64]; /* directory where last mounted */
616/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
617 /*
618 * Performance hints. Directory preallocation should only
619 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
620 */
621 __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
622 __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
623 __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
624 /*
625 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
626 */
627/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
628/*E0*/ __le32 s_journal_inum; /* inode number of journal file */
629 __le32 s_journal_dev; /* device number of journal file */
630 __le32 s_last_orphan; /* start of list of inodes to delete */
631 __le32 s_hash_seed[4]; /* HTREE hash seed */
632 __u8 s_def_hash_version; /* Default hash version to use */
633 __u8 s_reserved_char_pad;
634 __le16 s_desc_size; /* size of group descriptor */
635/*100*/ __le32 s_default_mount_opts;
636 __le32 s_first_meta_bg; /* First metablock block group */
637 __le32 s_mkfs_time; /* When the filesystem was created */
638 __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
639 /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
640/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
641 __le32 s_r_blocks_count_hi; /* Reserved blocks count */
642 __le32 s_free_blocks_count_hi; /* Free blocks count */
643 __le16 s_min_extra_isize; /* All inodes have at least # bytes */
644 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
645 __le32 s_flags; /* Miscellaneous flags */
646 __le16 s_raid_stride; /* RAID stride */
647 __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
648 __le64 s_mmp_block; /* Block for multi-mount protection */
649 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
650 __u32 s_reserved[163]; /* Padding to the end of the block */
651};
652
653#ifdef __KERNEL__
654static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb)
655{
656 return sb->s_fs_info;
657}
658static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
659{
660 return container_of(inode, struct ext4_inode_info, vfs_inode);
661}
662
663static inline struct timespec ext4_current_time(struct inode *inode)
664{
665 return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
666 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
667}
668
669
670static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
671{
672 return ino == EXT4_ROOT_INO ||
673 ino == EXT4_JOURNAL_INO ||
674 ino == EXT4_RESIZE_INO ||
675 (ino >= EXT4_FIRST_INO(sb) &&
676 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
677}
678#else
679/* Assume that user mode programs are passing in an ext4fs superblock, not
680 * a kernel struct super_block. This will allow us to call the feature-test
681 * macros from user land. */
682#define EXT4_SB(sb) (sb)
683#endif
684
685#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
686
687/*
688 * Codes for operating systems
689 */
690#define EXT4_OS_LINUX 0
691#define EXT4_OS_HURD 1
692#define EXT4_OS_MASIX 2
693#define EXT4_OS_FREEBSD 3
694#define EXT4_OS_LITES 4
695
696/*
697 * Revision levels
698 */
699#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */
700#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
701
702#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV
703#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV
704
705#define EXT4_GOOD_OLD_INODE_SIZE 128
706
707/*
708 * Feature set definitions
709 */
710
711#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
712 ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
713#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
714 ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
715#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
716 ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
717#define EXT4_SET_COMPAT_FEATURE(sb,mask) \
718 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
719#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
720 EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
721#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \
722 EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
723#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \
724 EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
725#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
726 EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
727#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \
728 EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
729
730#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001
731#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002
732#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004
733#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008
734#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010
735#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020
736
737#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
738#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
739#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
740#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008
741#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
742#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
743#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
744
745#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
746#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
747#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
748#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
749#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010
750#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
751#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
752#define EXT4_FEATURE_INCOMPAT_MMP 0x0100
753#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
754
755#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
756#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
757 EXT4_FEATURE_INCOMPAT_RECOVER| \
758 EXT4_FEATURE_INCOMPAT_META_BG| \
759 EXT4_FEATURE_INCOMPAT_EXTENTS| \
760 EXT4_FEATURE_INCOMPAT_64BIT| \
761 EXT4_FEATURE_INCOMPAT_FLEX_BG)
762#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
763 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
764 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
765 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
766 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
767 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
768 EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
769
770/*
771 * Default values for user and/or group using reserved blocks
772 */
773#define EXT4_DEF_RESUID 0
774#define EXT4_DEF_RESGID 0
775
776/*
777 * Default mount options
778 */
779#define EXT4_DEFM_DEBUG 0x0001
780#define EXT4_DEFM_BSDGROUPS 0x0002
781#define EXT4_DEFM_XATTR_USER 0x0004
782#define EXT4_DEFM_ACL 0x0008
783#define EXT4_DEFM_UID16 0x0010
784#define EXT4_DEFM_JMODE 0x0060
785#define EXT4_DEFM_JMODE_DATA 0x0020
786#define EXT4_DEFM_JMODE_ORDERED 0x0040
787#define EXT4_DEFM_JMODE_WBACK 0x0060
788
789/*
790 * Structure of a directory entry
791 */
792#define EXT4_NAME_LEN 255
793
794struct ext4_dir_entry {
795 __le32 inode; /* Inode number */
796 __le16 rec_len; /* Directory entry length */
797 __le16 name_len; /* Name length */
798 char name[EXT4_NAME_LEN]; /* File name */
799};
800
801/*
802 * The new version of the directory entry. Since EXT4 structures are
803 * stored in intel byte order, and the name_len field could never be
804 * bigger than 255 chars, it's safe to reclaim the extra byte for the
805 * file_type field.
806 */
807struct ext4_dir_entry_2 {
808 __le32 inode; /* Inode number */
809 __le16 rec_len; /* Directory entry length */
810 __u8 name_len; /* Name length */
811 __u8 file_type;
812 char name[EXT4_NAME_LEN]; /* File name */
813};
814
815/*
816 * Ext4 directory file types. Only the low 3 bits are used. The
817 * other bits are reserved for now.
818 */
819#define EXT4_FT_UNKNOWN 0
820#define EXT4_FT_REG_FILE 1
821#define EXT4_FT_DIR 2
822#define EXT4_FT_CHRDEV 3
823#define EXT4_FT_BLKDEV 4
824#define EXT4_FT_FIFO 5
825#define EXT4_FT_SOCK 6
826#define EXT4_FT_SYMLINK 7
827
828#define EXT4_FT_MAX 8
829
830/*
831 * EXT4_DIR_PAD defines the directory entries boundaries
832 *
833 * NOTE: It must be a multiple of 4
834 */
835#define EXT4_DIR_PAD 4
836#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
837#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
838 ~EXT4_DIR_ROUND)
839#define EXT4_MAX_REC_LEN ((1<<16)-1)
840
841static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
842{
843 unsigned len = le16_to_cpu(dlen);
844
845 if (len == EXT4_MAX_REC_LEN)
846 return 1 << 16;
847 return len;
848}
849
850static inline __le16 ext4_rec_len_to_disk(unsigned len)
851{
852 if (len == (1 << 16))
853 return cpu_to_le16(EXT4_MAX_REC_LEN);
854 else if (len > (1 << 16))
855 BUG();
856 return cpu_to_le16(len);
857}
858
859/*
860 * Hash Tree Directory indexing
861 * (c) Daniel Phillips, 2001
862 */
863
864#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
865 EXT4_FEATURE_COMPAT_DIR_INDEX) && \
866 (EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
867#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
868#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
869
870/* Legal values for the dx_root hash_version field: */
871
872#define DX_HASH_LEGACY 0
873#define DX_HASH_HALF_MD4 1
874#define DX_HASH_TEA 2
875
876#ifdef __KERNEL__
877
878/* hash info structure used by the directory hash */
879struct dx_hash_info
880{
881 u32 hash;
882 u32 minor_hash;
883 int hash_version;
884 u32 *seed;
885};
886
887#define EXT4_HTREE_EOF 0x7fffffff
888
889/*
890 * Control parameters used by ext4_htree_next_block
891 */
892#define HASH_NB_ALWAYS 1
893
894
895/*
896 * Describe an inode's exact location on disk and in memory
897 */
898struct ext4_iloc
899{
900 struct buffer_head *bh;
901 unsigned long offset;
902 ext4_group_t block_group;
903};
904
905static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
906{
907 return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
908}
909
910/*
911 * This structure is stuffed into the struct file's private_data field
912 * for directories. It is where we put information so that we can do
913 * readdir operations in hash tree order.
914 */
915struct dir_private_info {
916 struct rb_root root;
917 struct rb_node *curr_node;
918 struct fname *extra_fname;
919 loff_t last_pos;
920 __u32 curr_hash;
921 __u32 curr_minor_hash;
922 __u32 next_hash;
923};
924
925/* calculate the first block number of the group */
926static inline ext4_fsblk_t
927ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
928{
929 return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
930 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
931}
932
933/*
934 * Special error return code only used by dx_probe() and its callers.
935 */
936#define ERR_BAD_DX_DIR -75000
937
938void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
939 unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
940
941/*
942 * Function prototypes
943 */
944
945/*
946 * Ok, these declarations are also in <linux/kernel.h> but none of the
947 * ext4 source programs needs to include it so they are duplicated here.
948 */
949# define NORET_TYPE /**/
950# define ATTRIB_NORET __attribute__((noreturn))
951# define NORET_AND noreturn,
952
953/* balloc.c */
954extern unsigned int ext4_block_group(struct super_block *sb,
955 ext4_fsblk_t blocknr);
956extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
957 ext4_fsblk_t blocknr);
958extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
959extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
960 ext4_group_t group);
961extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
962 ext4_fsblk_t goal, int *errp);
963extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
964 ext4_fsblk_t goal, unsigned long *count, int *errp);
965extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
966 ext4_fsblk_t goal, unsigned long *count, int *errp);
967extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
968 ext4_fsblk_t block, unsigned long count, int metadata);
969extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
970 ext4_fsblk_t block, unsigned long count,
971 unsigned long *pdquot_freed_blocks);
972extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
973extern void ext4_check_blocks_bitmap (struct super_block *);
974extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
975 ext4_group_t block_group,
976 struct buffer_head ** bh);
977extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
978extern void ext4_init_block_alloc_info(struct inode *);
979extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
980
981/* dir.c */
982extern int ext4_check_dir_entry(const char *, struct inode *,
983 struct ext4_dir_entry_2 *,
984 struct buffer_head *, unsigned long);
985extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
986 __u32 minor_hash,
987 struct ext4_dir_entry_2 *dirent);
988extern void ext4_htree_free_dir_info(struct dir_private_info *p);
989
990/* fsync.c */
991extern int ext4_sync_file (struct file *, struct dentry *, int);
992
993/* hash.c */
994extern int ext4fs_dirhash(const char *name, int len, struct
995 dx_hash_info *hinfo);
996
997/* ialloc.c */
998extern struct inode * ext4_new_inode (handle_t *, struct inode *, int);
999extern void ext4_free_inode (handle_t *, struct inode *);
1000extern struct inode * ext4_orphan_get (struct super_block *, unsigned long);
1001extern unsigned long ext4_count_free_inodes (struct super_block *);
1002extern unsigned long ext4_count_dirs (struct super_block *);
1003extern void ext4_check_inodes_bitmap (struct super_block *);
1004extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
1005
1006/* mballoc.c */
1007extern long ext4_mb_stats;
1008extern long ext4_mb_max_to_scan;
1009extern int ext4_mb_init(struct super_block *, int);
1010extern int ext4_mb_release(struct super_block *);
1011extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
1012 struct ext4_allocation_request *, int *);
1013extern int ext4_mb_reserve_blocks(struct super_block *, int);
1014extern void ext4_mb_discard_inode_preallocations(struct inode *);
1015extern int __init init_ext4_mballoc(void);
1016extern void exit_ext4_mballoc(void);
1017extern void ext4_mb_free_blocks(handle_t *, struct inode *,
1018 unsigned long, unsigned long, int, unsigned long *);
1019
1020
1021/* inode.c */
1022int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1023 struct buffer_head *bh, ext4_fsblk_t blocknr);
1024struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1025 ext4_lblk_t, int, int *);
1026struct buffer_head *ext4_bread(handle_t *, struct inode *,
1027 ext4_lblk_t, int, int *);
1028int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
1029 ext4_lblk_t iblock, unsigned long maxblocks,
1030 struct buffer_head *bh_result,
1031 int create, int extend_disksize);
1032
1033extern struct inode *ext4_iget(struct super_block *, unsigned long);
1034extern int ext4_write_inode (struct inode *, int);
1035extern int ext4_setattr (struct dentry *, struct iattr *);
1036extern void ext4_delete_inode (struct inode *);
1037extern int ext4_sync_inode (handle_t *, struct inode *);
1038extern void ext4_discard_reservation (struct inode *);
1039extern void ext4_dirty_inode(struct inode *);
1040extern int ext4_change_inode_journal_flag(struct inode *, int);
1041extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
1042extern void ext4_truncate (struct inode *);
1043extern void ext4_set_inode_flags(struct inode *);
1044extern void ext4_get_inode_flags(struct ext4_inode_info *);
1045extern void ext4_set_aops(struct inode *inode);
1046extern int ext4_writepage_trans_blocks(struct inode *);
1047extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
1048 struct address_space *mapping, loff_t from);
1049
1050/* ioctl.c */
1051extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1052extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
1053
1054/* migrate.c */
1055extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int,
1056 unsigned long);
1057/* namei.c */
1058extern int ext4_orphan_add(handle_t *, struct inode *);
1059extern int ext4_orphan_del(handle_t *, struct inode *);
1060extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1061 __u32 start_minor_hash, __u32 *next_hash);
1062
1063/* resize.c */
1064extern int ext4_group_add(struct super_block *sb,
1065 struct ext4_new_group_data *input);
1066extern int ext4_group_extend(struct super_block *sb,
1067 struct ext4_super_block *es,
1068 ext4_fsblk_t n_blocks_count);
1069
1070/* super.c */
1071extern void ext4_error (struct super_block *, const char *, const char *, ...)
1072 __attribute__ ((format (printf, 3, 4)));
1073extern void __ext4_std_error (struct super_block *, const char *, int);
1074extern void ext4_abort (struct super_block *, const char *, const char *, ...)
1075 __attribute__ ((format (printf, 3, 4)));
1076extern void ext4_warning (struct super_block *, const char *, const char *, ...)
1077 __attribute__ ((format (printf, 3, 4)));
1078extern void ext4_update_dynamic_rev (struct super_block *sb);
1079extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
1080 __u32 compat);
1081extern int ext4_update_rocompat_feature(handle_t *handle,
1082 struct super_block *sb, __u32 rocompat);
1083extern int ext4_update_incompat_feature(handle_t *handle,
1084 struct super_block *sb, __u32 incompat);
1085extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
1086 struct ext4_group_desc *bg);
1087extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
1088 struct ext4_group_desc *bg);
1089extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
1090 struct ext4_group_desc *bg);
1091extern void ext4_block_bitmap_set(struct super_block *sb,
1092 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1093extern void ext4_inode_bitmap_set(struct super_block *sb,
1094 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1095extern void ext4_inode_table_set(struct super_block *sb,
1096 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1097
1098static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
1099{
1100 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
1101 le32_to_cpu(es->s_blocks_count_lo);
1102}
1103
1104static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
1105{
1106 return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
1107 le32_to_cpu(es->s_r_blocks_count_lo);
1108}
1109
1110static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
1111{
1112 return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) |
1113 le32_to_cpu(es->s_free_blocks_count_lo);
1114}
1115
1116static inline void ext4_blocks_count_set(struct ext4_super_block *es,
1117 ext4_fsblk_t blk)
1118{
1119 es->s_blocks_count_lo = cpu_to_le32((u32)blk);
1120 es->s_blocks_count_hi = cpu_to_le32(blk >> 32);
1121}
1122
1123static inline void ext4_free_blocks_count_set(struct ext4_super_block *es,
1124 ext4_fsblk_t blk)
1125{
1126 es->s_free_blocks_count_lo = cpu_to_le32((u32)blk);
1127 es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32);
1128}
1129
1130static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
1131 ext4_fsblk_t blk)
1132{
1133 es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
1134 es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
1135}
1136
1137static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
1138{
1139 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
1140 le32_to_cpu(raw_inode->i_size_lo);
1141}
1142
1143static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
1144{
1145 raw_inode->i_size_lo = cpu_to_le32(i_size);
1146 raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
1147}
1148
1149static inline
1150struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
1151 ext4_group_t group)
1152{
1153 struct ext4_group_info ***grp_info;
1154 long indexv, indexh;
1155 grp_info = EXT4_SB(sb)->s_group_info;
1156 indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
1157 indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
1158 return grp_info[indexv][indexh];
1159}
1160
1161
1162#define ext4_std_error(sb, errno) \
1163do { \
1164 if ((errno)) \
1165 __ext4_std_error((sb), __FUNCTION__, (errno)); \
1166} while (0)
1167
1168/*
1169 * Inodes and files operations
1170 */
1171
1172/* dir.c */
1173extern const struct file_operations ext4_dir_operations;
1174
1175/* file.c */
1176extern const struct inode_operations ext4_file_inode_operations;
1177extern const struct file_operations ext4_file_operations;
1178
1179/* namei.c */
1180extern const struct inode_operations ext4_dir_inode_operations;
1181extern const struct inode_operations ext4_special_inode_operations;
1182
1183/* symlink.c */
1184extern const struct inode_operations ext4_symlink_inode_operations;
1185extern const struct inode_operations ext4_fast_symlink_inode_operations;
1186
1187/* extents.c */
1188extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
1189extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
1190extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1191 ext4_lblk_t iblock,
1192 unsigned long max_blocks, struct buffer_head *bh_result,
1193 int create, int extend_disksize);
1194extern void ext4_ext_truncate(struct inode *, struct page *);
1195extern void ext4_ext_init(struct super_block *);
1196extern void ext4_ext_release(struct super_block *);
1197extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1198 loff_t len);
1199extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
1200 sector_t block, unsigned long max_blocks,
1201 struct buffer_head *bh, int create,
1202 int extend_disksize);
1203#endif /* __KERNEL__ */
1204
1205#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
new file mode 100644
index 000000000000..75333b595fab
--- /dev/null
+++ b/fs/ext4/ext4_extents.h
@@ -0,0 +1,232 @@
1/*
2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
3 * Written by Alex Tomas <alex@clusterfs.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public Licens
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17 */
18
19#ifndef _EXT4_EXTENTS
20#define _EXT4_EXTENTS
21
22#include "ext4.h"
23
24/*
25 * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks
26 * becomes very small, so index split, in-depth growing and
27 * other hard changes happen much more often.
28 * This is for debug purposes only.
29 */
30#define AGGRESSIVE_TEST_
31
32/*
33 * With EXTENTS_STATS defined, the number of blocks and extents
34 * are collected in the truncate path. They'll be shown at
35 * umount time.
36 */
37#define EXTENTS_STATS__
38
39/*
40 * If CHECK_BINSEARCH is defined, then the results of the binary search
41 * will also be checked by linear search.
42 */
43#define CHECK_BINSEARCH__
44
45/*
46 * If EXT_DEBUG is defined you can use the 'extdebug' mount option
47 * to get lots of info about what's going on.
48 */
49#define EXT_DEBUG__
50#ifdef EXT_DEBUG
51#define ext_debug(a...) printk(a)
52#else
53#define ext_debug(a...)
54#endif
55
56/*
57 * If EXT_STATS is defined then stats numbers are collected.
58 * These number will be displayed at umount time.
59 */
60#define EXT_STATS_
61
62
63/*
64 * ext4_inode has i_block array (60 bytes total).
65 * The first 12 bytes store ext4_extent_header;
66 * the remainder stores an array of ext4_extent.
67 */
68
69/*
70 * This is the extent on-disk structure.
71 * It's used at the bottom of the tree.
72 */
73struct ext4_extent {
74 __le32 ee_block; /* first logical block extent covers */
75 __le16 ee_len; /* number of blocks covered by extent */
76 __le16 ee_start_hi; /* high 16 bits of physical block */
77 __le32 ee_start_lo; /* low 32 bits of physical block */
78};
79
80/*
81 * This is index on-disk structure.
82 * It's used at all the levels except the bottom.
83 */
84struct ext4_extent_idx {
85 __le32 ei_block; /* index covers logical blocks from 'block' */
86 __le32 ei_leaf_lo; /* pointer to the physical block of the next *
87 * level. leaf or next index could be there */
88 __le16 ei_leaf_hi; /* high 16 bits of physical block */
89 __u16 ei_unused;
90};
91
92/*
93 * Each block (leaves and indexes), even inode-stored has header.
94 */
95struct ext4_extent_header {
96 __le16 eh_magic; /* probably will support different formats */
97 __le16 eh_entries; /* number of valid entries */
98 __le16 eh_max; /* capacity of store in entries */
99 __le16 eh_depth; /* has tree real underlying blocks? */
100 __le32 eh_generation; /* generation of the tree */
101};
102
103#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a)
104
105/*
106 * Array of ext4_ext_path contains path to some extent.
107 * Creation/lookup routines use it for traversal/splitting/etc.
108 * Truncate uses it to simulate recursive walking.
109 */
110struct ext4_ext_path {
111 ext4_fsblk_t p_block;
112 __u16 p_depth;
113 struct ext4_extent *p_ext;
114 struct ext4_extent_idx *p_idx;
115 struct ext4_extent_header *p_hdr;
116 struct buffer_head *p_bh;
117};
118
119/*
120 * structure for external API
121 */
122
123#define EXT4_EXT_CACHE_NO 0
124#define EXT4_EXT_CACHE_GAP 1
125#define EXT4_EXT_CACHE_EXTENT 2
126
127
128#define EXT_MAX_BLOCK 0xffffffff
129
130/*
131 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
132 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
133 * MSB of ee_len field in the extent datastructure to signify if this
134 * particular extent is an initialized extent or an uninitialized (i.e.
135 * preallocated).
136 * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
137 * uninitialized extent.
138 * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
139 * uninitialized one. In other words, if MSB of ee_len is set, it is an
140 * uninitialized extent with only one special scenario when ee_len = 0x8000.
141 * In this case we can not have an uninitialized extent of zero length and
142 * thus we make it as a special case of initialized extent with 0x8000 length.
143 * This way we get better extent-to-group alignment for initialized extents.
144 * Hence, the maximum number of blocks we can have in an *initialized*
145 * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
146 */
147#define EXT_INIT_MAX_LEN (1UL << 15)
148#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1)
149
150
151#define EXT_FIRST_EXTENT(__hdr__) \
152 ((struct ext4_extent *) (((char *) (__hdr__)) + \
153 sizeof(struct ext4_extent_header)))
154#define EXT_FIRST_INDEX(__hdr__) \
155 ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \
156 sizeof(struct ext4_extent_header)))
157#define EXT_HAS_FREE_INDEX(__path__) \
158 (le16_to_cpu((__path__)->p_hdr->eh_entries) \
159 < le16_to_cpu((__path__)->p_hdr->eh_max))
160#define EXT_LAST_EXTENT(__hdr__) \
161 (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
162#define EXT_LAST_INDEX(__hdr__) \
163 (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
164#define EXT_MAX_EXTENT(__hdr__) \
165 (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
166#define EXT_MAX_INDEX(__hdr__) \
167 (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
168
169static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
170{
171 return (struct ext4_extent_header *) EXT4_I(inode)->i_data;
172}
173
174static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh)
175{
176 return (struct ext4_extent_header *) bh->b_data;
177}
178
179static inline unsigned short ext_depth(struct inode *inode)
180{
181 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
182}
183
184static inline void ext4_ext_tree_changed(struct inode *inode)
185{
186 EXT4_I(inode)->i_ext_generation++;
187}
188
189static inline void
190ext4_ext_invalidate_cache(struct inode *inode)
191{
192 EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
193}
194
195static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
196{
197 /* We can not have an uninitialized extent of zero length! */
198 BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
199 ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
200}
201
202static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
203{
204 /* Extent with ee_len of 0x8000 is treated as an initialized extent */
205 return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
206}
207
208static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
209{
210 return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
211 le16_to_cpu(ext->ee_len) :
212 (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
213}
214
215extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
216extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
217extern int ext4_extent_tree_init(handle_t *, struct inode *);
218extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
219extern int ext4_ext_try_to_merge(struct inode *inode,
220 struct ext4_ext_path *path,
221 struct ext4_extent *);
222extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
223extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
224extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
225 struct ext4_ext_path *);
226extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
227 ext4_lblk_t *, ext4_fsblk_t *);
228extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
229 ext4_lblk_t *, ext4_fsblk_t *);
230extern void ext4_ext_drop_refs(struct ext4_ext_path *);
231#endif /* _EXT4_EXTENTS */
232
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
new file mode 100644
index 000000000000..26a4ae255d79
--- /dev/null
+++ b/fs/ext4/ext4_i.h
@@ -0,0 +1,167 @@
1/*
2 * ext4_i.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_i.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_I
17#define _EXT4_I
18
19#include <linux/rwsem.h>
20#include <linux/rbtree.h>
21#include <linux/seqlock.h>
22#include <linux/mutex.h>
23
24/* data type for block offset of block group */
25typedef int ext4_grpblk_t;
26
27/* data type for filesystem-wide blocks number */
28typedef unsigned long long ext4_fsblk_t;
29
30/* data type for file logical block number */
31typedef __u32 ext4_lblk_t;
32
33/* data type for block group number */
34typedef unsigned long ext4_group_t;
35
36struct ext4_reserve_window {
37 ext4_fsblk_t _rsv_start; /* First byte reserved */
38 ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */
39};
40
41struct ext4_reserve_window_node {
42 struct rb_node rsv_node;
43 __u32 rsv_goal_size;
44 __u32 rsv_alloc_hit;
45 struct ext4_reserve_window rsv_window;
46};
47
48struct ext4_block_alloc_info {
49 /* information about reservation window */
50 struct ext4_reserve_window_node rsv_window_node;
51 /*
52 * was i_next_alloc_block in ext4_inode_info
53 * is the logical (file-relative) number of the
54 * most-recently-allocated block in this file.
55 * We use this for detecting linearly ascending allocation requests.
56 */
57 ext4_lblk_t last_alloc_logical_block;
58 /*
59 * Was i_next_alloc_goal in ext4_inode_info
60 * is the *physical* companion to i_next_alloc_block.
61 * it the physical block number of the block which was most-recentl
62 * allocated to this file. This give us the goal (target) for the next
63 * allocation when we detect linearly ascending requests.
64 */
65 ext4_fsblk_t last_alloc_physical_block;
66};
67
68#define rsv_start rsv_window._rsv_start
69#define rsv_end rsv_window._rsv_end
70
71/*
72 * storage for cached extent
73 */
74struct ext4_ext_cache {
75 ext4_fsblk_t ec_start;
76 ext4_lblk_t ec_block;
77 __u32 ec_len; /* must be 32bit to return holes */
78 __u32 ec_type;
79};
80
81/*
82 * third extended file system inode data in memory
83 */
84struct ext4_inode_info {
85 __le32 i_data[15]; /* unconverted */
86 __u32 i_flags;
87 ext4_fsblk_t i_file_acl;
88 __u32 i_dtime;
89
90 /*
91 * i_block_group is the number of the block group which contains
92 * this file's inode. Constant across the lifetime of the inode,
93 * it is ued for making block allocation decisions - we try to
94 * place a file's data blocks near its inode block, and new inodes
95 * near to their parent directory's inode.
96 */
97 ext4_group_t i_block_group;
98 __u32 i_state; /* Dynamic state flags for ext4 */
99
100 /* block reservation info */
101 struct ext4_block_alloc_info *i_block_alloc_info;
102
103 ext4_lblk_t i_dir_start_lookup;
104#ifdef CONFIG_EXT4DEV_FS_XATTR
105 /*
106 * Extended attributes can be read independently of the main file
107 * data. Taking i_mutex even when reading would cause contention
108 * between readers of EAs and writers of regular file data, so
109 * instead we synchronize on xattr_sem when reading or changing
110 * EAs.
111 */
112 struct rw_semaphore xattr_sem;
113#endif
114#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
115 struct posix_acl *i_acl;
116 struct posix_acl *i_default_acl;
117#endif
118
119 struct list_head i_orphan; /* unlinked but open inodes */
120
121 /*
122 * i_disksize keeps track of what the inode size is ON DISK, not
123 * in memory. During truncate, i_size is set to the new size by
124 * the VFS prior to calling ext4_truncate(), but the filesystem won't
125 * set i_disksize to 0 until the truncate is actually under way.
126 *
127 * The intent is that i_disksize always represents the blocks which
128 * are used by this file. This allows recovery to restart truncate
129 * on orphans if we crash during truncate. We actually write i_disksize
130 * into the on-disk inode when writing inodes out, instead of i_size.
131 *
132 * The only time when i_disksize and i_size may be different is when
133 * a truncate is in progress. The only things which change i_disksize
134 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
135 */
136 loff_t i_disksize;
137
138 /* on-disk additional length */
139 __u16 i_extra_isize;
140
141 /*
142 * i_data_sem is for serialising ext4_truncate() against
143 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
144 * data tree are chopped off during truncate. We can't do that in
145 * ext4 because whenever we perform intermediate commits during
146 * truncate, the inode and all the metadata blocks *must* be in a
147 * consistent state which allows truncation of the orphans to restart
148 * during recovery. Hence we must fix the get_block-vs-truncate race
149 * by other means, so we have i_data_sem.
150 */
151 struct rw_semaphore i_data_sem;
152 struct inode vfs_inode;
153
154 unsigned long i_ext_generation;
155 struct ext4_ext_cache i_cached_extent;
156 /*
157 * File creation time. Its function is same as that of
158 * struct timespec i_{a,c,m}time in the generic inode.
159 */
160 struct timespec i_crtime;
161
162 /* mballoc */
163 struct list_head i_prealloc_list;
164 spinlock_t i_prealloc_lock;
165};
166
167#endif /* _EXT4_I */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d6afe4e27340..c75384b34f2c 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -2,14 +2,14 @@
2 * Interface between ext4 and JBD 2 * Interface between ext4 and JBD
3 */ 3 */
4 4
5#include <linux/ext4_jbd2.h> 5#include "ext4_jbd2.h"
6 6
7int __ext4_journal_get_undo_access(const char *where, handle_t *handle, 7int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 8 struct buffer_head *bh)
9{ 9{
10 int err = jbd2_journal_get_undo_access(handle, bh); 10 int err = jbd2_journal_get_undo_access(handle, bh);
11 if (err) 11 if (err)
12 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 12 ext4_journal_abort_handle(where, __func__, bh, handle, err);
13 return err; 13 return err;
14} 14}
15 15
@@ -18,7 +18,7 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
18{ 18{
19 int err = jbd2_journal_get_write_access(handle, bh); 19 int err = jbd2_journal_get_write_access(handle, bh);
20 if (err) 20 if (err)
21 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 21 ext4_journal_abort_handle(where, __func__, bh, handle, err);
22 return err; 22 return err;
23} 23}
24 24
@@ -27,7 +27,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
27{ 27{
28 int err = jbd2_journal_forget(handle, bh); 28 int err = jbd2_journal_forget(handle, bh);
29 if (err) 29 if (err)
30 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 30 ext4_journal_abort_handle(where, __func__, bh, handle, err);
31 return err; 31 return err;
32} 32}
33 33
@@ -36,7 +36,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
36{ 36{
37 int err = jbd2_journal_revoke(handle, blocknr, bh); 37 int err = jbd2_journal_revoke(handle, blocknr, bh);
38 if (err) 38 if (err)
39 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 39 ext4_journal_abort_handle(where, __func__, bh, handle, err);
40 return err; 40 return err;
41} 41}
42 42
@@ -45,7 +45,7 @@ int __ext4_journal_get_create_access(const char *where,
45{ 45{
46 int err = jbd2_journal_get_create_access(handle, bh); 46 int err = jbd2_journal_get_create_access(handle, bh);
47 if (err) 47 if (err)
48 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 48 ext4_journal_abort_handle(where, __func__, bh, handle, err);
49 return err; 49 return err;
50} 50}
51 51
@@ -54,6 +54,6 @@ int __ext4_journal_dirty_metadata(const char *where,
54{ 54{
55 int err = jbd2_journal_dirty_metadata(handle, bh); 55 int err = jbd2_journal_dirty_metadata(handle, bh);
56 if (err) 56 if (err)
57 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 57 ext4_journal_abort_handle(where, __func__, bh, handle, err);
58 return err; 58 return err;
59} 59}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
new file mode 100644
index 000000000000..9255a7d28b24
--- /dev/null
+++ b/fs/ext4/ext4_jbd2.h
@@ -0,0 +1,231 @@
1/*
2 * ext4_jbd2.h
3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 *
6 * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
7 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Ext4-specific journaling extensions.
13 */
14
15#ifndef _EXT4_JBD2_H
16#define _EXT4_JBD2_H
17
18#include <linux/fs.h>
19#include <linux/jbd2.h>
20#include "ext4.h"
21
22#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal)
23
24/* Define the number of blocks we need to account to a transaction to
25 * modify one block of data.
26 *
27 * We may have to touch one inode, one bitmap buffer, up to three
28 * indirection blocks, the group and superblock summaries, and the data
29 * block to complete the transaction.
30 *
31 * For extents-enabled fs we may have to allocate and modify up to
32 * 5 levels of tree + root which are stored in the inode. */
33
34#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
35 (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
36 || test_opt(sb, EXTENTS) ? 27U : 8U)
37
38/* Extended attribute operations touch at most two data buffers,
39 * two bitmap buffers, and two group summaries, in addition to the inode
40 * and the superblock, which are already accounted for. */
41
42#define EXT4_XATTR_TRANS_BLOCKS 6U
43
44/* Define the minimum size for a transaction which modifies data. This
45 * needs to take into account the fact that we may end up modifying two
46 * quota files too (one for the group, one for the user quota). The
47 * superblock only gets updated once, of course, so don't bother
48 * counting that again for the quota updates. */
49
50#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
51 EXT4_XATTR_TRANS_BLOCKS - 2 + \
52 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
53
54/* Delete operations potentially hit one directory's namespace plus an
55 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
56 * generous. We can grow the delete transaction later if necessary. */
57
58#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
59
60/* Define an arbitrary limit for the amount of data we will anticipate
61 * writing to any given transaction. For unbounded transactions such as
62 * write(2) and truncate(2) we can write more than this, but we always
63 * start off at the maximum transaction size and grow the transaction
64 * optimistically as we go. */
65
66#define EXT4_MAX_TRANS_DATA 64U
67
68/* We break up a large truncate or write transaction once the handle's
69 * buffer credits gets this low, we need either to extend the
70 * transaction or to start a new one. Reserve enough space here for
71 * inode, bitmap, superblock, group and indirection updates for at least
72 * one block, plus two quota updates. Quota allocations are not
73 * needed. */
74
75#define EXT4_RESERVE_TRANS_BLOCKS 12U
76
77#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8
78
79#ifdef CONFIG_QUOTA
80/* Amount of blocks needed for quota update - we know that the structure was
81 * allocated so we need to update only inode+data */
82#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
83/* Amount of blocks needed for quota insert/delete - we do some block writes
84 * but inode, sb and group updates are done only once */
85#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
86 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
87#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
88 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
89#else
90#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
91#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
92#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
93#endif
94
95int
96ext4_mark_iloc_dirty(handle_t *handle,
97 struct inode *inode,
98 struct ext4_iloc *iloc);
99
100/*
101 * On success, We end up with an outstanding reference count against
102 * iloc->bh. This _must_ be cleaned up later.
103 */
104
105int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
106 struct ext4_iloc *iloc);
107
108int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
109
110/*
111 * Wrapper functions with which ext4 calls into JBD. The intent here is
112 * to allow these to be turned into appropriate stubs so ext4 can control
113 * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't
114 * been done yet.
115 */
116
117static inline void ext4_journal_release_buffer(handle_t *handle,
118 struct buffer_head *bh)
119{
120 jbd2_journal_release_buffer(handle, bh);
121}
122
123void ext4_journal_abort_handle(const char *caller, const char *err_fn,
124 struct buffer_head *bh, handle_t *handle, int err);
125
126int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
127 struct buffer_head *bh);
128
129int __ext4_journal_get_write_access(const char *where, handle_t *handle,
130 struct buffer_head *bh);
131
132int __ext4_journal_forget(const char *where, handle_t *handle,
133 struct buffer_head *bh);
134
135int __ext4_journal_revoke(const char *where, handle_t *handle,
136 ext4_fsblk_t blocknr, struct buffer_head *bh);
137
138int __ext4_journal_get_create_access(const char *where,
139 handle_t *handle, struct buffer_head *bh);
140
141int __ext4_journal_dirty_metadata(const char *where,
142 handle_t *handle, struct buffer_head *bh);
143
144#define ext4_journal_get_undo_access(handle, bh) \
145 __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh))
146#define ext4_journal_get_write_access(handle, bh) \
147 __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh))
148#define ext4_journal_revoke(handle, blocknr, bh) \
149 __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
150#define ext4_journal_get_create_access(handle, bh) \
151 __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh))
152#define ext4_journal_dirty_metadata(handle, bh) \
153 __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
154#define ext4_journal_forget(handle, bh) \
155 __ext4_journal_forget(__FUNCTION__, (handle), (bh))
156
157int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
158
159handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
160int __ext4_journal_stop(const char *where, handle_t *handle);
161
162static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
163{
164 return ext4_journal_start_sb(inode->i_sb, nblocks);
165}
166
167#define ext4_journal_stop(handle) \
168 __ext4_journal_stop(__FUNCTION__, (handle))
169
170static inline handle_t *ext4_journal_current_handle(void)
171{
172 return journal_current_handle();
173}
174
175static inline int ext4_journal_extend(handle_t *handle, int nblocks)
176{
177 return jbd2_journal_extend(handle, nblocks);
178}
179
180static inline int ext4_journal_restart(handle_t *handle, int nblocks)
181{
182 return jbd2_journal_restart(handle, nblocks);
183}
184
185static inline int ext4_journal_blocks_per_page(struct inode *inode)
186{
187 return jbd2_journal_blocks_per_page(inode);
188}
189
190static inline int ext4_journal_force_commit(journal_t *journal)
191{
192 return jbd2_journal_force_commit(journal);
193}
194
195/* super.c */
196int ext4_force_commit(struct super_block *sb);
197
198static inline int ext4_should_journal_data(struct inode *inode)
199{
200 if (!S_ISREG(inode->i_mode))
201 return 1;
202 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
203 return 1;
204 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
205 return 1;
206 return 0;
207}
208
209static inline int ext4_should_order_data(struct inode *inode)
210{
211 if (!S_ISREG(inode->i_mode))
212 return 0;
213 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
214 return 0;
215 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
216 return 1;
217 return 0;
218}
219
220static inline int ext4_should_writeback_data(struct inode *inode)
221{
222 if (!S_ISREG(inode->i_mode))
223 return 0;
224 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
225 return 0;
226 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
227 return 1;
228 return 0;
229}
230
231#endif /* _EXT4_JBD2_H */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
new file mode 100644
index 000000000000..5802e69f2191
--- /dev/null
+++ b/fs/ext4/ext4_sb.h
@@ -0,0 +1,148 @@
1/*
2 * ext4_sb.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_sb.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_SB
17#define _EXT4_SB
18
19#ifdef __KERNEL__
20#include <linux/timer.h>
21#include <linux/wait.h>
22#include <linux/blockgroup_lock.h>
23#include <linux/percpu_counter.h>
24#endif
25#include <linux/rbtree.h>
26
27/*
28 * third extended-fs super-block data in memory
29 */
30struct ext4_sb_info {
31 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
32 unsigned long s_inodes_per_block;/* Number of inodes per block */
33 unsigned long s_blocks_per_group;/* Number of blocks in a group */
34 unsigned long s_inodes_per_group;/* Number of inodes in a group */
35 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
36 unsigned long s_gdb_count; /* Number of group descriptor blocks */
37 unsigned long s_desc_per_block; /* Number of group descriptors per block */
38 ext4_group_t s_groups_count; /* Number of groups in the fs */
39 unsigned long s_overhead_last; /* Last calculated overhead */
40 unsigned long s_blocks_last; /* Last seen block count */
41 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
42 struct buffer_head * s_sbh; /* Buffer containing the super block */
43 struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */
44 struct buffer_head ** s_group_desc;
45 unsigned long s_mount_opt;
46 ext4_fsblk_t s_sb_block;
47 uid_t s_resuid;
48 gid_t s_resgid;
49 unsigned short s_mount_state;
50 unsigned short s_pad;
51 int s_addr_per_block_bits;
52 int s_desc_per_block_bits;
53 int s_inode_size;
54 int s_first_ino;
55 spinlock_t s_next_gen_lock;
56 u32 s_next_generation;
57 u32 s_hash_seed[4];
58 int s_def_hash_version;
59 struct percpu_counter s_freeblocks_counter;
60 struct percpu_counter s_freeinodes_counter;
61 struct percpu_counter s_dirs_counter;
62 struct blockgroup_lock s_blockgroup_lock;
63
64 /* root of the per fs reservation window tree */
65 spinlock_t s_rsv_window_lock;
66 struct rb_root s_rsv_window_root;
67 struct ext4_reserve_window_node s_rsv_window_head;
68
69 /* Journaling */
70 struct inode * s_journal_inode;
71 struct journal_s * s_journal;
72 struct list_head s_orphan;
73 unsigned long s_commit_interval;
74 struct block_device *journal_bdev;
75#ifdef CONFIG_JBD2_DEBUG
76 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
77 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
78#endif
79#ifdef CONFIG_QUOTA
80 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
81 int s_jquota_fmt; /* Format of quota to use */
82#endif
83 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
84
85#ifdef EXTENTS_STATS
86 /* ext4 extents stats */
87 unsigned long s_ext_min;
88 unsigned long s_ext_max;
89 unsigned long s_depth_max;
90 spinlock_t s_ext_stats_lock;
91 unsigned long s_ext_blocks;
92 unsigned long s_ext_extents;
93#endif
94
95 /* for buddy allocator */
96 struct ext4_group_info ***s_group_info;
97 struct inode *s_buddy_cache;
98 long s_blocks_reserved;
99 spinlock_t s_reserve_lock;
100 struct list_head s_active_transaction;
101 struct list_head s_closed_transaction;
102 struct list_head s_committed_transaction;
103 spinlock_t s_md_lock;
104 tid_t s_last_transaction;
105 unsigned short *s_mb_offsets, *s_mb_maxs;
106
107 /* tunables */
108 unsigned long s_stripe;
109 unsigned long s_mb_stream_request;
110 unsigned long s_mb_max_to_scan;
111 unsigned long s_mb_min_to_scan;
112 unsigned long s_mb_stats;
113 unsigned long s_mb_order2_reqs;
114 unsigned long s_mb_group_prealloc;
115 /* where last allocation was done - for stream allocation */
116 unsigned long s_mb_last_group;
117 unsigned long s_mb_last_start;
118
119 /* history to debug policy */
120 struct ext4_mb_history *s_mb_history;
121 int s_mb_history_cur;
122 int s_mb_history_max;
123 int s_mb_history_num;
124 struct proc_dir_entry *s_mb_proc;
125 spinlock_t s_mb_history_lock;
126 int s_mb_history_filter;
127
128 /* stats for buddy allocator */
129 spinlock_t s_mb_pa_lock;
130 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
131 atomic_t s_bal_success; /* we found long enough chunks */
132 atomic_t s_bal_allocated; /* in blocks */
133 atomic_t s_bal_ex_scanned; /* total extents scanned */
134 atomic_t s_bal_goals; /* goal hits */
135 atomic_t s_bal_breaks; /* too long searches */
136 atomic_t s_bal_2orders; /* 2^order hits */
137 spinlock_t s_bal_lock;
138 unsigned long s_mb_buddies_generated;
139 unsigned long long s_mb_generation_time;
140 atomic_t s_mb_lost_chunks;
141 atomic_t s_mb_preallocated;
142 atomic_t s_mb_discarded;
143
144 /* locality groups */
145 struct ext4_locality_group *s_locality_groups;
146};
147
148#endif /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9ae6e67090cd..47929c4e3dae 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -32,7 +32,6 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/fs.h> 33#include <linux/fs.h>
34#include <linux/time.h> 34#include <linux/time.h>
35#include <linux/ext4_jbd2.h>
36#include <linux/jbd2.h> 35#include <linux/jbd2.h>
37#include <linux/highuid.h> 36#include <linux/highuid.h>
38#include <linux/pagemap.h> 37#include <linux/pagemap.h>
@@ -40,8 +39,9 @@
40#include <linux/string.h> 39#include <linux/string.h>
41#include <linux/slab.h> 40#include <linux/slab.h>
42#include <linux/falloc.h> 41#include <linux/falloc.h>
43#include <linux/ext4_fs_extents.h>
44#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include "ext4_jbd2.h"
44#include "ext4_extents.h"
45 45
46 46
47/* 47/*
@@ -308,7 +308,7 @@ corrupted:
308} 308}
309 309
310#define ext4_ext_check_header(inode, eh, depth) \ 310#define ext4_ext_check_header(inode, eh, depth) \
311 __ext4_ext_check_header(__FUNCTION__, inode, eh, depth) 311 __ext4_ext_check_header(__func__, inode, eh, depth)
312 312
313#ifdef EXT_DEBUG 313#ifdef EXT_DEBUG
314static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 314static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -614,7 +614,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
614 614
615 ix->ei_block = cpu_to_le32(logical); 615 ix->ei_block = cpu_to_le32(logical);
616 ext4_idx_store_pblock(ix, ptr); 616 ext4_idx_store_pblock(ix, ptr);
617 curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); 617 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
618 618
619 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) 619 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
620 > le16_to_cpu(curp->p_hdr->eh_max)); 620 > le16_to_cpu(curp->p_hdr->eh_max));
@@ -736,7 +736,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
736 } 736 }
737 if (m) { 737 if (m) {
738 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); 738 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
739 neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m); 739 le16_add_cpu(&neh->eh_entries, m);
740 } 740 }
741 741
742 set_buffer_uptodate(bh); 742 set_buffer_uptodate(bh);
@@ -753,8 +753,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
753 err = ext4_ext_get_access(handle, inode, path + depth); 753 err = ext4_ext_get_access(handle, inode, path + depth);
754 if (err) 754 if (err)
755 goto cleanup; 755 goto cleanup;
756 path[depth].p_hdr->eh_entries = 756 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
757 cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
758 err = ext4_ext_dirty(handle, inode, path + depth); 757 err = ext4_ext_dirty(handle, inode, path + depth);
759 if (err) 758 if (err)
760 goto cleanup; 759 goto cleanup;
@@ -817,8 +816,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
817 if (m) { 816 if (m) {
818 memmove(++fidx, path[i].p_idx - m, 817 memmove(++fidx, path[i].p_idx - m,
819 sizeof(struct ext4_extent_idx) * m); 818 sizeof(struct ext4_extent_idx) * m);
820 neh->eh_entries = 819 le16_add_cpu(&neh->eh_entries, m);
821 cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
822 } 820 }
823 set_buffer_uptodate(bh); 821 set_buffer_uptodate(bh);
824 unlock_buffer(bh); 822 unlock_buffer(bh);
@@ -834,7 +832,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
834 err = ext4_ext_get_access(handle, inode, path + i); 832 err = ext4_ext_get_access(handle, inode, path + i);
835 if (err) 833 if (err)
836 goto cleanup; 834 goto cleanup;
837 path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m); 835 le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
838 err = ext4_ext_dirty(handle, inode, path + i); 836 err = ext4_ext_dirty(handle, inode, path + i);
839 if (err) 837 if (err)
840 goto cleanup; 838 goto cleanup;
@@ -1369,7 +1367,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
1369 * sizeof(struct ext4_extent); 1367 * sizeof(struct ext4_extent);
1370 memmove(ex + 1, ex + 2, len); 1368 memmove(ex + 1, ex + 2, len);
1371 } 1369 }
1372 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1); 1370 le16_add_cpu(&eh->eh_entries, -1);
1373 merge_done = 1; 1371 merge_done = 1;
1374 WARN_ON(eh->eh_entries == 0); 1372 WARN_ON(eh->eh_entries == 0);
1375 if (!eh->eh_entries) 1373 if (!eh->eh_entries)
@@ -1560,7 +1558,7 @@ has_space:
1560 path[depth].p_ext = nearex; 1558 path[depth].p_ext = nearex;
1561 } 1559 }
1562 1560
1563 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); 1561 le16_add_cpu(&eh->eh_entries, 1);
1564 nearex = path[depth].p_ext; 1562 nearex = path[depth].p_ext;
1565 nearex->ee_block = newext->ee_block; 1563 nearex->ee_block = newext->ee_block;
1566 ext4_ext_store_pblock(nearex, ext_pblock(newext)); 1564 ext4_ext_store_pblock(nearex, ext_pblock(newext));
@@ -1699,7 +1697,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1699 err = ext4_ext_get_access(handle, inode, path); 1697 err = ext4_ext_get_access(handle, inode, path);
1700 if (err) 1698 if (err)
1701 return err; 1699 return err;
1702 path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); 1700 le16_add_cpu(&path->p_hdr->eh_entries, -1);
1703 err = ext4_ext_dirty(handle, inode, path); 1701 err = ext4_ext_dirty(handle, inode, path);
1704 if (err) 1702 if (err)
1705 return err; 1703 return err;
@@ -1902,7 +1900,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1902 if (num == 0) { 1900 if (num == 0) {
1903 /* this extent is removed; mark slot entirely unused */ 1901 /* this extent is removed; mark slot entirely unused */
1904 ext4_ext_store_pblock(ex, 0); 1902 ext4_ext_store_pblock(ex, 0);
1905 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); 1903 le16_add_cpu(&eh->eh_entries, -1);
1906 } 1904 }
1907 1905
1908 ex->ee_block = cpu_to_le32(block); 1906 ex->ee_block = cpu_to_le32(block);
@@ -1979,7 +1977,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
1979 * We start scanning from right side, freeing all the blocks 1977 * We start scanning from right side, freeing all the blocks
1980 * after i_size and walking into the tree depth-wise. 1978 * after i_size and walking into the tree depth-wise.
1981 */ 1979 */
1982 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); 1980 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
1983 if (path == NULL) { 1981 if (path == NULL) {
1984 ext4_journal_stop(handle); 1982 ext4_journal_stop(handle);
1985 return -ENOMEM; 1983 return -ENOMEM;
@@ -2138,6 +2136,82 @@ void ext4_ext_release(struct super_block *sb)
2138#endif 2136#endif
2139} 2137}
2140 2138
2139static void bi_complete(struct bio *bio, int error)
2140{
2141 complete((struct completion *)bio->bi_private);
2142}
2143
2144/* FIXME!! we need to try to merge to left or right after zero-out */
2145static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2146{
2147 int ret = -EIO;
2148 struct bio *bio;
2149 int blkbits, blocksize;
2150 sector_t ee_pblock;
2151 struct completion event;
2152 unsigned int ee_len, len, done, offset;
2153
2154
2155 blkbits = inode->i_blkbits;
2156 blocksize = inode->i_sb->s_blocksize;
2157 ee_len = ext4_ext_get_actual_len(ex);
2158 ee_pblock = ext_pblock(ex);
2159
2160 /* convert ee_pblock to 512 byte sectors */
2161 ee_pblock = ee_pblock << (blkbits - 9);
2162
2163 while (ee_len > 0) {
2164
2165 if (ee_len > BIO_MAX_PAGES)
2166 len = BIO_MAX_PAGES;
2167 else
2168 len = ee_len;
2169
2170 bio = bio_alloc(GFP_NOIO, len);
2171 if (!bio)
2172 return -ENOMEM;
2173 bio->bi_sector = ee_pblock;
2174 bio->bi_bdev = inode->i_sb->s_bdev;
2175
2176 done = 0;
2177 offset = 0;
2178 while (done < len) {
2179 ret = bio_add_page(bio, ZERO_PAGE(0),
2180 blocksize, offset);
2181 if (ret != blocksize) {
2182 /*
2183 * We can't add any more pages because of
2184 * hardware limitations. Start a new bio.
2185 */
2186 break;
2187 }
2188 done++;
2189 offset += blocksize;
2190 if (offset >= PAGE_CACHE_SIZE)
2191 offset = 0;
2192 }
2193
2194 init_completion(&event);
2195 bio->bi_private = &event;
2196 bio->bi_end_io = bi_complete;
2197 submit_bio(WRITE, bio);
2198 wait_for_completion(&event);
2199
2200 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
2201 ret = 0;
2202 else {
2203 ret = -EIO;
2204 break;
2205 }
2206 bio_put(bio);
2207 ee_len -= done;
2208 ee_pblock += done << (blkbits - 9);
2209 }
2210 return ret;
2211}
2212
2213#define EXT4_EXT_ZERO_LEN 7
2214
2141/* 2215/*
2142 * This function is called by ext4_ext_get_blocks() if someone tries to write 2216 * This function is called by ext4_ext_get_blocks() if someone tries to write
2143 * to an uninitialized extent. It may result in splitting the uninitialized 2217 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -2154,7 +2228,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2154 ext4_lblk_t iblock, 2228 ext4_lblk_t iblock,
2155 unsigned long max_blocks) 2229 unsigned long max_blocks)
2156{ 2230{
2157 struct ext4_extent *ex, newex; 2231 struct ext4_extent *ex, newex, orig_ex;
2158 struct ext4_extent *ex1 = NULL; 2232 struct ext4_extent *ex1 = NULL;
2159 struct ext4_extent *ex2 = NULL; 2233 struct ext4_extent *ex2 = NULL;
2160 struct ext4_extent *ex3 = NULL; 2234 struct ext4_extent *ex3 = NULL;
@@ -2173,10 +2247,26 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2173 allocated = ee_len - (iblock - ee_block); 2247 allocated = ee_len - (iblock - ee_block);
2174 newblock = iblock - ee_block + ext_pblock(ex); 2248 newblock = iblock - ee_block + ext_pblock(ex);
2175 ex2 = ex; 2249 ex2 = ex;
2250 orig_ex.ee_block = ex->ee_block;
2251 orig_ex.ee_len = cpu_to_le16(ee_len);
2252 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
2176 2253
2177 err = ext4_ext_get_access(handle, inode, path + depth); 2254 err = ext4_ext_get_access(handle, inode, path + depth);
2178 if (err) 2255 if (err)
2179 goto out; 2256 goto out;
2257 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
2258 if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
2259 err = ext4_ext_zeroout(inode, &orig_ex);
2260 if (err)
2261 goto fix_extent_len;
2262 /* update the extent length and mark as initialized */
2263 ex->ee_block = orig_ex.ee_block;
2264 ex->ee_len = orig_ex.ee_len;
2265 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2266 ext4_ext_dirty(handle, inode, path + depth);
2267 /* zeroed the full extent */
2268 return allocated;
2269 }
2180 2270
2181 /* ex1: ee_block to iblock - 1 : uninitialized */ 2271 /* ex1: ee_block to iblock - 1 : uninitialized */
2182 if (iblock > ee_block) { 2272 if (iblock > ee_block) {
@@ -2195,19 +2285,103 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2195 /* ex3: to ee_block + ee_len : uninitialised */ 2285 /* ex3: to ee_block + ee_len : uninitialised */
2196 if (allocated > max_blocks) { 2286 if (allocated > max_blocks) {
2197 unsigned int newdepth; 2287 unsigned int newdepth;
2288 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
2289 if (allocated <= EXT4_EXT_ZERO_LEN) {
2290 /* Mark first half uninitialized.
2291 * Mark second half initialized and zero out the
2292 * initialized extent
2293 */
2294 ex->ee_block = orig_ex.ee_block;
2295 ex->ee_len = cpu_to_le16(ee_len - allocated);
2296 ext4_ext_mark_uninitialized(ex);
2297 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2298 ext4_ext_dirty(handle, inode, path + depth);
2299
2300 ex3 = &newex;
2301 ex3->ee_block = cpu_to_le32(iblock);
2302 ext4_ext_store_pblock(ex3, newblock);
2303 ex3->ee_len = cpu_to_le16(allocated);
2304 err = ext4_ext_insert_extent(handle, inode, path, ex3);
2305 if (err == -ENOSPC) {
2306 err = ext4_ext_zeroout(inode, &orig_ex);
2307 if (err)
2308 goto fix_extent_len;
2309 ex->ee_block = orig_ex.ee_block;
2310 ex->ee_len = orig_ex.ee_len;
2311 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2312 ext4_ext_dirty(handle, inode, path + depth);
2313 /* zeroed the full extent */
2314 return allocated;
2315
2316 } else if (err)
2317 goto fix_extent_len;
2318
2319 /*
2320 * We need to zero out the second half because
2321 * an fallocate request can update file size and
2322 * converting the second half to initialized extent
2323 * implies that we can leak some junk data to user
2324 * space.
2325 */
2326 err = ext4_ext_zeroout(inode, ex3);
2327 if (err) {
2328 /*
2329 * We should actually mark the
2330 * second half as uninit and return error
2331 * Insert would have changed the extent
2332 */
2333 depth = ext_depth(inode);
2334 ext4_ext_drop_refs(path);
2335 path = ext4_ext_find_extent(inode,
2336 iblock, path);
2337 if (IS_ERR(path)) {
2338 err = PTR_ERR(path);
2339 return err;
2340 }
2341 ex = path[depth].p_ext;
2342 err = ext4_ext_get_access(handle, inode,
2343 path + depth);
2344 if (err)
2345 return err;
2346 ext4_ext_mark_uninitialized(ex);
2347 ext4_ext_dirty(handle, inode, path + depth);
2348 return err;
2349 }
2350
2351 /* zeroed the second half */
2352 return allocated;
2353 }
2198 ex3 = &newex; 2354 ex3 = &newex;
2199 ex3->ee_block = cpu_to_le32(iblock + max_blocks); 2355 ex3->ee_block = cpu_to_le32(iblock + max_blocks);
2200 ext4_ext_store_pblock(ex3, newblock + max_blocks); 2356 ext4_ext_store_pblock(ex3, newblock + max_blocks);
2201 ex3->ee_len = cpu_to_le16(allocated - max_blocks); 2357 ex3->ee_len = cpu_to_le16(allocated - max_blocks);
2202 ext4_ext_mark_uninitialized(ex3); 2358 ext4_ext_mark_uninitialized(ex3);
2203 err = ext4_ext_insert_extent(handle, inode, path, ex3); 2359 err = ext4_ext_insert_extent(handle, inode, path, ex3);
2204 if (err) 2360 if (err == -ENOSPC) {
2205 goto out; 2361 err = ext4_ext_zeroout(inode, &orig_ex);
2362 if (err)
2363 goto fix_extent_len;
2364 /* update the extent length and mark as initialized */
2365 ex->ee_block = orig_ex.ee_block;
2366 ex->ee_len = orig_ex.ee_len;
2367 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2368 ext4_ext_dirty(handle, inode, path + depth);
2369 /* zeroed the full extent */
2370 return allocated;
2371
2372 } else if (err)
2373 goto fix_extent_len;
2206 /* 2374 /*
2207 * The depth, and hence eh & ex might change 2375 * The depth, and hence eh & ex might change
2208 * as part of the insert above. 2376 * as part of the insert above.
2209 */ 2377 */
2210 newdepth = ext_depth(inode); 2378 newdepth = ext_depth(inode);
2379 /*
2380 * update the extent length after successfull insert of the
2381 * split extent
2382 */
2383 orig_ex.ee_len = cpu_to_le16(ee_len -
2384 ext4_ext_get_actual_len(ex3));
2211 if (newdepth != depth) { 2385 if (newdepth != depth) {
2212 depth = newdepth; 2386 depth = newdepth;
2213 ext4_ext_drop_refs(path); 2387 ext4_ext_drop_refs(path);
@@ -2226,6 +2400,24 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2226 goto out; 2400 goto out;
2227 } 2401 }
2228 allocated = max_blocks; 2402 allocated = max_blocks;
2403
2404 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
2405 * to insert a extent in the middle zerout directly
2406 * otherwise give the extent a chance to merge to left
2407 */
2408 if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
2409 iblock != ee_block) {
2410 err = ext4_ext_zeroout(inode, &orig_ex);
2411 if (err)
2412 goto fix_extent_len;
2413 /* update the extent length and mark as initialized */
2414 ex->ee_block = orig_ex.ee_block;
2415 ex->ee_len = orig_ex.ee_len;
2416 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2417 ext4_ext_dirty(handle, inode, path + depth);
2418 /* zero out the first half */
2419 return allocated;
2420 }
2229 } 2421 }
2230 /* 2422 /*
2231 * If there was a change of depth as part of the 2423 * If there was a change of depth as part of the
@@ -2282,8 +2474,29 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2282 goto out; 2474 goto out;
2283insert: 2475insert:
2284 err = ext4_ext_insert_extent(handle, inode, path, &newex); 2476 err = ext4_ext_insert_extent(handle, inode, path, &newex);
2477 if (err == -ENOSPC) {
2478 err = ext4_ext_zeroout(inode, &orig_ex);
2479 if (err)
2480 goto fix_extent_len;
2481 /* update the extent length and mark as initialized */
2482 ex->ee_block = orig_ex.ee_block;
2483 ex->ee_len = orig_ex.ee_len;
2484 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2485 ext4_ext_dirty(handle, inode, path + depth);
2486 /* zero out the first half */
2487 return allocated;
2488 } else if (err)
2489 goto fix_extent_len;
2285out: 2490out:
2286 return err ? err : allocated; 2491 return err ? err : allocated;
2492
2493fix_extent_len:
2494 ex->ee_block = orig_ex.ee_block;
2495 ex->ee_len = orig_ex.ee_len;
2496 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2497 ext4_ext_mark_uninitialized(ex);
2498 ext4_ext_dirty(handle, inode, path + depth);
2499 return err;
2287} 2500}
2288 2501
2289/* 2502/*
@@ -2393,8 +2606,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2393 } 2606 }
2394 if (create == EXT4_CREATE_UNINITIALIZED_EXT) 2607 if (create == EXT4_CREATE_UNINITIALIZED_EXT)
2395 goto out; 2608 goto out;
2396 if (!create) 2609 if (!create) {
2610 /*
2611 * We have blocks reserved already. We
2612 * return allocated blocks so that delalloc
2613 * won't do block reservation for us. But
2614 * the buffer head will be unmapped so that
2615 * a read from the block returns 0s.
2616 */
2617 if (allocated > max_blocks)
2618 allocated = max_blocks;
2619 /* mark the buffer unwritten */
2620 __set_bit(BH_Unwritten, &bh_result->b_state);
2397 goto out2; 2621 goto out2;
2622 }
2398 2623
2399 ret = ext4_ext_convert_to_initialized(handle, inode, 2624 ret = ext4_ext_convert_to_initialized(handle, inode,
2400 path, iblock, 2625 path, iblock,
@@ -2584,6 +2809,8 @@ out_stop:
2584 ext4_orphan_del(handle, inode); 2809 ext4_orphan_del(handle, inode);
2585 2810
2586 up_write(&EXT4_I(inode)->i_data_sem); 2811 up_write(&EXT4_I(inode)->i_data_sem);
2812 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
2813 ext4_mark_inode_dirty(handle, inode);
2587 ext4_journal_stop(handle); 2814 ext4_journal_stop(handle);
2588} 2815}
2589 2816
@@ -2608,6 +2835,28 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
2608 return needed; 2835 return needed;
2609} 2836}
2610 2837
2838static void ext4_falloc_update_inode(struct inode *inode,
2839 int mode, loff_t new_size, int update_ctime)
2840{
2841 struct timespec now;
2842
2843 if (update_ctime) {
2844 now = current_fs_time(inode->i_sb);
2845 if (!timespec_equal(&inode->i_ctime, &now))
2846 inode->i_ctime = now;
2847 }
2848 /*
2849 * Update only when preallocation was requested beyond
2850 * the file size.
2851 */
2852 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
2853 new_size > i_size_read(inode)) {
2854 i_size_write(inode, new_size);
2855 EXT4_I(inode)->i_disksize = new_size;
2856 }
2857
2858}
2859
2611/* 2860/*
2612 * preallocate space for a file. This implements ext4's fallocate inode 2861 * preallocate space for a file. This implements ext4's fallocate inode
2613 * operation, which gets called from sys_fallocate system call. 2862 * operation, which gets called from sys_fallocate system call.
@@ -2619,8 +2868,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2619{ 2868{
2620 handle_t *handle; 2869 handle_t *handle;
2621 ext4_lblk_t block; 2870 ext4_lblk_t block;
2871 loff_t new_size;
2622 unsigned long max_blocks; 2872 unsigned long max_blocks;
2623 ext4_fsblk_t nblocks = 0;
2624 int ret = 0; 2873 int ret = 0;
2625 int ret2 = 0; 2874 int ret2 = 0;
2626 int retries = 0; 2875 int retries = 0;
@@ -2639,9 +2888,12 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2639 return -ENODEV; 2888 return -ENODEV;
2640 2889
2641 block = offset >> blkbits; 2890 block = offset >> blkbits;
2891 /*
2892 * We can't just convert len to max_blocks because
2893 * If blocksize = 4096 offset = 3072 and len = 2048
2894 */
2642 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 2895 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
2643 - block; 2896 - block;
2644
2645 /* 2897 /*
2646 * credits to insert 1 extent into extent tree + buffers to be able to 2898 * credits to insert 1 extent into extent tree + buffers to be able to
2647 * modify 1 super block, 1 block bitmap and 1 group descriptor. 2899 * modify 1 super block, 1 block bitmap and 1 group descriptor.
@@ -2657,7 +2909,6 @@ retry:
2657 ret = PTR_ERR(handle); 2909 ret = PTR_ERR(handle);
2658 break; 2910 break;
2659 } 2911 }
2660
2661 ret = ext4_get_blocks_wrap(handle, inode, block, 2912 ret = ext4_get_blocks_wrap(handle, inode, block,
2662 max_blocks, &map_bh, 2913 max_blocks, &map_bh,
2663 EXT4_CREATE_UNINITIALIZED_EXT, 0); 2914 EXT4_CREATE_UNINITIALIZED_EXT, 0);
@@ -2673,61 +2924,24 @@ retry:
2673 ret2 = ext4_journal_stop(handle); 2924 ret2 = ext4_journal_stop(handle);
2674 break; 2925 break;
2675 } 2926 }
2676 if (ret > 0) { 2927 if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
2677 /* check wrap through sign-bit/zero here */ 2928 blkbits) >> blkbits))
2678 if ((block + ret) < 0 || (block + ret) < block) { 2929 new_size = offset + len;
2679 ret = -EIO; 2930 else
2680 ext4_mark_inode_dirty(handle, inode); 2931 new_size = (block + ret) << blkbits;
2681 ret2 = ext4_journal_stop(handle);
2682 break;
2683 }
2684 if (buffer_new(&map_bh) && ((block + ret) >
2685 (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
2686 >> blkbits)))
2687 nblocks = nblocks + ret;
2688 }
2689
2690 /* Update ctime if new blocks get allocated */
2691 if (nblocks) {
2692 struct timespec now;
2693
2694 now = current_fs_time(inode->i_sb);
2695 if (!timespec_equal(&inode->i_ctime, &now))
2696 inode->i_ctime = now;
2697 }
2698 2932
2933 ext4_falloc_update_inode(inode, mode, new_size,
2934 buffer_new(&map_bh));
2699 ext4_mark_inode_dirty(handle, inode); 2935 ext4_mark_inode_dirty(handle, inode);
2700 ret2 = ext4_journal_stop(handle); 2936 ret2 = ext4_journal_stop(handle);
2701 if (ret2) 2937 if (ret2)
2702 break; 2938 break;
2703 } 2939 }
2704 2940 if (ret == -ENOSPC &&
2705 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2941 ext4_should_retry_alloc(inode->i_sb, &retries)) {
2942 ret = 0;
2706 goto retry; 2943 goto retry;
2707
2708 /*
2709 * Time to update the file size.
2710 * Update only when preallocation was requested beyond the file size.
2711 */
2712 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
2713 (offset + len) > i_size_read(inode)) {
2714 if (ret > 0) {
2715 /*
2716 * if no error, we assume preallocation succeeded
2717 * completely
2718 */
2719 i_size_write(inode, offset + len);
2720 EXT4_I(inode)->i_disksize = i_size_read(inode);
2721 } else if (ret < 0 && nblocks) {
2722 /* Handle partial allocation scenario */
2723 loff_t newsize;
2724
2725 newsize = (nblocks << blkbits) + i_size_read(inode);
2726 i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
2727 EXT4_I(inode)->i_disksize = i_size_read(inode);
2728 }
2729 } 2944 }
2730
2731 mutex_unlock(&inode->i_mutex); 2945 mutex_unlock(&inode->i_mutex);
2732 return ret > 0 ? ret2 : ret; 2946 return ret > 0 ? ret2 : ret;
2733} 2947}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac35ec58db55..4159be6366ab 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,8 +21,8 @@
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/ext4_fs.h> 24#include "ext4.h"
25#include <linux/ext4_jbd2.h> 25#include "ext4_jbd2.h"
26#include "xattr.h" 26#include "xattr.h"
27#include "acl.h" 27#include "acl.h"
28 28
@@ -129,7 +129,7 @@ const struct file_operations ext4_file_operations = {
129 .write = do_sync_write, 129 .write = do_sync_write,
130 .aio_read = generic_file_aio_read, 130 .aio_read = generic_file_aio_read,
131 .aio_write = ext4_file_write, 131 .aio_write = ext4_file_write,
132 .ioctl = ext4_ioctl, 132 .unlocked_ioctl = ext4_ioctl,
133#ifdef CONFIG_COMPAT 133#ifdef CONFIG_COMPAT
134 .compat_ioctl = ext4_compat_ioctl, 134 .compat_ioctl = ext4_compat_ioctl,
135#endif 135#endif
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 8d50879d1c2c..1c8ba48d4f8d 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -27,8 +27,8 @@
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/ext4_fs.h> 30#include "ext4.h"
31#include <linux/ext4_jbd2.h> 31#include "ext4_jbd2.h"
32 32
33/* 33/*
34 * akpm: A new design for ext4_sync_file(). 34 * akpm: A new design for ext4_sync_file().
@@ -72,6 +72,9 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
72 goto out; 72 goto out;
73 } 73 }
74 74
75 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
76 goto out;
77
75 /* 78 /*
76 * The VFS has written the file data. If the inode is unaltered 79 * The VFS has written the file data. If the inode is unaltered
77 * then we need not start a commit. 80 * then we need not start a commit.
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 1555024e3b36..1d6329dbe390 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -11,8 +11,8 @@
11 11
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/jbd2.h> 13#include <linux/jbd2.h>
14#include <linux/ext4_fs.h>
15#include <linux/cryptohash.h> 14#include <linux/cryptohash.h>
15#include "ext4.h"
16 16
17#define DELTA 0x9E3779B9 17#define DELTA 0x9E3779B9
18 18
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 486e46a3918d..c6efbab0c801 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -15,8 +15,6 @@
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h> 17#include <linux/jbd2.h>
18#include <linux/ext4_fs.h>
19#include <linux/ext4_jbd2.h>
20#include <linux/stat.h> 18#include <linux/stat.h>
21#include <linux/string.h> 19#include <linux/string.h>
22#include <linux/quotaops.h> 20#include <linux/quotaops.h>
@@ -25,7 +23,8 @@
25#include <linux/bitops.h> 23#include <linux/bitops.h>
26#include <linux/blkdev.h> 24#include <linux/blkdev.h>
27#include <asm/byteorder.h> 25#include <asm/byteorder.h>
28 26#include "ext4.h"
27#include "ext4_jbd2.h"
29#include "xattr.h" 28#include "xattr.h"
30#include "acl.h" 29#include "acl.h"
31#include "group.h" 30#include "group.h"
@@ -75,7 +74,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
75 /* If checksum is bad mark all blocks and inodes use to prevent 74 /* If checksum is bad mark all blocks and inodes use to prevent
76 * allocation, essentially implementing a per-group read-only flag. */ 75 * allocation, essentially implementing a per-group read-only flag. */
77 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 76 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
78 ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n", 77 ext4_error(sb, __func__, "Checksum bad for group %lu\n",
79 block_group); 78 block_group);
80 gdp->bg_free_blocks_count = 0; 79 gdp->bg_free_blocks_count = 0;
81 gdp->bg_free_inodes_count = 0; 80 gdp->bg_free_inodes_count = 0;
@@ -223,11 +222,9 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
223 222
224 if (gdp) { 223 if (gdp) {
225 spin_lock(sb_bgl_lock(sbi, block_group)); 224 spin_lock(sb_bgl_lock(sbi, block_group));
226 gdp->bg_free_inodes_count = cpu_to_le16( 225 le16_add_cpu(&gdp->bg_free_inodes_count, 1);
227 le16_to_cpu(gdp->bg_free_inodes_count) + 1);
228 if (is_directory) 226 if (is_directory)
229 gdp->bg_used_dirs_count = cpu_to_le16( 227 le16_add_cpu(&gdp->bg_used_dirs_count, -1);
230 le16_to_cpu(gdp->bg_used_dirs_count) - 1);
231 gdp->bg_checksum = ext4_group_desc_csum(sbi, 228 gdp->bg_checksum = ext4_group_desc_csum(sbi,
232 block_group, gdp); 229 block_group, gdp);
233 spin_unlock(sb_bgl_lock(sbi, block_group)); 230 spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -588,7 +585,7 @@ got:
588 ino++; 585 ino++;
589 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 586 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
590 ino > EXT4_INODES_PER_GROUP(sb)) { 587 ino > EXT4_INODES_PER_GROUP(sb)) {
591 ext4_error(sb, __FUNCTION__, 588 ext4_error(sb, __func__,
592 "reserved inode or inode > inodes count - " 589 "reserved inode or inode > inodes count - "
593 "block_group = %lu, inode=%lu", group, 590 "block_group = %lu, inode=%lu", group,
594 ino + group * EXT4_INODES_PER_GROUP(sb)); 591 ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -664,11 +661,9 @@ got:
664 cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); 661 cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
665 } 662 }
666 663
667 gdp->bg_free_inodes_count = 664 le16_add_cpu(&gdp->bg_free_inodes_count, -1);
668 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
669 if (S_ISDIR(mode)) { 665 if (S_ISDIR(mode)) {
670 gdp->bg_used_dirs_count = 666 le16_add_cpu(&gdp->bg_used_dirs_count, 1);
671 cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
672 } 667 }
673 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 668 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
674 spin_unlock(sb_bgl_lock(sbi, group)); 669 spin_unlock(sb_bgl_lock(sbi, group));
@@ -744,23 +739,24 @@ got:
744 if (err) 739 if (err)
745 goto fail_free_drop; 740 goto fail_free_drop;
746 741
747 err = ext4_mark_inode_dirty(handle, inode);
748 if (err) {
749 ext4_std_error(sb, err);
750 goto fail_free_drop;
751 }
752 if (test_opt(sb, EXTENTS)) { 742 if (test_opt(sb, EXTENTS)) {
753 /* set extent flag only for directory and file */ 743 /* set extent flag only for diretory, file and normal symlink*/
754 if (S_ISDIR(mode) || S_ISREG(mode)) { 744 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
755 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; 745 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
756 ext4_ext_tree_init(handle, inode); 746 ext4_ext_tree_init(handle, inode);
757 err = ext4_update_incompat_feature(handle, sb, 747 err = ext4_update_incompat_feature(handle, sb,
758 EXT4_FEATURE_INCOMPAT_EXTENTS); 748 EXT4_FEATURE_INCOMPAT_EXTENTS);
759 if (err) 749 if (err)
760 goto fail; 750 goto fail_free_drop;
761 } 751 }
762 } 752 }
763 753
754 err = ext4_mark_inode_dirty(handle, inode);
755 if (err) {
756 ext4_std_error(sb, err);
757 goto fail_free_drop;
758 }
759
764 ext4_debug("allocating inode %lu\n", inode->i_ino); 760 ext4_debug("allocating inode %lu\n", inode->i_ino);
765 goto really_out; 761 goto really_out;
766fail: 762fail:
@@ -796,7 +792,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
796 792
797 /* Error cases - e2fsck has already cleaned up for us */ 793 /* Error cases - e2fsck has already cleaned up for us */
798 if (ino > max_ino) { 794 if (ino > max_ino) {
799 ext4_warning(sb, __FUNCTION__, 795 ext4_warning(sb, __func__,
800 "bad orphan ino %lu! e2fsck was run?", ino); 796 "bad orphan ino %lu! e2fsck was run?", ino);
801 goto error; 797 goto error;
802 } 798 }
@@ -805,7 +801,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
805 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 801 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
806 bitmap_bh = read_inode_bitmap(sb, block_group); 802 bitmap_bh = read_inode_bitmap(sb, block_group);
807 if (!bitmap_bh) { 803 if (!bitmap_bh) {
808 ext4_warning(sb, __FUNCTION__, 804 ext4_warning(sb, __func__,
809 "inode bitmap error for orphan %lu", ino); 805 "inode bitmap error for orphan %lu", ino);
810 goto error; 806 goto error;
811 } 807 }
@@ -830,7 +826,7 @@ iget_failed:
830 err = PTR_ERR(inode); 826 err = PTR_ERR(inode);
831 inode = NULL; 827 inode = NULL;
832bad_orphan: 828bad_orphan:
833 ext4_warning(sb, __FUNCTION__, 829 ext4_warning(sb, __func__,
834 "bad orphan inode %lu! e2fsck was run?", ino); 830 "bad orphan inode %lu! e2fsck was run?", ino);
835 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", 831 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
836 bit, (unsigned long long)bitmap_bh->b_blocknr, 832 bit, (unsigned long long)bitmap_bh->b_blocknr,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8fab233cb05f..8d9707746413 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -25,7 +25,6 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/time.h> 27#include <linux/time.h>
28#include <linux/ext4_jbd2.h>
29#include <linux/jbd2.h> 28#include <linux/jbd2.h>
30#include <linux/highuid.h> 29#include <linux/highuid.h>
31#include <linux/pagemap.h> 30#include <linux/pagemap.h>
@@ -36,6 +35,7 @@
36#include <linux/mpage.h> 35#include <linux/mpage.h>
37#include <linux/uio.h> 36#include <linux/uio.h>
38#include <linux/bio.h> 37#include <linux/bio.h>
38#include "ext4_jbd2.h"
39#include "xattr.h" 39#include "xattr.h"
40#include "acl.h" 40#include "acl.h"
41 41
@@ -93,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
93 BUFFER_TRACE(bh, "call ext4_journal_revoke"); 93 BUFFER_TRACE(bh, "call ext4_journal_revoke");
94 err = ext4_journal_revoke(handle, blocknr, bh); 94 err = ext4_journal_revoke(handle, blocknr, bh);
95 if (err) 95 if (err)
96 ext4_abort(inode->i_sb, __FUNCTION__, 96 ext4_abort(inode->i_sb, __func__,
97 "error %d when attempting revoke", err); 97 "error %d when attempting revoke", err);
98 BUFFER_TRACE(bh, "exit"); 98 BUFFER_TRACE(bh, "exit");
99 return err; 99 return err;
@@ -985,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
985 } else { 985 } else {
986 retval = ext4_get_blocks_handle(handle, inode, block, 986 retval = ext4_get_blocks_handle(handle, inode, block,
987 max_blocks, bh, create, extend_disksize); 987 max_blocks, bh, create, extend_disksize);
988
989 if (retval > 0 && buffer_new(bh)) {
990 /*
991 * We allocated new blocks which will result in
992 * i_data's format changing. Force the migrate
993 * to fail by clearing migrate flags
994 */
995 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
996 ~EXT4_EXT_MIGRATE;
997 }
988 } 998 }
989 up_write((&EXT4_I(inode)->i_data_sem)); 999 up_write((&EXT4_I(inode)->i_data_sem));
990 return retval; 1000 return retval;
@@ -1230,7 +1240,7 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1230{ 1240{
1231 int err = jbd2_journal_dirty_data(handle, bh); 1241 int err = jbd2_journal_dirty_data(handle, bh);
1232 if (err) 1242 if (err)
1233 ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__, 1243 ext4_journal_abort_handle(__func__, __func__,
1234 bh, handle, err); 1244 bh, handle, err);
1235 return err; 1245 return err;
1236} 1246}
@@ -1301,10 +1311,11 @@ static int ext4_ordered_write_end(struct file *file,
1301 new_i_size = pos + copied; 1311 new_i_size = pos + copied;
1302 if (new_i_size > EXT4_I(inode)->i_disksize) 1312 if (new_i_size > EXT4_I(inode)->i_disksize)
1303 EXT4_I(inode)->i_disksize = new_i_size; 1313 EXT4_I(inode)->i_disksize = new_i_size;
1304 copied = ext4_generic_write_end(file, mapping, pos, len, copied, 1314 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1305 page, fsdata); 1315 page, fsdata);
1306 if (copied < 0) 1316 copied = ret2;
1307 ret = copied; 1317 if (ret2 < 0)
1318 ret = ret2;
1308 } 1319 }
1309 ret2 = ext4_journal_stop(handle); 1320 ret2 = ext4_journal_stop(handle);
1310 if (!ret) 1321 if (!ret)
@@ -1329,10 +1340,11 @@ static int ext4_writeback_write_end(struct file *file,
1329 if (new_i_size > EXT4_I(inode)->i_disksize) 1340 if (new_i_size > EXT4_I(inode)->i_disksize)
1330 EXT4_I(inode)->i_disksize = new_i_size; 1341 EXT4_I(inode)->i_disksize = new_i_size;
1331 1342
1332 copied = ext4_generic_write_end(file, mapping, pos, len, copied, 1343 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1333 page, fsdata); 1344 page, fsdata);
1334 if (copied < 0) 1345 copied = ret2;
1335 ret = copied; 1346 if (ret2 < 0)
1347 ret = ret2;
1336 1348
1337 ret2 = ext4_journal_stop(handle); 1349 ret2 = ext4_journal_stop(handle);
1338 if (!ret) 1350 if (!ret)
@@ -2501,12 +2513,10 @@ out_stop:
2501static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, 2513static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
2502 unsigned long ino, struct ext4_iloc *iloc) 2514 unsigned long ino, struct ext4_iloc *iloc)
2503{ 2515{
2504 unsigned long desc, group_desc;
2505 ext4_group_t block_group; 2516 ext4_group_t block_group;
2506 unsigned long offset; 2517 unsigned long offset;
2507 ext4_fsblk_t block; 2518 ext4_fsblk_t block;
2508 struct buffer_head *bh; 2519 struct ext4_group_desc *gdp;
2509 struct ext4_group_desc * gdp;
2510 2520
2511 if (!ext4_valid_inum(sb, ino)) { 2521 if (!ext4_valid_inum(sb, ino)) {
2512 /* 2522 /*
@@ -2518,22 +2528,10 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
2518 } 2528 }
2519 2529
2520 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 2530 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
2521 if (block_group >= EXT4_SB(sb)->s_groups_count) { 2531 gdp = ext4_get_group_desc(sb, block_group, NULL);
2522 ext4_error(sb,"ext4_get_inode_block","group >= groups count"); 2532 if (!gdp)
2523 return 0; 2533 return 0;
2524 }
2525 smp_rmb();
2526 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
2527 desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2528 bh = EXT4_SB(sb)->s_group_desc[group_desc];
2529 if (!bh) {
2530 ext4_error (sb, "ext4_get_inode_block",
2531 "Descriptor not loaded");
2532 return 0;
2533 }
2534 2534
2535 gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
2536 desc * EXT4_DESC_SIZE(sb));
2537 /* 2535 /*
2538 * Figure out the offset within the block group inode table 2536 * Figure out the offset within the block group inode table
2539 */ 2537 */
@@ -2976,7 +2974,8 @@ static int ext4_do_update_inode(handle_t *handle,
2976 if (ext4_inode_blocks_set(handle, raw_inode, ei)) 2974 if (ext4_inode_blocks_set(handle, raw_inode, ei))
2977 goto out_brelse; 2975 goto out_brelse;
2978 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 2976 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
2979 raw_inode->i_flags = cpu_to_le32(ei->i_flags); 2977 /* clear the migrate flag in the raw_inode */
2978 raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
2980 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 2979 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
2981 cpu_to_le32(EXT4_OS_HURD)) 2980 cpu_to_le32(EXT4_OS_HURD))
2982 raw_inode->i_file_acl_high = 2981 raw_inode->i_file_acl_high =
@@ -3374,7 +3373,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
3374 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; 3373 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
3375 if (mnt_count != 3374 if (mnt_count !=
3376 le16_to_cpu(sbi->s_es->s_mnt_count)) { 3375 le16_to_cpu(sbi->s_es->s_mnt_count)) {
3377 ext4_warning(inode->i_sb, __FUNCTION__, 3376 ext4_warning(inode->i_sb, __func__,
3378 "Unable to expand inode %lu. Delete" 3377 "Unable to expand inode %lu. Delete"
3379 " some EAs or run e2fsck.", 3378 " some EAs or run e2fsck.",
3380 inode->i_ino); 3379 inode->i_ino);
@@ -3415,7 +3414,7 @@ void ext4_dirty_inode(struct inode *inode)
3415 current_handle->h_transaction != handle->h_transaction) { 3414 current_handle->h_transaction != handle->h_transaction) {
3416 /* This task has a transaction open against a different fs */ 3415 /* This task has a transaction open against a different fs */
3417 printk(KERN_EMERG "%s: transactions do not match!\n", 3416 printk(KERN_EMERG "%s: transactions do not match!\n",
3418 __FUNCTION__); 3417 __func__);
3419 } else { 3418 } else {
3420 jbd_debug(5, "marking dirty. outer handle=%p\n", 3419 jbd_debug(5, "marking dirty. outer handle=%p\n",
3421 current_handle); 3420 current_handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 25b13ede8086..7a6c2f1faba6 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -10,17 +10,17 @@
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/jbd2.h> 11#include <linux/jbd2.h>
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/ext4_fs.h>
14#include <linux/ext4_jbd2.h>
15#include <linux/time.h> 13#include <linux/time.h>
16#include <linux/compat.h> 14#include <linux/compat.h>
17#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
18#include <linux/mount.h> 16#include <linux/mount.h>
19#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include "ext4_jbd2.h"
19#include "ext4.h"
20 20
21int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, 21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
22 unsigned long arg)
23{ 22{
23 struct inode *inode = filp->f_dentry->d_inode;
24 struct ext4_inode_info *ei = EXT4_I(inode); 24 struct ext4_inode_info *ei = EXT4_I(inode);
25 unsigned int flags; 25 unsigned int flags;
26 unsigned short rsv_window_size; 26 unsigned short rsv_window_size;
@@ -277,9 +277,6 @@ setversion_out:
277#ifdef CONFIG_COMPAT 277#ifdef CONFIG_COMPAT
278long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 278long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
279{ 279{
280 struct inode *inode = file->f_path.dentry->d_inode;
281 int ret;
282
283 /* These are just misnamed, they actually get/put from/to user an int */ 280 /* These are just misnamed, they actually get/put from/to user an int */
284 switch (cmd) { 281 switch (cmd) {
285 case EXT4_IOC32_GETFLAGS: 282 case EXT4_IOC32_GETFLAGS:
@@ -319,9 +316,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
319 default: 316 default:
320 return -ENOIOCTLCMD; 317 return -ENOIOCTLCMD;
321 } 318 }
322 lock_kernel(); 319 return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
323 ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
324 unlock_kernel();
325 return ret;
326} 320}
327#endif 321#endif
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ef97f19c2f9d..fbec2ef93797 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,21 +21,7 @@
21 * mballoc.c contains the multiblocks allocation routines 21 * mballoc.c contains the multiblocks allocation routines
22 */ 22 */
23 23
24#include <linux/time.h> 24#include "mballoc.h"
25#include <linux/fs.h>
26#include <linux/namei.h>
27#include <linux/ext4_jbd2.h>
28#include <linux/ext4_fs.h>
29#include <linux/quotaops.h>
30#include <linux/buffer_head.h>
31#include <linux/module.h>
32#include <linux/swap.h>
33#include <linux/proc_fs.h>
34#include <linux/pagemap.h>
35#include <linux/seq_file.h>
36#include <linux/version.h>
37#include "group.h"
38
39/* 25/*
40 * MUSTDO: 26 * MUSTDO:
41 * - test ext4_ext_search_left() and ext4_ext_search_right() 27 * - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -345,288 +331,6 @@
345 * 331 *
346 */ 332 */
347 333
348/*
349 * with AGGRESSIVE_CHECK allocator runs consistency checks over
350 * structures. these checks slow things down a lot
351 */
352#define AGGRESSIVE_CHECK__
353
354/*
355 * with DOUBLE_CHECK defined mballoc creates persistent in-core
356 * bitmaps, maintains and uses them to check for double allocations
357 */
358#define DOUBLE_CHECK__
359
360/*
361 */
362#define MB_DEBUG__
363#ifdef MB_DEBUG
364#define mb_debug(fmt, a...) printk(fmt, ##a)
365#else
366#define mb_debug(fmt, a...)
367#endif
368
369/*
370 * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
371 * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
372 */
373#define EXT4_MB_HISTORY
374#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
375#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
376#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
377#define EXT4_MB_HISTORY_FREE 8 /* free */
378
379#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
380 EXT4_MB_HISTORY_PREALLOC)
381
382/*
383 * How long mballoc can look for a best extent (in found extents)
384 */
385#define MB_DEFAULT_MAX_TO_SCAN 200
386
387/*
388 * How long mballoc must look for a best extent
389 */
390#define MB_DEFAULT_MIN_TO_SCAN 10
391
392/*
393 * How many groups mballoc will scan looking for the best chunk
394 */
395#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
396
397/*
398 * with 'ext4_mb_stats' allocator will collect stats that will be
399 * shown at umount. The collecting costs though!
400 */
401#define MB_DEFAULT_STATS 1
402
403/*
404 * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
405 * by the stream allocator, which purpose is to pack requests
406 * as close each to other as possible to produce smooth I/O traffic
407 * We use locality group prealloc space for stream request.
408 * We can tune the same via /proc/fs/ext4/<parition>/stream_req
409 */
410#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
411
412/*
413 * for which requests use 2^N search using buddies
414 */
415#define MB_DEFAULT_ORDER2_REQS 2
416
417/*
418 * default group prealloc size 512 blocks
419 */
420#define MB_DEFAULT_GROUP_PREALLOC 512
421
422static struct kmem_cache *ext4_pspace_cachep;
423static struct kmem_cache *ext4_ac_cachep;
424
425#ifdef EXT4_BB_MAX_BLOCKS
426#undef EXT4_BB_MAX_BLOCKS
427#endif
428#define EXT4_BB_MAX_BLOCKS 30
429
430struct ext4_free_metadata {
431 ext4_group_t group;
432 unsigned short num;
433 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
434 struct list_head list;
435};
436
437struct ext4_group_info {
438 unsigned long bb_state;
439 unsigned long bb_tid;
440 struct ext4_free_metadata *bb_md_cur;
441 unsigned short bb_first_free;
442 unsigned short bb_free;
443 unsigned short bb_fragments;
444 struct list_head bb_prealloc_list;
445#ifdef DOUBLE_CHECK
446 void *bb_bitmap;
447#endif
448 unsigned short bb_counters[];
449};
450
451#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
452#define EXT4_GROUP_INFO_LOCKED_BIT 1
453
454#define EXT4_MB_GRP_NEED_INIT(grp) \
455 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
456
457
458struct ext4_prealloc_space {
459 struct list_head pa_inode_list;
460 struct list_head pa_group_list;
461 union {
462 struct list_head pa_tmp_list;
463 struct rcu_head pa_rcu;
464 } u;
465 spinlock_t pa_lock;
466 atomic_t pa_count;
467 unsigned pa_deleted;
468 ext4_fsblk_t pa_pstart; /* phys. block */
469 ext4_lblk_t pa_lstart; /* log. block */
470 unsigned short pa_len; /* len of preallocated chunk */
471 unsigned short pa_free; /* how many blocks are free */
472 unsigned short pa_linear; /* consumed in one direction
473 * strictly, for grp prealloc */
474 spinlock_t *pa_obj_lock;
475 struct inode *pa_inode; /* hack, for history only */
476};
477
478
479struct ext4_free_extent {
480 ext4_lblk_t fe_logical;
481 ext4_grpblk_t fe_start;
482 ext4_group_t fe_group;
483 int fe_len;
484};
485
486/*
487 * Locality group:
488 * we try to group all related changes together
489 * so that writeback can flush/allocate them together as well
490 */
491struct ext4_locality_group {
492 /* for allocator */
493 struct mutex lg_mutex; /* to serialize allocates */
494 struct list_head lg_prealloc_list;/* list of preallocations */
495 spinlock_t lg_prealloc_lock;
496};
497
498struct ext4_allocation_context {
499 struct inode *ac_inode;
500 struct super_block *ac_sb;
501
502 /* original request */
503 struct ext4_free_extent ac_o_ex;
504
505 /* goal request (after normalization) */
506 struct ext4_free_extent ac_g_ex;
507
508 /* the best found extent */
509 struct ext4_free_extent ac_b_ex;
510
511 /* copy of the bext found extent taken before preallocation efforts */
512 struct ext4_free_extent ac_f_ex;
513
514 /* number of iterations done. we have to track to limit searching */
515 unsigned long ac_ex_scanned;
516 __u16 ac_groups_scanned;
517 __u16 ac_found;
518 __u16 ac_tail;
519 __u16 ac_buddy;
520 __u16 ac_flags; /* allocation hints */
521 __u8 ac_status;
522 __u8 ac_criteria;
523 __u8 ac_repeats;
524 __u8 ac_2order; /* if request is to allocate 2^N blocks and
525 * N > 0, the field stores N, otherwise 0 */
526 __u8 ac_op; /* operation, for history only */
527 struct page *ac_bitmap_page;
528 struct page *ac_buddy_page;
529 struct ext4_prealloc_space *ac_pa;
530 struct ext4_locality_group *ac_lg;
531};
532
533#define AC_STATUS_CONTINUE 1
534#define AC_STATUS_FOUND 2
535#define AC_STATUS_BREAK 3
536
537struct ext4_mb_history {
538 struct ext4_free_extent orig; /* orig allocation */
539 struct ext4_free_extent goal; /* goal allocation */
540 struct ext4_free_extent result; /* result allocation */
541 unsigned pid;
542 unsigned ino;
543 __u16 found; /* how many extents have been found */
544 __u16 groups; /* how many groups have been scanned */
545 __u16 tail; /* what tail broke some buddy */
546 __u16 buddy; /* buddy the tail ^^^ broke */
547 __u16 flags;
548 __u8 cr:3; /* which phase the result extent was found at */
549 __u8 op:4;
550 __u8 merged:1;
551};
552
553struct ext4_buddy {
554 struct page *bd_buddy_page;
555 void *bd_buddy;
556 struct page *bd_bitmap_page;
557 void *bd_bitmap;
558 struct ext4_group_info *bd_info;
559 struct super_block *bd_sb;
560 __u16 bd_blkbits;
561 ext4_group_t bd_group;
562};
563#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
564#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
565
566#ifndef EXT4_MB_HISTORY
567static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
568{
569 return;
570}
571#else
572static void ext4_mb_store_history(struct ext4_allocation_context *ac);
573#endif
574
575#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
576
577static struct proc_dir_entry *proc_root_ext4;
578struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
579ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
580 ext4_fsblk_t goal, unsigned long *count, int *errp);
581
582static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
583 ext4_group_t group);
584static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
585static void ext4_mb_free_committed_blocks(struct super_block *);
586static void ext4_mb_return_to_preallocation(struct inode *inode,
587 struct ext4_buddy *e4b, sector_t block,
588 int count);
589static void ext4_mb_put_pa(struct ext4_allocation_context *,
590 struct super_block *, struct ext4_prealloc_space *pa);
591static int ext4_mb_init_per_dev_proc(struct super_block *sb);
592static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
593
594
595static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
596{
597 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
598
599 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
600}
601
602static inline void ext4_unlock_group(struct super_block *sb,
603 ext4_group_t group)
604{
605 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
606
607 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
608}
609
610static inline int ext4_is_group_locked(struct super_block *sb,
611 ext4_group_t group)
612{
613 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
614
615 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
616 &(grinfo->bb_state));
617}
618
619static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
620 struct ext4_free_extent *fex)
621{
622 ext4_fsblk_t block;
623
624 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
625 + fex->fe_start
626 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
627 return block;
628}
629
630static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 334static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
631{ 335{
632#if BITS_PER_LONG == 64 336#if BITS_PER_LONG == 64
@@ -736,7 +440,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
736 blocknr += 440 blocknr +=
737 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 441 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
738 442
739 ext4_error(sb, __FUNCTION__, "double-free of inode" 443 ext4_error(sb, __func__, "double-free of inode"
740 " %lu's block %llu(bit %u in group %lu)\n", 444 " %lu's block %llu(bit %u in group %lu)\n",
741 inode ? inode->i_ino : 0, blocknr, 445 inode ? inode->i_ino : 0, blocknr,
742 first + i, e4b->bd_group); 446 first + i, e4b->bd_group);
@@ -898,17 +602,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
898 list_for_each(cur, &grp->bb_prealloc_list) { 602 list_for_each(cur, &grp->bb_prealloc_list) {
899 ext4_group_t groupnr; 603 ext4_group_t groupnr;
900 struct ext4_prealloc_space *pa; 604 struct ext4_prealloc_space *pa;
901 pa = list_entry(cur, struct ext4_prealloc_space, group_list); 605 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
902 ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k); 606 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
903 MB_CHECK_ASSERT(groupnr == e4b->bd_group); 607 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
904 for (i = 0; i < pa->len; i++) 608 for (i = 0; i < pa->pa_len; i++)
905 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); 609 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
906 } 610 }
907 return 0; 611 return 0;
908} 612}
909#undef MB_CHECK_ASSERT 613#undef MB_CHECK_ASSERT
910#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ 614#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
911 __FILE__, __FUNCTION__, __LINE__) 615 __FILE__, __func__, __LINE__)
912#else 616#else
913#define mb_check_buddy(e4b) 617#define mb_check_buddy(e4b)
914#endif 618#endif
@@ -982,7 +686,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
982 grp->bb_fragments = fragments; 686 grp->bb_fragments = fragments;
983 687
984 if (free != grp->bb_free) { 688 if (free != grp->bb_free) {
985 ext4_error(sb, __FUNCTION__, 689 ext4_error(sb, __func__,
986 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", 690 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
987 group, free, grp->bb_free); 691 group, free, grp->bb_free);
988 /* 692 /*
@@ -1168,8 +872,9 @@ out:
1168 return err; 872 return err;
1169} 873}
1170 874
1171static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, 875static noinline_for_stack int
1172 struct ext4_buddy *e4b) 876ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
877 struct ext4_buddy *e4b)
1173{ 878{
1174 struct ext4_sb_info *sbi = EXT4_SB(sb); 879 struct ext4_sb_info *sbi = EXT4_SB(sb);
1175 struct inode *inode = sbi->s_buddy_cache; 880 struct inode *inode = sbi->s_buddy_cache;
@@ -1367,7 +1072,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1367 blocknr += 1072 blocknr +=
1368 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 1073 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1369 1074
1370 ext4_error(sb, __FUNCTION__, "double-free of inode" 1075 ext4_error(sb, __func__, "double-free of inode"
1371 " %lu's block %llu(bit %u in group %lu)\n", 1076 " %lu's block %llu(bit %u in group %lu)\n",
1372 inode ? inode->i_ino : 0, blocknr, block, 1077 inode ? inode->i_ino : 0, blocknr, block,
1373 e4b->bd_group); 1078 e4b->bd_group);
@@ -1848,7 +1553,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1848 * free blocks even though group info says we 1553 * free blocks even though group info says we
1849 * we have free blocks 1554 * we have free blocks
1850 */ 1555 */
1851 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1556 ext4_error(sb, __func__, "%d free blocks as per "
1852 "group info. But bitmap says 0\n", 1557 "group info. But bitmap says 0\n",
1853 free); 1558 free);
1854 break; 1559 break;
@@ -1857,7 +1562,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1857 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1562 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1858 BUG_ON(ex.fe_len <= 0); 1563 BUG_ON(ex.fe_len <= 0);
1859 if (free < ex.fe_len) { 1564 if (free < ex.fe_len) {
1860 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1565 ext4_error(sb, __func__, "%d free blocks as per "
1861 "group info. But got %d blocks\n", 1566 "group info. But got %d blocks\n",
1862 free, ex.fe_len); 1567 free, ex.fe_len);
1863 /* 1568 /*
@@ -1965,7 +1670,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1965 return 0; 1670 return 0;
1966} 1671}
1967 1672
1968static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1673static noinline_for_stack int
1674ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1969{ 1675{
1970 ext4_group_t group; 1676 ext4_group_t group;
1971 ext4_group_t i; 1677 ext4_group_t i;
@@ -2449,17 +2155,10 @@ static void ext4_mb_history_init(struct super_block *sb)
2449 int i; 2155 int i;
2450 2156
2451 if (sbi->s_mb_proc != NULL) { 2157 if (sbi->s_mb_proc != NULL) {
2452 struct proc_dir_entry *p; 2158 proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc,
2453 p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); 2159 &ext4_mb_seq_history_fops, sb);
2454 if (p) { 2160 proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc,
2455 p->proc_fops = &ext4_mb_seq_history_fops; 2161 &ext4_mb_seq_groups_fops, sb);
2456 p->data = sb;
2457 }
2458 p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
2459 if (p) {
2460 p->proc_fops = &ext4_mb_seq_groups_fops;
2461 p->data = sb;
2462 }
2463 } 2162 }
2464 2163
2465 sbi->s_mb_history_max = 1000; 2164 sbi->s_mb_history_max = 1000;
@@ -2472,7 +2171,8 @@ static void ext4_mb_history_init(struct super_block *sb)
2472 /* if we can't allocate history, then we simple won't use it */ 2171 /* if we can't allocate history, then we simple won't use it */
2473} 2172}
2474 2173
2475static void ext4_mb_store_history(struct ext4_allocation_context *ac) 2174static noinline_for_stack void
2175ext4_mb_store_history(struct ext4_allocation_context *ac)
2476{ 2176{
2477 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2177 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2478 struct ext4_mb_history h; 2178 struct ext4_mb_history h;
@@ -2572,13 +2272,13 @@ static int ext4_mb_init_backend(struct super_block *sb)
2572 meta_group_info[j] = kzalloc(len, GFP_KERNEL); 2272 meta_group_info[j] = kzalloc(len, GFP_KERNEL);
2573 if (meta_group_info[j] == NULL) { 2273 if (meta_group_info[j] == NULL) {
2574 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); 2274 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2575 i--;
2576 goto err_freebuddy; 2275 goto err_freebuddy;
2577 } 2276 }
2578 desc = ext4_get_group_desc(sb, i, NULL); 2277 desc = ext4_get_group_desc(sb, i, NULL);
2579 if (desc == NULL) { 2278 if (desc == NULL) {
2580 printk(KERN_ERR 2279 printk(KERN_ERR
2581 "EXT4-fs: can't read descriptor %lu\n", i); 2280 "EXT4-fs: can't read descriptor %lu\n", i);
2281 i++;
2582 goto err_freebuddy; 2282 goto err_freebuddy;
2583 } 2283 }
2584 memset(meta_group_info[j], 0, len); 2284 memset(meta_group_info[j], 0, len);
@@ -2618,13 +2318,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
2618 return 0; 2318 return 0;
2619 2319
2620err_freebuddy: 2320err_freebuddy:
2621 while (i >= 0) { 2321 while (i-- > 0)
2622 kfree(ext4_get_group_info(sb, i)); 2322 kfree(ext4_get_group_info(sb, i));
2623 i--;
2624 }
2625 i = num_meta_group_infos; 2323 i = num_meta_group_infos;
2626err_freemeta: 2324err_freemeta:
2627 while (--i >= 0) 2325 while (i-- > 0)
2628 kfree(sbi->s_group_info[i]); 2326 kfree(sbi->s_group_info[i]);
2629 iput(sbi->s_buddy_cache); 2327 iput(sbi->s_buddy_cache);
2630err_freesgi: 2328err_freesgi:
@@ -2808,7 +2506,8 @@ int ext4_mb_release(struct super_block *sb)
2808 return 0; 2506 return 0;
2809} 2507}
2810 2508
2811static void ext4_mb_free_committed_blocks(struct super_block *sb) 2509static noinline_for_stack void
2510ext4_mb_free_committed_blocks(struct super_block *sb)
2812{ 2511{
2813 struct ext4_sb_info *sbi = EXT4_SB(sb); 2512 struct ext4_sb_info *sbi = EXT4_SB(sb);
2814 int err; 2513 int err;
@@ -2867,7 +2566,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb)
2867 mb_debug("freed %u blocks in %u structures\n", count, count2); 2566 mb_debug("freed %u blocks in %u structures\n", count, count2);
2868} 2567}
2869 2568
2870#define EXT4_ROOT "ext4"
2871#define EXT4_MB_STATS_NAME "stats" 2569#define EXT4_MB_STATS_NAME "stats"
2872#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" 2570#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan"
2873#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" 2571#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan"
@@ -3007,9 +2705,9 @@ int __init init_ext4_mballoc(void)
3007 return -ENOMEM; 2705 return -ENOMEM;
3008 } 2706 }
3009#ifdef CONFIG_PROC_FS 2707#ifdef CONFIG_PROC_FS
3010 proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); 2708 proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
3011 if (proc_root_ext4 == NULL) 2709 if (proc_root_ext4 == NULL)
3012 printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); 2710 printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n");
3013#endif 2711#endif
3014 return 0; 2712 return 0;
3015} 2713}
@@ -3020,7 +2718,7 @@ void exit_ext4_mballoc(void)
3020 kmem_cache_destroy(ext4_pspace_cachep); 2718 kmem_cache_destroy(ext4_pspace_cachep);
3021 kmem_cache_destroy(ext4_ac_cachep); 2719 kmem_cache_destroy(ext4_ac_cachep);
3022#ifdef CONFIG_PROC_FS 2720#ifdef CONFIG_PROC_FS
3023 remove_proc_entry(EXT4_ROOT, proc_root_fs); 2721 remove_proc_entry("fs/ext4", NULL);
3024#endif 2722#endif
3025} 2723}
3026 2724
@@ -3029,7 +2727,8 @@ void exit_ext4_mballoc(void)
3029 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps 2727 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
3030 * Returns 0 if success or error code 2728 * Returns 0 if success or error code
3031 */ 2729 */
3032static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2730static noinline_for_stack int
2731ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3033 handle_t *handle) 2732 handle_t *handle)
3034{ 2733{
3035 struct buffer_head *bitmap_bh = NULL; 2734 struct buffer_head *bitmap_bh = NULL;
@@ -3078,7 +2777,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3078 in_range(block, ext4_inode_table(sb, gdp), 2777 in_range(block, ext4_inode_table(sb, gdp),
3079 EXT4_SB(sb)->s_itb_per_group)) { 2778 EXT4_SB(sb)->s_itb_per_group)) {
3080 2779
3081 ext4_error(sb, __FUNCTION__, 2780 ext4_error(sb, __func__,
3082 "Allocating block in system zone - block = %llu", 2781 "Allocating block in system zone - block = %llu",
3083 block); 2782 block);
3084 } 2783 }
@@ -3102,9 +2801,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3102 ac->ac_b_ex.fe_group, 2801 ac->ac_b_ex.fe_group,
3103 gdp)); 2802 gdp));
3104 } 2803 }
3105 gdp->bg_free_blocks_count = 2804 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
3106 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
3107 - ac->ac_b_ex.fe_len);
3108 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2805 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
3109 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2806 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
3110 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2807 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3138,7 +2835,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3138 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; 2835 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
3139 else 2836 else
3140 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; 2837 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3141 mb_debug("#%u: goal %lu blocks for locality group\n", 2838 mb_debug("#%u: goal %u blocks for locality group\n",
3142 current->pid, ac->ac_g_ex.fe_len); 2839 current->pid, ac->ac_g_ex.fe_len);
3143} 2840}
3144 2841
@@ -3146,15 +2843,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3146 * Normalization means making request better in terms of 2843 * Normalization means making request better in terms of
3147 * size and alignment 2844 * size and alignment
3148 */ 2845 */
3149static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, 2846static noinline_for_stack void
2847ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3150 struct ext4_allocation_request *ar) 2848 struct ext4_allocation_request *ar)
3151{ 2849{
3152 int bsbits, max; 2850 int bsbits, max;
3153 ext4_lblk_t end; 2851 ext4_lblk_t end;
3154 struct list_head *cur;
3155 loff_t size, orig_size, start_off; 2852 loff_t size, orig_size, start_off;
3156 ext4_lblk_t start, orig_start; 2853 ext4_lblk_t start, orig_start;
3157 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 2854 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2855 struct ext4_prealloc_space *pa;
3158 2856
3159 /* do normalize only data requests, metadata requests 2857 /* do normalize only data requests, metadata requests
3160 do not need preallocation */ 2858 do not need preallocation */
@@ -3240,12 +2938,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3240 2938
3241 /* check we don't cross already preallocated blocks */ 2939 /* check we don't cross already preallocated blocks */
3242 rcu_read_lock(); 2940 rcu_read_lock();
3243 list_for_each_rcu(cur, &ei->i_prealloc_list) { 2941 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3244 struct ext4_prealloc_space *pa;
3245 unsigned long pa_end; 2942 unsigned long pa_end;
3246 2943
3247 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3248
3249 if (pa->pa_deleted) 2944 if (pa->pa_deleted)
3250 continue; 2945 continue;
3251 spin_lock(&pa->pa_lock); 2946 spin_lock(&pa->pa_lock);
@@ -3287,10 +2982,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3287 2982
3288 /* XXX: extra loop to check we really don't overlap preallocations */ 2983 /* XXX: extra loop to check we really don't overlap preallocations */
3289 rcu_read_lock(); 2984 rcu_read_lock();
3290 list_for_each_rcu(cur, &ei->i_prealloc_list) { 2985 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3291 struct ext4_prealloc_space *pa;
3292 unsigned long pa_end; 2986 unsigned long pa_end;
3293 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3294 spin_lock(&pa->pa_lock); 2987 spin_lock(&pa->pa_lock);
3295 if (pa->pa_deleted == 0) { 2988 if (pa->pa_deleted == 0) {
3296 pa_end = pa->pa_lstart + pa->pa_len; 2989 pa_end = pa->pa_lstart + pa->pa_len;
@@ -3382,7 +3075,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3382 BUG_ON(pa->pa_free < len); 3075 BUG_ON(pa->pa_free < len);
3383 pa->pa_free -= len; 3076 pa->pa_free -= len;
3384 3077
3385 mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa); 3078 mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
3386} 3079}
3387 3080
3388/* 3081/*
@@ -3412,12 +3105,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3412/* 3105/*
3413 * search goal blocks in preallocated space 3106 * search goal blocks in preallocated space
3414 */ 3107 */
3415static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3108static noinline_for_stack int
3109ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3416{ 3110{
3417 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3111 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3418 struct ext4_locality_group *lg; 3112 struct ext4_locality_group *lg;
3419 struct ext4_prealloc_space *pa; 3113 struct ext4_prealloc_space *pa;
3420 struct list_head *cur;
3421 3114
3422 /* only data can be preallocated */ 3115 /* only data can be preallocated */
3423 if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) 3116 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3425,8 +3118,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3425 3118
3426 /* first, try per-file preallocation */ 3119 /* first, try per-file preallocation */
3427 rcu_read_lock(); 3120 rcu_read_lock();
3428 list_for_each_rcu(cur, &ei->i_prealloc_list) { 3121 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3429 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3430 3122
3431 /* all fields in this condition don't change, 3123 /* all fields in this condition don't change,
3432 * so we can skip locking for them */ 3124 * so we can skip locking for them */
@@ -3458,8 +3150,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3458 return 0; 3150 return 0;
3459 3151
3460 rcu_read_lock(); 3152 rcu_read_lock();
3461 list_for_each_rcu(cur, &lg->lg_prealloc_list) { 3153 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
3462 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3463 spin_lock(&pa->pa_lock); 3154 spin_lock(&pa->pa_lock);
3464 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { 3155 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
3465 atomic_inc(&pa->pa_count); 3156 atomic_inc(&pa->pa_count);
@@ -3579,7 +3270,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3579/* 3270/*
3580 * creates new preallocated space for given inode 3271 * creates new preallocated space for given inode
3581 */ 3272 */
3582static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) 3273static noinline_for_stack int
3274ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3583{ 3275{
3584 struct super_block *sb = ac->ac_sb; 3276 struct super_block *sb = ac->ac_sb;
3585 struct ext4_prealloc_space *pa; 3277 struct ext4_prealloc_space *pa;
@@ -3666,7 +3358,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3666/* 3358/*
3667 * creates new preallocated space for locality group inodes belongs to 3359 * creates new preallocated space for locality group inodes belongs to
3668 */ 3360 */
3669static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac) 3361static noinline_for_stack int
3362ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3670{ 3363{
3671 struct super_block *sb = ac->ac_sb; 3364 struct super_block *sb = ac->ac_sb;
3672 struct ext4_locality_group *lg; 3365 struct ext4_locality_group *lg;
@@ -3739,11 +3432,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3739 * the caller MUST hold group/inode locks. 3432 * the caller MUST hold group/inode locks.
3740 * TODO: optimize the case when there are no in-core structures yet 3433 * TODO: optimize the case when there are no in-core structures yet
3741 */ 3434 */
3742static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, 3435static noinline_for_stack int
3743 struct buffer_head *bitmap_bh, 3436ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3744 struct ext4_prealloc_space *pa) 3437 struct ext4_prealloc_space *pa,
3438 struct ext4_allocation_context *ac)
3745{ 3439{
3746 struct ext4_allocation_context *ac;
3747 struct super_block *sb = e4b->bd_sb; 3440 struct super_block *sb = e4b->bd_sb;
3748 struct ext4_sb_info *sbi = EXT4_SB(sb); 3441 struct ext4_sb_info *sbi = EXT4_SB(sb);
3749 unsigned long end; 3442 unsigned long end;
@@ -3759,8 +3452,6 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3759 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3452 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3760 end = bit + pa->pa_len; 3453 end = bit + pa->pa_len;
3761 3454
3762 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3763
3764 if (ac) { 3455 if (ac) {
3765 ac->ac_sb = sb; 3456 ac->ac_sb = sb;
3766 ac->ac_inode = pa->pa_inode; 3457 ac->ac_inode = pa->pa_inode;
@@ -3797,7 +3488,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3797 pa, (unsigned long) pa->pa_lstart, 3488 pa, (unsigned long) pa->pa_lstart,
3798 (unsigned long) pa->pa_pstart, 3489 (unsigned long) pa->pa_pstart,
3799 (unsigned long) pa->pa_len); 3490 (unsigned long) pa->pa_len);
3800 ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", 3491 ext4_error(sb, __func__, "free %u, pa_free %u\n",
3801 free, pa->pa_free); 3492 free, pa->pa_free);
3802 /* 3493 /*
3803 * pa is already deleted so we use the value obtained 3494 * pa is already deleted so we use the value obtained
@@ -3805,22 +3496,19 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3805 */ 3496 */
3806 } 3497 }
3807 atomic_add(free, &sbi->s_mb_discarded); 3498 atomic_add(free, &sbi->s_mb_discarded);
3808 if (ac)
3809 kmem_cache_free(ext4_ac_cachep, ac);
3810 3499
3811 return err; 3500 return err;
3812} 3501}
3813 3502
3814static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, 3503static noinline_for_stack int
3815 struct ext4_prealloc_space *pa) 3504ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3505 struct ext4_prealloc_space *pa,
3506 struct ext4_allocation_context *ac)
3816{ 3507{
3817 struct ext4_allocation_context *ac;
3818 struct super_block *sb = e4b->bd_sb; 3508 struct super_block *sb = e4b->bd_sb;
3819 ext4_group_t group; 3509 ext4_group_t group;
3820 ext4_grpblk_t bit; 3510 ext4_grpblk_t bit;
3821 3511
3822 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3823
3824 if (ac) 3512 if (ac)
3825 ac->ac_op = EXT4_MB_HISTORY_DISCARD; 3513 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3826 3514
@@ -3838,7 +3526,6 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3838 ac->ac_b_ex.fe_len = pa->pa_len; 3526 ac->ac_b_ex.fe_len = pa->pa_len;
3839 ac->ac_b_ex.fe_logical = 0; 3527 ac->ac_b_ex.fe_logical = 0;
3840 ext4_mb_store_history(ac); 3528 ext4_mb_store_history(ac);
3841 kmem_cache_free(ext4_ac_cachep, ac);
3842 } 3529 }
3843 3530
3844 return 0; 3531 return 0;
@@ -3853,12 +3540,14 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3853 * - how many do we discard 3540 * - how many do we discard
3854 * 1) how many requested 3541 * 1) how many requested
3855 */ 3542 */
3856static int ext4_mb_discard_group_preallocations(struct super_block *sb, 3543static noinline_for_stack int
3544ext4_mb_discard_group_preallocations(struct super_block *sb,
3857 ext4_group_t group, int needed) 3545 ext4_group_t group, int needed)
3858{ 3546{
3859 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 3547 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3860 struct buffer_head *bitmap_bh = NULL; 3548 struct buffer_head *bitmap_bh = NULL;
3861 struct ext4_prealloc_space *pa, *tmp; 3549 struct ext4_prealloc_space *pa, *tmp;
3550 struct ext4_allocation_context *ac;
3862 struct list_head list; 3551 struct list_head list;
3863 struct ext4_buddy e4b; 3552 struct ext4_buddy e4b;
3864 int err; 3553 int err;
@@ -3886,6 +3575,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
3886 grp = ext4_get_group_info(sb, group); 3575 grp = ext4_get_group_info(sb, group);
3887 INIT_LIST_HEAD(&list); 3576 INIT_LIST_HEAD(&list);
3888 3577
3578 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3889repeat: 3579repeat:
3890 ext4_lock_group(sb, group); 3580 ext4_lock_group(sb, group);
3891 list_for_each_entry_safe(pa, tmp, 3581 list_for_each_entry_safe(pa, tmp,
@@ -3940,9 +3630,9 @@ repeat:
3940 spin_unlock(pa->pa_obj_lock); 3630 spin_unlock(pa->pa_obj_lock);
3941 3631
3942 if (pa->pa_linear) 3632 if (pa->pa_linear)
3943 ext4_mb_release_group_pa(&e4b, pa); 3633 ext4_mb_release_group_pa(&e4b, pa, ac);
3944 else 3634 else
3945 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); 3635 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
3946 3636
3947 list_del(&pa->u.pa_tmp_list); 3637 list_del(&pa->u.pa_tmp_list);
3948 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3638 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3950,6 +3640,8 @@ repeat:
3950 3640
3951out: 3641out:
3952 ext4_unlock_group(sb, group); 3642 ext4_unlock_group(sb, group);
3643 if (ac)
3644 kmem_cache_free(ext4_ac_cachep, ac);
3953 ext4_mb_release_desc(&e4b); 3645 ext4_mb_release_desc(&e4b);
3954 put_bh(bitmap_bh); 3646 put_bh(bitmap_bh);
3955 return free; 3647 return free;
@@ -3970,6 +3662,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3970 struct super_block *sb = inode->i_sb; 3662 struct super_block *sb = inode->i_sb;
3971 struct buffer_head *bitmap_bh = NULL; 3663 struct buffer_head *bitmap_bh = NULL;
3972 struct ext4_prealloc_space *pa, *tmp; 3664 struct ext4_prealloc_space *pa, *tmp;
3665 struct ext4_allocation_context *ac;
3973 ext4_group_t group = 0; 3666 ext4_group_t group = 0;
3974 struct list_head list; 3667 struct list_head list;
3975 struct ext4_buddy e4b; 3668 struct ext4_buddy e4b;
@@ -3984,6 +3677,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3984 3677
3985 INIT_LIST_HEAD(&list); 3678 INIT_LIST_HEAD(&list);
3986 3679
3680 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3987repeat: 3681repeat:
3988 /* first, collect all pa's in the inode */ 3682 /* first, collect all pa's in the inode */
3989 spin_lock(&ei->i_prealloc_lock); 3683 spin_lock(&ei->i_prealloc_lock);
@@ -4048,7 +3742,7 @@ repeat:
4048 3742
4049 ext4_lock_group(sb, group); 3743 ext4_lock_group(sb, group);
4050 list_del(&pa->pa_group_list); 3744 list_del(&pa->pa_group_list);
4051 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); 3745 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
4052 ext4_unlock_group(sb, group); 3746 ext4_unlock_group(sb, group);
4053 3747
4054 ext4_mb_release_desc(&e4b); 3748 ext4_mb_release_desc(&e4b);
@@ -4057,6 +3751,8 @@ repeat:
4057 list_del(&pa->u.pa_tmp_list); 3751 list_del(&pa->u.pa_tmp_list);
4058 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3752 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4059 } 3753 }
3754 if (ac)
3755 kmem_cache_free(ext4_ac_cachep, ac);
4060} 3756}
4061 3757
4062/* 3758/*
@@ -4116,7 +3812,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4116 printk(KERN_ERR "PA:%lu:%d:%u \n", i, 3812 printk(KERN_ERR "PA:%lu:%d:%u \n", i,
4117 start, pa->pa_len); 3813 start, pa->pa_len);
4118 } 3814 }
4119 ext4_lock_group(sb, i); 3815 ext4_unlock_group(sb, i);
4120 3816
4121 if (grp->bb_free == 0) 3817 if (grp->bb_free == 0)
4122 continue; 3818 continue;
@@ -4175,7 +3871,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4175 mutex_lock(&ac->ac_lg->lg_mutex); 3871 mutex_lock(&ac->ac_lg->lg_mutex);
4176} 3872}
4177 3873
4178static int ext4_mb_initialize_context(struct ext4_allocation_context *ac, 3874static noinline_for_stack int
3875ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4179 struct ext4_allocation_request *ar) 3876 struct ext4_allocation_request *ar)
4180{ 3877{
4181 struct super_block *sb = ar->inode->i_sb; 3878 struct super_block *sb = ar->inode->i_sb;
@@ -4406,7 +4103,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
4406 ext4_mb_free_committed_blocks(sb); 4103 ext4_mb_free_committed_blocks(sb);
4407} 4104}
4408 4105
4409static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, 4106static noinline_for_stack int
4107ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4410 ext4_group_t group, ext4_grpblk_t block, int count) 4108 ext4_group_t group, ext4_grpblk_t block, int count)
4411{ 4109{
4412 struct ext4_group_info *db = e4b->bd_info; 4110 struct ext4_group_info *db = e4b->bd_info;
@@ -4497,7 +4195,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4497 if (block < le32_to_cpu(es->s_first_data_block) || 4195 if (block < le32_to_cpu(es->s_first_data_block) ||
4498 block + count < block || 4196 block + count < block ||
4499 block + count > ext4_blocks_count(es)) { 4197 block + count > ext4_blocks_count(es)) {
4500 ext4_error(sb, __FUNCTION__, 4198 ext4_error(sb, __func__,
4501 "Freeing blocks not in datazone - " 4199 "Freeing blocks not in datazone - "
4502 "block = %lu, count = %lu", block, count); 4200 "block = %lu, count = %lu", block, count);
4503 goto error_return; 4201 goto error_return;
@@ -4538,7 +4236,7 @@ do_more:
4538 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4236 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4539 EXT4_SB(sb)->s_itb_per_group)) { 4237 EXT4_SB(sb)->s_itb_per_group)) {
4540 4238
4541 ext4_error(sb, __FUNCTION__, 4239 ext4_error(sb, __func__,
4542 "Freeing blocks in system zone - " 4240 "Freeing blocks in system zone - "
4543 "Block = %lu, count = %lu", block, count); 4241 "Block = %lu, count = %lu", block, count);
4544 } 4242 }
@@ -4596,8 +4294,7 @@ do_more:
4596 } 4294 }
4597 4295
4598 spin_lock(sb_bgl_lock(sbi, block_group)); 4296 spin_lock(sb_bgl_lock(sbi, block_group));
4599 gdp->bg_free_blocks_count = 4297 le16_add_cpu(&gdp->bg_free_blocks_count, count);
4600 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
4601 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4298 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4602 spin_unlock(sb_bgl_lock(sbi, block_group)); 4299 spin_unlock(sb_bgl_lock(sbi, block_group));
4603 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4300 percpu_counter_add(&sbi->s_freeblocks_counter, count);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
new file mode 100644
index 000000000000..bfe6add46bcf
--- /dev/null
+++ b/fs/ext4/mballoc.h
@@ -0,0 +1,304 @@
1/*
2 * fs/ext4/mballoc.h
3 *
4 * Written by: Alex Tomas <alex@clusterfs.com>
5 *
6 */
7#ifndef _EXT4_MBALLOC_H
8#define _EXT4_MBALLOC_H
9
10#include <linux/time.h>
11#include <linux/fs.h>
12#include <linux/namei.h>
13#include <linux/quotaops.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/swap.h>
17#include <linux/proc_fs.h>
18#include <linux/pagemap.h>
19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include "ext4_jbd2.h"
22#include "ext4.h"
23#include "group.h"
24
25/*
26 * with AGGRESSIVE_CHECK allocator runs consistency checks over
27 * structures. these checks slow things down a lot
28 */
29#define AGGRESSIVE_CHECK__
30
31/*
32 * with DOUBLE_CHECK defined mballoc creates persistent in-core
33 * bitmaps, maintains and uses them to check for double allocations
34 */
35#define DOUBLE_CHECK__
36
37/*
38 */
39#define MB_DEBUG__
40#ifdef MB_DEBUG
41#define mb_debug(fmt, a...) printk(fmt, ##a)
42#else
43#define mb_debug(fmt, a...)
44#endif
45
46/*
47 * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
48 * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
49 */
50#define EXT4_MB_HISTORY
51#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
52#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
53#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
54#define EXT4_MB_HISTORY_FREE 8 /* free */
55
56#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
57 EXT4_MB_HISTORY_PREALLOC)
58
59/*
60 * How long mballoc can look for a best extent (in found extents)
61 */
62#define MB_DEFAULT_MAX_TO_SCAN 200
63
64/*
65 * How long mballoc must look for a best extent
66 */
67#define MB_DEFAULT_MIN_TO_SCAN 10
68
69/*
70 * How many groups mballoc will scan looking for the best chunk
71 */
72#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
73
74/*
75 * with 'ext4_mb_stats' allocator will collect stats that will be
76 * shown at umount. The collecting costs though!
77 */
78#define MB_DEFAULT_STATS 1
79
80/*
81 * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
82 * by the stream allocator, which purpose is to pack requests
83 * as close each to other as possible to produce smooth I/O traffic
84 * We use locality group prealloc space for stream request.
85 * We can tune the same via /proc/fs/ext4/<parition>/stream_req
86 */
87#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
88
89/*
90 * for which requests use 2^N search using buddies
91 */
92#define MB_DEFAULT_ORDER2_REQS 2
93
94/*
95 * default group prealloc size 512 blocks
96 */
97#define MB_DEFAULT_GROUP_PREALLOC 512
98
99static struct kmem_cache *ext4_pspace_cachep;
100static struct kmem_cache *ext4_ac_cachep;
101
102#ifdef EXT4_BB_MAX_BLOCKS
103#undef EXT4_BB_MAX_BLOCKS
104#endif
105#define EXT4_BB_MAX_BLOCKS 30
106
107struct ext4_free_metadata {
108 ext4_group_t group;
109 unsigned short num;
110 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
111 struct list_head list;
112};
113
114struct ext4_group_info {
115 unsigned long bb_state;
116 unsigned long bb_tid;
117 struct ext4_free_metadata *bb_md_cur;
118 unsigned short bb_first_free;
119 unsigned short bb_free;
120 unsigned short bb_fragments;
121 struct list_head bb_prealloc_list;
122#ifdef DOUBLE_CHECK
123 void *bb_bitmap;
124#endif
125 unsigned short bb_counters[];
126};
127
128#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
129#define EXT4_GROUP_INFO_LOCKED_BIT 1
130
131#define EXT4_MB_GRP_NEED_INIT(grp) \
132 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
133
134
135struct ext4_prealloc_space {
136 struct list_head pa_inode_list;
137 struct list_head pa_group_list;
138 union {
139 struct list_head pa_tmp_list;
140 struct rcu_head pa_rcu;
141 } u;
142 spinlock_t pa_lock;
143 atomic_t pa_count;
144 unsigned pa_deleted;
145 ext4_fsblk_t pa_pstart; /* phys. block */
146 ext4_lblk_t pa_lstart; /* log. block */
147 unsigned short pa_len; /* len of preallocated chunk */
148 unsigned short pa_free; /* how many blocks are free */
149 unsigned short pa_linear; /* consumed in one direction
150 * strictly, for grp prealloc */
151 spinlock_t *pa_obj_lock;
152 struct inode *pa_inode; /* hack, for history only */
153};
154
155
156struct ext4_free_extent {
157 ext4_lblk_t fe_logical;
158 ext4_grpblk_t fe_start;
159 ext4_group_t fe_group;
160 int fe_len;
161};
162
163/*
164 * Locality group:
165 * we try to group all related changes together
166 * so that writeback can flush/allocate them together as well
167 */
168struct ext4_locality_group {
169 /* for allocator */
170 struct mutex lg_mutex; /* to serialize allocates */
171 struct list_head lg_prealloc_list;/* list of preallocations */
172 spinlock_t lg_prealloc_lock;
173};
174
175struct ext4_allocation_context {
176 struct inode *ac_inode;
177 struct super_block *ac_sb;
178
179 /* original request */
180 struct ext4_free_extent ac_o_ex;
181
182 /* goal request (after normalization) */
183 struct ext4_free_extent ac_g_ex;
184
185 /* the best found extent */
186 struct ext4_free_extent ac_b_ex;
187
188 /* copy of the bext found extent taken before preallocation efforts */
189 struct ext4_free_extent ac_f_ex;
190
191 /* number of iterations done. we have to track to limit searching */
192 unsigned long ac_ex_scanned;
193 __u16 ac_groups_scanned;
194 __u16 ac_found;
195 __u16 ac_tail;
196 __u16 ac_buddy;
197 __u16 ac_flags; /* allocation hints */
198 __u8 ac_status;
199 __u8 ac_criteria;
200 __u8 ac_repeats;
201 __u8 ac_2order; /* if request is to allocate 2^N blocks and
202 * N > 0, the field stores N, otherwise 0 */
203 __u8 ac_op; /* operation, for history only */
204 struct page *ac_bitmap_page;
205 struct page *ac_buddy_page;
206 struct ext4_prealloc_space *ac_pa;
207 struct ext4_locality_group *ac_lg;
208};
209
210#define AC_STATUS_CONTINUE 1
211#define AC_STATUS_FOUND 2
212#define AC_STATUS_BREAK 3
213
214struct ext4_mb_history {
215 struct ext4_free_extent orig; /* orig allocation */
216 struct ext4_free_extent goal; /* goal allocation */
217 struct ext4_free_extent result; /* result allocation */
218 unsigned pid;
219 unsigned ino;
220 __u16 found; /* how many extents have been found */
221 __u16 groups; /* how many groups have been scanned */
222 __u16 tail; /* what tail broke some buddy */
223 __u16 buddy; /* buddy the tail ^^^ broke */
224 __u16 flags;
225 __u8 cr:3; /* which phase the result extent was found at */
226 __u8 op:4;
227 __u8 merged:1;
228};
229
230struct ext4_buddy {
231 struct page *bd_buddy_page;
232 void *bd_buddy;
233 struct page *bd_bitmap_page;
234 void *bd_bitmap;
235 struct ext4_group_info *bd_info;
236 struct super_block *bd_sb;
237 __u16 bd_blkbits;
238 ext4_group_t bd_group;
239};
240#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
241#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
242
243#ifndef EXT4_MB_HISTORY
244static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
245{
246 return;
247}
248#else
249static void ext4_mb_store_history(struct ext4_allocation_context *ac);
250#endif
251
252#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
253
254static struct proc_dir_entry *proc_root_ext4;
255struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
256
257static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
258 ext4_group_t group);
259static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
260static void ext4_mb_free_committed_blocks(struct super_block *);
261static void ext4_mb_return_to_preallocation(struct inode *inode,
262 struct ext4_buddy *e4b, sector_t block,
263 int count);
264static void ext4_mb_put_pa(struct ext4_allocation_context *,
265 struct super_block *, struct ext4_prealloc_space *pa);
266static int ext4_mb_init_per_dev_proc(struct super_block *sb);
267static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
268
269
270static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
271{
272 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
273
274 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
275}
276
277static inline void ext4_unlock_group(struct super_block *sb,
278 ext4_group_t group)
279{
280 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
281
282 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
283}
284
285static inline int ext4_is_group_locked(struct super_block *sb,
286 ext4_group_t group)
287{
288 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
289
290 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
291 &(grinfo->bb_state));
292}
293
294static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
295 struct ext4_free_extent *fex)
296{
297 ext4_fsblk_t block;
298
299 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
300 + fex->fe_start
301 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
302 return block;
303}
304#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 5c1e27de7755..b9e077ba07e9 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -13,8 +13,8 @@
13 */ 13 */
14 14
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/ext4_jbd2.h> 16#include "ext4_jbd2.h"
17#include <linux/ext4_fs_extents.h> 17#include "ext4_extents.h"
18 18
19/* 19/*
20 * The contiguous blocks details which can be 20 * The contiguous blocks details which can be
@@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
327} 327}
328 328
329static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 329static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
330 struct inode *tmp_inode) 330 struct inode *tmp_inode)
331{ 331{
332 int retval; 332 int retval;
333 __le32 i_data[3]; 333 __le32 i_data[3];
@@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
339 * i_data field of the original inode 339 * i_data field of the original inode
340 */ 340 */
341 retval = ext4_journal_extend(handle, 1); 341 retval = ext4_journal_extend(handle, 1);
342 if (retval != 0) { 342 if (retval) {
343 retval = ext4_journal_restart(handle, 1); 343 retval = ext4_journal_restart(handle, 1);
344 if (retval) 344 if (retval)
345 goto err_out; 345 goto err_out;
@@ -351,6 +351,18 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
351 351
352 down_write(&EXT4_I(inode)->i_data_sem); 352 down_write(&EXT4_I(inode)->i_data_sem);
353 /* 353 /*
354 * if EXT4_EXT_MIGRATE is cleared a block allocation
355 * happened after we started the migrate. We need to
356 * fail the migrate
357 */
358 if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) {
359 retval = -EAGAIN;
360 up_write(&EXT4_I(inode)->i_data_sem);
361 goto err_out;
362 } else
363 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
364 ~EXT4_EXT_MIGRATE;
365 /*
354 * We have the extent map build with the tmp inode. 366 * We have the extent map build with the tmp inode.
355 * Now copy the i_data across 367 * Now copy the i_data across
356 */ 368 */
@@ -508,6 +520,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
508 * switch the inode format to prevent read. 520 * switch the inode format to prevent read.
509 */ 521 */
510 mutex_lock(&(inode->i_mutex)); 522 mutex_lock(&(inode->i_mutex));
523 /*
524 * Even though we take i_mutex we can still cause block allocation
525 * via mmap write to holes. If we have allocated new blocks we fail
526 * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag.
527 * The flag is updated with i_data_sem held to prevent racing with
528 * block allocation.
529 */
530 down_read((&EXT4_I(inode)->i_data_sem));
531 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE;
532 up_read((&EXT4_I(inode)->i_data_sem));
533
511 handle = ext4_journal_start(inode, 1); 534 handle = ext4_journal_start(inode, 1);
512 535
513 ei = EXT4_I(inode); 536 ei = EXT4_I(inode);
@@ -559,9 +582,15 @@ err_out:
559 * tmp_inode 582 * tmp_inode
560 */ 583 */
561 free_ext_block(handle, tmp_inode); 584 free_ext_block(handle, tmp_inode);
562 else 585 else {
563 retval = ext4_ext_swap_inode_data(handle, inode, 586 retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
564 tmp_inode); 587 if (retval)
588 /*
589 * if we fail to swap inode data free the extent
590 * details of the tmp inode
591 */
592 free_ext_block(handle, tmp_inode);
593 }
565 594
566 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ 595 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
567 if (ext4_journal_extend(handle, 1) != 0) 596 if (ext4_journal_extend(handle, 1) != 0)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 28aa2ed4297e..ab16beaa830d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -28,14 +28,14 @@
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/time.h> 30#include <linux/time.h>
31#include <linux/ext4_fs.h>
32#include <linux/ext4_jbd2.h>
33#include <linux/fcntl.h> 31#include <linux/fcntl.h>
34#include <linux/stat.h> 32#include <linux/stat.h>
35#include <linux/string.h> 33#include <linux/string.h>
36#include <linux/quotaops.h> 34#include <linux/quotaops.h>
37#include <linux/buffer_head.h> 35#include <linux/buffer_head.h>
38#include <linux/bio.h> 36#include <linux/bio.h>
37#include "ext4.h"
38#include "ext4_jbd2.h"
39 39
40#include "namei.h" 40#include "namei.h"
41#include "xattr.h" 41#include "xattr.h"
@@ -57,10 +57,15 @@ static struct buffer_head *ext4_append(handle_t *handle,
57 57
58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
59 59
60 if ((bh = ext4_bread(handle, inode, *block, 1, err))) { 60 bh = ext4_bread(handle, inode, *block, 1, err);
61 if (bh) {
61 inode->i_size += inode->i_sb->s_blocksize; 62 inode->i_size += inode->i_sb->s_blocksize;
62 EXT4_I(inode)->i_disksize = inode->i_size; 63 EXT4_I(inode)->i_disksize = inode->i_size;
63 ext4_journal_get_write_access(handle,bh); 64 *err = ext4_journal_get_write_access(handle, bh);
65 if (*err) {
66 brelse(bh);
67 bh = NULL;
68 }
64 } 69 }
65 return bh; 70 return bh;
66} 71}
@@ -348,7 +353,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
348 if (root->info.hash_version != DX_HASH_TEA && 353 if (root->info.hash_version != DX_HASH_TEA &&
349 root->info.hash_version != DX_HASH_HALF_MD4 && 354 root->info.hash_version != DX_HASH_HALF_MD4 &&
350 root->info.hash_version != DX_HASH_LEGACY) { 355 root->info.hash_version != DX_HASH_LEGACY) {
351 ext4_warning(dir->i_sb, __FUNCTION__, 356 ext4_warning(dir->i_sb, __func__,
352 "Unrecognised inode hash code %d", 357 "Unrecognised inode hash code %d",
353 root->info.hash_version); 358 root->info.hash_version);
354 brelse(bh); 359 brelse(bh);
@@ -362,7 +367,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
362 hash = hinfo->hash; 367 hash = hinfo->hash;
363 368
364 if (root->info.unused_flags & 1) { 369 if (root->info.unused_flags & 1) {
365 ext4_warning(dir->i_sb, __FUNCTION__, 370 ext4_warning(dir->i_sb, __func__,
366 "Unimplemented inode hash flags: %#06x", 371 "Unimplemented inode hash flags: %#06x",
367 root->info.unused_flags); 372 root->info.unused_flags);
368 brelse(bh); 373 brelse(bh);
@@ -371,7 +376,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
371 } 376 }
372 377
373 if ((indirect = root->info.indirect_levels) > 1) { 378 if ((indirect = root->info.indirect_levels) > 1) {
374 ext4_warning(dir->i_sb, __FUNCTION__, 379 ext4_warning(dir->i_sb, __func__,
375 "Unimplemented inode hash depth: %#06x", 380 "Unimplemented inode hash depth: %#06x",
376 root->info.indirect_levels); 381 root->info.indirect_levels);
377 brelse(bh); 382 brelse(bh);
@@ -384,7 +389,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
384 389
385 if (dx_get_limit(entries) != dx_root_limit(dir, 390 if (dx_get_limit(entries) != dx_root_limit(dir,
386 root->info.info_length)) { 391 root->info.info_length)) {
387 ext4_warning(dir->i_sb, __FUNCTION__, 392 ext4_warning(dir->i_sb, __func__,
388 "dx entry: limit != root limit"); 393 "dx entry: limit != root limit");
389 brelse(bh); 394 brelse(bh);
390 *err = ERR_BAD_DX_DIR; 395 *err = ERR_BAD_DX_DIR;
@@ -396,7 +401,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
396 { 401 {
397 count = dx_get_count(entries); 402 count = dx_get_count(entries);
398 if (!count || count > dx_get_limit(entries)) { 403 if (!count || count > dx_get_limit(entries)) {
399 ext4_warning(dir->i_sb, __FUNCTION__, 404 ext4_warning(dir->i_sb, __func__,
400 "dx entry: no count or count > limit"); 405 "dx entry: no count or count > limit");
401 brelse(bh); 406 brelse(bh);
402 *err = ERR_BAD_DX_DIR; 407 *err = ERR_BAD_DX_DIR;
@@ -441,7 +446,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
441 goto fail2; 446 goto fail2;
442 at = entries = ((struct dx_node *) bh->b_data)->entries; 447 at = entries = ((struct dx_node *) bh->b_data)->entries;
443 if (dx_get_limit(entries) != dx_node_limit (dir)) { 448 if (dx_get_limit(entries) != dx_node_limit (dir)) {
444 ext4_warning(dir->i_sb, __FUNCTION__, 449 ext4_warning(dir->i_sb, __func__,
445 "dx entry: limit != node limit"); 450 "dx entry: limit != node limit");
446 brelse(bh); 451 brelse(bh);
447 *err = ERR_BAD_DX_DIR; 452 *err = ERR_BAD_DX_DIR;
@@ -457,7 +462,7 @@ fail2:
457 } 462 }
458fail: 463fail:
459 if (*err == ERR_BAD_DX_DIR) 464 if (*err == ERR_BAD_DX_DIR)
460 ext4_warning(dir->i_sb, __FUNCTION__, 465 ext4_warning(dir->i_sb, __func__,
461 "Corrupt dir inode %ld, running e2fsck is " 466 "Corrupt dir inode %ld, running e2fsck is "
462 "recommended.", dir->i_ino); 467 "recommended.", dir->i_ino);
463 return NULL; 468 return NULL;
@@ -914,7 +919,7 @@ restart:
914 wait_on_buffer(bh); 919 wait_on_buffer(bh);
915 if (!buffer_uptodate(bh)) { 920 if (!buffer_uptodate(bh)) {
916 /* read error, skip block & hope for the best */ 921 /* read error, skip block & hope for the best */
917 ext4_error(sb, __FUNCTION__, "reading directory #%lu " 922 ext4_error(sb, __func__, "reading directory #%lu "
918 "offset %lu", dir->i_ino, 923 "offset %lu", dir->i_ino,
919 (unsigned long)block); 924 (unsigned long)block);
920 brelse(bh); 925 brelse(bh);
@@ -1007,7 +1012,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
1007 retval = ext4_htree_next_block(dir, hash, frame, 1012 retval = ext4_htree_next_block(dir, hash, frame,
1008 frames, NULL); 1013 frames, NULL);
1009 if (retval < 0) { 1014 if (retval < 0) {
1010 ext4_warning(sb, __FUNCTION__, 1015 ext4_warning(sb, __func__,
1011 "error reading index page in directory #%lu", 1016 "error reading index page in directory #%lu",
1012 dir->i_ino); 1017 dir->i_ino);
1013 *err = retval; 1018 *err = retval;
@@ -1532,7 +1537,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1532 1537
1533 if (levels && (dx_get_count(frames->entries) == 1538 if (levels && (dx_get_count(frames->entries) ==
1534 dx_get_limit(frames->entries))) { 1539 dx_get_limit(frames->entries))) {
1535 ext4_warning(sb, __FUNCTION__, 1540 ext4_warning(sb, __func__,
1536 "Directory index full!"); 1541 "Directory index full!");
1537 err = -ENOSPC; 1542 err = -ENOSPC;
1538 goto cleanup; 1543 goto cleanup;
@@ -1860,11 +1865,11 @@ static int empty_dir (struct inode * inode)
1860 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 1865 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
1861 !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { 1866 !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
1862 if (err) 1867 if (err)
1863 ext4_error(inode->i_sb, __FUNCTION__, 1868 ext4_error(inode->i_sb, __func__,
1864 "error %d reading directory #%lu offset 0", 1869 "error %d reading directory #%lu offset 0",
1865 err, inode->i_ino); 1870 err, inode->i_ino);
1866 else 1871 else
1867 ext4_warning(inode->i_sb, __FUNCTION__, 1872 ext4_warning(inode->i_sb, __func__,
1868 "bad directory (dir #%lu) - no data block", 1873 "bad directory (dir #%lu) - no data block",
1869 inode->i_ino); 1874 inode->i_ino);
1870 return 1; 1875 return 1;
@@ -1893,7 +1898,7 @@ static int empty_dir (struct inode * inode)
1893 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); 1898 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
1894 if (!bh) { 1899 if (!bh) {
1895 if (err) 1900 if (err)
1896 ext4_error(sb, __FUNCTION__, 1901 ext4_error(sb, __func__,
1897 "error %d reading directory" 1902 "error %d reading directory"
1898 " #%lu offset %lu", 1903 " #%lu offset %lu",
1899 err, inode->i_ino, offset); 1904 err, inode->i_ino, offset);
@@ -2217,6 +2222,8 @@ retry:
2217 goto out_stop; 2222 goto out_stop;
2218 } 2223 }
2219 } else { 2224 } else {
2225 /* clear the extent format for fast symlink */
2226 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
2220 inode->i_op = &ext4_fast_symlink_inode_operations; 2227 inode->i_op = &ext4_fast_symlink_inode_operations;
2221 memcpy((char*)&EXT4_I(inode)->i_data,symname,l); 2228 memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
2222 inode->i_size = l-1; 2229 inode->i_size = l-1;
@@ -2347,6 +2354,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2347 EXT4_FEATURE_INCOMPAT_FILETYPE)) 2354 EXT4_FEATURE_INCOMPAT_FILETYPE))
2348 new_de->file_type = old_de->file_type; 2355 new_de->file_type = old_de->file_type;
2349 new_dir->i_version++; 2356 new_dir->i_version++;
2357 new_dir->i_ctime = new_dir->i_mtime =
2358 ext4_current_time(new_dir);
2359 ext4_mark_inode_dirty(handle, new_dir);
2350 BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata"); 2360 BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
2351 ext4_journal_dirty_metadata(handle, new_bh); 2361 ext4_journal_dirty_metadata(handle, new_bh);
2352 brelse(new_bh); 2362 brelse(new_bh);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index e29efa0f9d62..9f086a6a472b 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -11,11 +11,10 @@
11 11
12#define EXT4FS_DEBUG 12#define EXT4FS_DEBUG
13 13
14#include <linux/ext4_jbd2.h>
15
16#include <linux/errno.h> 14#include <linux/errno.h>
17#include <linux/slab.h> 15#include <linux/slab.h>
18 16
17#include "ext4_jbd2.h"
19#include "group.h" 18#include "group.h"
20 19
21#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 20#define outside(b, first, last) ((b) < (first) || (b) >= (last))
@@ -50,63 +49,63 @@ static int verify_group_input(struct super_block *sb,
50 49
51 ext4_get_group_no_and_offset(sb, start, NULL, &offset); 50 ext4_get_group_no_and_offset(sb, start, NULL, &offset);
52 if (group != sbi->s_groups_count) 51 if (group != sbi->s_groups_count)
53 ext4_warning(sb, __FUNCTION__, 52 ext4_warning(sb, __func__,
54 "Cannot add at group %u (only %lu groups)", 53 "Cannot add at group %u (only %lu groups)",
55 input->group, sbi->s_groups_count); 54 input->group, sbi->s_groups_count);
56 else if (offset != 0) 55 else if (offset != 0)
57 ext4_warning(sb, __FUNCTION__, "Last group not full"); 56 ext4_warning(sb, __func__, "Last group not full");
58 else if (input->reserved_blocks > input->blocks_count / 5) 57 else if (input->reserved_blocks > input->blocks_count / 5)
59 ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", 58 ext4_warning(sb, __func__, "Reserved blocks too high (%u)",
60 input->reserved_blocks); 59 input->reserved_blocks);
61 else if (free_blocks_count < 0) 60 else if (free_blocks_count < 0)
62 ext4_warning(sb, __FUNCTION__, "Bad blocks count %u", 61 ext4_warning(sb, __func__, "Bad blocks count %u",
63 input->blocks_count); 62 input->blocks_count);
64 else if (!(bh = sb_bread(sb, end - 1))) 63 else if (!(bh = sb_bread(sb, end - 1)))
65 ext4_warning(sb, __FUNCTION__, 64 ext4_warning(sb, __func__,
66 "Cannot read last block (%llu)", 65 "Cannot read last block (%llu)",
67 end - 1); 66 end - 1);
68 else if (outside(input->block_bitmap, start, end)) 67 else if (outside(input->block_bitmap, start, end))
69 ext4_warning(sb, __FUNCTION__, 68 ext4_warning(sb, __func__,
70 "Block bitmap not in group (block %llu)", 69 "Block bitmap not in group (block %llu)",
71 (unsigned long long)input->block_bitmap); 70 (unsigned long long)input->block_bitmap);
72 else if (outside(input->inode_bitmap, start, end)) 71 else if (outside(input->inode_bitmap, start, end))
73 ext4_warning(sb, __FUNCTION__, 72 ext4_warning(sb, __func__,
74 "Inode bitmap not in group (block %llu)", 73 "Inode bitmap not in group (block %llu)",
75 (unsigned long long)input->inode_bitmap); 74 (unsigned long long)input->inode_bitmap);
76 else if (outside(input->inode_table, start, end) || 75 else if (outside(input->inode_table, start, end) ||
77 outside(itend - 1, start, end)) 76 outside(itend - 1, start, end))
78 ext4_warning(sb, __FUNCTION__, 77 ext4_warning(sb, __func__,
79 "Inode table not in group (blocks %llu-%llu)", 78 "Inode table not in group (blocks %llu-%llu)",
80 (unsigned long long)input->inode_table, itend - 1); 79 (unsigned long long)input->inode_table, itend - 1);
81 else if (input->inode_bitmap == input->block_bitmap) 80 else if (input->inode_bitmap == input->block_bitmap)
82 ext4_warning(sb, __FUNCTION__, 81 ext4_warning(sb, __func__,
83 "Block bitmap same as inode bitmap (%llu)", 82 "Block bitmap same as inode bitmap (%llu)",
84 (unsigned long long)input->block_bitmap); 83 (unsigned long long)input->block_bitmap);
85 else if (inside(input->block_bitmap, input->inode_table, itend)) 84 else if (inside(input->block_bitmap, input->inode_table, itend))
86 ext4_warning(sb, __FUNCTION__, 85 ext4_warning(sb, __func__,
87 "Block bitmap (%llu) in inode table (%llu-%llu)", 86 "Block bitmap (%llu) in inode table (%llu-%llu)",
88 (unsigned long long)input->block_bitmap, 87 (unsigned long long)input->block_bitmap,
89 (unsigned long long)input->inode_table, itend - 1); 88 (unsigned long long)input->inode_table, itend - 1);
90 else if (inside(input->inode_bitmap, input->inode_table, itend)) 89 else if (inside(input->inode_bitmap, input->inode_table, itend))
91 ext4_warning(sb, __FUNCTION__, 90 ext4_warning(sb, __func__,
92 "Inode bitmap (%llu) in inode table (%llu-%llu)", 91 "Inode bitmap (%llu) in inode table (%llu-%llu)",
93 (unsigned long long)input->inode_bitmap, 92 (unsigned long long)input->inode_bitmap,
94 (unsigned long long)input->inode_table, itend - 1); 93 (unsigned long long)input->inode_table, itend - 1);
95 else if (inside(input->block_bitmap, start, metaend)) 94 else if (inside(input->block_bitmap, start, metaend))
96 ext4_warning(sb, __FUNCTION__, 95 ext4_warning(sb, __func__,
97 "Block bitmap (%llu) in GDT table" 96 "Block bitmap (%llu) in GDT table"
98 " (%llu-%llu)", 97 " (%llu-%llu)",
99 (unsigned long long)input->block_bitmap, 98 (unsigned long long)input->block_bitmap,
100 start, metaend - 1); 99 start, metaend - 1);
101 else if (inside(input->inode_bitmap, start, metaend)) 100 else if (inside(input->inode_bitmap, start, metaend))
102 ext4_warning(sb, __FUNCTION__, 101 ext4_warning(sb, __func__,
103 "Inode bitmap (%llu) in GDT table" 102 "Inode bitmap (%llu) in GDT table"
104 " (%llu-%llu)", 103 " (%llu-%llu)",
105 (unsigned long long)input->inode_bitmap, 104 (unsigned long long)input->inode_bitmap,
106 start, metaend - 1); 105 start, metaend - 1);
107 else if (inside(input->inode_table, start, metaend) || 106 else if (inside(input->inode_table, start, metaend) ||
108 inside(itend - 1, start, metaend)) 107 inside(itend - 1, start, metaend))
109 ext4_warning(sb, __FUNCTION__, 108 ext4_warning(sb, __func__,
110 "Inode table (%llu-%llu) overlaps" 109 "Inode table (%llu-%llu) overlaps"
111 "GDT table (%llu-%llu)", 110 "GDT table (%llu-%llu)",
112 (unsigned long long)input->inode_table, 111 (unsigned long long)input->inode_table,
@@ -368,7 +367,7 @@ static int verify_reserved_gdb(struct super_block *sb,
368 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { 367 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
369 if (le32_to_cpu(*p++) != 368 if (le32_to_cpu(*p++) !=
370 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ 369 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
371 ext4_warning(sb, __FUNCTION__, 370 ext4_warning(sb, __func__,
372 "reserved GDT %llu" 371 "reserved GDT %llu"
373 " missing grp %d (%llu)", 372 " missing grp %d (%llu)",
374 blk, grp, 373 blk, grp,
@@ -424,7 +423,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
424 */ 423 */
425 if (EXT4_SB(sb)->s_sbh->b_blocknr != 424 if (EXT4_SB(sb)->s_sbh->b_blocknr !=
426 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
427 ext4_warning(sb, __FUNCTION__, 426 ext4_warning(sb, __func__,
428 "won't resize using backup superblock at %llu", 427 "won't resize using backup superblock at %llu",
429 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); 428 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
430 return -EPERM; 429 return -EPERM;
@@ -448,7 +447,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
448 447
449 data = (__le32 *)dind->b_data; 448 data = (__le32 *)dind->b_data;
450 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { 449 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
451 ext4_warning(sb, __FUNCTION__, 450 ext4_warning(sb, __func__,
452 "new group %u GDT block %llu not reserved", 451 "new group %u GDT block %llu not reserved",
453 input->group, gdblock); 452 input->group, gdblock);
454 err = -EINVAL; 453 err = -EINVAL;
@@ -469,10 +468,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
469 goto exit_dindj; 468 goto exit_dindj;
470 469
471 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), 470 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
472 GFP_KERNEL); 471 GFP_NOFS);
473 if (!n_group_desc) { 472 if (!n_group_desc) {
474 err = -ENOMEM; 473 err = -ENOMEM;
475 ext4_warning (sb, __FUNCTION__, 474 ext4_warning(sb, __func__,
476 "not enough memory for %lu groups", gdb_num + 1); 475 "not enough memory for %lu groups", gdb_num + 1);
477 goto exit_inode; 476 goto exit_inode;
478 } 477 }
@@ -502,8 +501,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
502 EXT4_SB(sb)->s_gdb_count++; 501 EXT4_SB(sb)->s_gdb_count++;
503 kfree(o_group_desc); 502 kfree(o_group_desc);
504 503
505 es->s_reserved_gdt_blocks = 504 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
506 cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
507 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); 505 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
508 506
509 return 0; 507 return 0;
@@ -553,7 +551,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
553 int res, i; 551 int res, i;
554 int err; 552 int err;
555 553
556 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL); 554 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
557 if (!primary) 555 if (!primary)
558 return -ENOMEM; 556 return -ENOMEM;
559 557
@@ -571,7 +569,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
571 /* Get each reserved primary GDT block and verify it holds backups */ 569 /* Get each reserved primary GDT block and verify it holds backups */
572 for (res = 0; res < reserved_gdb; res++, blk++) { 570 for (res = 0; res < reserved_gdb; res++, blk++) {
573 if (le32_to_cpu(*data) != blk) { 571 if (le32_to_cpu(*data) != blk) {
574 ext4_warning(sb, __FUNCTION__, 572 ext4_warning(sb, __func__,
575 "reserved block %llu" 573 "reserved block %llu"
576 " not at offset %ld", 574 " not at offset %ld",
577 blk, 575 blk,
@@ -715,7 +713,7 @@ static void update_backups(struct super_block *sb,
715 */ 713 */
716exit_err: 714exit_err:
717 if (err) { 715 if (err) {
718 ext4_warning(sb, __FUNCTION__, 716 ext4_warning(sb, __func__,
719 "can't update backup for group %lu (err %d), " 717 "can't update backup for group %lu (err %d), "
720 "forcing fsck on next reboot", group, err); 718 "forcing fsck on next reboot", group, err);
721 sbi->s_mount_state &= ~EXT4_VALID_FS; 719 sbi->s_mount_state &= ~EXT4_VALID_FS;
@@ -755,33 +753,33 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
755 753
756 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, 754 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
757 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 755 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
758 ext4_warning(sb, __FUNCTION__, 756 ext4_warning(sb, __func__,
759 "Can't resize non-sparse filesystem further"); 757 "Can't resize non-sparse filesystem further");
760 return -EPERM; 758 return -EPERM;
761 } 759 }
762 760
763 if (ext4_blocks_count(es) + input->blocks_count < 761 if (ext4_blocks_count(es) + input->blocks_count <
764 ext4_blocks_count(es)) { 762 ext4_blocks_count(es)) {
765 ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n"); 763 ext4_warning(sb, __func__, "blocks_count overflow\n");
766 return -EINVAL; 764 return -EINVAL;
767 } 765 }
768 766
769 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < 767 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
770 le32_to_cpu(es->s_inodes_count)) { 768 le32_to_cpu(es->s_inodes_count)) {
771 ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n"); 769 ext4_warning(sb, __func__, "inodes_count overflow\n");
772 return -EINVAL; 770 return -EINVAL;
773 } 771 }
774 772
775 if (reserved_gdb || gdb_off == 0) { 773 if (reserved_gdb || gdb_off == 0) {
776 if (!EXT4_HAS_COMPAT_FEATURE(sb, 774 if (!EXT4_HAS_COMPAT_FEATURE(sb,
777 EXT4_FEATURE_COMPAT_RESIZE_INODE)){ 775 EXT4_FEATURE_COMPAT_RESIZE_INODE)){
778 ext4_warning(sb, __FUNCTION__, 776 ext4_warning(sb, __func__,
779 "No reserved GDT blocks, can't resize"); 777 "No reserved GDT blocks, can't resize");
780 return -EPERM; 778 return -EPERM;
781 } 779 }
782 inode = ext4_iget(sb, EXT4_RESIZE_INO); 780 inode = ext4_iget(sb, EXT4_RESIZE_INO);
783 if (IS_ERR(inode)) { 781 if (IS_ERR(inode)) {
784 ext4_warning(sb, __FUNCTION__, 782 ext4_warning(sb, __func__,
785 "Error opening resize inode"); 783 "Error opening resize inode");
786 return PTR_ERR(inode); 784 return PTR_ERR(inode);
787 } 785 }
@@ -810,7 +808,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
810 808
811 lock_super(sb); 809 lock_super(sb);
812 if (input->group != sbi->s_groups_count) { 810 if (input->group != sbi->s_groups_count) {
813 ext4_warning(sb, __FUNCTION__, 811 ext4_warning(sb, __func__,
814 "multiple resizers run on filesystem!"); 812 "multiple resizers run on filesystem!");
815 err = -EBUSY; 813 err = -EBUSY;
816 goto exit_journal; 814 goto exit_journal;
@@ -877,8 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
877 */ 875 */
878 ext4_blocks_count_set(es, ext4_blocks_count(es) + 876 ext4_blocks_count_set(es, ext4_blocks_count(es) +
879 input->blocks_count); 877 input->blocks_count);
880 es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) + 878 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
881 EXT4_INODES_PER_GROUP(sb));
882 879
883 /* 880 /*
884 * We need to protect s_groups_count against other CPUs seeing 881 * We need to protect s_groups_count against other CPUs seeing
@@ -977,13 +974,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
977 " too large to resize to %llu blocks safely\n", 974 " too large to resize to %llu blocks safely\n",
978 sb->s_id, n_blocks_count); 975 sb->s_id, n_blocks_count);
979 if (sizeof(sector_t) < 8) 976 if (sizeof(sector_t) < 8)
980 ext4_warning(sb, __FUNCTION__, 977 ext4_warning(sb, __func__,
981 "CONFIG_LBD not enabled\n"); 978 "CONFIG_LBD not enabled\n");
982 return -EINVAL; 979 return -EINVAL;
983 } 980 }
984 981
985 if (n_blocks_count < o_blocks_count) { 982 if (n_blocks_count < o_blocks_count) {
986 ext4_warning(sb, __FUNCTION__, 983 ext4_warning(sb, __func__,
987 "can't shrink FS - resize aborted"); 984 "can't shrink FS - resize aborted");
988 return -EBUSY; 985 return -EBUSY;
989 } 986 }
@@ -992,7 +989,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
992 ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); 989 ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
993 990
994 if (last == 0) { 991 if (last == 0) {
995 ext4_warning(sb, __FUNCTION__, 992 ext4_warning(sb, __func__,
996 "need to use ext2online to resize further"); 993 "need to use ext2online to resize further");
997 return -EPERM; 994 return -EPERM;
998 } 995 }
@@ -1000,7 +997,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1000 add = EXT4_BLOCKS_PER_GROUP(sb) - last; 997 add = EXT4_BLOCKS_PER_GROUP(sb) - last;
1001 998
1002 if (o_blocks_count + add < o_blocks_count) { 999 if (o_blocks_count + add < o_blocks_count) {
1003 ext4_warning(sb, __FUNCTION__, "blocks_count overflow"); 1000 ext4_warning(sb, __func__, "blocks_count overflow");
1004 return -EINVAL; 1001 return -EINVAL;
1005 } 1002 }
1006 1003
@@ -1008,7 +1005,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1008 add = n_blocks_count - o_blocks_count; 1005 add = n_blocks_count - o_blocks_count;
1009 1006
1010 if (o_blocks_count + add < n_blocks_count) 1007 if (o_blocks_count + add < n_blocks_count)
1011 ext4_warning(sb, __FUNCTION__, 1008 ext4_warning(sb, __func__,
1012 "will only finish group (%llu" 1009 "will only finish group (%llu"
1013 " blocks, %u new)", 1010 " blocks, %u new)",
1014 o_blocks_count + add, add); 1011 o_blocks_count + add, add);
@@ -1016,7 +1013,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1016 /* See if the device is actually as big as what was requested */ 1013 /* See if the device is actually as big as what was requested */
1017 bh = sb_bread(sb, o_blocks_count + add -1); 1014 bh = sb_bread(sb, o_blocks_count + add -1);
1018 if (!bh) { 1015 if (!bh) {
1019 ext4_warning(sb, __FUNCTION__, 1016 ext4_warning(sb, __func__,
1020 "can't read last block, resize aborted"); 1017 "can't read last block, resize aborted");
1021 return -ENOSPC; 1018 return -ENOSPC;
1022 } 1019 }
@@ -1028,13 +1025,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1028 handle = ext4_journal_start_sb(sb, 3); 1025 handle = ext4_journal_start_sb(sb, 3);
1029 if (IS_ERR(handle)) { 1026 if (IS_ERR(handle)) {
1030 err = PTR_ERR(handle); 1027 err = PTR_ERR(handle);
1031 ext4_warning(sb, __FUNCTION__, "error %d on journal start",err); 1028 ext4_warning(sb, __func__, "error %d on journal start", err);
1032 goto exit_put; 1029 goto exit_put;
1033 } 1030 }
1034 1031
1035 lock_super(sb); 1032 lock_super(sb);
1036 if (o_blocks_count != ext4_blocks_count(es)) { 1033 if (o_blocks_count != ext4_blocks_count(es)) {
1037 ext4_warning(sb, __FUNCTION__, 1034 ext4_warning(sb, __func__,
1038 "multiple resizers run on filesystem!"); 1035 "multiple resizers run on filesystem!");
1039 unlock_super(sb); 1036 unlock_super(sb);
1040 ext4_journal_stop(handle); 1037 ext4_journal_stop(handle);
@@ -1044,7 +1041,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1044 1041
1045 if ((err = ext4_journal_get_write_access(handle, 1042 if ((err = ext4_journal_get_write_access(handle,
1046 EXT4_SB(sb)->s_sbh))) { 1043 EXT4_SB(sb)->s_sbh))) {
1047 ext4_warning(sb, __FUNCTION__, 1044 ext4_warning(sb, __func__,
1048 "error %d on journal write access", err); 1045 "error %d on journal write access", err);
1049 unlock_super(sb); 1046 unlock_super(sb);
1050 ext4_journal_stop(handle); 1047 ext4_journal_stop(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 13383ba18f1d..52dd0679a4e2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -21,8 +21,6 @@
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/ext4_fs.h>
25#include <linux/ext4_jbd2.h>
26#include <linux/slab.h> 24#include <linux/slab.h>
27#include <linux/init.h> 25#include <linux/init.h>
28#include <linux/blkdev.h> 26#include <linux/blkdev.h>
@@ -38,9 +36,10 @@
38#include <linux/seq_file.h> 36#include <linux/seq_file.h>
39#include <linux/log2.h> 37#include <linux/log2.h>
40#include <linux/crc16.h> 38#include <linux/crc16.h>
41
42#include <asm/uaccess.h> 39#include <asm/uaccess.h>
43 40
41#include "ext4.h"
42#include "ext4_jbd2.h"
44#include "xattr.h" 43#include "xattr.h"
45#include "acl.h" 44#include "acl.h"
46#include "namei.h" 45#include "namei.h"
@@ -135,7 +134,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
135 * take the FS itself readonly cleanly. */ 134 * take the FS itself readonly cleanly. */
136 journal = EXT4_SB(sb)->s_journal; 135 journal = EXT4_SB(sb)->s_journal;
137 if (is_journal_aborted(journal)) { 136 if (is_journal_aborted(journal)) {
138 ext4_abort(sb, __FUNCTION__, 137 ext4_abort(sb, __func__,
139 "Detected aborted journal"); 138 "Detected aborted journal");
140 return ERR_PTR(-EROFS); 139 return ERR_PTR(-EROFS);
141 } 140 }
@@ -355,7 +354,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
355 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 354 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
356 return; 355 return;
357 356
358 ext4_warning(sb, __FUNCTION__, 357 ext4_warning(sb, __func__,
359 "updating to rev %d because of new feature flag, " 358 "updating to rev %d because of new feature flag, "
360 "running e2fsck is recommended", 359 "running e2fsck is recommended",
361 EXT4_DYNAMIC_REV); 360 EXT4_DYNAMIC_REV);
@@ -813,7 +812,8 @@ static int ext4_acquire_dquot(struct dquot *dquot);
813static int ext4_release_dquot(struct dquot *dquot); 812static int ext4_release_dquot(struct dquot *dquot);
814static int ext4_mark_dquot_dirty(struct dquot *dquot); 813static int ext4_mark_dquot_dirty(struct dquot *dquot);
815static int ext4_write_info(struct super_block *sb, int type); 814static int ext4_write_info(struct super_block *sb, int type);
816static int ext4_quota_on(struct super_block *sb, int type, int format_id, char *path); 815static int ext4_quota_on(struct super_block *sb, int type, int format_id,
816 char *path, int remount);
817static int ext4_quota_on_mount(struct super_block *sb, int type); 817static int ext4_quota_on_mount(struct super_block *sb, int type);
818static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 818static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
819 size_t len, loff_t off); 819 size_t len, loff_t off);
@@ -944,8 +944,8 @@ static match_table_t tokens = {
944 {Opt_mballoc, "mballoc"}, 944 {Opt_mballoc, "mballoc"},
945 {Opt_nomballoc, "nomballoc"}, 945 {Opt_nomballoc, "nomballoc"},
946 {Opt_stripe, "stripe=%u"}, 946 {Opt_stripe, "stripe=%u"},
947 {Opt_err, NULL},
948 {Opt_resize, "resize"}, 947 {Opt_resize, "resize"},
948 {Opt_err, NULL},
949}; 949};
950 950
951static ext4_fsblk_t get_sb_block(void **data) 951static ext4_fsblk_t get_sb_block(void **data)
@@ -1387,11 +1387,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1387 * a plain journaled filesystem we can keep it set as 1387 * a plain journaled filesystem we can keep it set as
1388 * valid forever! :) 1388 * valid forever! :)
1389 */ 1389 */
1390 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS); 1390 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1391#endif 1391#endif
1392 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1392 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1393 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1393 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1394 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); 1394 le16_add_cpu(&es->s_mnt_count, 1);
1395 es->s_mtime = cpu_to_le32(get_seconds()); 1395 es->s_mtime = cpu_to_le32(get_seconds());
1396 ext4_update_dynamic_rev(sb); 1396 ext4_update_dynamic_rev(sb);
1397 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1397 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -1484,36 +1484,33 @@ static int ext4_check_descriptors(struct super_block *sb)
1484 block_bitmap = ext4_block_bitmap(sb, gdp); 1484 block_bitmap = ext4_block_bitmap(sb, gdp);
1485 if (block_bitmap < first_block || block_bitmap > last_block) 1485 if (block_bitmap < first_block || block_bitmap > last_block)
1486 { 1486 {
1487 ext4_error (sb, "ext4_check_descriptors", 1487 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1488 "Block bitmap for group %lu" 1488 "Block bitmap for group %lu not in group "
1489 " not in group (block %llu)!", 1489 "(block %llu)!", i, block_bitmap);
1490 i, block_bitmap);
1491 return 0; 1490 return 0;
1492 } 1491 }
1493 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1492 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1494 if (inode_bitmap < first_block || inode_bitmap > last_block) 1493 if (inode_bitmap < first_block || inode_bitmap > last_block)
1495 { 1494 {
1496 ext4_error (sb, "ext4_check_descriptors", 1495 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1497 "Inode bitmap for group %lu" 1496 "Inode bitmap for group %lu not in group "
1498 " not in group (block %llu)!", 1497 "(block %llu)!", i, inode_bitmap);
1499 i, inode_bitmap);
1500 return 0; 1498 return 0;
1501 } 1499 }
1502 inode_table = ext4_inode_table(sb, gdp); 1500 inode_table = ext4_inode_table(sb, gdp);
1503 if (inode_table < first_block || 1501 if (inode_table < first_block ||
1504 inode_table + sbi->s_itb_per_group - 1 > last_block) 1502 inode_table + sbi->s_itb_per_group - 1 > last_block)
1505 { 1503 {
1506 ext4_error (sb, "ext4_check_descriptors", 1504 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1507 "Inode table for group %lu" 1505 "Inode table for group %lu not in group "
1508 " not in group (block %llu)!", 1506 "(block %llu)!", i, inode_table);
1509 i, inode_table);
1510 return 0; 1507 return 0;
1511 } 1508 }
1512 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1509 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1513 ext4_error(sb, __FUNCTION__, 1510 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1514 "Checksum for group %lu failed (%u!=%u)\n", 1511 "Checksum for group %lu failed (%u!=%u)\n",
1515 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1512 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1516 gdp)), le16_to_cpu(gdp->bg_checksum)); 1513 gdp)), le16_to_cpu(gdp->bg_checksum));
1517 return 0; 1514 return 0;
1518 } 1515 }
1519 if (!flexbg_flag) 1516 if (!flexbg_flag)
@@ -1593,8 +1590,8 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1593 while (es->s_last_orphan) { 1590 while (es->s_last_orphan) {
1594 struct inode *inode; 1591 struct inode *inode;
1595 1592
1596 if (!(inode = 1593 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1597 ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { 1594 if (IS_ERR(inode)) {
1598 es->s_last_orphan = 0; 1595 es->s_last_orphan = 0;
1599 break; 1596 break;
1600 } 1597 }
@@ -1604,7 +1601,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1604 if (inode->i_nlink) { 1601 if (inode->i_nlink) {
1605 printk(KERN_DEBUG 1602 printk(KERN_DEBUG
1606 "%s: truncating inode %lu to %Ld bytes\n", 1603 "%s: truncating inode %lu to %Ld bytes\n",
1607 __FUNCTION__, inode->i_ino, inode->i_size); 1604 __func__, inode->i_ino, inode->i_size);
1608 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1605 jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1609 inode->i_ino, inode->i_size); 1606 inode->i_ino, inode->i_size);
1610 ext4_truncate(inode); 1607 ext4_truncate(inode);
@@ -1612,7 +1609,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1612 } else { 1609 } else {
1613 printk(KERN_DEBUG 1610 printk(KERN_DEBUG
1614 "%s: deleting unreferenced inode %lu\n", 1611 "%s: deleting unreferenced inode %lu\n",
1615 __FUNCTION__, inode->i_ino); 1612 __func__, inode->i_ino);
1616 jbd_debug(2, "deleting unreferenced inode %lu\n", 1613 jbd_debug(2, "deleting unreferenced inode %lu\n",
1617 inode->i_ino); 1614 inode->i_ino);
1618 nr_orphans++; 1615 nr_orphans++;
@@ -1632,7 +1629,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1632 /* Turn quotas off */ 1629 /* Turn quotas off */
1633 for (i = 0; i < MAXQUOTAS; i++) { 1630 for (i = 0; i < MAXQUOTAS; i++) {
1634 if (sb_dqopt(sb)->files[i]) 1631 if (sb_dqopt(sb)->files[i])
1635 vfs_quota_off(sb, i); 1632 vfs_quota_off(sb, i, 0);
1636 } 1633 }
1637#endif 1634#endif
1638 sb->s_flags = s_flags; /* Restore MS_RDONLY status */ 1635 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -2698,9 +2695,9 @@ static void ext4_clear_journal_err(struct super_block * sb,
2698 char nbuf[16]; 2695 char nbuf[16];
2699 2696
2700 errstr = ext4_decode_error(sb, j_errno, nbuf); 2697 errstr = ext4_decode_error(sb, j_errno, nbuf);
2701 ext4_warning(sb, __FUNCTION__, "Filesystem error recorded " 2698 ext4_warning(sb, __func__, "Filesystem error recorded "
2702 "from previous mount: %s", errstr); 2699 "from previous mount: %s", errstr);
2703 ext4_warning(sb, __FUNCTION__, "Marking fs in need of " 2700 ext4_warning(sb, __func__, "Marking fs in need of "
2704 "filesystem check."); 2701 "filesystem check.");
2705 2702
2706 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2703 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
@@ -2827,7 +2824,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
2827 } 2824 }
2828 2825
2829 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 2826 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
2830 ext4_abort(sb, __FUNCTION__, "Abort forced by user"); 2827 ext4_abort(sb, __func__, "Abort forced by user");
2831 2828
2832 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2829 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2833 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2830 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3039,8 +3036,14 @@ static int ext4_dquot_drop(struct inode *inode)
3039 3036
3040 /* We may delete quota structure so we need to reserve enough blocks */ 3037 /* We may delete quota structure so we need to reserve enough blocks */
3041 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); 3038 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
3042 if (IS_ERR(handle)) 3039 if (IS_ERR(handle)) {
3040 /*
3041 * We call dquot_drop() anyway to at least release references
3042 * to quota structures so that umount does not hang.
3043 */
3044 dquot_drop(inode);
3043 return PTR_ERR(handle); 3045 return PTR_ERR(handle);
3046 }
3044 ret = dquot_drop(inode); 3047 ret = dquot_drop(inode);
3045 err = ext4_journal_stop(handle); 3048 err = ext4_journal_stop(handle);
3046 if (!ret) 3049 if (!ret)
@@ -3143,7 +3146,7 @@ static int ext4_quota_on_mount(struct super_block *sb, int type)
3143 * Standard function to be called on quota_on 3146 * Standard function to be called on quota_on
3144 */ 3147 */
3145static int ext4_quota_on(struct super_block *sb, int type, int format_id, 3148static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3146 char *path) 3149 char *path, int remount)
3147{ 3150{
3148 int err; 3151 int err;
3149 struct nameidata nd; 3152 struct nameidata nd;
@@ -3151,9 +3154,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3151 if (!test_opt(sb, QUOTA)) 3154 if (!test_opt(sb, QUOTA))
3152 return -EINVAL; 3155 return -EINVAL;
3153 /* Not journalling quota? */ 3156 /* Not journalling quota? */
3154 if (!EXT4_SB(sb)->s_qf_names[USRQUOTA] && 3157 if ((!EXT4_SB(sb)->s_qf_names[USRQUOTA] &&
3155 !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) 3158 !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
3156 return vfs_quota_on(sb, type, format_id, path); 3159 return vfs_quota_on(sb, type, format_id, path, remount);
3157 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 3160 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
3158 if (err) 3161 if (err)
3159 return err; 3162 return err;
@@ -3168,7 +3171,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3168 "EXT4-fs: Quota file not on filesystem root. " 3171 "EXT4-fs: Quota file not on filesystem root. "
3169 "Journalled quota will not work.\n"); 3172 "Journalled quota will not work.\n");
3170 path_put(&nd.path); 3173 path_put(&nd.path);
3171 return vfs_quota_on(sb, type, format_id, path); 3174 return vfs_quota_on(sb, type, format_id, path, remount);
3172} 3175}
3173 3176
3174/* Read data from quotafile - avoid pagecache and such because we cannot afford 3177/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e6f9da4287c4..e9178643dc01 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -19,8 +19,8 @@
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/jbd2.h> 21#include <linux/jbd2.h>
22#include <linux/ext4_fs.h>
23#include <linux/namei.h> 22#include <linux/namei.h>
23#include "ext4.h"
24#include "xattr.h" 24#include "xattr.h"
25 25
26static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) 26static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e9054c1c7d93..3fbc2c6c3d0e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,11 +53,11 @@
53#include <linux/init.h> 53#include <linux/init.h>
54#include <linux/fs.h> 54#include <linux/fs.h>
55#include <linux/slab.h> 55#include <linux/slab.h>
56#include <linux/ext4_jbd2.h>
57#include <linux/ext4_fs.h>
58#include <linux/mbcache.h> 56#include <linux/mbcache.h>
59#include <linux/quotaops.h> 57#include <linux/quotaops.h>
60#include <linux/rwsem.h> 58#include <linux/rwsem.h>
59#include "ext4_jbd2.h"
60#include "ext4.h"
61#include "xattr.h" 61#include "xattr.h"
62#include "acl.h" 62#include "acl.h"
63 63
@@ -92,6 +92,8 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
92 struct mb_cache_entry **); 92 struct mb_cache_entry **);
93static void ext4_xattr_rehash(struct ext4_xattr_header *, 93static void ext4_xattr_rehash(struct ext4_xattr_header *,
94 struct ext4_xattr_entry *); 94 struct ext4_xattr_entry *);
95static int ext4_xattr_list(struct inode *inode, char *buffer,
96 size_t buffer_size);
95 97
96static struct mb_cache *ext4_xattr_cache; 98static struct mb_cache *ext4_xattr_cache;
97 99
@@ -225,7 +227,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
225 ea_bdebug(bh, "b_count=%d, refcount=%d", 227 ea_bdebug(bh, "b_count=%d, refcount=%d",
226 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 228 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
227 if (ext4_xattr_check_block(bh)) { 229 if (ext4_xattr_check_block(bh)) {
228bad_block: ext4_error(inode->i_sb, __FUNCTION__, 230bad_block: ext4_error(inode->i_sb, __func__,
229 "inode %lu: bad block %llu", inode->i_ino, 231 "inode %lu: bad block %llu", inode->i_ino,
230 EXT4_I(inode)->i_file_acl); 232 EXT4_I(inode)->i_file_acl);
231 error = -EIO; 233 error = -EIO;
@@ -367,7 +369,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
367 ea_bdebug(bh, "b_count=%d, refcount=%d", 369 ea_bdebug(bh, "b_count=%d, refcount=%d",
368 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 370 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
369 if (ext4_xattr_check_block(bh)) { 371 if (ext4_xattr_check_block(bh)) {
370 ext4_error(inode->i_sb, __FUNCTION__, 372 ext4_error(inode->i_sb, __func__,
371 "inode %lu: bad block %llu", inode->i_ino, 373 "inode %lu: bad block %llu", inode->i_ino,
372 EXT4_I(inode)->i_file_acl); 374 EXT4_I(inode)->i_file_acl);
373 error = -EIO; 375 error = -EIO;
@@ -420,7 +422,7 @@ cleanup:
420 * Returns a negative error number on failure, or the number of bytes 422 * Returns a negative error number on failure, or the number of bytes
421 * used / required on success. 423 * used / required on success.
422 */ 424 */
423int 425static int
424ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 426ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
425{ 427{
426 int i_error, b_error; 428 int i_error, b_error;
@@ -484,8 +486,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
484 get_bh(bh); 486 get_bh(bh);
485 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 487 ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
486 } else { 488 } else {
487 BHDR(bh)->h_refcount = cpu_to_le32( 489 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
488 le32_to_cpu(BHDR(bh)->h_refcount) - 1);
489 error = ext4_journal_dirty_metadata(handle, bh); 490 error = ext4_journal_dirty_metadata(handle, bh);
490 if (IS_SYNC(inode)) 491 if (IS_SYNC(inode))
491 handle->h_sync = 1; 492 handle->h_sync = 1;
@@ -660,7 +661,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
660 atomic_read(&(bs->bh->b_count)), 661 atomic_read(&(bs->bh->b_count)),
661 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 662 le32_to_cpu(BHDR(bs->bh)->h_refcount));
662 if (ext4_xattr_check_block(bs->bh)) { 663 if (ext4_xattr_check_block(bs->bh)) {
663 ext4_error(sb, __FUNCTION__, 664 ext4_error(sb, __func__,
664 "inode %lu: bad block %llu", inode->i_ino, 665 "inode %lu: bad block %llu", inode->i_ino,
665 EXT4_I(inode)->i_file_acl); 666 EXT4_I(inode)->i_file_acl);
666 error = -EIO; 667 error = -EIO;
@@ -738,7 +739,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
738 ce = NULL; 739 ce = NULL;
739 } 740 }
740 ea_bdebug(bs->bh, "cloning"); 741 ea_bdebug(bs->bh, "cloning");
741 s->base = kmalloc(bs->bh->b_size, GFP_KERNEL); 742 s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
742 error = -ENOMEM; 743 error = -ENOMEM;
743 if (s->base == NULL) 744 if (s->base == NULL)
744 goto cleanup; 745 goto cleanup;
@@ -750,7 +751,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
750 } 751 }
751 } else { 752 } else {
752 /* Allocate a buffer where we construct the new block. */ 753 /* Allocate a buffer where we construct the new block. */
753 s->base = kzalloc(sb->s_blocksize, GFP_KERNEL); 754 s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
754 /* assert(header == s->base) */ 755 /* assert(header == s->base) */
755 error = -ENOMEM; 756 error = -ENOMEM;
756 if (s->base == NULL) 757 if (s->base == NULL)
@@ -789,8 +790,7 @@ inserted:
789 if (error) 790 if (error)
790 goto cleanup_dquot; 791 goto cleanup_dquot;
791 lock_buffer(new_bh); 792 lock_buffer(new_bh);
792 BHDR(new_bh)->h_refcount = cpu_to_le32(1 + 793 le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
793 le32_to_cpu(BHDR(new_bh)->h_refcount));
794 ea_bdebug(new_bh, "reusing; refcount now=%d", 794 ea_bdebug(new_bh, "reusing; refcount now=%d",
795 le32_to_cpu(BHDR(new_bh)->h_refcount)); 795 le32_to_cpu(BHDR(new_bh)->h_refcount));
796 unlock_buffer(new_bh); 796 unlock_buffer(new_bh);
@@ -808,10 +808,8 @@ inserted:
808 get_bh(new_bh); 808 get_bh(new_bh);
809 } else { 809 } else {
810 /* We need to allocate a new block */ 810 /* We need to allocate a new block */
811 ext4_fsblk_t goal = le32_to_cpu( 811 ext4_fsblk_t goal = ext4_group_first_block_no(sb,
812 EXT4_SB(sb)->s_es->s_first_data_block) + 812 EXT4_I(inode)->i_block_group);
813 (ext4_fsblk_t)EXT4_I(inode)->i_block_group *
814 EXT4_BLOCKS_PER_GROUP(sb);
815 ext4_fsblk_t block = ext4_new_block(handle, inode, 813 ext4_fsblk_t block = ext4_new_block(handle, inode,
816 goal, &error); 814 goal, &error);
817 if (error) 815 if (error)
@@ -863,7 +861,7 @@ cleanup_dquot:
863 goto cleanup; 861 goto cleanup;
864 862
865bad_block: 863bad_block:
866 ext4_error(inode->i_sb, __FUNCTION__, 864 ext4_error(inode->i_sb, __func__,
867 "inode %lu: bad block %llu", inode->i_ino, 865 "inode %lu: bad block %llu", inode->i_ino,
868 EXT4_I(inode)->i_file_acl); 866 EXT4_I(inode)->i_file_acl);
869 goto cleanup; 867 goto cleanup;
@@ -1166,7 +1164,7 @@ retry:
1166 if (!bh) 1164 if (!bh)
1167 goto cleanup; 1165 goto cleanup;
1168 if (ext4_xattr_check_block(bh)) { 1166 if (ext4_xattr_check_block(bh)) {
1169 ext4_error(inode->i_sb, __FUNCTION__, 1167 ext4_error(inode->i_sb, __func__,
1170 "inode %lu: bad block %llu", inode->i_ino, 1168 "inode %lu: bad block %llu", inode->i_ino,
1171 EXT4_I(inode)->i_file_acl); 1169 EXT4_I(inode)->i_file_acl);
1172 error = -EIO; 1170 error = -EIO;
@@ -1341,14 +1339,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1341 goto cleanup; 1339 goto cleanup;
1342 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 1340 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1343 if (!bh) { 1341 if (!bh) {
1344 ext4_error(inode->i_sb, __FUNCTION__, 1342 ext4_error(inode->i_sb, __func__,
1345 "inode %lu: block %llu read error", inode->i_ino, 1343 "inode %lu: block %llu read error", inode->i_ino,
1346 EXT4_I(inode)->i_file_acl); 1344 EXT4_I(inode)->i_file_acl);
1347 goto cleanup; 1345 goto cleanup;
1348 } 1346 }
1349 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 1347 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1350 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1348 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1351 ext4_error(inode->i_sb, __FUNCTION__, 1349 ext4_error(inode->i_sb, __func__,
1352 "inode %lu: bad block %llu", inode->i_ino, 1350 "inode %lu: bad block %llu", inode->i_ino,
1353 EXT4_I(inode)->i_file_acl); 1351 EXT4_I(inode)->i_file_acl);
1354 goto cleanup; 1352 goto cleanup;
@@ -1475,7 +1473,7 @@ again:
1475 } 1473 }
1476 bh = sb_bread(inode->i_sb, ce->e_block); 1474 bh = sb_bread(inode->i_sb, ce->e_block);
1477 if (!bh) { 1475 if (!bh) {
1478 ext4_error(inode->i_sb, __FUNCTION__, 1476 ext4_error(inode->i_sb, __func__,
1479 "inode %lu: block %lu read error", 1477 "inode %lu: block %lu read error",
1480 inode->i_ino, (unsigned long) ce->e_block); 1478 inode->i_ino, (unsigned long) ce->e_block);
1481 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1479 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index d7f5d6a12651..5992fe979bb9 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -74,7 +74,6 @@ extern struct xattr_handler ext4_xattr_security_handler;
74extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); 74extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
75 75
76extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); 76extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
77extern int ext4_xattr_list(struct inode *, char *, size_t);
78extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); 77extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
79extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); 78extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
80 79
@@ -99,12 +98,6 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
99} 98}
100 99
101static inline int 100static inline int
102ext4_xattr_list(struct inode *inode, void *buffer, size_t size)
103{
104 return -EOPNOTSUPP;
105}
106
107static inline int
108ext4_xattr_set(struct inode *inode, int name_index, const char *name, 101ext4_xattr_set(struct inode *inode, int name_index, const char *name,
109 const void *value, size_t size, int flags) 102 const void *value, size_t size, int flags)
110{ 103{
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index f17eaf2321b9..ca5f89fc6cae 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -6,9 +6,9 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/ext4_jbd2.h>
10#include <linux/ext4_fs.h>
11#include <linux/security.h> 9#include <linux/security.h>
10#include "ext4_jbd2.h"
11#include "ext4.h"
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index e0f05acdafec..fff33382cadc 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -9,8 +9,8 @@
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/ext4_jbd2.h> 12#include "ext4_jbd2.h"
13#include <linux/ext4_fs.h> 13#include "ext4.h"
14#include "xattr.h" 14#include "xattr.h"
15 15
16#define XATTR_TRUSTED_PREFIX "trusted." 16#define XATTR_TRUSTED_PREFIX "trusted."
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index 7ed3d8ebf096..67be723fcc4e 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -8,8 +8,8 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/ext4_jbd2.h> 11#include "ext4_jbd2.h"
12#include <linux/ext4_fs.h> 12#include "ext4.h"
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_USER_PREFIX "user." 15#define XATTR_USER_PREFIX "user."
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 639b3b4f86d1..fda25479af26 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -242,7 +242,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
242 /* prevent the infinite loop of cluster chain */ 242 /* prevent the infinite loop of cluster chain */
243 if (*fclus > limit) { 243 if (*fclus > limit) {
244 fat_fs_panic(sb, "%s: detected the cluster chain loop" 244 fat_fs_panic(sb, "%s: detected the cluster chain loop"
245 " (i_pos %lld)", __FUNCTION__, 245 " (i_pos %lld)", __func__,
246 MSDOS_I(inode)->i_pos); 246 MSDOS_I(inode)->i_pos);
247 nr = -EIO; 247 nr = -EIO;
248 goto out; 248 goto out;
@@ -253,7 +253,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
253 goto out; 253 goto out;
254 else if (nr == FAT_ENT_FREE) { 254 else if (nr == FAT_ENT_FREE) {
255 fat_fs_panic(sb, "%s: invalid cluster chain" 255 fat_fs_panic(sb, "%s: invalid cluster chain"
256 " (i_pos %lld)", __FUNCTION__, 256 " (i_pos %lld)", __func__,
257 MSDOS_I(inode)->i_pos); 257 MSDOS_I(inode)->i_pos);
258 nr = -EIO; 258 nr = -EIO;
259 goto out; 259 goto out;
@@ -286,7 +286,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
286 return ret; 286 return ret;
287 else if (ret == FAT_ENT_EOF) { 287 else if (ret == FAT_ENT_EOF) {
288 fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)", 288 fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)",
289 __FUNCTION__, MSDOS_I(inode)->i_pos); 289 __func__, MSDOS_I(inode)->i_pos);
290 return -EIO; 290 return -EIO;
291 } 291 }
292 return dclus; 292 return dclus;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 72cbcd61bd95..486725ee99ae 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -124,8 +124,8 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
124 * but ignore that right now. 124 * but ignore that right now.
125 * Ahem... Stack smashing in ring 0 isn't fun. Fixed. 125 * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
126 */ 126 */
127static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int uni_xlate, 127static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
128 struct nls_table *nls) 128 int uni_xlate, struct nls_table *nls)
129{ 129{
130 wchar_t *ip, ec; 130 wchar_t *ip, ec;
131 unsigned char *op, nc; 131 unsigned char *op, nc;
@@ -135,10 +135,11 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int uni_xlate,
135 ip = uni; 135 ip = uni;
136 op = ascii; 136 op = ascii;
137 137
138 while (*ip) { 138 while (*ip && ((len - NLS_MAX_CHARSET_SIZE) > 0)) {
139 ec = *ip++; 139 ec = *ip++;
140 if ( (charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) { 140 if ( (charlen = nls->uni2char(ec, op, NLS_MAX_CHARSET_SIZE)) > 0) {
141 op += charlen; 141 op += charlen;
142 len -= charlen;
142 } else { 143 } else {
143 if (uni_xlate == 1) { 144 if (uni_xlate == 1) {
144 *op = ':'; 145 *op = ':';
@@ -149,16 +150,19 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int uni_xlate,
149 ec >>= 4; 150 ec >>= 4;
150 } 151 }
151 op += 5; 152 op += 5;
153 len -= 5;
152 } else { 154 } else {
153 *op++ = '?'; 155 *op++ = '?';
156 len--;
154 } 157 }
155 } 158 }
156 /* We have some slack there, so it's OK */
157 if (op>ascii+256) {
158 op = ascii + 256;
159 break;
160 }
161 } 159 }
160
161 if (unlikely(*ip)) {
162 printk(KERN_WARNING "FAT: filename was truncated while "
163 "converting.");
164 }
165
162 *op = 0; 166 *op = 0;
163 return (op - ascii); 167 return (op - ascii);
164} 168}
@@ -243,7 +247,7 @@ static int fat_parse_long(struct inode *dir, loff_t *pos,
243 unsigned char id, slot, slots, alias_checksum; 247 unsigned char id, slot, slots, alias_checksum;
244 248
245 if (!*unicode) { 249 if (!*unicode) {
246 *unicode = (wchar_t *)__get_free_page(GFP_KERNEL); 250 *unicode = __getname();
247 if (!*unicode) { 251 if (!*unicode) {
248 brelse(*bh); 252 brelse(*bh);
249 return -ENOMEM; 253 return -ENOMEM;
@@ -311,9 +315,11 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
311 struct nls_table *nls_io = sbi->nls_io; 315 struct nls_table *nls_io = sbi->nls_io;
312 struct nls_table *nls_disk = sbi->nls_disk; 316 struct nls_table *nls_disk = sbi->nls_disk;
313 wchar_t bufuname[14]; 317 wchar_t bufuname[14];
314 unsigned char xlate_len, nr_slots; 318 unsigned char nr_slots;
319 int xlate_len;
315 wchar_t *unicode = NULL; 320 wchar_t *unicode = NULL;
316 unsigned char work[MSDOS_NAME], bufname[260]; /* 256 + 4 */ 321 unsigned char work[MSDOS_NAME];
322 unsigned char *bufname = NULL;
317 int uni_xlate = sbi->options.unicode_xlate; 323 int uni_xlate = sbi->options.unicode_xlate;
318 int utf8 = sbi->options.utf8; 324 int utf8 = sbi->options.utf8;
319 int anycase = (sbi->options.name_check != 's'); 325 int anycase = (sbi->options.name_check != 's');
@@ -321,6 +327,10 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
321 loff_t cpos = 0; 327 loff_t cpos = 0;
322 int chl, i, j, last_u, err; 328 int chl, i, j, last_u, err;
323 329
330 bufname = __getname();
331 if (!bufname)
332 return -ENOMEM;
333
324 err = -ENOENT; 334 err = -ENOENT;
325 while(1) { 335 while(1) {
326 if (fat_get_entry(inode, &cpos, &bh, &de) == -1) 336 if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
@@ -386,8 +396,8 @@ parse_record:
386 396
387 bufuname[last_u] = 0x0000; 397 bufuname[last_u] = 0x0000;
388 xlate_len = utf8 398 xlate_len = utf8
389 ?utf8_wcstombs(bufname, bufuname, sizeof(bufname)) 399 ?utf8_wcstombs(bufname, bufuname, PATH_MAX)
390 :uni16_to_x8(bufname, bufuname, uni_xlate, nls_io); 400 :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io);
391 if (xlate_len == name_len) 401 if (xlate_len == name_len)
392 if ((!anycase && !memcmp(name, bufname, xlate_len)) || 402 if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
393 (anycase && !nls_strnicmp(nls_io, name, bufname, 403 (anycase && !nls_strnicmp(nls_io, name, bufname,
@@ -396,8 +406,8 @@ parse_record:
396 406
397 if (nr_slots) { 407 if (nr_slots) {
398 xlate_len = utf8 408 xlate_len = utf8
399 ?utf8_wcstombs(bufname, unicode, sizeof(bufname)) 409 ?utf8_wcstombs(bufname, unicode, PATH_MAX)
400 :uni16_to_x8(bufname, unicode, uni_xlate, nls_io); 410 :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io);
401 if (xlate_len != name_len) 411 if (xlate_len != name_len)
402 continue; 412 continue;
403 if ((!anycase && !memcmp(name, bufname, xlate_len)) || 413 if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
@@ -416,8 +426,10 @@ Found:
416 sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de); 426 sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
417 err = 0; 427 err = 0;
418EODir: 428EODir:
429 if (bufname)
430 __putname(bufname);
419 if (unicode) 431 if (unicode)
420 free_page((unsigned long)unicode); 432 __putname(unicode);
421 433
422 return err; 434 return err;
423} 435}
@@ -598,7 +610,7 @@ parse_record:
598 if (isvfat) { 610 if (isvfat) {
599 bufuname[j] = 0x0000; 611 bufuname[j] = 0x0000;
600 i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname)) 612 i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname))
601 : uni16_to_x8(bufname, bufuname, uni_xlate, nls_io); 613 : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io);
602 } 614 }
603 615
604 fill_name = bufname; 616 fill_name = bufname;
@@ -607,10 +619,10 @@ parse_record:
607 /* convert the unicode long name. 261 is maximum size 619 /* convert the unicode long name. 261 is maximum size
608 * of unicode buffer. (13 * slots + nul) */ 620 * of unicode buffer. (13 * slots + nul) */
609 void *longname = unicode + 261; 621 void *longname = unicode + 261;
610 int buf_size = PAGE_SIZE - (261 * sizeof(unicode[0])); 622 int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
611 int long_len = utf8 623 int long_len = utf8
612 ? utf8_wcstombs(longname, unicode, buf_size) 624 ? utf8_wcstombs(longname, unicode, buf_size)
613 : uni16_to_x8(longname, unicode, uni_xlate, nls_io); 625 : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io);
614 626
615 if (!both) { 627 if (!both) {
616 fill_name = longname; 628 fill_name = longname;
@@ -640,7 +652,7 @@ EODir:
640FillFailed: 652FillFailed:
641 brelse(bh); 653 brelse(bh);
642 if (unicode) 654 if (unicode)
643 free_page((unsigned long)unicode); 655 __putname(unicode);
644out: 656out:
645 unlock_kernel(); 657 unlock_kernel();
646 return ret; 658 return ret;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 5fb366992b73..302e95c4af7e 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -450,7 +450,8 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
450 BUG_ON(nr_cluster > (MAX_BUF_PER_PAGE / 2)); /* fixed limit */ 450 BUG_ON(nr_cluster > (MAX_BUF_PER_PAGE / 2)); /* fixed limit */
451 451
452 lock_fat(sbi); 452 lock_fat(sbi);
453 if (sbi->free_clusters != -1 && sbi->free_clusters < nr_cluster) { 453 if (sbi->free_clusters != -1 && sbi->free_clus_valid &&
454 sbi->free_clusters < nr_cluster) {
454 unlock_fat(sbi); 455 unlock_fat(sbi);
455 return -ENOSPC; 456 return -ENOSPC;
456 } 457 }
@@ -504,6 +505,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
504 505
505 /* Couldn't allocate the free entries */ 506 /* Couldn't allocate the free entries */
506 sbi->free_clusters = 0; 507 sbi->free_clusters = 0;
508 sbi->free_clus_valid = 1;
507 sb->s_dirt = 1; 509 sb->s_dirt = 1;
508 err = -ENOSPC; 510 err = -ENOSPC;
509 511
@@ -544,7 +546,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
544 goto error; 546 goto error;
545 } else if (cluster == FAT_ENT_FREE) { 547 } else if (cluster == FAT_ENT_FREE) {
546 fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF", 548 fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF",
547 __FUNCTION__); 549 __func__);
548 err = -EIO; 550 err = -EIO;
549 goto error; 551 goto error;
550 } 552 }
@@ -583,8 +585,6 @@ error:
583 brelse(bhs[i]); 585 brelse(bhs[i]);
584 unlock_fat(sbi); 586 unlock_fat(sbi);
585 587
586 fat_clusters_flush(sb);
587
588 return err; 588 return err;
589} 589}
590 590
@@ -615,7 +615,7 @@ int fat_count_free_clusters(struct super_block *sb)
615 int err = 0, free; 615 int err = 0, free;
616 616
617 lock_fat(sbi); 617 lock_fat(sbi);
618 if (sbi->free_clusters != -1) 618 if (sbi->free_clusters != -1 && sbi->free_clus_valid)
619 goto out; 619 goto out;
620 620
621 reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits; 621 reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits;
@@ -643,6 +643,7 @@ int fat_count_free_clusters(struct super_block *sb)
643 } while (fat_ent_next(sbi, &fatent)); 643 } while (fat_ent_next(sbi, &fatent));
644 } 644 }
645 sbi->free_clusters = free; 645 sbi->free_clusters = free;
646 sbi->free_clus_valid = 1;
646 sb->s_dirt = 1; 647 sb->s_dirt = 1;
647 fatent_brelse(&fatent); 648 fatent_brelse(&fatent);
648out: 649out:
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 2a3bed967041..27cc1164ec36 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -157,104 +157,6 @@ out:
157 return err; 157 return err;
158} 158}
159 159
160static int check_mode(const struct msdos_sb_info *sbi, mode_t mode)
161{
162 mode_t req = mode & ~S_IFMT;
163
164 /*
165 * Of the r and x bits, all (subject to umask) must be present. Of the
166 * w bits, either all (subject to umask) or none must be present.
167 */
168
169 if (S_ISREG(mode)) {
170 req &= ~sbi->options.fs_fmask;
171
172 if ((req & (S_IRUGO | S_IXUGO)) !=
173 ((S_IRUGO | S_IXUGO) & ~sbi->options.fs_fmask))
174 return -EPERM;
175
176 if ((req & S_IWUGO) != 0 &&
177 (req & S_IWUGO) != (S_IWUGO & ~sbi->options.fs_fmask))
178 return -EPERM;
179 } else if (S_ISDIR(mode)) {
180 req &= ~sbi->options.fs_dmask;
181
182 if ((req & (S_IRUGO | S_IXUGO)) !=
183 ((S_IRUGO | S_IXUGO) & ~sbi->options.fs_dmask))
184 return -EPERM;
185
186 if ((req & S_IWUGO) != 0 &&
187 (req & S_IWUGO) != (S_IWUGO & ~sbi->options.fs_dmask))
188 return -EPERM;
189 } else {
190 return -EPERM;
191 }
192
193 return 0;
194}
195
196int fat_notify_change(struct dentry *dentry, struct iattr *attr)
197{
198 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
199 struct inode *inode = dentry->d_inode;
200 int mask, error = 0;
201
202 lock_kernel();
203
204 /*
205 * Expand the file. Since inode_setattr() updates ->i_size
206 * before calling the ->truncate(), but FAT needs to fill the
207 * hole before it.
208 */
209 if (attr->ia_valid & ATTR_SIZE) {
210 if (attr->ia_size > inode->i_size) {
211 error = fat_cont_expand(inode, attr->ia_size);
212 if (error || attr->ia_valid == ATTR_SIZE)
213 goto out;
214 attr->ia_valid &= ~ATTR_SIZE;
215 }
216 }
217
218 error = inode_change_ok(inode, attr);
219 if (error) {
220 if (sbi->options.quiet)
221 error = 0;
222 goto out;
223 }
224 if (((attr->ia_valid & ATTR_UID) &&
225 (attr->ia_uid != sbi->options.fs_uid)) ||
226 ((attr->ia_valid & ATTR_GID) &&
227 (attr->ia_gid != sbi->options.fs_gid)))
228 error = -EPERM;
229
230 if (error) {
231 if (sbi->options.quiet)
232 error = 0;
233 goto out;
234 }
235
236 if (attr->ia_valid & ATTR_MODE) {
237 error = check_mode(sbi, attr->ia_mode);
238 if (error != 0 && !sbi->options.quiet)
239 goto out;
240 }
241
242 error = inode_setattr(inode, attr);
243 if (error)
244 goto out;
245
246 if (S_ISDIR(inode->i_mode))
247 mask = sbi->options.fs_dmask;
248 else
249 mask = sbi->options.fs_fmask;
250 inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask);
251out:
252 unlock_kernel();
253 return error;
254}
255
256EXPORT_SYMBOL_GPL(fat_notify_change);
257
258/* Free all clusters after the skip'th cluster. */ 160/* Free all clusters after the skip'th cluster. */
259static int fat_free(struct inode *inode, int skip) 161static int fat_free(struct inode *inode, int skip)
260{ 162{
@@ -306,7 +208,7 @@ static int fat_free(struct inode *inode, int skip)
306 } else if (ret == FAT_ENT_FREE) { 208 } else if (ret == FAT_ENT_FREE) {
307 fat_fs_panic(sb, 209 fat_fs_panic(sb,
308 "%s: invalid cluster chain (i_pos %lld)", 210 "%s: invalid cluster chain (i_pos %lld)",
309 __FUNCTION__, MSDOS_I(inode)->i_pos); 211 __func__, MSDOS_I(inode)->i_pos);
310 ret = -EIO; 212 ret = -EIO;
311 } else if (ret > 0) { 213 } else if (ret > 0) {
312 err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait); 214 err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait);
@@ -355,8 +257,112 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
355} 257}
356EXPORT_SYMBOL_GPL(fat_getattr); 258EXPORT_SYMBOL_GPL(fat_getattr);
357 259
260static int fat_check_mode(const struct msdos_sb_info *sbi, struct inode *inode,
261 mode_t mode)
262{
263 mode_t mask, req = mode & ~S_IFMT;
264
265 if (S_ISREG(mode))
266 mask = sbi->options.fs_fmask;
267 else
268 mask = sbi->options.fs_dmask;
269
270 /*
271 * Of the r and x bits, all (subject to umask) must be present. Of the
272 * w bits, either all (subject to umask) or none must be present.
273 */
274 req &= ~mask;
275 if ((req & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO)))
276 return -EPERM;
277 if ((req & S_IWUGO) && ((req & S_IWUGO) != (S_IWUGO & ~mask)))
278 return -EPERM;
279
280 return 0;
281}
282
283static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
284{
285 mode_t allow_utime = sbi->options.allow_utime;
286
287 if (current->fsuid != inode->i_uid) {
288 if (in_group_p(inode->i_gid))
289 allow_utime >>= 3;
290 if (allow_utime & MAY_WRITE)
291 return 1;
292 }
293
294 /* use a default check */
295 return 0;
296}
297
298int fat_setattr(struct dentry *dentry, struct iattr *attr)
299{
300 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
301 struct inode *inode = dentry->d_inode;
302 int mask, error = 0;
303 unsigned int ia_valid;
304
305 lock_kernel();
306
307 /*
308 * Expand the file. Since inode_setattr() updates ->i_size
309 * before calling the ->truncate(), but FAT needs to fill the
310 * hole before it.
311 */
312 if (attr->ia_valid & ATTR_SIZE) {
313 if (attr->ia_size > inode->i_size) {
314 error = fat_cont_expand(inode, attr->ia_size);
315 if (error || attr->ia_valid == ATTR_SIZE)
316 goto out;
317 attr->ia_valid &= ~ATTR_SIZE;
318 }
319 }
320
321 /* Check for setting the inode time. */
322 ia_valid = attr->ia_valid;
323 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) {
324 if (fat_allow_set_time(sbi, inode))
325 attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET);
326 }
327
328 error = inode_change_ok(inode, attr);
329 attr->ia_valid = ia_valid;
330 if (error) {
331 if (sbi->options.quiet)
332 error = 0;
333 goto out;
334 }
335 if (((attr->ia_valid & ATTR_UID) &&
336 (attr->ia_uid != sbi->options.fs_uid)) ||
337 ((attr->ia_valid & ATTR_GID) &&
338 (attr->ia_gid != sbi->options.fs_gid)) ||
339 ((attr->ia_valid & ATTR_MODE) &&
340 fat_check_mode(sbi, inode, attr->ia_mode) < 0))
341 error = -EPERM;
342
343 if (error) {
344 if (sbi->options.quiet)
345 error = 0;
346 goto out;
347 }
348
349 error = inode_setattr(inode, attr);
350 if (error)
351 goto out;
352
353 if (S_ISDIR(inode->i_mode))
354 mask = sbi->options.fs_dmask;
355 else
356 mask = sbi->options.fs_fmask;
357 inode->i_mode &= S_IFMT | (S_IRWXUGO & ~mask);
358out:
359 unlock_kernel();
360 return error;
361}
362EXPORT_SYMBOL_GPL(fat_setattr);
363
358const struct inode_operations fat_file_inode_operations = { 364const struct inode_operations fat_file_inode_operations = {
359 .truncate = fat_truncate, 365 .truncate = fat_truncate,
360 .setattr = fat_notify_change, 366 .setattr = fat_setattr,
361 .getattr = fat_getattr, 367 .getattr = fat_getattr,
362}; 368};
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 53f3cf62b7c1..4e0a3dd9d677 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -433,11 +433,8 @@ EXPORT_SYMBOL_GPL(fat_build_inode);
433static void fat_delete_inode(struct inode *inode) 433static void fat_delete_inode(struct inode *inode)
434{ 434{
435 truncate_inode_pages(&inode->i_data, 0); 435 truncate_inode_pages(&inode->i_data, 0);
436 436 inode->i_size = 0;
437 if (!is_bad_inode(inode)) { 437 fat_truncate(inode);
438 inode->i_size = 0;
439 fat_truncate(inode);
440 }
441 clear_inode(inode); 438 clear_inode(inode);
442} 439}
443 440
@@ -445,8 +442,6 @@ static void fat_clear_inode(struct inode *inode)
445{ 442{
446 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); 443 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
447 444
448 if (is_bad_inode(inode))
449 return;
450 lock_kernel(); 445 lock_kernel();
451 spin_lock(&sbi->inode_hash_lock); 446 spin_lock(&sbi->inode_hash_lock);
452 fat_cache_inval_inode(inode); 447 fat_cache_inval_inode(inode);
@@ -542,7 +537,7 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
542 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); 537 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
543 538
544 /* If the count of free cluster is still unknown, counts it here. */ 539 /* If the count of free cluster is still unknown, counts it here. */
545 if (sbi->free_clusters == -1) { 540 if (sbi->free_clusters == -1 || !sbi->free_clus_valid) {
546 int err = fat_count_free_clusters(dentry->d_sb); 541 int err = fat_count_free_clusters(dentry->d_sb);
547 if (err) 542 if (err)
548 return err; 543 return err;
@@ -790,6 +785,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
790 seq_printf(m, ",gid=%u", opts->fs_gid); 785 seq_printf(m, ",gid=%u", opts->fs_gid);
791 seq_printf(m, ",fmask=%04o", opts->fs_fmask); 786 seq_printf(m, ",fmask=%04o", opts->fs_fmask);
792 seq_printf(m, ",dmask=%04o", opts->fs_dmask); 787 seq_printf(m, ",dmask=%04o", opts->fs_dmask);
788 if (opts->allow_utime)
789 seq_printf(m, ",allow_utime=%04o", opts->allow_utime);
793 if (sbi->nls_disk) 790 if (sbi->nls_disk)
794 seq_printf(m, ",codepage=%s", sbi->nls_disk->charset); 791 seq_printf(m, ",codepage=%s", sbi->nls_disk->charset);
795 if (isvfat) { 792 if (isvfat) {
@@ -845,9 +842,9 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
845 842
846enum { 843enum {
847 Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid, 844 Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid,
848 Opt_umask, Opt_dmask, Opt_fmask, Opt_codepage, Opt_usefree, Opt_nocase, 845 Opt_umask, Opt_dmask, Opt_fmask, Opt_allow_utime, Opt_codepage,
849 Opt_quiet, Opt_showexec, Opt_debug, Opt_immutable, 846 Opt_usefree, Opt_nocase, Opt_quiet, Opt_showexec, Opt_debug,
850 Opt_dots, Opt_nodots, 847 Opt_immutable, Opt_dots, Opt_nodots,
851 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 848 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
852 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 849 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
853 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 850 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
@@ -866,6 +863,7 @@ static match_table_t fat_tokens = {
866 {Opt_umask, "umask=%o"}, 863 {Opt_umask, "umask=%o"},
867 {Opt_dmask, "dmask=%o"}, 864 {Opt_dmask, "dmask=%o"},
868 {Opt_fmask, "fmask=%o"}, 865 {Opt_fmask, "fmask=%o"},
866 {Opt_allow_utime, "allow_utime=%o"},
869 {Opt_codepage, "codepage=%u"}, 867 {Opt_codepage, "codepage=%u"},
870 {Opt_usefree, "usefree"}, 868 {Opt_usefree, "usefree"},
871 {Opt_nocase, "nocase"}, 869 {Opt_nocase, "nocase"},
@@ -937,6 +935,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
937 opts->fs_uid = current->uid; 935 opts->fs_uid = current->uid;
938 opts->fs_gid = current->gid; 936 opts->fs_gid = current->gid;
939 opts->fs_fmask = opts->fs_dmask = current->fs->umask; 937 opts->fs_fmask = opts->fs_dmask = current->fs->umask;
938 opts->allow_utime = -1;
940 opts->codepage = fat_default_codepage; 939 opts->codepage = fat_default_codepage;
941 opts->iocharset = fat_default_iocharset; 940 opts->iocharset = fat_default_iocharset;
942 if (is_vfat) 941 if (is_vfat)
@@ -1024,6 +1023,11 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1024 return 0; 1023 return 0;
1025 opts->fs_fmask = option; 1024 opts->fs_fmask = option;
1026 break; 1025 break;
1026 case Opt_allow_utime:
1027 if (match_octal(&args[0], &option))
1028 return 0;
1029 opts->allow_utime = option & (S_IWGRP | S_IWOTH);
1030 break;
1027 case Opt_codepage: 1031 case Opt_codepage:
1028 if (match_int(&args[0], &option)) 1032 if (match_int(&args[0], &option))
1029 return 0; 1033 return 0;
@@ -1106,6 +1110,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1106 " for FAT filesystems, filesystem will be case sensitive!\n"); 1110 " for FAT filesystems, filesystem will be case sensitive!\n");
1107 } 1111 }
1108 1112
1113 /* If user doesn't specify allow_utime, it's initialized from dmask. */
1114 if (opts->allow_utime == (unsigned short)-1)
1115 opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
1109 if (opts->unicode_xlate) 1116 if (opts->unicode_xlate)
1110 opts->utf8 = 0; 1117 opts->utf8 = 0;
1111 1118
@@ -1208,18 +1215,17 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1208 */ 1215 */
1209 1216
1210 media = b->media; 1217 media = b->media;
1211 if (!FAT_VALID_MEDIA(media)) { 1218 if (!fat_valid_media(media)) {
1212 if (!silent) 1219 if (!silent)
1213 printk(KERN_ERR "FAT: invalid media value (0x%02x)\n", 1220 printk(KERN_ERR "FAT: invalid media value (0x%02x)\n",
1214 media); 1221 media);
1215 brelse(bh); 1222 brelse(bh);
1216 goto out_invalid; 1223 goto out_invalid;
1217 } 1224 }
1218 logical_sector_size = 1225 logical_sector_size = get_unaligned_le16(&b->sector_size);
1219 le16_to_cpu(get_unaligned((__le16 *)&b->sector_size));
1220 if (!is_power_of_2(logical_sector_size) 1226 if (!is_power_of_2(logical_sector_size)
1221 || (logical_sector_size < 512) 1227 || (logical_sector_size < 512)
1222 || (PAGE_CACHE_SIZE < logical_sector_size)) { 1228 || (logical_sector_size > 4096)) {
1223 if (!silent) 1229 if (!silent)
1224 printk(KERN_ERR "FAT: bogus logical sector size %u\n", 1230 printk(KERN_ERR "FAT: bogus logical sector size %u\n",
1225 logical_sector_size); 1231 logical_sector_size);
@@ -1267,6 +1273,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1267 sbi->fat_length = le16_to_cpu(b->fat_length); 1273 sbi->fat_length = le16_to_cpu(b->fat_length);
1268 sbi->root_cluster = 0; 1274 sbi->root_cluster = 0;
1269 sbi->free_clusters = -1; /* Don't know yet */ 1275 sbi->free_clusters = -1; /* Don't know yet */
1276 sbi->free_clus_valid = 0;
1270 sbi->prev_free = FAT_START_ENT; 1277 sbi->prev_free = FAT_START_ENT;
1271 1278
1272 if (!sbi->fat_length && b->fat32_length) { 1279 if (!sbi->fat_length && b->fat32_length) {
@@ -1302,8 +1309,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1302 sbi->fsinfo_sector); 1309 sbi->fsinfo_sector);
1303 } else { 1310 } else {
1304 if (sbi->options.usefree) 1311 if (sbi->options.usefree)
1305 sbi->free_clusters = 1312 sbi->free_clus_valid = 1;
1306 le32_to_cpu(fsinfo->free_clusters); 1313 sbi->free_clusters = le32_to_cpu(fsinfo->free_clusters);
1307 sbi->prev_free = le32_to_cpu(fsinfo->next_cluster); 1314 sbi->prev_free = le32_to_cpu(fsinfo->next_cluster);
1308 } 1315 }
1309 1316
@@ -1314,8 +1321,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1314 sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; 1321 sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
1315 1322
1316 sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length; 1323 sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length;
1317 sbi->dir_entries = 1324 sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
1318 le16_to_cpu(get_unaligned((__le16 *)&b->dir_entries));
1319 if (sbi->dir_entries & (sbi->dir_per_block - 1)) { 1325 if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
1320 if (!silent) 1326 if (!silent)
1321 printk(KERN_ERR "FAT: bogus directroy-entries per block" 1327 printk(KERN_ERR "FAT: bogus directroy-entries per block"
@@ -1327,7 +1333,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1327 rootdir_sectors = sbi->dir_entries 1333 rootdir_sectors = sbi->dir_entries
1328 * sizeof(struct msdos_dir_entry) / sb->s_blocksize; 1334 * sizeof(struct msdos_dir_entry) / sb->s_blocksize;
1329 sbi->data_start = sbi->dir_start + rootdir_sectors; 1335 sbi->data_start = sbi->dir_start + rootdir_sectors;
1330 total_sectors = le16_to_cpu(get_unaligned((__le16 *)&b->sectors)); 1336 total_sectors = get_unaligned_le16(&b->sectors);
1331 if (total_sectors == 0) 1337 if (total_sectors == 0)
1332 total_sectors = le32_to_cpu(b->total_sect); 1338 total_sectors = le32_to_cpu(b->total_sect);
1333 1339
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 2b46064f66b2..50ab5eecb99b 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -50,7 +50,11 @@ extern daddr_t vxfs_bmap1(struct inode *, long);
50/* vxfs_fshead.c */ 50/* vxfs_fshead.c */
51extern int vxfs_read_fshead(struct super_block *); 51extern int vxfs_read_fshead(struct super_block *);
52 52
53/* vxfs_immed.c */
54extern const struct inode_operations vxfs_immed_symlink_iops;
55
53/* vxfs_inode.c */ 56/* vxfs_inode.c */
57extern const struct address_space_operations vxfs_immed_aops;
54extern struct kmem_cache *vxfs_inode_cachep; 58extern struct kmem_cache *vxfs_inode_cachep;
55extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t); 59extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t);
56extern struct inode * vxfs_get_fake_inode(struct super_block *, 60extern struct inode * vxfs_get_fake_inode(struct super_block *,
@@ -69,6 +73,7 @@ extern const struct file_operations vxfs_dir_operations;
69extern int vxfs_read_olt(struct super_block *, u_long); 73extern int vxfs_read_olt(struct super_block *, u_long);
70 74
71/* vxfs_subr.c */ 75/* vxfs_subr.c */
76extern const struct address_space_operations vxfs_aops;
72extern struct page * vxfs_get_page(struct address_space *, u_long); 77extern struct page * vxfs_get_page(struct address_space *, u_long);
73extern void vxfs_put_page(struct page *); 78extern void vxfs_put_page(struct page *);
74extern struct buffer_head * vxfs_bread(struct inode *, int); 79extern struct buffer_head * vxfs_bread(struct inode *, int);
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index 8a5959a61ba9..c36aeaf92e41 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -35,6 +35,7 @@
35#include <linux/namei.h> 35#include <linux/namei.h>
36 36
37#include "vxfs.h" 37#include "vxfs.h"
38#include "vxfs_extern.h"
38#include "vxfs_inode.h" 39#include "vxfs_inode.h"
39 40
40 41
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ad88d2364bc2..9f3f2ceb73f0 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -41,11 +41,6 @@
41#include "vxfs_extern.h" 41#include "vxfs_extern.h"
42 42
43 43
44extern const struct address_space_operations vxfs_aops;
45extern const struct address_space_operations vxfs_immed_aops;
46
47extern const struct inode_operations vxfs_immed_symlink_iops;
48
49struct kmem_cache *vxfs_inode_cachep; 44struct kmem_cache *vxfs_inode_cachep;
50 45
51 46
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 06557679ca41..ae45f77765c0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -25,6 +25,45 @@
25#include <linux/buffer_head.h> 25#include <linux/buffer_head.h>
26#include "internal.h" 26#include "internal.h"
27 27
28
29/**
30 * writeback_acquire - attempt to get exclusive writeback access to a device
31 * @bdi: the device's backing_dev_info structure
32 *
33 * It is a waste of resources to have more than one pdflush thread blocked on
34 * a single request queue. Exclusion at the request_queue level is obtained
35 * via a flag in the request_queue's backing_dev_info.state.
36 *
37 * Non-request_queue-backed address_spaces will share default_backing_dev_info,
38 * unless they implement their own. Which is somewhat inefficient, as this
39 * may prevent concurrent writeback against multiple devices.
40 */
41static int writeback_acquire(struct backing_dev_info *bdi)
42{
43 return !test_and_set_bit(BDI_pdflush, &bdi->state);
44}
45
46/**
47 * writeback_in_progress - determine whether there is writeback in progress
48 * @bdi: the device's backing_dev_info structure.
49 *
50 * Determine whether there is writeback in progress against a backing device.
51 */
52int writeback_in_progress(struct backing_dev_info *bdi)
53{
54 return test_bit(BDI_pdflush, &bdi->state);
55}
56
57/**
58 * writeback_release - relinquish exclusive writeback access against a device.
59 * @bdi: the device's backing_dev_info structure
60 */
61static void writeback_release(struct backing_dev_info *bdi)
62{
63 BUG_ON(!writeback_in_progress(bdi));
64 clear_bit(BDI_pdflush, &bdi->state);
65}
66
28/** 67/**
29 * __mark_inode_dirty - internal function 68 * __mark_inode_dirty - internal function
30 * @inode: inode to mark 69 * @inode: inode to mark
@@ -747,43 +786,4 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int
747 786
748 return err; 787 return err;
749} 788}
750
751EXPORT_SYMBOL(generic_osync_inode); 789EXPORT_SYMBOL(generic_osync_inode);
752
753/**
754 * writeback_acquire - attempt to get exclusive writeback access to a device
755 * @bdi: the device's backing_dev_info structure
756 *
757 * It is a waste of resources to have more than one pdflush thread blocked on
758 * a single request queue. Exclusion at the request_queue level is obtained
759 * via a flag in the request_queue's backing_dev_info.state.
760 *
761 * Non-request_queue-backed address_spaces will share default_backing_dev_info,
762 * unless they implement their own. Which is somewhat inefficient, as this
763 * may prevent concurrent writeback against multiple devices.
764 */
765int writeback_acquire(struct backing_dev_info *bdi)
766{
767 return !test_and_set_bit(BDI_pdflush, &bdi->state);
768}
769
770/**
771 * writeback_in_progress - determine whether there is writeback in progress
772 * @bdi: the device's backing_dev_info structure.
773 *
774 * Determine whether there is writeback in progress against a backing device.
775 */
776int writeback_in_progress(struct backing_dev_info *bdi)
777{
778 return test_bit(BDI_pdflush, &bdi->state);
779}
780
781/**
782 * writeback_release - relinquish exclusive writeback access against a device.
783 * @bdi: the device's backing_dev_info structure
784 */
785void writeback_release(struct backing_dev_info *bdi)
786{
787 BUG_ON(!writeback_in_progress(bdi));
788 clear_bit(BDI_pdflush, &bdi->state);
789}
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 105d4a271e07..4f3cab321415 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -117,7 +117,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
117 117
118 parent = fuse_control_sb->s_root; 118 parent = fuse_control_sb->s_root;
119 inc_nlink(parent->d_inode); 119 inc_nlink(parent->d_inode);
120 sprintf(name, "%llu", (unsigned long long) fc->id); 120 sprintf(name, "%u", fc->dev);
121 parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2, 121 parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
122 &simple_dir_inode_operations, 122 &simple_dir_inode_operations,
123 &simple_dir_operations); 123 &simple_dir_operations);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index af639807524e..87250b6a8682 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -47,6 +47,14 @@ struct fuse_req *fuse_request_alloc(void)
47 return req; 47 return req;
48} 48}
49 49
50struct fuse_req *fuse_request_alloc_nofs(void)
51{
52 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
53 if (req)
54 fuse_request_init(req);
55 return req;
56}
57
50void fuse_request_free(struct fuse_req *req) 58void fuse_request_free(struct fuse_req *req)
51{ 59{
52 kmem_cache_free(fuse_req_cachep, req); 60 kmem_cache_free(fuse_req_cachep, req);
@@ -291,6 +299,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
291 299
292static void wait_answer_interruptible(struct fuse_conn *fc, 300static void wait_answer_interruptible(struct fuse_conn *fc,
293 struct fuse_req *req) 301 struct fuse_req *req)
302 __releases(fc->lock) __acquires(fc->lock)
294{ 303{
295 if (signal_pending(current)) 304 if (signal_pending(current))
296 return; 305 return;
@@ -307,8 +316,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
307 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 316 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
308} 317}
309 318
310/* Called with fc->lock held. Releases, and then reacquires it. */
311static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 319static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
320 __releases(fc->lock) __acquires(fc->lock)
312{ 321{
313 if (!fc->no_interrupt) { 322 if (!fc->no_interrupt) {
314 /* Any signal may interrupt this */ 323 /* Any signal may interrupt this */
@@ -430,6 +439,17 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
430} 439}
431 440
432/* 441/*
442 * Called under fc->lock
443 *
444 * fc->connected must have been checked previously
445 */
446void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req)
447{
448 req->isreply = 1;
449 request_send_nowait_locked(fc, req);
450}
451
452/*
433 * Lock the request. Up to the next unlock_request() there mustn't be 453 * Lock the request. Up to the next unlock_request() there mustn't be
434 * anything that could cause a page-fault. If the request was already 454 * anything that could cause a page-fault. If the request was already
435 * aborted bail out. 455 * aborted bail out.
@@ -968,6 +988,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
968 * locked). 988 * locked).
969 */ 989 */
970static void end_io_requests(struct fuse_conn *fc) 990static void end_io_requests(struct fuse_conn *fc)
991 __releases(fc->lock) __acquires(fc->lock)
971{ 992{
972 while (!list_empty(&fc->io)) { 993 while (!list_empty(&fc->io)) {
973 struct fuse_req *req = 994 struct fuse_req *req =
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c4807b3fc8a3..2060bf06b906 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -132,7 +132,7 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
132 req->out.args[0].value = outarg; 132 req->out.args[0].value = outarg;
133} 133}
134 134
135static u64 fuse_get_attr_version(struct fuse_conn *fc) 135u64 fuse_get_attr_version(struct fuse_conn *fc)
136{ 136{
137 u64 curr_version; 137 u64 curr_version;
138 138
@@ -1107,6 +1107,50 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
1107} 1107}
1108 1108
1109/* 1109/*
1110 * Prevent concurrent writepages on inode
1111 *
1112 * This is done by adding a negative bias to the inode write counter
1113 * and waiting for all pending writes to finish.
1114 */
1115void fuse_set_nowrite(struct inode *inode)
1116{
1117 struct fuse_conn *fc = get_fuse_conn(inode);
1118 struct fuse_inode *fi = get_fuse_inode(inode);
1119
1120 BUG_ON(!mutex_is_locked(&inode->i_mutex));
1121
1122 spin_lock(&fc->lock);
1123 BUG_ON(fi->writectr < 0);
1124 fi->writectr += FUSE_NOWRITE;
1125 spin_unlock(&fc->lock);
1126 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1127}
1128
1129/*
1130 * Allow writepages on inode
1131 *
1132 * Remove the bias from the writecounter and send any queued
1133 * writepages.
1134 */
1135static void __fuse_release_nowrite(struct inode *inode)
1136{
1137 struct fuse_inode *fi = get_fuse_inode(inode);
1138
1139 BUG_ON(fi->writectr != FUSE_NOWRITE);
1140 fi->writectr = 0;
1141 fuse_flush_writepages(inode);
1142}
1143
1144void fuse_release_nowrite(struct inode *inode)
1145{
1146 struct fuse_conn *fc = get_fuse_conn(inode);
1147
1148 spin_lock(&fc->lock);
1149 __fuse_release_nowrite(inode);
1150 spin_unlock(&fc->lock);
1151}
1152
1153/*
1110 * Set attributes, and at the same time refresh them. 1154 * Set attributes, and at the same time refresh them.
1111 * 1155 *
1112 * Truncation is slightly complicated, because the 'truncate' request 1156 * Truncation is slightly complicated, because the 'truncate' request
@@ -1122,6 +1166,8 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1122 struct fuse_req *req; 1166 struct fuse_req *req;
1123 struct fuse_setattr_in inarg; 1167 struct fuse_setattr_in inarg;
1124 struct fuse_attr_out outarg; 1168 struct fuse_attr_out outarg;
1169 bool is_truncate = false;
1170 loff_t oldsize;
1125 int err; 1171 int err;
1126 1172
1127 if (!fuse_allow_task(fc, current)) 1173 if (!fuse_allow_task(fc, current))
@@ -1145,12 +1191,16 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1145 send_sig(SIGXFSZ, current, 0); 1191 send_sig(SIGXFSZ, current, 0);
1146 return -EFBIG; 1192 return -EFBIG;
1147 } 1193 }
1194 is_truncate = true;
1148 } 1195 }
1149 1196
1150 req = fuse_get_req(fc); 1197 req = fuse_get_req(fc);
1151 if (IS_ERR(req)) 1198 if (IS_ERR(req))
1152 return PTR_ERR(req); 1199 return PTR_ERR(req);
1153 1200
1201 if (is_truncate)
1202 fuse_set_nowrite(inode);
1203
1154 memset(&inarg, 0, sizeof(inarg)); 1204 memset(&inarg, 0, sizeof(inarg));
1155 memset(&outarg, 0, sizeof(outarg)); 1205 memset(&outarg, 0, sizeof(outarg));
1156 iattr_to_fattr(attr, &inarg); 1206 iattr_to_fattr(attr, &inarg);
@@ -1181,16 +1231,44 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1181 if (err) { 1231 if (err) {
1182 if (err == -EINTR) 1232 if (err == -EINTR)
1183 fuse_invalidate_attr(inode); 1233 fuse_invalidate_attr(inode);
1184 return err; 1234 goto error;
1185 } 1235 }
1186 1236
1187 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { 1237 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1188 make_bad_inode(inode); 1238 make_bad_inode(inode);
1189 return -EIO; 1239 err = -EIO;
1240 goto error;
1241 }
1242
1243 spin_lock(&fc->lock);
1244 fuse_change_attributes_common(inode, &outarg.attr,
1245 attr_timeout(&outarg));
1246 oldsize = inode->i_size;
1247 i_size_write(inode, outarg.attr.size);
1248
1249 if (is_truncate) {
1250 /* NOTE: this may release/reacquire fc->lock */
1251 __fuse_release_nowrite(inode);
1252 }
1253 spin_unlock(&fc->lock);
1254
1255 /*
1256 * Only call invalidate_inode_pages2() after removing
1257 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1258 */
1259 if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1260 if (outarg.attr.size < oldsize)
1261 fuse_truncate(inode->i_mapping, outarg.attr.size);
1262 invalidate_inode_pages2(inode->i_mapping);
1190 } 1263 }
1191 1264
1192 fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0);
1193 return 0; 1265 return 0;
1266
1267error:
1268 if (is_truncate)
1269 fuse_release_nowrite(inode);
1270
1271 return err;
1194} 1272}
1195 1273
1196static int fuse_setattr(struct dentry *entry, struct iattr *attr) 1274static int fuse_setattr(struct dentry *entry, struct iattr *attr)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 676b0bc8a86d..f28cf8b46f80 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -210,6 +210,49 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
210 return (u64) v0 + ((u64) v1 << 32); 210 return (u64) v0 + ((u64) v1 << 32);
211} 211}
212 212
213/*
214 * Check if page is under writeback
215 *
216 * This is currently done by walking the list of writepage requests
217 * for the inode, which can be pretty inefficient.
218 */
219static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
220{
221 struct fuse_conn *fc = get_fuse_conn(inode);
222 struct fuse_inode *fi = get_fuse_inode(inode);
223 struct fuse_req *req;
224 bool found = false;
225
226 spin_lock(&fc->lock);
227 list_for_each_entry(req, &fi->writepages, writepages_entry) {
228 pgoff_t curr_index;
229
230 BUG_ON(req->inode != inode);
231 curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
232 if (curr_index == index) {
233 found = true;
234 break;
235 }
236 }
237 spin_unlock(&fc->lock);
238
239 return found;
240}
241
242/*
243 * Wait for page writeback to be completed.
244 *
245 * Since fuse doesn't rely on the VM writeback tracking, this has to
246 * use some other means.
247 */
248static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
249{
250 struct fuse_inode *fi = get_fuse_inode(inode);
251
252 wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
253 return 0;
254}
255
213static int fuse_flush(struct file *file, fl_owner_t id) 256static int fuse_flush(struct file *file, fl_owner_t id)
214{ 257{
215 struct inode *inode = file->f_path.dentry->d_inode; 258 struct inode *inode = file->f_path.dentry->d_inode;
@@ -245,6 +288,21 @@ static int fuse_flush(struct file *file, fl_owner_t id)
245 return err; 288 return err;
246} 289}
247 290
291/*
292 * Wait for all pending writepages on the inode to finish.
293 *
294 * This is currently done by blocking further writes with FUSE_NOWRITE
295 * and waiting for all sent writes to complete.
296 *
297 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
298 * could conflict with truncation.
299 */
300static void fuse_sync_writes(struct inode *inode)
301{
302 fuse_set_nowrite(inode);
303 fuse_release_nowrite(inode);
304}
305
248int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, 306int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
249 int isdir) 307 int isdir)
250{ 308{
@@ -261,6 +319,17 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
261 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 319 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
262 return 0; 320 return 0;
263 321
322 /*
323 * Start writeback against all dirty pages of the inode, then
324 * wait for all outstanding writes, before sending the FSYNC
325 * request.
326 */
327 err = write_inode_now(inode, 0);
328 if (err)
329 return err;
330
331 fuse_sync_writes(inode);
332
264 req = fuse_get_req(fc); 333 req = fuse_get_req(fc);
265 if (IS_ERR(req)) 334 if (IS_ERR(req))
266 return PTR_ERR(req); 335 return PTR_ERR(req);
@@ -294,7 +363,7 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
294void fuse_read_fill(struct fuse_req *req, struct file *file, 363void fuse_read_fill(struct fuse_req *req, struct file *file,
295 struct inode *inode, loff_t pos, size_t count, int opcode) 364 struct inode *inode, loff_t pos, size_t count, int opcode)
296{ 365{
297 struct fuse_read_in *inarg = &req->misc.read_in; 366 struct fuse_read_in *inarg = &req->misc.read.in;
298 struct fuse_file *ff = file->private_data; 367 struct fuse_file *ff = file->private_data;
299 368
300 inarg->fh = ff->fh; 369 inarg->fh = ff->fh;
@@ -320,7 +389,7 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
320 389
321 fuse_read_fill(req, file, inode, pos, count, FUSE_READ); 390 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
322 if (owner != NULL) { 391 if (owner != NULL) {
323 struct fuse_read_in *inarg = &req->misc.read_in; 392 struct fuse_read_in *inarg = &req->misc.read.in;
324 393
325 inarg->read_flags |= FUSE_READ_LOCKOWNER; 394 inarg->read_flags |= FUSE_READ_LOCKOWNER;
326 inarg->lock_owner = fuse_lock_owner_id(fc, owner); 395 inarg->lock_owner = fuse_lock_owner_id(fc, owner);
@@ -329,31 +398,66 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
329 return req->out.args[0].size; 398 return req->out.args[0].size;
330} 399}
331 400
401static void fuse_read_update_size(struct inode *inode, loff_t size,
402 u64 attr_ver)
403{
404 struct fuse_conn *fc = get_fuse_conn(inode);
405 struct fuse_inode *fi = get_fuse_inode(inode);
406
407 spin_lock(&fc->lock);
408 if (attr_ver == fi->attr_version && size < inode->i_size) {
409 fi->attr_version = ++fc->attr_version;
410 i_size_write(inode, size);
411 }
412 spin_unlock(&fc->lock);
413}
414
332static int fuse_readpage(struct file *file, struct page *page) 415static int fuse_readpage(struct file *file, struct page *page)
333{ 416{
334 struct inode *inode = page->mapping->host; 417 struct inode *inode = page->mapping->host;
335 struct fuse_conn *fc = get_fuse_conn(inode); 418 struct fuse_conn *fc = get_fuse_conn(inode);
336 struct fuse_req *req; 419 struct fuse_req *req;
420 size_t num_read;
421 loff_t pos = page_offset(page);
422 size_t count = PAGE_CACHE_SIZE;
423 u64 attr_ver;
337 int err; 424 int err;
338 425
339 err = -EIO; 426 err = -EIO;
340 if (is_bad_inode(inode)) 427 if (is_bad_inode(inode))
341 goto out; 428 goto out;
342 429
430 /*
431 * Page writeback can extend beyond the liftime of the
432 * page-cache page, so make sure we read a properly synced
433 * page.
434 */
435 fuse_wait_on_page_writeback(inode, page->index);
436
343 req = fuse_get_req(fc); 437 req = fuse_get_req(fc);
344 err = PTR_ERR(req); 438 err = PTR_ERR(req);
345 if (IS_ERR(req)) 439 if (IS_ERR(req))
346 goto out; 440 goto out;
347 441
442 attr_ver = fuse_get_attr_version(fc);
443
348 req->out.page_zeroing = 1; 444 req->out.page_zeroing = 1;
349 req->num_pages = 1; 445 req->num_pages = 1;
350 req->pages[0] = page; 446 req->pages[0] = page;
351 fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE, 447 num_read = fuse_send_read(req, file, inode, pos, count, NULL);
352 NULL);
353 err = req->out.h.error; 448 err = req->out.h.error;
354 fuse_put_request(fc, req); 449 fuse_put_request(fc, req);
355 if (!err) 450
451 if (!err) {
452 /*
453 * Short read means EOF. If file size is larger, truncate it
454 */
455 if (num_read < count)
456 fuse_read_update_size(inode, pos + num_read, attr_ver);
457
356 SetPageUptodate(page); 458 SetPageUptodate(page);
459 }
460
357 fuse_invalidate_attr(inode); /* atime changed */ 461 fuse_invalidate_attr(inode); /* atime changed */
358 out: 462 out:
359 unlock_page(page); 463 unlock_page(page);
@@ -363,8 +467,19 @@ static int fuse_readpage(struct file *file, struct page *page)
363static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) 467static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
364{ 468{
365 int i; 469 int i;
470 size_t count = req->misc.read.in.size;
471 size_t num_read = req->out.args[0].size;
472 struct inode *inode = req->pages[0]->mapping->host;
473
474 /*
475 * Short read means EOF. If file size is larger, truncate it
476 */
477 if (!req->out.h.error && num_read < count) {
478 loff_t pos = page_offset(req->pages[0]) + num_read;
479 fuse_read_update_size(inode, pos, req->misc.read.attr_ver);
480 }
366 481
367 fuse_invalidate_attr(req->pages[0]->mapping->host); /* atime changed */ 482 fuse_invalidate_attr(inode); /* atime changed */
368 483
369 for (i = 0; i < req->num_pages; i++) { 484 for (i = 0; i < req->num_pages; i++) {
370 struct page *page = req->pages[i]; 485 struct page *page = req->pages[i];
@@ -387,6 +502,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file,
387 size_t count = req->num_pages << PAGE_CACHE_SHIFT; 502 size_t count = req->num_pages << PAGE_CACHE_SHIFT;
388 req->out.page_zeroing = 1; 503 req->out.page_zeroing = 1;
389 fuse_read_fill(req, file, inode, pos, count, FUSE_READ); 504 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
505 req->misc.read.attr_ver = fuse_get_attr_version(fc);
390 if (fc->async_read) { 506 if (fc->async_read) {
391 struct fuse_file *ff = file->private_data; 507 struct fuse_file *ff = file->private_data;
392 req->ff = fuse_file_get(ff); 508 req->ff = fuse_file_get(ff);
@@ -411,6 +527,8 @@ static int fuse_readpages_fill(void *_data, struct page *page)
411 struct inode *inode = data->inode; 527 struct inode *inode = data->inode;
412 struct fuse_conn *fc = get_fuse_conn(inode); 528 struct fuse_conn *fc = get_fuse_conn(inode);
413 529
530 fuse_wait_on_page_writeback(inode, page->index);
531
414 if (req->num_pages && 532 if (req->num_pages &&
415 (req->num_pages == FUSE_MAX_PAGES_PER_REQ || 533 (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
416 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 534 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
@@ -477,11 +595,10 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
477} 595}
478 596
479static void fuse_write_fill(struct fuse_req *req, struct file *file, 597static void fuse_write_fill(struct fuse_req *req, struct file *file,
480 struct inode *inode, loff_t pos, size_t count, 598 struct fuse_file *ff, struct inode *inode,
481 int writepage) 599 loff_t pos, size_t count, int writepage)
482{ 600{
483 struct fuse_conn *fc = get_fuse_conn(inode); 601 struct fuse_conn *fc = get_fuse_conn(inode);
484 struct fuse_file *ff = file->private_data;
485 struct fuse_write_in *inarg = &req->misc.write.in; 602 struct fuse_write_in *inarg = &req->misc.write.in;
486 struct fuse_write_out *outarg = &req->misc.write.out; 603 struct fuse_write_out *outarg = &req->misc.write.out;
487 604
@@ -490,7 +607,7 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file,
490 inarg->offset = pos; 607 inarg->offset = pos;
491 inarg->size = count; 608 inarg->size = count;
492 inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; 609 inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
493 inarg->flags = file->f_flags; 610 inarg->flags = file ? file->f_flags : 0;
494 req->in.h.opcode = FUSE_WRITE; 611 req->in.h.opcode = FUSE_WRITE;
495 req->in.h.nodeid = get_node_id(inode); 612 req->in.h.nodeid = get_node_id(inode);
496 req->in.argpages = 1; 613 req->in.argpages = 1;
@@ -511,7 +628,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
511 fl_owner_t owner) 628 fl_owner_t owner)
512{ 629{
513 struct fuse_conn *fc = get_fuse_conn(inode); 630 struct fuse_conn *fc = get_fuse_conn(inode);
514 fuse_write_fill(req, file, inode, pos, count, 0); 631 fuse_write_fill(req, file, file->private_data, inode, pos, count, 0);
515 if (owner != NULL) { 632 if (owner != NULL) {
516 struct fuse_write_in *inarg = &req->misc.write.in; 633 struct fuse_write_in *inarg = &req->misc.write.in;
517 inarg->write_flags |= FUSE_WRITE_LOCKOWNER; 634 inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
@@ -533,19 +650,36 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
533 return 0; 650 return 0;
534} 651}
535 652
653static void fuse_write_update_size(struct inode *inode, loff_t pos)
654{
655 struct fuse_conn *fc = get_fuse_conn(inode);
656 struct fuse_inode *fi = get_fuse_inode(inode);
657
658 spin_lock(&fc->lock);
659 fi->attr_version = ++fc->attr_version;
660 if (pos > inode->i_size)
661 i_size_write(inode, pos);
662 spin_unlock(&fc->lock);
663}
664
536static int fuse_buffered_write(struct file *file, struct inode *inode, 665static int fuse_buffered_write(struct file *file, struct inode *inode,
537 loff_t pos, unsigned count, struct page *page) 666 loff_t pos, unsigned count, struct page *page)
538{ 667{
539 int err; 668 int err;
540 size_t nres; 669 size_t nres;
541 struct fuse_conn *fc = get_fuse_conn(inode); 670 struct fuse_conn *fc = get_fuse_conn(inode);
542 struct fuse_inode *fi = get_fuse_inode(inode);
543 unsigned offset = pos & (PAGE_CACHE_SIZE - 1); 671 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
544 struct fuse_req *req; 672 struct fuse_req *req;
545 673
546 if (is_bad_inode(inode)) 674 if (is_bad_inode(inode))
547 return -EIO; 675 return -EIO;
548 676
677 /*
678 * Make sure writepages on the same page are not mixed up with
679 * plain writes.
680 */
681 fuse_wait_on_page_writeback(inode, page->index);
682
549 req = fuse_get_req(fc); 683 req = fuse_get_req(fc);
550 if (IS_ERR(req)) 684 if (IS_ERR(req))
551 return PTR_ERR(req); 685 return PTR_ERR(req);
@@ -560,12 +694,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
560 err = -EIO; 694 err = -EIO;
561 if (!err) { 695 if (!err) {
562 pos += nres; 696 pos += nres;
563 spin_lock(&fc->lock); 697 fuse_write_update_size(inode, pos);
564 fi->attr_version = ++fc->attr_version;
565 if (pos > inode->i_size)
566 i_size_write(inode, pos);
567 spin_unlock(&fc->lock);
568
569 if (count == PAGE_CACHE_SIZE) 698 if (count == PAGE_CACHE_SIZE)
570 SetPageUptodate(page); 699 SetPageUptodate(page);
571 } 700 }
@@ -588,6 +717,198 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
588 return res; 717 return res;
589} 718}
590 719
720static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
721 struct inode *inode, loff_t pos,
722 size_t count)
723{
724 size_t res;
725 unsigned offset;
726 unsigned i;
727
728 for (i = 0; i < req->num_pages; i++)
729 fuse_wait_on_page_writeback(inode, req->pages[i]->index);
730
731 res = fuse_send_write(req, file, inode, pos, count, NULL);
732
733 offset = req->page_offset;
734 count = res;
735 for (i = 0; i < req->num_pages; i++) {
736 struct page *page = req->pages[i];
737
738 if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
739 SetPageUptodate(page);
740
741 if (count > PAGE_CACHE_SIZE - offset)
742 count -= PAGE_CACHE_SIZE - offset;
743 else
744 count = 0;
745 offset = 0;
746
747 unlock_page(page);
748 page_cache_release(page);
749 }
750
751 return res;
752}
753
754static ssize_t fuse_fill_write_pages(struct fuse_req *req,
755 struct address_space *mapping,
756 struct iov_iter *ii, loff_t pos)
757{
758 struct fuse_conn *fc = get_fuse_conn(mapping->host);
759 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
760 size_t count = 0;
761 int err;
762
763 req->page_offset = offset;
764
765 do {
766 size_t tmp;
767 struct page *page;
768 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
769 size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
770 iov_iter_count(ii));
771
772 bytes = min_t(size_t, bytes, fc->max_write - count);
773
774 again:
775 err = -EFAULT;
776 if (iov_iter_fault_in_readable(ii, bytes))
777 break;
778
779 err = -ENOMEM;
780 page = __grab_cache_page(mapping, index);
781 if (!page)
782 break;
783
784 pagefault_disable();
785 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
786 pagefault_enable();
787 flush_dcache_page(page);
788
789 if (!tmp) {
790 unlock_page(page);
791 page_cache_release(page);
792 bytes = min(bytes, iov_iter_single_seg_count(ii));
793 goto again;
794 }
795
796 err = 0;
797 req->pages[req->num_pages] = page;
798 req->num_pages++;
799
800 iov_iter_advance(ii, tmp);
801 count += tmp;
802 pos += tmp;
803 offset += tmp;
804 if (offset == PAGE_CACHE_SIZE)
805 offset = 0;
806
807 } while (iov_iter_count(ii) && count < fc->max_write &&
808 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
809
810 return count > 0 ? count : err;
811}
812
813static ssize_t fuse_perform_write(struct file *file,
814 struct address_space *mapping,
815 struct iov_iter *ii, loff_t pos)
816{
817 struct inode *inode = mapping->host;
818 struct fuse_conn *fc = get_fuse_conn(inode);
819 int err = 0;
820 ssize_t res = 0;
821
822 if (is_bad_inode(inode))
823 return -EIO;
824
825 do {
826 struct fuse_req *req;
827 ssize_t count;
828
829 req = fuse_get_req(fc);
830 if (IS_ERR(req)) {
831 err = PTR_ERR(req);
832 break;
833 }
834
835 count = fuse_fill_write_pages(req, mapping, ii, pos);
836 if (count <= 0) {
837 err = count;
838 } else {
839 size_t num_written;
840
841 num_written = fuse_send_write_pages(req, file, inode,
842 pos, count);
843 err = req->out.h.error;
844 if (!err) {
845 res += num_written;
846 pos += num_written;
847
848 /* break out of the loop on short write */
849 if (num_written != count)
850 err = -EIO;
851 }
852 }
853 fuse_put_request(fc, req);
854 } while (!err && iov_iter_count(ii));
855
856 if (res > 0)
857 fuse_write_update_size(inode, pos);
858
859 fuse_invalidate_attr(inode);
860
861 return res > 0 ? res : err;
862}
863
864static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
865 unsigned long nr_segs, loff_t pos)
866{
867 struct file *file = iocb->ki_filp;
868 struct address_space *mapping = file->f_mapping;
869 size_t count = 0;
870 ssize_t written = 0;
871 struct inode *inode = mapping->host;
872 ssize_t err;
873 struct iov_iter i;
874
875 WARN_ON(iocb->ki_pos != pos);
876
877 err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
878 if (err)
879 return err;
880
881 mutex_lock(&inode->i_mutex);
882 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
883
884 /* We can write back this queue in page reclaim */
885 current->backing_dev_info = mapping->backing_dev_info;
886
887 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
888 if (err)
889 goto out;
890
891 if (count == 0)
892 goto out;
893
894 err = remove_suid(file->f_path.dentry);
895 if (err)
896 goto out;
897
898 file_update_time(file);
899
900 iov_iter_init(&i, iov, nr_segs, count, 0);
901 written = fuse_perform_write(file, mapping, &i, pos);
902 if (written >= 0)
903 iocb->ki_pos = pos + written;
904
905out:
906 current->backing_dev_info = NULL;
907 mutex_unlock(&inode->i_mutex);
908
909 return written ? written : err;
910}
911
591static void fuse_release_user_pages(struct fuse_req *req, int write) 912static void fuse_release_user_pages(struct fuse_req *req, int write)
592{ 913{
593 unsigned i; 914 unsigned i;
@@ -613,7 +934,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
613 934
614 nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); 935 nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
615 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 936 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
616 npages = min(max(npages, 1), FUSE_MAX_PAGES_PER_REQ); 937 npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
617 down_read(&current->mm->mmap_sem); 938 down_read(&current->mm->mmap_sem);
618 npages = get_user_pages(current, current->mm, user_addr, npages, write, 939 npages = get_user_pages(current, current->mm, user_addr, npages, write,
619 0, req->pages, NULL); 940 0, req->pages, NULL);
@@ -645,14 +966,15 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
645 966
646 while (count) { 967 while (count) {
647 size_t nres; 968 size_t nres;
648 size_t nbytes = min(count, nmax); 969 size_t nbytes_limit = min(count, nmax);
649 int err = fuse_get_user_pages(req, buf, nbytes, !write); 970 size_t nbytes;
971 int err = fuse_get_user_pages(req, buf, nbytes_limit, !write);
650 if (err) { 972 if (err) {
651 res = err; 973 res = err;
652 break; 974 break;
653 } 975 }
654 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; 976 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
655 nbytes = min(count, nbytes); 977 nbytes = min(nbytes_limit, nbytes);
656 if (write) 978 if (write)
657 nres = fuse_send_write(req, file, inode, pos, nbytes, 979 nres = fuse_send_write(req, file, inode, pos, nbytes,
658 current->files); 980 current->files);
@@ -683,12 +1005,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
683 } 1005 }
684 fuse_put_request(fc, req); 1006 fuse_put_request(fc, req);
685 if (res > 0) { 1007 if (res > 0) {
686 if (write) { 1008 if (write)
687 spin_lock(&fc->lock); 1009 fuse_write_update_size(inode, pos);
688 if (pos > inode->i_size)
689 i_size_write(inode, pos);
690 spin_unlock(&fc->lock);
691 }
692 *ppos = pos; 1010 *ppos = pos;
693 } 1011 }
694 fuse_invalidate_attr(inode); 1012 fuse_invalidate_attr(inode);
@@ -716,21 +1034,225 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
716 return res; 1034 return res;
717} 1035}
718 1036
719static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) 1037static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
720{ 1038{
721 if ((vma->vm_flags & VM_SHARED)) { 1039 __free_page(req->pages[0]);
722 if ((vma->vm_flags & VM_WRITE)) 1040 fuse_file_put(req->ff);
723 return -ENODEV; 1041 fuse_put_request(fc, req);
724 else 1042}
725 vma->vm_flags &= ~VM_MAYWRITE; 1043
1044static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
1045{
1046 struct inode *inode = req->inode;
1047 struct fuse_inode *fi = get_fuse_inode(inode);
1048 struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
1049
1050 list_del(&req->writepages_entry);
1051 dec_bdi_stat(bdi, BDI_WRITEBACK);
1052 dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
1053 bdi_writeout_inc(bdi);
1054 wake_up(&fi->page_waitq);
1055}
1056
1057/* Called under fc->lock, may release and reacquire it */
1058static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
1059{
1060 struct fuse_inode *fi = get_fuse_inode(req->inode);
1061 loff_t size = i_size_read(req->inode);
1062 struct fuse_write_in *inarg = &req->misc.write.in;
1063
1064 if (!fc->connected)
1065 goto out_free;
1066
1067 if (inarg->offset + PAGE_CACHE_SIZE <= size) {
1068 inarg->size = PAGE_CACHE_SIZE;
1069 } else if (inarg->offset < size) {
1070 inarg->size = size & (PAGE_CACHE_SIZE - 1);
1071 } else {
1072 /* Got truncated off completely */
1073 goto out_free;
1074 }
1075
1076 req->in.args[1].size = inarg->size;
1077 fi->writectr++;
1078 request_send_background_locked(fc, req);
1079 return;
1080
1081 out_free:
1082 fuse_writepage_finish(fc, req);
1083 spin_unlock(&fc->lock);
1084 fuse_writepage_free(fc, req);
1085 spin_lock(&fc->lock);
1086}
1087
1088/*
1089 * If fi->writectr is positive (no truncate or fsync going on) send
1090 * all queued writepage requests.
1091 *
1092 * Called with fc->lock
1093 */
1094void fuse_flush_writepages(struct inode *inode)
1095{
1096 struct fuse_conn *fc = get_fuse_conn(inode);
1097 struct fuse_inode *fi = get_fuse_inode(inode);
1098 struct fuse_req *req;
1099
1100 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
1101 req = list_entry(fi->queued_writes.next, struct fuse_req, list);
1102 list_del_init(&req->list);
1103 fuse_send_writepage(fc, req);
1104 }
1105}
1106
1107static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
1108{
1109 struct inode *inode = req->inode;
1110 struct fuse_inode *fi = get_fuse_inode(inode);
1111
1112 mapping_set_error(inode->i_mapping, req->out.h.error);
1113 spin_lock(&fc->lock);
1114 fi->writectr--;
1115 fuse_writepage_finish(fc, req);
1116 spin_unlock(&fc->lock);
1117 fuse_writepage_free(fc, req);
1118}
1119
1120static int fuse_writepage_locked(struct page *page)
1121{
1122 struct address_space *mapping = page->mapping;
1123 struct inode *inode = mapping->host;
1124 struct fuse_conn *fc = get_fuse_conn(inode);
1125 struct fuse_inode *fi = get_fuse_inode(inode);
1126 struct fuse_req *req;
1127 struct fuse_file *ff;
1128 struct page *tmp_page;
1129
1130 set_page_writeback(page);
1131
1132 req = fuse_request_alloc_nofs();
1133 if (!req)
1134 goto err;
1135
1136 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
1137 if (!tmp_page)
1138 goto err_free;
1139
1140 spin_lock(&fc->lock);
1141 BUG_ON(list_empty(&fi->write_files));
1142 ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
1143 req->ff = fuse_file_get(ff);
1144 spin_unlock(&fc->lock);
1145
1146 fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
1147
1148 copy_highpage(tmp_page, page);
1149 req->num_pages = 1;
1150 req->pages[0] = tmp_page;
1151 req->page_offset = 0;
1152 req->end = fuse_writepage_end;
1153 req->inode = inode;
1154
1155 inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
1156 inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
1157 end_page_writeback(page);
1158
1159 spin_lock(&fc->lock);
1160 list_add(&req->writepages_entry, &fi->writepages);
1161 list_add_tail(&req->list, &fi->queued_writes);
1162 fuse_flush_writepages(inode);
1163 spin_unlock(&fc->lock);
1164
1165 return 0;
1166
1167err_free:
1168 fuse_request_free(req);
1169err:
1170 end_page_writeback(page);
1171 return -ENOMEM;
1172}
1173
1174static int fuse_writepage(struct page *page, struct writeback_control *wbc)
1175{
1176 int err;
1177
1178 err = fuse_writepage_locked(page);
1179 unlock_page(page);
1180
1181 return err;
1182}
1183
1184static int fuse_launder_page(struct page *page)
1185{
1186 int err = 0;
1187 if (clear_page_dirty_for_io(page)) {
1188 struct inode *inode = page->mapping->host;
1189 err = fuse_writepage_locked(page);
1190 if (!err)
1191 fuse_wait_on_page_writeback(inode, page->index);
726 } 1192 }
727 return generic_file_mmap(file, vma); 1193 return err;
728} 1194}
729 1195
730static int fuse_set_page_dirty(struct page *page) 1196/*
1197 * Write back dirty pages now, because there may not be any suitable
1198 * open files later
1199 */
1200static void fuse_vma_close(struct vm_area_struct *vma)
731{ 1201{
732 printk("fuse_set_page_dirty: should not happen\n"); 1202 filemap_write_and_wait(vma->vm_file->f_mapping);
733 dump_stack(); 1203}
1204
1205/*
1206 * Wait for writeback against this page to complete before allowing it
1207 * to be marked dirty again, and hence written back again, possibly
1208 * before the previous writepage completed.
1209 *
1210 * Block here, instead of in ->writepage(), so that the userspace fs
1211 * can only block processes actually operating on the filesystem.
1212 *
1213 * Otherwise unprivileged userspace fs would be able to block
1214 * unrelated:
1215 *
1216 * - page migration
1217 * - sync(2)
1218 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
1219 */
1220static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1221{
1222 /*
1223 * Don't use page->mapping as it may become NULL from a
1224 * concurrent truncate.
1225 */
1226 struct inode *inode = vma->vm_file->f_mapping->host;
1227
1228 fuse_wait_on_page_writeback(inode, page->index);
1229 return 0;
1230}
1231
1232static struct vm_operations_struct fuse_file_vm_ops = {
1233 .close = fuse_vma_close,
1234 .fault = filemap_fault,
1235 .page_mkwrite = fuse_page_mkwrite,
1236};
1237
1238static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
1239{
1240 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
1241 struct inode *inode = file->f_dentry->d_inode;
1242 struct fuse_conn *fc = get_fuse_conn(inode);
1243 struct fuse_inode *fi = get_fuse_inode(inode);
1244 struct fuse_file *ff = file->private_data;
1245 /*
1246 * file may be written through mmap, so chain it onto the
1247 * inodes's write_file list
1248 */
1249 spin_lock(&fc->lock);
1250 if (list_empty(&ff->write_entry))
1251 list_add(&ff->write_entry, &fi->write_files);
1252 spin_unlock(&fc->lock);
1253 }
1254 file_accessed(file);
1255 vma->vm_ops = &fuse_file_vm_ops;
734 return 0; 1256 return 0;
735} 1257}
736 1258
@@ -909,12 +1431,37 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
909 return err ? 0 : outarg.block; 1431 return err ? 0 : outarg.block;
910} 1432}
911 1433
1434static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1435{
1436 loff_t retval;
1437 struct inode *inode = file->f_path.dentry->d_inode;
1438
1439 mutex_lock(&inode->i_mutex);
1440 switch (origin) {
1441 case SEEK_END:
1442 offset += i_size_read(inode);
1443 break;
1444 case SEEK_CUR:
1445 offset += file->f_pos;
1446 }
1447 retval = -EINVAL;
1448 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
1449 if (offset != file->f_pos) {
1450 file->f_pos = offset;
1451 file->f_version = 0;
1452 }
1453 retval = offset;
1454 }
1455 mutex_unlock(&inode->i_mutex);
1456 return retval;
1457}
1458
912static const struct file_operations fuse_file_operations = { 1459static const struct file_operations fuse_file_operations = {
913 .llseek = generic_file_llseek, 1460 .llseek = fuse_file_llseek,
914 .read = do_sync_read, 1461 .read = do_sync_read,
915 .aio_read = fuse_file_aio_read, 1462 .aio_read = fuse_file_aio_read,
916 .write = do_sync_write, 1463 .write = do_sync_write,
917 .aio_write = generic_file_aio_write, 1464 .aio_write = fuse_file_aio_write,
918 .mmap = fuse_file_mmap, 1465 .mmap = fuse_file_mmap,
919 .open = fuse_open, 1466 .open = fuse_open,
920 .flush = fuse_flush, 1467 .flush = fuse_flush,
@@ -926,7 +1473,7 @@ static const struct file_operations fuse_file_operations = {
926}; 1473};
927 1474
928static const struct file_operations fuse_direct_io_file_operations = { 1475static const struct file_operations fuse_direct_io_file_operations = {
929 .llseek = generic_file_llseek, 1476 .llseek = fuse_file_llseek,
930 .read = fuse_direct_read, 1477 .read = fuse_direct_read,
931 .write = fuse_direct_write, 1478 .write = fuse_direct_write,
932 .open = fuse_open, 1479 .open = fuse_open,
@@ -940,10 +1487,12 @@ static const struct file_operations fuse_direct_io_file_operations = {
940 1487
941static const struct address_space_operations fuse_file_aops = { 1488static const struct address_space_operations fuse_file_aops = {
942 .readpage = fuse_readpage, 1489 .readpage = fuse_readpage,
1490 .writepage = fuse_writepage,
1491 .launder_page = fuse_launder_page,
943 .write_begin = fuse_write_begin, 1492 .write_begin = fuse_write_begin,
944 .write_end = fuse_write_end, 1493 .write_end = fuse_write_end,
945 .readpages = fuse_readpages, 1494 .readpages = fuse_readpages,
946 .set_page_dirty = fuse_set_page_dirty, 1495 .set_page_dirty = __set_page_dirty_nobuffers,
947 .bmap = fuse_bmap, 1496 .bmap = fuse_bmap,
948}; 1497};
949 1498
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 67aaf6ee38ea..dadffa21a206 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -15,6 +15,7 @@
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
17#include <linux/mutex.h> 17#include <linux/mutex.h>
18#include <linux/rwsem.h>
18 19
19/** Max number of pages that can be used in a single read request */ 20/** Max number of pages that can be used in a single read request */
20#define FUSE_MAX_PAGES_PER_REQ 32 21#define FUSE_MAX_PAGES_PER_REQ 32
@@ -25,6 +26,9 @@
25/** Congestion starts at 75% of maximum */ 26/** Congestion starts at 75% of maximum */
26#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) 27#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100)
27 28
29/** Bias for fi->writectr, meaning new writepages must not be sent */
30#define FUSE_NOWRITE INT_MIN
31
28/** It could be as large as PATH_MAX, but would that have any uses? */ 32/** It could be as large as PATH_MAX, but would that have any uses? */
29#define FUSE_NAME_MAX 1024 33#define FUSE_NAME_MAX 1024
30 34
@@ -73,6 +77,19 @@ struct fuse_inode {
73 77
74 /** Files usable in writepage. Protected by fc->lock */ 78 /** Files usable in writepage. Protected by fc->lock */
75 struct list_head write_files; 79 struct list_head write_files;
80
81 /** Writepages pending on truncate or fsync */
82 struct list_head queued_writes;
83
84 /** Number of sent writes, a negative bias (FUSE_NOWRITE)
85 * means more writes are blocked */
86 int writectr;
87
88 /** Waitq for writepage completion */
89 wait_queue_head_t page_waitq;
90
91 /** List of writepage requestst (pending or sent) */
92 struct list_head writepages;
76}; 93};
77 94
78/** FUSE specific file data */ 95/** FUSE specific file data */
@@ -222,7 +239,10 @@ struct fuse_req {
222 } release; 239 } release;
223 struct fuse_init_in init_in; 240 struct fuse_init_in init_in;
224 struct fuse_init_out init_out; 241 struct fuse_init_out init_out;
225 struct fuse_read_in read_in; 242 struct {
243 struct fuse_read_in in;
244 u64 attr_ver;
245 } read;
226 struct { 246 struct {
227 struct fuse_write_in in; 247 struct fuse_write_in in;
228 struct fuse_write_out out; 248 struct fuse_write_out out;
@@ -242,6 +262,12 @@ struct fuse_req {
242 /** File used in the request (or NULL) */ 262 /** File used in the request (or NULL) */
243 struct fuse_file *ff; 263 struct fuse_file *ff;
244 264
265 /** Inode used in the request or NULL */
266 struct inode *inode;
267
268 /** Link on fi->writepages */
269 struct list_head writepages_entry;
270
245 /** Request completion callback */ 271 /** Request completion callback */
246 void (*end)(struct fuse_conn *, struct fuse_req *); 272 void (*end)(struct fuse_conn *, struct fuse_req *);
247 273
@@ -390,8 +416,8 @@ struct fuse_conn {
390 /** Entry on the fuse_conn_list */ 416 /** Entry on the fuse_conn_list */
391 struct list_head entry; 417 struct list_head entry;
392 418
393 /** Unique ID */ 419 /** Device ID from super block */
394 u64 id; 420 dev_t dev;
395 421
396 /** Dentries in the control filesystem */ 422 /** Dentries in the control filesystem */
397 struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES]; 423 struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
@@ -438,7 +464,7 @@ extern const struct file_operations fuse_dev_operations;
438/** 464/**
439 * Get a filled in inode 465 * Get a filled in inode
440 */ 466 */
441struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 467struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
442 int generation, struct fuse_attr *attr, 468 int generation, struct fuse_attr *attr,
443 u64 attr_valid, u64 attr_version); 469 u64 attr_valid, u64 attr_version);
444 470
@@ -446,7 +472,7 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
446 * Send FORGET command 472 * Send FORGET command
447 */ 473 */
448void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 474void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
449 unsigned long nodeid, u64 nlookup); 475 u64 nodeid, u64 nlookup);
450 476
451/** 477/**
452 * Initialize READ or READDIR request 478 * Initialize READ or READDIR request
@@ -504,6 +530,11 @@ void fuse_init_symlink(struct inode *inode);
504void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 530void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
505 u64 attr_valid, u64 attr_version); 531 u64 attr_valid, u64 attr_version);
506 532
533void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
534 u64 attr_valid);
535
536void fuse_truncate(struct address_space *mapping, loff_t offset);
537
507/** 538/**
508 * Initialize the client device 539 * Initialize the client device
509 */ 540 */
@@ -522,6 +553,8 @@ void fuse_ctl_cleanup(void);
522 */ 553 */
523struct fuse_req *fuse_request_alloc(void); 554struct fuse_req *fuse_request_alloc(void);
524 555
556struct fuse_req *fuse_request_alloc_nofs(void);
557
525/** 558/**
526 * Free a request 559 * Free a request
527 */ 560 */
@@ -558,6 +591,8 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
558 */ 591 */
559void request_send_background(struct fuse_conn *fc, struct fuse_req *req); 592void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
560 593
594void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req);
595
561/* Abort all requests */ 596/* Abort all requests */
562void fuse_abort_conn(struct fuse_conn *fc); 597void fuse_abort_conn(struct fuse_conn *fc);
563 598
@@ -600,3 +635,10 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
600 635
601int fuse_update_attributes(struct inode *inode, struct kstat *stat, 636int fuse_update_attributes(struct inode *inode, struct kstat *stat,
602 struct file *file, bool *refreshed); 637 struct file *file, bool *refreshed);
638
639void fuse_flush_writepages(struct inode *inode);
640
641void fuse_set_nowrite(struct inode *inode);
642void fuse_release_nowrite(struct inode *inode);
643
644u64 fuse_get_attr_version(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4df34da2284a..79b615873838 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -59,7 +59,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
59 fi->nodeid = 0; 59 fi->nodeid = 0;
60 fi->nlookup = 0; 60 fi->nlookup = 0;
61 fi->attr_version = 0; 61 fi->attr_version = 0;
62 fi->writectr = 0;
62 INIT_LIST_HEAD(&fi->write_files); 63 INIT_LIST_HEAD(&fi->write_files);
64 INIT_LIST_HEAD(&fi->queued_writes);
65 INIT_LIST_HEAD(&fi->writepages);
66 init_waitqueue_head(&fi->page_waitq);
63 fi->forget_req = fuse_request_alloc(); 67 fi->forget_req = fuse_request_alloc();
64 if (!fi->forget_req) { 68 if (!fi->forget_req) {
65 kmem_cache_free(fuse_inode_cachep, inode); 69 kmem_cache_free(fuse_inode_cachep, inode);
@@ -73,13 +77,14 @@ static void fuse_destroy_inode(struct inode *inode)
73{ 77{
74 struct fuse_inode *fi = get_fuse_inode(inode); 78 struct fuse_inode *fi = get_fuse_inode(inode);
75 BUG_ON(!list_empty(&fi->write_files)); 79 BUG_ON(!list_empty(&fi->write_files));
80 BUG_ON(!list_empty(&fi->queued_writes));
76 if (fi->forget_req) 81 if (fi->forget_req)
77 fuse_request_free(fi->forget_req); 82 fuse_request_free(fi->forget_req);
78 kmem_cache_free(fuse_inode_cachep, inode); 83 kmem_cache_free(fuse_inode_cachep, inode);
79} 84}
80 85
81void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 86void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
82 unsigned long nodeid, u64 nlookup) 87 u64 nodeid, u64 nlookup)
83{ 88{
84 struct fuse_forget_in *inarg = &req->misc.forget_in; 89 struct fuse_forget_in *inarg = &req->misc.forget_in;
85 inarg->nlookup = nlookup; 90 inarg->nlookup = nlookup;
@@ -109,7 +114,7 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
109 return 0; 114 return 0;
110} 115}
111 116
112static void fuse_truncate(struct address_space *mapping, loff_t offset) 117void fuse_truncate(struct address_space *mapping, loff_t offset)
113{ 118{
114 /* See vmtruncate() */ 119 /* See vmtruncate() */
115 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 120 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
@@ -117,19 +122,12 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset)
117 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 122 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
118} 123}
119 124
120 125void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
121void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 126 u64 attr_valid)
122 u64 attr_valid, u64 attr_version)
123{ 127{
124 struct fuse_conn *fc = get_fuse_conn(inode); 128 struct fuse_conn *fc = get_fuse_conn(inode);
125 struct fuse_inode *fi = get_fuse_inode(inode); 129 struct fuse_inode *fi = get_fuse_inode(inode);
126 loff_t oldsize;
127 130
128 spin_lock(&fc->lock);
129 if (attr_version != 0 && fi->attr_version > attr_version) {
130 spin_unlock(&fc->lock);
131 return;
132 }
133 fi->attr_version = ++fc->attr_version; 131 fi->attr_version = ++fc->attr_version;
134 fi->i_time = attr_valid; 132 fi->i_time = attr_valid;
135 133
@@ -159,6 +157,22 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
159 fi->orig_i_mode = inode->i_mode; 157 fi->orig_i_mode = inode->i_mode;
160 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 158 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
161 inode->i_mode &= ~S_ISVTX; 159 inode->i_mode &= ~S_ISVTX;
160}
161
162void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
163 u64 attr_valid, u64 attr_version)
164{
165 struct fuse_conn *fc = get_fuse_conn(inode);
166 struct fuse_inode *fi = get_fuse_inode(inode);
167 loff_t oldsize;
168
169 spin_lock(&fc->lock);
170 if (attr_version != 0 && fi->attr_version > attr_version) {
171 spin_unlock(&fc->lock);
172 return;
173 }
174
175 fuse_change_attributes_common(inode, attr, attr_valid);
162 176
163 oldsize = inode->i_size; 177 oldsize = inode->i_size;
164 i_size_write(inode, attr->size); 178 i_size_write(inode, attr->size);
@@ -193,7 +207,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
193 207
194static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 208static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
195{ 209{
196 unsigned long nodeid = *(unsigned long *) _nodeidp; 210 u64 nodeid = *(u64 *) _nodeidp;
197 if (get_node_id(inode) == nodeid) 211 if (get_node_id(inode) == nodeid)
198 return 1; 212 return 1;
199 else 213 else
@@ -202,12 +216,12 @@ static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
202 216
203static int fuse_inode_set(struct inode *inode, void *_nodeidp) 217static int fuse_inode_set(struct inode *inode, void *_nodeidp)
204{ 218{
205 unsigned long nodeid = *(unsigned long *) _nodeidp; 219 u64 nodeid = *(u64 *) _nodeidp;
206 get_fuse_inode(inode)->nodeid = nodeid; 220 get_fuse_inode(inode)->nodeid = nodeid;
207 return 0; 221 return 0;
208} 222}
209 223
210struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 224struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
211 int generation, struct fuse_attr *attr, 225 int generation, struct fuse_attr *attr,
212 u64 attr_valid, u64 attr_version) 226 u64 attr_valid, u64 attr_version)
213{ 227{
@@ -447,7 +461,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
447 return 0; 461 return 0;
448} 462}
449 463
450static struct fuse_conn *new_conn(void) 464static struct fuse_conn *new_conn(struct super_block *sb)
451{ 465{
452 struct fuse_conn *fc; 466 struct fuse_conn *fc;
453 int err; 467 int err;
@@ -468,19 +482,41 @@ static struct fuse_conn *new_conn(void)
468 atomic_set(&fc->num_waiting, 0); 482 atomic_set(&fc->num_waiting, 0);
469 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 483 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
470 fc->bdi.unplug_io_fn = default_unplug_io_fn; 484 fc->bdi.unplug_io_fn = default_unplug_io_fn;
485 /* fuse does it's own writeback accounting */
486 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
487 fc->dev = sb->s_dev;
471 err = bdi_init(&fc->bdi); 488 err = bdi_init(&fc->bdi);
472 if (err) { 489 if (err)
473 kfree(fc); 490 goto error_kfree;
474 fc = NULL; 491 err = bdi_register_dev(&fc->bdi, fc->dev);
475 goto out; 492 if (err)
476 } 493 goto error_bdi_destroy;
494 /*
495 * For a single fuse filesystem use max 1% of dirty +
496 * writeback threshold.
497 *
498 * This gives about 1M of write buffer for memory maps on a
499 * machine with 1G and 10% dirty_ratio, which should be more
500 * than enough.
501 *
502 * Privileged users can raise it by writing to
503 *
504 * /sys/class/bdi/<bdi>/max_ratio
505 */
506 bdi_set_max_ratio(&fc->bdi, 1);
477 fc->reqctr = 0; 507 fc->reqctr = 0;
478 fc->blocked = 1; 508 fc->blocked = 1;
479 fc->attr_version = 1; 509 fc->attr_version = 1;
480 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 510 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
481 } 511 }
482out:
483 return fc; 512 return fc;
513
514error_bdi_destroy:
515 bdi_destroy(&fc->bdi);
516error_kfree:
517 mutex_destroy(&fc->inst_mutex);
518 kfree(fc);
519 return NULL;
484} 520}
485 521
486void fuse_conn_put(struct fuse_conn *fc) 522void fuse_conn_put(struct fuse_conn *fc)
@@ -548,6 +584,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
548 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); 584 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
549 fc->minor = arg->minor; 585 fc->minor = arg->minor;
550 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 586 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
587 fc->max_write = min_t(unsigned, 4096, fc->max_write);
551 fc->conn_init = 1; 588 fc->conn_init = 1;
552 } 589 }
553 fuse_put_request(fc, req); 590 fuse_put_request(fc, req);
@@ -578,12 +615,6 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
578 request_send_background(fc, req); 615 request_send_background(fc, req);
579} 616}
580 617
581static u64 conn_id(void)
582{
583 static u64 ctr = 1;
584 return ctr++;
585}
586
587static int fuse_fill_super(struct super_block *sb, void *data, int silent) 618static int fuse_fill_super(struct super_block *sb, void *data, int silent)
588{ 619{
589 struct fuse_conn *fc; 620 struct fuse_conn *fc;
@@ -621,14 +652,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
621 if (file->f_op != &fuse_dev_operations) 652 if (file->f_op != &fuse_dev_operations)
622 return -EINVAL; 653 return -EINVAL;
623 654
624 fc = new_conn(); 655 fc = new_conn(sb);
625 if (!fc) 656 if (!fc)
626 return -ENOMEM; 657 return -ENOMEM;
627 658
628 fc->flags = d.flags; 659 fc->flags = d.flags;
629 fc->user_id = d.user_id; 660 fc->user_id = d.user_id;
630 fc->group_id = d.group_id; 661 fc->group_id = d.group_id;
631 fc->max_read = d.max_read; 662 fc->max_read = min_t(unsigned, 4096, d.max_read);
632 663
633 /* Used by get_root_inode() */ 664 /* Used by get_root_inode() */
634 sb->s_fs_info = fc; 665 sb->s_fs_info = fc;
@@ -659,7 +690,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
659 if (file->private_data) 690 if (file->private_data)
660 goto err_unlock; 691 goto err_unlock;
661 692
662 fc->id = conn_id();
663 err = fuse_ctl_add_conn(fc); 693 err = fuse_ctl_add_conn(fc);
664 if (err) 694 if (err)
665 goto err_unlock; 695 goto err_unlock;
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 8479da47049c..a4ff271df9ee 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -212,7 +212,7 @@ int gdlm_sysfs_init(void)
212{ 212{
213 gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj); 213 gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj);
214 if (!gdlm_kset) { 214 if (!gdlm_kset) {
215 printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); 215 printk(KERN_WARNING "%s: can not create kset\n", __func__);
216 return -ENOMEM; 216 return -ENOMEM;
217 } 217 }
218 return 0; 218 return 0;
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 90a04a6e3789..f55394e57cb2 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -438,7 +438,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
438 int error; 438 int error;
439 439
440 /* 440 /*
441 * Due to the order of unstuffing files and ->nopage(), we can be 441 * Due to the order of unstuffing files and ->fault(), we can be
442 * asked for a zero page in the case of a stuffed file being extended, 442 * asked for a zero page in the case of a stuffed file being extended,
443 * so we need to supply one here. It doesn't happen often. 443 * so we need to supply one here. It doesn't happen often.
444 */ 444 */
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 509c5d60bd80..7f48576289c9 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -41,7 +41,7 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
41 41
42#define gfs2_assert_withdraw(sdp, assertion) \ 42#define gfs2_assert_withdraw(sdp, assertion) \
43((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \ 43((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
44 __FUNCTION__, __FILE__, __LINE__)) 44 __func__, __FILE__, __LINE__))
45 45
46 46
47int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, 47int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
@@ -49,28 +49,28 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
49 49
50#define gfs2_assert_warn(sdp, assertion) \ 50#define gfs2_assert_warn(sdp, assertion) \
51((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \ 51((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
52 __FUNCTION__, __FILE__, __LINE__)) 52 __func__, __FILE__, __LINE__))
53 53
54 54
55int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, 55int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
56 const char *function, char *file, unsigned int line); 56 const char *function, char *file, unsigned int line);
57 57
58#define gfs2_consist(sdp) \ 58#define gfs2_consist(sdp) \
59gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__) 59gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__)
60 60
61 61
62int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, 62int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
63 const char *function, char *file, unsigned int line); 63 const char *function, char *file, unsigned int line);
64 64
65#define gfs2_consist_inode(ip) \ 65#define gfs2_consist_inode(ip) \
66gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__) 66gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__)
67 67
68 68
69int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, 69int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
70 const char *function, char *file, unsigned int line); 70 const char *function, char *file, unsigned int line);
71 71
72#define gfs2_consist_rgrpd(rgd) \ 72#define gfs2_consist_rgrpd(rgd) \
73gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__) 73gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__)
74 74
75 75
76int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 76int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -91,7 +91,7 @@ static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
91} 91}
92 92
93#define gfs2_meta_check(sdp, bh) \ 93#define gfs2_meta_check(sdp, bh) \
94gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__) 94gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__)
95 95
96 96
97int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 97int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -118,7 +118,7 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
118} 118}
119 119
120#define gfs2_metatype_check(sdp, bh, type) \ 120#define gfs2_metatype_check(sdp, bh, type) \
121gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__) 121gfs2_metatype_check_i((sdp), (bh), (type), __func__, __FILE__, __LINE__)
122 122
123static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type, 123static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
124 u16 format) 124 u16 format)
@@ -134,14 +134,14 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
134 char *file, unsigned int line); 134 char *file, unsigned int line);
135 135
136#define gfs2_io_error(sdp) \ 136#define gfs2_io_error(sdp) \
137gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__); 137gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__);
138 138
139 139
140int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, 140int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
141 const char *function, char *file, unsigned int line); 141 const char *function, char *file, unsigned int line);
142 142
143#define gfs2_io_error_bh(sdp, bh) \ 143#define gfs2_io_error_bh(sdp, bh) \
144gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); 144gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__);
145 145
146 146
147extern struct kmem_cache *gfs2_glock_cachep; 147extern struct kmem_cache *gfs2_glock_cachep;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 24cf6fc43021..f6621a785202 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -208,7 +208,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
208 struct hfs_bnode *node, *next_node; 208 struct hfs_bnode *node, *next_node;
209 struct page **pagep; 209 struct page **pagep;
210 u32 nidx, idx; 210 u32 nidx, idx;
211 u16 off, len; 211 unsigned off;
212 u16 off16;
213 u16 len;
212 u8 *data, byte, m; 214 u8 *data, byte, m;
213 int i; 215 int i;
214 216
@@ -235,7 +237,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
235 node = hfs_bnode_find(tree, nidx); 237 node = hfs_bnode_find(tree, nidx);
236 if (IS_ERR(node)) 238 if (IS_ERR(node))
237 return node; 239 return node;
238 len = hfs_brec_lenoff(node, 2, &off); 240 len = hfs_brec_lenoff(node, 2, &off16);
241 off = off16;
239 242
240 off += node->page_offset; 243 off += node->page_offset;
241 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 244 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
@@ -280,7 +283,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
280 return next_node; 283 return next_node;
281 node = next_node; 284 node = next_node;
282 285
283 len = hfs_brec_lenoff(node, 0, &off); 286 len = hfs_brec_lenoff(node, 0, &off16);
287 off = off16;
284 off += node->page_offset; 288 off += node->page_offset;
285 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 289 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
286 data = kmap(*pagep); 290 data = kmap(*pagep);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index b4651e128d7f..36ca2e1a4fa3 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -215,7 +215,7 @@ int hfs_mdb_get(struct super_block *sb)
215 attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT); 215 attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT);
216 attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT); 216 attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT);
217 mdb->drAtrb = attrib; 217 mdb->drAtrb = attrib;
218 mdb->drWrCnt = cpu_to_be32(be32_to_cpu(mdb->drWrCnt) + 1); 218 be32_add_cpu(&mdb->drWrCnt, 1);
219 mdb->drLsMod = hfs_mtime(); 219 mdb->drLsMod = hfs_mtime();
220 220
221 mark_buffer_dirty(HFS_SB(sb)->mdb_bh); 221 mark_buffer_dirty(HFS_SB(sb)->mdb_bh);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 32de44ed0021..8cf67974adf6 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -297,7 +297,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
297 return 0; 297 return 0;
298 } 298 }
299 p = match_strdup(&args[0]); 299 p = match_strdup(&args[0]);
300 hsb->nls_disk = load_nls(p); 300 if (p)
301 hsb->nls_disk = load_nls(p);
301 if (!hsb->nls_disk) { 302 if (!hsb->nls_disk) {
302 printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p); 303 printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p);
303 kfree(p); 304 kfree(p);
@@ -311,7 +312,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
311 return 0; 312 return 0;
312 } 313 }
313 p = match_strdup(&args[0]); 314 p = match_strdup(&args[0]);
314 hsb->nls_io = load_nls(p); 315 if (p)
316 hsb->nls_io = load_nls(p);
315 if (!hsb->nls_io) { 317 if (!hsb->nls_io) {
316 printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p); 318 printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p);
317 kfree(p); 319 kfree(p);
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index bb5433608a42..e49fcee1e293 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -184,7 +184,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
184 struct hfs_bnode *node, *next_node; 184 struct hfs_bnode *node, *next_node;
185 struct page **pagep; 185 struct page **pagep;
186 u32 nidx, idx; 186 u32 nidx, idx;
187 u16 off, len; 187 unsigned off;
188 u16 off16;
189 u16 len;
188 u8 *data, byte, m; 190 u8 *data, byte, m;
189 int i; 191 int i;
190 192
@@ -211,7 +213,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
211 node = hfs_bnode_find(tree, nidx); 213 node = hfs_bnode_find(tree, nidx);
212 if (IS_ERR(node)) 214 if (IS_ERR(node))
213 return node; 215 return node;
214 len = hfs_brec_lenoff(node, 2, &off); 216 len = hfs_brec_lenoff(node, 2, &off16);
217 off = off16;
215 218
216 off += node->page_offset; 219 off += node->page_offset;
217 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 220 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
@@ -256,7 +259,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
256 return next_node; 259 return next_node;
257 node = next_node; 260 node = next_node;
258 261
259 len = hfs_brec_lenoff(node, 0, &off); 262 len = hfs_brec_lenoff(node, 0, &off16);
263 off = off16;
260 off += node->page_offset; 264 off += node->page_offset;
261 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 265 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
262 data = kmap(*pagep); 266 data = kmap(*pagep);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d72d0a8b25aa..9e59537b43d5 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -311,6 +311,10 @@ int hfsplus_delete_cat(u32, struct inode *, struct qstr *);
311int hfsplus_rename_cat(u32, struct inode *, struct qstr *, 311int hfsplus_rename_cat(u32, struct inode *, struct qstr *,
312 struct inode *, struct qstr *); 312 struct inode *, struct qstr *);
313 313
314/* dir.c */
315extern const struct inode_operations hfsplus_dir_inode_operations;
316extern const struct file_operations hfsplus_dir_operations;
317
314/* extents.c */ 318/* extents.c */
315int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); 319int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
316void hfsplus_ext_write_extent(struct inode *); 320void hfsplus_ext_write_extent(struct inode *);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 37744cf3706a..d53b2af91c25 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -278,9 +278,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
278 return 0; 278 return 0;
279} 279}
280 280
281extern const struct inode_operations hfsplus_dir_inode_operations;
282extern struct file_operations hfsplus_dir_operations;
283
284static const struct inode_operations hfsplus_file_inode_operations = { 281static const struct inode_operations hfsplus_file_inode_operations = {
285 .lookup = hfsplus_file_lookup, 282 .lookup = hfsplus_file_lookup,
286 .truncate = hfsplus_file_truncate, 283 .truncate = hfsplus_file_truncate,
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index dc64fac00831..9997cbf8beb5 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -132,7 +132,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
132 return 0; 132 return 0;
133 } 133 }
134 p = match_strdup(&args[0]); 134 p = match_strdup(&args[0]);
135 sbi->nls = load_nls(p); 135 if (p)
136 sbi->nls = load_nls(p);
136 if (!sbi->nls) { 137 if (!sbi->nls) {
137 printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p); 138 printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p);
138 kfree(p); 139 kfree(p);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index b0f9ad362d1d..ce97a54518d8 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -357,7 +357,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
357 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); 357 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n");
358 sb->s_flags |= MS_RDONLY; 358 sb->s_flags |= MS_RDONLY;
359 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { 359 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
360 printk(KERN_WARNING "hfs: write access to a jounaled filesystem is not supported, " 360 printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, "
361 "use the force option at your own risk, mounting read-only.\n"); 361 "use the force option at your own risk, mounting read-only.\n");
362 sb->s_flags |= MS_RDONLY; 362 sb->s_flags |= MS_RDONLY;
363 } 363 }
@@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
423 */ 423 */
424 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); 424 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
425 vhdr->modify_date = hfsp_now2mt(); 425 vhdr->modify_date = hfsp_now2mt();
426 vhdr->write_count = cpu_to_be32(be32_to_cpu(vhdr->write_count) + 1); 426 be32_add_cpu(&vhdr->write_count, 1);
427 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); 427 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
428 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); 428 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
429 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); 429 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 72cab78f0509..175d08eacc86 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -47,7 +47,7 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
47 return 0; 47 return 0;
48 wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART)); 48 wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART));
49 49
50 extent = be32_to_cpu(get_unaligned((__be32 *)(bufptr + HFSP_WRAPOFF_EMBEDEXT))); 50 extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT);
51 wd->embed_start = (extent >> 16) & 0xFFFF; 51 wd->embed_start = (extent >> 16) & 0xFFFF;
52 wd->embed_count = extent & 0xFFFF; 52 wd->embed_count = extent & 0xFFFF;
53 53
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6846785fe904..aeabf80f81a5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -45,7 +45,7 @@ static const struct inode_operations hugetlbfs_inode_operations;
45 45
46static struct backing_dev_info hugetlbfs_backing_dev_info = { 46static struct backing_dev_info hugetlbfs_backing_dev_info = {
47 .ra_pages = 0, /* No readahead */ 47 .ra_pages = 0, /* No readahead */
48 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 48 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
49}; 49};
50 50
51int sysctl_hugetlb_shm_group; 51int sysctl_hugetlb_shm_group;
@@ -504,7 +504,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
504 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 504 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
505 INIT_LIST_HEAD(&inode->i_mapping->private_list); 505 INIT_LIST_HEAD(&inode->i_mapping->private_list);
506 info = HUGETLBFS_I(inode); 506 info = HUGETLBFS_I(inode);
507 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); 507 mpol_shared_policy_init(&info->policy, NULL);
508 switch (mode & S_IFMT) { 508 switch (mode & S_IFMT) {
509 default: 509 default:
510 init_special_inode(inode, mode, dev); 510 init_special_inode(inode, mode, dev);
diff --git a/fs/inode.c b/fs/inode.c
index 27ee1af50d02..bf6478130424 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -495,8 +495,7 @@ static struct inode * find_inode(struct super_block * sb, struct hlist_head *hea
495 struct inode * inode = NULL; 495 struct inode * inode = NULL;
496 496
497repeat: 497repeat:
498 hlist_for_each (node, head) { 498 hlist_for_each_entry(inode, node, head, i_hash) {
499 inode = hlist_entry(node, struct inode, i_hash);
500 if (inode->i_sb != sb) 499 if (inode->i_sb != sb)
501 continue; 500 continue;
502 if (!test(inode, data)) 501 if (!test(inode, data))
@@ -520,8 +519,7 @@ static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head
520 struct inode * inode = NULL; 519 struct inode * inode = NULL;
521 520
522repeat: 521repeat:
523 hlist_for_each (node, head) { 522 hlist_for_each_entry(inode, node, head, i_hash) {
524 inode = hlist_entry(node, struct inode, i_hash);
525 if (inode->i_ino != ino) 523 if (inode->i_ino != ino)
526 continue; 524 continue;
527 if (inode->i_sb != sb) 525 if (inode->i_sb != sb)
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 7b94a1e3c015..6676c06bb7c1 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -598,7 +598,7 @@ asmlinkage long sys_inotify_init(void)
598 } 598 }
599 599
600 ih = inotify_init(&inotify_user_ops); 600 ih = inotify_init(&inotify_user_ops);
601 if (unlikely(IS_ERR(ih))) { 601 if (IS_ERR(ih)) {
602 ret = PTR_ERR(ih); 602 ret = PTR_ERR(ih);
603 goto out_free_dev; 603 goto out_free_dev;
604 } 604 }
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f32fbde2175e..7db32b3382d3 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -28,8 +28,8 @@
28 * 28 *
29 * Returns 0 on success, -errno on error. 29 * Returns 0 on success, -errno on error.
30 */ 30 */
31long vfs_ioctl(struct file *filp, unsigned int cmd, 31static long vfs_ioctl(struct file *filp, unsigned int cmd,
32 unsigned long arg) 32 unsigned long arg)
33{ 33{
34 int error = -ENOTTY; 34 int error = -ENOTTY;
35 35
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 1ba407c64df1..2f0dc5a14633 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -145,6 +145,14 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
145 } 145 }
146 de = tmpde; 146 de = tmpde;
147 } 147 }
148 /* Basic sanity check, whether name doesn't exceed dir entry */
149 if (de_len < de->name_len[0] +
150 sizeof(struct iso_directory_record)) {
151 printk(KERN_NOTICE "iso9660: Corrupted directory entry"
152 " in block %lu of inode %lu\n", block,
153 inode->i_ino);
154 return -EIO;
155 }
148 156
149 if (first_de) { 157 if (first_de) {
150 isofs_normalize_block_and_offset(de, 158 isofs_normalize_block_and_offset(de,
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index d1bdf8adb351..ccbf72faf27a 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -78,29 +78,29 @@ static inline int isonum_712(char *p)
78} 78}
79static inline unsigned int isonum_721(char *p) 79static inline unsigned int isonum_721(char *p)
80{ 80{
81 return le16_to_cpu(get_unaligned((__le16 *)p)); 81 return get_unaligned_le16(p);
82} 82}
83static inline unsigned int isonum_722(char *p) 83static inline unsigned int isonum_722(char *p)
84{ 84{
85 return be16_to_cpu(get_unaligned((__le16 *)p)); 85 return get_unaligned_be16(p);
86} 86}
87static inline unsigned int isonum_723(char *p) 87static inline unsigned int isonum_723(char *p)
88{ 88{
89 /* Ignore bigendian datum due to broken mastering programs */ 89 /* Ignore bigendian datum due to broken mastering programs */
90 return le16_to_cpu(get_unaligned((__le16 *)p)); 90 return get_unaligned_le16(p);
91} 91}
92static inline unsigned int isonum_731(char *p) 92static inline unsigned int isonum_731(char *p)
93{ 93{
94 return le32_to_cpu(get_unaligned((__le32 *)p)); 94 return get_unaligned_le32(p);
95} 95}
96static inline unsigned int isonum_732(char *p) 96static inline unsigned int isonum_732(char *p)
97{ 97{
98 return be32_to_cpu(get_unaligned((__le32 *)p)); 98 return get_unaligned_be32(p);
99} 99}
100static inline unsigned int isonum_733(char *p) 100static inline unsigned int isonum_733(char *p)
101{ 101{
102 /* Ignore bigendian datum due to broken mastering programs */ 102 /* Ignore bigendian datum due to broken mastering programs */
103 return le32_to_cpu(get_unaligned((__le32 *)p)); 103 return get_unaligned_le32(p);
104} 104}
105extern int iso_date(char *, int); 105extern int iso_date(char *, int);
106 106
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 344b247bc29a..8299889a835e 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -111,6 +111,13 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
111 111
112 dlen = de->name_len[0]; 112 dlen = de->name_len[0];
113 dpnt = de->name; 113 dpnt = de->name;
114 /* Basic sanity check, whether name doesn't exceed dir entry */
115 if (de_len < dlen + sizeof(struct iso_directory_record)) {
116 printk(KERN_NOTICE "iso9660: Corrupted directory entry"
117 " in block %lu of inode %lu\n", block,
118 dir->i_ino);
119 return 0;
120 }
114 121
115 if (sbi->s_rock && 122 if (sbi->s_rock &&
116 ((i = get_rock_ridge_filename(de, tmpname, dir)))) { 123 ((i = get_rock_ridge_filename(de, tmpname, dir)))) {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a38c7186c570..cd931ef1f000 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -407,22 +407,6 @@ void journal_commit_transaction(journal_t *journal)
407 jbd_debug (3, "JBD: commit phase 2\n"); 407 jbd_debug (3, "JBD: commit phase 2\n");
408 408
409 /* 409 /*
410 * First, drop modified flag: all accesses to the buffers
411 * will be tracked for a new trasaction only -bzzz
412 */
413 spin_lock(&journal->j_list_lock);
414 if (commit_transaction->t_buffers) {
415 new_jh = jh = commit_transaction->t_buffers->b_tnext;
416 do {
417 J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
418 new_jh->b_modified == 0);
419 new_jh->b_modified = 0;
420 new_jh = new_jh->b_tnext;
421 } while (new_jh != jh);
422 }
423 spin_unlock(&journal->j_list_lock);
424
425 /*
426 * Now start flushing things to disk, in the order they appear 410 * Now start flushing things to disk, in the order they appear
427 * on the transaction lists. Data blocks go first. 411 * on the transaction lists. Data blocks go first.
428 */ 412 */
@@ -488,6 +472,9 @@ void journal_commit_transaction(journal_t *journal)
488 */ 472 */
489 commit_transaction->t_state = T_COMMIT; 473 commit_transaction->t_state = T_COMMIT;
490 474
475 J_ASSERT(commit_transaction->t_nr_buffers <=
476 commit_transaction->t_outstanding_credits);
477
491 descriptor = NULL; 478 descriptor = NULL;
492 bufs = 0; 479 bufs = 0;
493 while (commit_transaction->t_buffers) { 480 while (commit_transaction->t_buffers) {
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 0e081d5f32e8..b99c3b3654c4 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -534,7 +534,7 @@ int log_wait_commit(journal_t *journal, tid_t tid)
534 if (!tid_geq(journal->j_commit_request, tid)) { 534 if (!tid_geq(journal->j_commit_request, tid)) {
535 printk(KERN_EMERG 535 printk(KERN_EMERG
536 "%s: error: j_commit_request=%d, tid=%d\n", 536 "%s: error: j_commit_request=%d, tid=%d\n",
537 __FUNCTION__, journal->j_commit_request, tid); 537 __func__, journal->j_commit_request, tid);
538 } 538 }
539 spin_unlock(&journal->j_state_lock); 539 spin_unlock(&journal->j_state_lock);
540#endif 540#endif
@@ -599,7 +599,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr,
599 599
600 printk(KERN_ALERT "%s: journal block not found " 600 printk(KERN_ALERT "%s: journal block not found "
601 "at offset %lu on %s\n", 601 "at offset %lu on %s\n",
602 __FUNCTION__, 602 __func__,
603 blocknr, 603 blocknr,
604 bdevname(journal->j_dev, b)); 604 bdevname(journal->j_dev, b));
605 err = -EIO; 605 err = -EIO;
@@ -728,7 +728,7 @@ journal_t * journal_init_dev(struct block_device *bdev,
728 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 728 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
729 if (!journal->j_wbuf) { 729 if (!journal->j_wbuf) {
730 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 730 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
731 __FUNCTION__); 731 __func__);
732 kfree(journal); 732 kfree(journal);
733 journal = NULL; 733 journal = NULL;
734 goto out; 734 goto out;
@@ -782,7 +782,7 @@ journal_t * journal_init_inode (struct inode *inode)
782 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 782 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
783 if (!journal->j_wbuf) { 783 if (!journal->j_wbuf) {
784 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 784 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
785 __FUNCTION__); 785 __func__);
786 kfree(journal); 786 kfree(journal);
787 return NULL; 787 return NULL;
788 } 788 }
@@ -791,7 +791,7 @@ journal_t * journal_init_inode (struct inode *inode)
791 /* If that failed, give up */ 791 /* If that failed, give up */
792 if (err) { 792 if (err) {
793 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 793 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
794 __FUNCTION__); 794 __func__);
795 kfree(journal); 795 kfree(journal);
796 return NULL; 796 return NULL;
797 } 797 }
@@ -877,7 +877,7 @@ int journal_create(journal_t *journal)
877 */ 877 */
878 printk(KERN_EMERG 878 printk(KERN_EMERG
879 "%s: creation of journal on external device!\n", 879 "%s: creation of journal on external device!\n",
880 __FUNCTION__); 880 __func__);
881 BUG(); 881 BUG();
882 } 882 }
883 883
@@ -1657,7 +1657,7 @@ static struct journal_head *journal_alloc_journal_head(void)
1657 jbd_debug(1, "out of memory for journal_head\n"); 1657 jbd_debug(1, "out of memory for journal_head\n");
1658 if (time_after(jiffies, last_warning + 5*HZ)) { 1658 if (time_after(jiffies, last_warning + 5*HZ)) {
1659 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 1659 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
1660 __FUNCTION__); 1660 __func__);
1661 last_warning = jiffies; 1661 last_warning = jiffies;
1662 } 1662 }
1663 while (ret == NULL) { 1663 while (ret == NULL) {
@@ -1794,13 +1794,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
1794 if (jh->b_frozen_data) { 1794 if (jh->b_frozen_data) {
1795 printk(KERN_WARNING "%s: freeing " 1795 printk(KERN_WARNING "%s: freeing "
1796 "b_frozen_data\n", 1796 "b_frozen_data\n",
1797 __FUNCTION__); 1797 __func__);
1798 jbd_free(jh->b_frozen_data, bh->b_size); 1798 jbd_free(jh->b_frozen_data, bh->b_size);
1799 } 1799 }
1800 if (jh->b_committed_data) { 1800 if (jh->b_committed_data) {
1801 printk(KERN_WARNING "%s: freeing " 1801 printk(KERN_WARNING "%s: freeing "
1802 "b_committed_data\n", 1802 "b_committed_data\n",
1803 __FUNCTION__); 1803 __func__);
1804 jbd_free(jh->b_committed_data, bh->b_size); 1804 jbd_free(jh->b_committed_data, bh->b_size);
1805 } 1805 }
1806 bh->b_private = NULL; 1806 bh->b_private = NULL;
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index d5f8eee7c88c..1bb43e987f4b 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -138,7 +138,7 @@ repeat:
138oom: 138oom:
139 if (!journal_oom_retry) 139 if (!journal_oom_retry)
140 return -ENOMEM; 140 return -ENOMEM;
141 jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); 141 jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
142 yield(); 142 yield();
143 goto repeat; 143 goto repeat;
144} 144}
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 2c9e8f5d13aa..67ff2024c23c 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -609,6 +609,12 @@ repeat:
609 goto done; 609 goto done;
610 610
611 /* 611 /*
612 * this is the first time this transaction is touching this buffer,
613 * reset the modified flag
614 */
615 jh->b_modified = 0;
616
617 /*
612 * If there is already a copy-out version of this buffer, then we don't 618 * If there is already a copy-out version of this buffer, then we don't
613 * need to make another one 619 * need to make another one
614 */ 620 */
@@ -681,7 +687,7 @@ repeat:
681 if (!frozen_buffer) { 687 if (!frozen_buffer) {
682 printk(KERN_EMERG 688 printk(KERN_EMERG
683 "%s: OOM for frozen_buffer\n", 689 "%s: OOM for frozen_buffer\n",
684 __FUNCTION__); 690 __func__);
685 JBUFFER_TRACE(jh, "oom!"); 691 JBUFFER_TRACE(jh, "oom!");
686 error = -ENOMEM; 692 error = -ENOMEM;
687 jbd_lock_bh_state(bh); 693 jbd_lock_bh_state(bh);
@@ -820,9 +826,16 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
820 826
821 if (jh->b_transaction == NULL) { 827 if (jh->b_transaction == NULL) {
822 jh->b_transaction = transaction; 828 jh->b_transaction = transaction;
829
830 /* first access by this transaction */
831 jh->b_modified = 0;
832
823 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 833 JBUFFER_TRACE(jh, "file as BJ_Reserved");
824 __journal_file_buffer(jh, transaction, BJ_Reserved); 834 __journal_file_buffer(jh, transaction, BJ_Reserved);
825 } else if (jh->b_transaction == journal->j_committing_transaction) { 835 } else if (jh->b_transaction == journal->j_committing_transaction) {
836 /* first access by this transaction */
837 jh->b_modified = 0;
838
826 JBUFFER_TRACE(jh, "set next transaction"); 839 JBUFFER_TRACE(jh, "set next transaction");
827 jh->b_next_transaction = transaction; 840 jh->b_next_transaction = transaction;
828 } 841 }
@@ -891,7 +904,7 @@ repeat:
891 committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); 904 committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS);
892 if (!committed_data) { 905 if (!committed_data) {
893 printk(KERN_EMERG "%s: No memory for committed data\n", 906 printk(KERN_EMERG "%s: No memory for committed data\n",
894 __FUNCTION__); 907 __func__);
895 err = -ENOMEM; 908 err = -ENOMEM;
896 goto out; 909 goto out;
897 } 910 }
@@ -1222,6 +1235,7 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1222 struct journal_head *jh; 1235 struct journal_head *jh;
1223 int drop_reserve = 0; 1236 int drop_reserve = 0;
1224 int err = 0; 1237 int err = 0;
1238 int was_modified = 0;
1225 1239
1226 BUFFER_TRACE(bh, "entry"); 1240 BUFFER_TRACE(bh, "entry");
1227 1241
@@ -1240,6 +1254,9 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1240 goto not_jbd; 1254 goto not_jbd;
1241 } 1255 }
1242 1256
1257 /* keep track of wether or not this transaction modified us */
1258 was_modified = jh->b_modified;
1259
1243 /* 1260 /*
1244 * The buffer's going from the transaction, we must drop 1261 * The buffer's going from the transaction, we must drop
1245 * all references -bzzz 1262 * all references -bzzz
@@ -1257,7 +1274,12 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1257 1274
1258 JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); 1275 JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1259 1276
1260 drop_reserve = 1; 1277 /*
1278 * we only want to drop a reference if this transaction
1279 * modified the buffer
1280 */
1281 if (was_modified)
1282 drop_reserve = 1;
1261 1283
1262 /* 1284 /*
1263 * We are no longer going to journal this buffer. 1285 * We are no longer going to journal this buffer.
@@ -1297,7 +1319,13 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1297 if (jh->b_next_transaction) { 1319 if (jh->b_next_transaction) {
1298 J_ASSERT(jh->b_next_transaction == transaction); 1320 J_ASSERT(jh->b_next_transaction == transaction);
1299 jh->b_next_transaction = NULL; 1321 jh->b_next_transaction = NULL;
1300 drop_reserve = 1; 1322
1323 /*
1324 * only drop a reference if this transaction modified
1325 * the buffer
1326 */
1327 if (was_modified)
1328 drop_reserve = 1;
1301 } 1329 }
1302 } 1330 }
1303 1331
@@ -2069,7 +2097,7 @@ void __journal_refile_buffer(struct journal_head *jh)
2069 jh->b_transaction = jh->b_next_transaction; 2097 jh->b_transaction = jh->b_next_transaction;
2070 jh->b_next_transaction = NULL; 2098 jh->b_next_transaction = NULL;
2071 __journal_file_buffer(jh, jh->b_transaction, 2099 __journal_file_buffer(jh, jh->b_transaction,
2072 was_dirty ? BJ_Metadata : BJ_Reserved); 2100 jh->b_modified ? BJ_Metadata : BJ_Reserved);
2073 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2101 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2074 2102
2075 if (was_dirty) 2103 if (was_dirty)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index a8173081f831..e0139786f717 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -520,22 +520,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
520 jbd_debug (3, "JBD: commit phase 2\n"); 520 jbd_debug (3, "JBD: commit phase 2\n");
521 521
522 /* 522 /*
523 * First, drop modified flag: all accesses to the buffers
524 * will be tracked for a new trasaction only -bzzz
525 */
526 spin_lock(&journal->j_list_lock);
527 if (commit_transaction->t_buffers) {
528 new_jh = jh = commit_transaction->t_buffers->b_tnext;
529 do {
530 J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
531 new_jh->b_modified == 0);
532 new_jh->b_modified = 0;
533 new_jh = new_jh->b_tnext;
534 } while (new_jh != jh);
535 }
536 spin_unlock(&journal->j_list_lock);
537
538 /*
539 * Now start flushing things to disk, in the order they appear 523 * Now start flushing things to disk, in the order they appear
540 * on the transaction lists. Data blocks go first. 524 * on the transaction lists. Data blocks go first.
541 */ 525 */
@@ -584,6 +568,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
584 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits; 568 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
585 stats.u.run.rs_blocks_logged = 0; 569 stats.u.run.rs_blocks_logged = 0;
586 570
571 J_ASSERT(commit_transaction->t_nr_buffers <=
572 commit_transaction->t_outstanding_credits);
573
587 descriptor = NULL; 574 descriptor = NULL;
588 bufs = 0; 575 bufs = 0;
589 while (commit_transaction->t_buffers) { 576 while (commit_transaction->t_buffers) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 954cff001df6..53632e3e8457 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -534,7 +534,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
534 if (!tid_geq(journal->j_commit_request, tid)) { 534 if (!tid_geq(journal->j_commit_request, tid)) {
535 printk(KERN_EMERG 535 printk(KERN_EMERG
536 "%s: error: j_commit_request=%d, tid=%d\n", 536 "%s: error: j_commit_request=%d, tid=%d\n",
537 __FUNCTION__, journal->j_commit_request, tid); 537 __func__, journal->j_commit_request, tid);
538 } 538 }
539 spin_unlock(&journal->j_state_lock); 539 spin_unlock(&journal->j_state_lock);
540#endif 540#endif
@@ -599,7 +599,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
599 599
600 printk(KERN_ALERT "%s: journal block not found " 600 printk(KERN_ALERT "%s: journal block not found "
601 "at offset %lu on %s\n", 601 "at offset %lu on %s\n",
602 __FUNCTION__, 602 __func__,
603 blocknr, 603 blocknr,
604 bdevname(journal->j_dev, b)); 604 bdevname(journal->j_dev, b));
605 err = -EIO; 605 err = -EIO;
@@ -904,19 +904,10 @@ static void jbd2_stats_proc_init(journal_t *journal)
904 snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); 904 snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
905 journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); 905 journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats);
906 if (journal->j_proc_entry) { 906 if (journal->j_proc_entry) {
907 struct proc_dir_entry *p; 907 proc_create_data("history", S_IRUGO, journal->j_proc_entry,
908 p = create_proc_entry("history", S_IRUGO, 908 &jbd2_seq_history_fops, journal);
909 journal->j_proc_entry); 909 proc_create_data("info", S_IRUGO, journal->j_proc_entry,
910 if (p) { 910 &jbd2_seq_info_fops, journal);
911 p->proc_fops = &jbd2_seq_history_fops;
912 p->data = journal;
913 p = create_proc_entry("info", S_IRUGO,
914 journal->j_proc_entry);
915 if (p) {
916 p->proc_fops = &jbd2_seq_info_fops;
917 p->data = journal;
918 }
919 }
920 } 911 }
921} 912}
922 913
@@ -1006,13 +997,14 @@ fail:
1006 */ 997 */
1007 998
1008/** 999/**
1009 * journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure 1000 * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
1010 * @bdev: Block device on which to create the journal 1001 * @bdev: Block device on which to create the journal
1011 * @fs_dev: Device which hold journalled filesystem for this journal. 1002 * @fs_dev: Device which hold journalled filesystem for this journal.
1012 * @start: Block nr Start of journal. 1003 * @start: Block nr Start of journal.
1013 * @len: Length of the journal in blocks. 1004 * @len: Length of the journal in blocks.
1014 * @blocksize: blocksize of journalling device 1005 * @blocksize: blocksize of journalling device
1015 * @returns: a newly created journal_t * 1006 *
1007 * Returns: a newly created journal_t *
1016 * 1008 *
1017 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous 1009 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous
1018 * range of blocks on an arbitrary block device. 1010 * range of blocks on an arbitrary block device.
@@ -1036,7 +1028,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
1036 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1028 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
1037 if (!journal->j_wbuf) { 1029 if (!journal->j_wbuf) {
1038 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1030 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1039 __FUNCTION__); 1031 __func__);
1040 kfree(journal); 1032 kfree(journal);
1041 journal = NULL; 1033 journal = NULL;
1042 goto out; 1034 goto out;
@@ -1092,7 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1092 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1084 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
1093 if (!journal->j_wbuf) { 1085 if (!journal->j_wbuf) {
1094 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1086 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1095 __FUNCTION__); 1087 __func__);
1096 kfree(journal); 1088 kfree(journal);
1097 return NULL; 1089 return NULL;
1098 } 1090 }
@@ -1101,7 +1093,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1101 /* If that failed, give up */ 1093 /* If that failed, give up */
1102 if (err) { 1094 if (err) {
1103 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 1095 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
1104 __FUNCTION__); 1096 __func__);
1105 kfree(journal); 1097 kfree(journal);
1106 return NULL; 1098 return NULL;
1107 } 1099 }
@@ -1187,7 +1179,7 @@ int jbd2_journal_create(journal_t *journal)
1187 */ 1179 */
1188 printk(KERN_EMERG 1180 printk(KERN_EMERG
1189 "%s: creation of journal on external device!\n", 1181 "%s: creation of journal on external device!\n",
1190 __FUNCTION__); 1182 __func__);
1191 BUG(); 1183 BUG();
1192 } 1184 }
1193 1185
@@ -1985,9 +1977,10 @@ static int journal_init_jbd2_journal_head_cache(void)
1985 1977
1986static void jbd2_journal_destroy_jbd2_journal_head_cache(void) 1978static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
1987{ 1979{
1988 J_ASSERT(jbd2_journal_head_cache != NULL); 1980 if (jbd2_journal_head_cache) {
1989 kmem_cache_destroy(jbd2_journal_head_cache); 1981 kmem_cache_destroy(jbd2_journal_head_cache);
1990 jbd2_journal_head_cache = NULL; 1982 jbd2_journal_head_cache = NULL;
1983 }
1991} 1984}
1992 1985
1993/* 1986/*
@@ -2006,7 +1999,7 @@ static struct journal_head *journal_alloc_journal_head(void)
2006 jbd_debug(1, "out of memory for journal_head\n"); 1999 jbd_debug(1, "out of memory for journal_head\n");
2007 if (time_after(jiffies, last_warning + 5*HZ)) { 2000 if (time_after(jiffies, last_warning + 5*HZ)) {
2008 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 2001 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
2009 __FUNCTION__); 2002 __func__);
2010 last_warning = jiffies; 2003 last_warning = jiffies;
2011 } 2004 }
2012 while (!ret) { 2005 while (!ret) {
@@ -2143,13 +2136,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
2143 if (jh->b_frozen_data) { 2136 if (jh->b_frozen_data) {
2144 printk(KERN_WARNING "%s: freeing " 2137 printk(KERN_WARNING "%s: freeing "
2145 "b_frozen_data\n", 2138 "b_frozen_data\n",
2146 __FUNCTION__); 2139 __func__);
2147 jbd2_free(jh->b_frozen_data, bh->b_size); 2140 jbd2_free(jh->b_frozen_data, bh->b_size);
2148 } 2141 }
2149 if (jh->b_committed_data) { 2142 if (jh->b_committed_data) {
2150 printk(KERN_WARNING "%s: freeing " 2143 printk(KERN_WARNING "%s: freeing "
2151 "b_committed_data\n", 2144 "b_committed_data\n",
2152 __FUNCTION__); 2145 __func__);
2153 jbd2_free(jh->b_committed_data, bh->b_size); 2146 jbd2_free(jh->b_committed_data, bh->b_size);
2154 } 2147 }
2155 bh->b_private = NULL; 2148 bh->b_private = NULL;
@@ -2314,10 +2307,12 @@ static int __init journal_init(void)
2314 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); 2307 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
2315 2308
2316 ret = journal_init_caches(); 2309 ret = journal_init_caches();
2317 if (ret != 0) 2310 if (ret == 0) {
2311 jbd2_create_debugfs_entry();
2312 jbd2_create_jbd_stats_proc_entry();
2313 } else {
2318 jbd2_journal_destroy_caches(); 2314 jbd2_journal_destroy_caches();
2319 jbd2_create_debugfs_entry(); 2315 }
2320 jbd2_create_jbd_stats_proc_entry();
2321 return ret; 2316 return ret;
2322} 2317}
2323 2318
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 2e1453a5e998..257ff2625765 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -139,7 +139,7 @@ repeat:
139oom: 139oom:
140 if (!journal_oom_retry) 140 if (!journal_oom_retry)
141 return -ENOMEM; 141 return -ENOMEM;
142 jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); 142 jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
143 yield(); 143 yield();
144 goto repeat; 144 goto repeat;
145} 145}
@@ -167,138 +167,121 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
167 return NULL; 167 return NULL;
168} 168}
169 169
170void jbd2_journal_destroy_revoke_caches(void)
171{
172 if (jbd2_revoke_record_cache) {
173 kmem_cache_destroy(jbd2_revoke_record_cache);
174 jbd2_revoke_record_cache = NULL;
175 }
176 if (jbd2_revoke_table_cache) {
177 kmem_cache_destroy(jbd2_revoke_table_cache);
178 jbd2_revoke_table_cache = NULL;
179 }
180}
181
170int __init jbd2_journal_init_revoke_caches(void) 182int __init jbd2_journal_init_revoke_caches(void)
171{ 183{
184 J_ASSERT(!jbd2_revoke_record_cache);
185 J_ASSERT(!jbd2_revoke_table_cache);
186
172 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", 187 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
173 sizeof(struct jbd2_revoke_record_s), 188 sizeof(struct jbd2_revoke_record_s),
174 0, 189 0,
175 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 190 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
176 NULL); 191 NULL);
177 if (!jbd2_revoke_record_cache) 192 if (!jbd2_revoke_record_cache)
178 return -ENOMEM; 193 goto record_cache_failure;
179 194
180 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", 195 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
181 sizeof(struct jbd2_revoke_table_s), 196 sizeof(struct jbd2_revoke_table_s),
182 0, SLAB_TEMPORARY, NULL); 197 0, SLAB_TEMPORARY, NULL);
183 if (!jbd2_revoke_table_cache) { 198 if (!jbd2_revoke_table_cache)
184 kmem_cache_destroy(jbd2_revoke_record_cache); 199 goto table_cache_failure;
185 jbd2_revoke_record_cache = NULL;
186 return -ENOMEM;
187 }
188 return 0; 200 return 0;
201table_cache_failure:
202 jbd2_journal_destroy_revoke_caches();
203record_cache_failure:
204 return -ENOMEM;
189} 205}
190 206
191void jbd2_journal_destroy_revoke_caches(void) 207static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
192{ 208{
193 kmem_cache_destroy(jbd2_revoke_record_cache); 209 int shift = 0;
194 jbd2_revoke_record_cache = NULL; 210 int tmp = hash_size;
195 kmem_cache_destroy(jbd2_revoke_table_cache); 211 struct jbd2_revoke_table_s *table;
196 jbd2_revoke_table_cache = NULL;
197}
198
199/* Initialise the revoke table for a given journal to a given size. */
200
201int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
202{
203 int shift, tmp;
204 212
205 J_ASSERT (journal->j_revoke_table[0] == NULL); 213 table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
214 if (!table)
215 goto out;
206 216
207 shift = 0;
208 tmp = hash_size;
209 while((tmp >>= 1UL) != 0UL) 217 while((tmp >>= 1UL) != 0UL)
210 shift++; 218 shift++;
211 219
212 journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); 220 table->hash_size = hash_size;
213 if (!journal->j_revoke_table[0]) 221 table->hash_shift = shift;
214 return -ENOMEM; 222 table->hash_table =
215 journal->j_revoke = journal->j_revoke_table[0];
216
217 /* Check that the hash_size is a power of two */
218 J_ASSERT(is_power_of_2(hash_size));
219
220 journal->j_revoke->hash_size = hash_size;
221
222 journal->j_revoke->hash_shift = shift;
223
224 journal->j_revoke->hash_table =
225 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 223 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
226 if (!journal->j_revoke->hash_table) { 224 if (!table->hash_table) {
227 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); 225 kmem_cache_free(jbd2_revoke_table_cache, table);
228 journal->j_revoke = NULL; 226 table = NULL;
229 return -ENOMEM; 227 goto out;
230 } 228 }
231 229
232 for (tmp = 0; tmp < hash_size; tmp++) 230 for (tmp = 0; tmp < hash_size; tmp++)
233 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); 231 INIT_LIST_HEAD(&table->hash_table[tmp]);
234 232
235 journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); 233out:
236 if (!journal->j_revoke_table[1]) { 234 return table;
237 kfree(journal->j_revoke_table[0]->hash_table); 235}
238 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); 236
239 return -ENOMEM; 237static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
238{
239 int i;
240 struct list_head *hash_list;
241
242 for (i = 0; i < table->hash_size; i++) {
243 hash_list = &table->hash_table[i];
244 J_ASSERT(list_empty(hash_list));
240 } 245 }
241 246
242 journal->j_revoke = journal->j_revoke_table[1]; 247 kfree(table->hash_table);
248 kmem_cache_free(jbd2_revoke_table_cache, table);
249}
243 250
244 /* Check that the hash_size is a power of two */ 251/* Initialise the revoke table for a given journal to a given size. */
252int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
253{
254 J_ASSERT(journal->j_revoke_table[0] == NULL);
245 J_ASSERT(is_power_of_2(hash_size)); 255 J_ASSERT(is_power_of_2(hash_size));
246 256
247 journal->j_revoke->hash_size = hash_size; 257 journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size);
248 258 if (!journal->j_revoke_table[0])
249 journal->j_revoke->hash_shift = shift; 259 goto fail0;
250 260
251 journal->j_revoke->hash_table = 261 journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size);
252 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 262 if (!journal->j_revoke_table[1])
253 if (!journal->j_revoke->hash_table) { 263 goto fail1;
254 kfree(journal->j_revoke_table[0]->hash_table);
255 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
256 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]);
257 journal->j_revoke = NULL;
258 return -ENOMEM;
259 }
260 264
261 for (tmp = 0; tmp < hash_size; tmp++) 265 journal->j_revoke = journal->j_revoke_table[1];
262 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
263 266
264 spin_lock_init(&journal->j_revoke_lock); 267 spin_lock_init(&journal->j_revoke_lock);
265 268
266 return 0; 269 return 0;
267}
268 270
269/* Destoy a journal's revoke table. The table must already be empty! */ 271fail1:
272 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
273fail0:
274 return -ENOMEM;
275}
270 276
277/* Destroy a journal's revoke table. The table must already be empty! */
271void jbd2_journal_destroy_revoke(journal_t *journal) 278void jbd2_journal_destroy_revoke(journal_t *journal)
272{ 279{
273 struct jbd2_revoke_table_s *table;
274 struct list_head *hash_list;
275 int i;
276
277 table = journal->j_revoke_table[0];
278 if (!table)
279 return;
280
281 for (i=0; i<table->hash_size; i++) {
282 hash_list = &table->hash_table[i];
283 J_ASSERT (list_empty(hash_list));
284 }
285
286 kfree(table->hash_table);
287 kmem_cache_free(jbd2_revoke_table_cache, table);
288 journal->j_revoke = NULL;
289
290 table = journal->j_revoke_table[1];
291 if (!table)
292 return;
293
294 for (i=0; i<table->hash_size; i++) {
295 hash_list = &table->hash_table[i];
296 J_ASSERT (list_empty(hash_list));
297 }
298
299 kfree(table->hash_table);
300 kmem_cache_free(jbd2_revoke_table_cache, table);
301 journal->j_revoke = NULL; 280 journal->j_revoke = NULL;
281 if (journal->j_revoke_table[0])
282 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
283 if (journal->j_revoke_table[1])
284 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]);
302} 285}
303 286
304 287
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b9b0b6f899b9..d6e006e67804 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -618,6 +618,12 @@ repeat:
618 goto done; 618 goto done;
619 619
620 /* 620 /*
621 * this is the first time this transaction is touching this buffer,
622 * reset the modified flag
623 */
624 jh->b_modified = 0;
625
626 /*
621 * If there is already a copy-out version of this buffer, then we don't 627 * If there is already a copy-out version of this buffer, then we don't
622 * need to make another one 628 * need to make another one
623 */ 629 */
@@ -690,7 +696,7 @@ repeat:
690 if (!frozen_buffer) { 696 if (!frozen_buffer) {
691 printk(KERN_EMERG 697 printk(KERN_EMERG
692 "%s: OOM for frozen_buffer\n", 698 "%s: OOM for frozen_buffer\n",
693 __FUNCTION__); 699 __func__);
694 JBUFFER_TRACE(jh, "oom!"); 700 JBUFFER_TRACE(jh, "oom!");
695 error = -ENOMEM; 701 error = -ENOMEM;
696 jbd_lock_bh_state(bh); 702 jbd_lock_bh_state(bh);
@@ -829,9 +835,16 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
829 835
830 if (jh->b_transaction == NULL) { 836 if (jh->b_transaction == NULL) {
831 jh->b_transaction = transaction; 837 jh->b_transaction = transaction;
838
839 /* first access by this transaction */
840 jh->b_modified = 0;
841
832 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 842 JBUFFER_TRACE(jh, "file as BJ_Reserved");
833 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 843 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
834 } else if (jh->b_transaction == journal->j_committing_transaction) { 844 } else if (jh->b_transaction == journal->j_committing_transaction) {
845 /* first access by this transaction */
846 jh->b_modified = 0;
847
835 JBUFFER_TRACE(jh, "set next transaction"); 848 JBUFFER_TRACE(jh, "set next transaction");
836 jh->b_next_transaction = transaction; 849 jh->b_next_transaction = transaction;
837 } 850 }
@@ -901,7 +914,7 @@ repeat:
901 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); 914 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
902 if (!committed_data) { 915 if (!committed_data) {
903 printk(KERN_EMERG "%s: No memory for committed data\n", 916 printk(KERN_EMERG "%s: No memory for committed data\n",
904 __FUNCTION__); 917 __func__);
905 err = -ENOMEM; 918 err = -ENOMEM;
906 goto out; 919 goto out;
907 } 920 }
@@ -1230,6 +1243,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1230 struct journal_head *jh; 1243 struct journal_head *jh;
1231 int drop_reserve = 0; 1244 int drop_reserve = 0;
1232 int err = 0; 1245 int err = 0;
1246 int was_modified = 0;
1233 1247
1234 BUFFER_TRACE(bh, "entry"); 1248 BUFFER_TRACE(bh, "entry");
1235 1249
@@ -1248,6 +1262,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1248 goto not_jbd; 1262 goto not_jbd;
1249 } 1263 }
1250 1264
1265 /* keep track of wether or not this transaction modified us */
1266 was_modified = jh->b_modified;
1267
1251 /* 1268 /*
1252 * The buffer's going from the transaction, we must drop 1269 * The buffer's going from the transaction, we must drop
1253 * all references -bzzz 1270 * all references -bzzz
@@ -1265,7 +1282,12 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1265 1282
1266 JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); 1283 JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1267 1284
1268 drop_reserve = 1; 1285 /*
1286 * we only want to drop a reference if this transaction
1287 * modified the buffer
1288 */
1289 if (was_modified)
1290 drop_reserve = 1;
1269 1291
1270 /* 1292 /*
1271 * We are no longer going to journal this buffer. 1293 * We are no longer going to journal this buffer.
@@ -1305,7 +1327,13 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1305 if (jh->b_next_transaction) { 1327 if (jh->b_next_transaction) {
1306 J_ASSERT(jh->b_next_transaction == transaction); 1328 J_ASSERT(jh->b_next_transaction == transaction);
1307 jh->b_next_transaction = NULL; 1329 jh->b_next_transaction = NULL;
1308 drop_reserve = 1; 1330
1331 /*
1332 * only drop a reference if this transaction modified
1333 * the buffer
1334 */
1335 if (was_modified)
1336 drop_reserve = 1;
1309 } 1337 }
1310 } 1338 }
1311 1339
@@ -1434,7 +1462,8 @@ int jbd2_journal_stop(handle_t *handle)
1434 return err; 1462 return err;
1435} 1463}
1436 1464
1437/**int jbd2_journal_force_commit() - force any uncommitted transactions 1465/**
1466 * int jbd2_journal_force_commit() - force any uncommitted transactions
1438 * @journal: journal to force 1467 * @journal: journal to force
1439 * 1468 *
1440 * For synchronous operations: force any uncommitted transactions 1469 * For synchronous operations: force any uncommitted transactions
@@ -2077,7 +2106,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
2077 jh->b_transaction = jh->b_next_transaction; 2106 jh->b_transaction = jh->b_next_transaction;
2078 jh->b_next_transaction = NULL; 2107 jh->b_next_transaction = NULL;
2079 __jbd2_journal_file_buffer(jh, jh->b_transaction, 2108 __jbd2_journal_file_buffer(jh, jh->b_transaction,
2080 was_dirty ? BJ_Metadata : BJ_Reserved); 2109 jh->b_modified ? BJ_Metadata : BJ_Reserved);
2081 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2110 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2082 2111
2083 if (was_dirty) 2112 if (was_dirty)
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index 9645275023e6..a113ecc3bafe 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -82,28 +82,28 @@
82 do { \ 82 do { \
83 printk(JFFS2_ERR_MSG_PREFIX \ 83 printk(JFFS2_ERR_MSG_PREFIX \
84 " (%d) %s: " fmt, task_pid_nr(current), \ 84 " (%d) %s: " fmt, task_pid_nr(current), \
85 __FUNCTION__ , ##__VA_ARGS__); \ 85 __func__ , ##__VA_ARGS__); \
86 } while(0) 86 } while(0)
87 87
88#define JFFS2_WARNING(fmt, ...) \ 88#define JFFS2_WARNING(fmt, ...) \
89 do { \ 89 do { \
90 printk(JFFS2_WARN_MSG_PREFIX \ 90 printk(JFFS2_WARN_MSG_PREFIX \
91 " (%d) %s: " fmt, task_pid_nr(current), \ 91 " (%d) %s: " fmt, task_pid_nr(current), \
92 __FUNCTION__ , ##__VA_ARGS__); \ 92 __func__ , ##__VA_ARGS__); \
93 } while(0) 93 } while(0)
94 94
95#define JFFS2_NOTICE(fmt, ...) \ 95#define JFFS2_NOTICE(fmt, ...) \
96 do { \ 96 do { \
97 printk(JFFS2_NOTICE_MSG_PREFIX \ 97 printk(JFFS2_NOTICE_MSG_PREFIX \
98 " (%d) %s: " fmt, task_pid_nr(current), \ 98 " (%d) %s: " fmt, task_pid_nr(current), \
99 __FUNCTION__ , ##__VA_ARGS__); \ 99 __func__ , ##__VA_ARGS__); \
100 } while(0) 100 } while(0)
101 101
102#define JFFS2_DEBUG(fmt, ...) \ 102#define JFFS2_DEBUG(fmt, ...) \
103 do { \ 103 do { \
104 printk(JFFS2_DBG_MSG_PREFIX \ 104 printk(JFFS2_DBG_MSG_PREFIX \
105 " (%d) %s: " fmt, task_pid_nr(current), \ 105 " (%d) %s: " fmt, task_pid_nr(current), \
106 __FUNCTION__ , ##__VA_ARGS__); \ 106 __func__ , ##__VA_ARGS__); \
107 } while(0) 107 } while(0)
108 108
109/* 109/*
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 05531f291bfa..082e844ab2db 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -82,7 +82,7 @@ static int is_xattr_datum_unchecked(struct jffs2_sb_info *c, struct jffs2_xattr_
82static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) 82static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
83{ 83{
84 /* must be called under down_write(xattr_sem) */ 84 /* must be called under down_write(xattr_sem) */
85 D1(dbg_xattr("%s: xid=%u, version=%u\n", __FUNCTION__, xd->xid, xd->version)); 85 D1(dbg_xattr("%s: xid=%u, version=%u\n", __func__, xd->xid, xd->version));
86 if (xd->xname) { 86 if (xd->xname) {
87 c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len); 87 c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len);
88 kfree(xd->xname); 88 kfree(xd->xname);
@@ -1252,7 +1252,7 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
1252 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE); 1252 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE);
1253 if (rc) { 1253 if (rc) {
1254 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n", 1254 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n",
1255 __FUNCTION__, rc, totlen); 1255 __func__, rc, totlen);
1256 rc = rc ? rc : -EBADFD; 1256 rc = rc ? rc : -EBADFD;
1257 goto out; 1257 goto out;
1258 } 1258 }
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 887f5759e536..bf6ab19b86ee 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -89,7 +89,7 @@ void jfs_proc_init(void)
89{ 89{
90 int i; 90 int i;
91 91
92 if (!(base = proc_mkdir("jfs", proc_root_fs))) 92 if (!(base = proc_mkdir("fs/jfs", NULL)))
93 return; 93 return;
94 base->owner = THIS_MODULE; 94 base->owner = THIS_MODULE;
95 95
@@ -109,7 +109,7 @@ void jfs_proc_clean(void)
109 if (base) { 109 if (base) {
110 for (i = 0; i < NPROCENT; i++) 110 for (i = 0; i < NPROCENT; i++)
111 remove_proc_entry(Entries[i].name, base); 111 remove_proc_entry(Entries[i].name, base);
112 remove_proc_entry("jfs", proc_root_fs); 112 remove_proc_entry("fs/jfs", NULL);
113 } 113 }
114} 114}
115 115
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 40b16f23e49a..5df517b81f3f 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -573,7 +573,7 @@ again:
573 /* Ensure the resulting lock will get added to granted list */ 573 /* Ensure the resulting lock will get added to granted list */
574 fl->fl_flags |= FL_SLEEP; 574 fl->fl_flags |= FL_SLEEP;
575 if (do_vfs_lock(fl) < 0) 575 if (do_vfs_lock(fl) < 0)
576 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); 576 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
577 up_read(&host->h_rwsem); 577 up_read(&host->h_rwsem);
578 fl->fl_flags = fl_flags; 578 fl->fl_flags = fl_flags;
579 status = 0; 579 status = 0;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 4d81553d2948..81aca859bfde 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -752,7 +752,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
752 return; 752 return;
753 default: 753 default:
754 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n", 754 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
755 -error, __FUNCTION__); 755 -error, __func__);
756 nlmsvc_insert_block(block, 10 * HZ); 756 nlmsvc_insert_block(block, 10 * HZ);
757 nlmsvc_release_block(block); 757 nlmsvc_release_block(block);
758 return; 758 return;
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 30f7d0ae2215..05ff4f1d7026 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -609,7 +609,7 @@ error_inode:
609 if (corrupt < 0) { 609 if (corrupt < 0) {
610 fat_fs_panic(new_dir->i_sb, 610 fat_fs_panic(new_dir->i_sb,
611 "%s: Filesystem corrupted (i_pos %lld)", 611 "%s: Filesystem corrupted (i_pos %lld)",
612 __FUNCTION__, sinfo.i_pos); 612 __func__, sinfo.i_pos);
613 } 613 }
614 goto out; 614 goto out;
615} 615}
@@ -653,7 +653,7 @@ static const struct inode_operations msdos_dir_inode_operations = {
653 .mkdir = msdos_mkdir, 653 .mkdir = msdos_mkdir,
654 .rmdir = msdos_rmdir, 654 .rmdir = msdos_rmdir,
655 .rename = msdos_rename, 655 .rename = msdos_rename,
656 .setattr = fat_notify_change, 656 .setattr = fat_setattr,
657 .getattr = fat_getattr, 657 .getattr = fat_getattr,
658}; 658};
659 659
diff --git a/fs/namei.c b/fs/namei.c
index e179f71bfcb0..32fd9655485b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -30,6 +30,7 @@
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/file.h> 31#include <linux/file.h>
32#include <linux/fcntl.h> 32#include <linux/fcntl.h>
33#include <linux/device_cgroup.h>
33#include <asm/namei.h> 34#include <asm/namei.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
35 36
@@ -281,6 +282,10 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
281 if (retval) 282 if (retval)
282 return retval; 283 return retval;
283 284
285 retval = devcgroup_inode_permission(inode, mask);
286 if (retval)
287 return retval;
288
284 return security_inode_permission(inode, mask, nd); 289 return security_inode_permission(inode, mask, nd);
285} 290}
286 291
@@ -2028,6 +2033,10 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2028 if (!dir->i_op || !dir->i_op->mknod) 2033 if (!dir->i_op || !dir->i_op->mknod)
2029 return -EPERM; 2034 return -EPERM;
2030 2035
2036 error = devcgroup_inode_mknod(mode, dev);
2037 if (error)
2038 return error;
2039
2031 error = security_inode_mknod(dir, dentry, mode, dev); 2040 error = security_inode_mknod(dir, dentry, mode, dev);
2032 if (error) 2041 if (error)
2033 return error; 2042 return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index f48f98110c30..4fc302c2a0e0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -14,7 +14,6 @@
14#include <linux/smp_lock.h> 14#include <linux/smp_lock.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/quotaops.h>
18#include <linux/acct.h> 17#include <linux/acct.h>
19#include <linux/capability.h> 18#include <linux/capability.h>
20#include <linux/cpumask.h> 19#include <linux/cpumask.h>
@@ -1084,7 +1083,6 @@ static int do_umount(struct vfsmount *mnt, int flags)
1084 down_write(&sb->s_umount); 1083 down_write(&sb->s_umount);
1085 if (!(sb->s_flags & MS_RDONLY)) { 1084 if (!(sb->s_flags & MS_RDONLY)) {
1086 lock_kernel(); 1085 lock_kernel();
1087 DQUOT_OFF(sb);
1088 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); 1086 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
1089 unlock_kernel(); 1087 unlock_kernel();
1090 } 1088 }
@@ -1178,17 +1176,6 @@ static int mount_is_safe(struct nameidata *nd)
1178#endif 1176#endif
1179} 1177}
1180 1178
1181static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
1182{
1183 while (1) {
1184 if (d == dentry)
1185 return 1;
1186 if (d == NULL || d == d->d_parent)
1187 return 0;
1188 d = d->d_parent;
1189 }
1190}
1191
1192struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, 1179struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1193 int flag) 1180 int flag)
1194{ 1181{
@@ -1205,7 +1192,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1205 1192
1206 p = mnt; 1193 p = mnt;
1207 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { 1194 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1208 if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) 1195 if (!is_subdir(r->mnt_mountpoint, dentry))
1209 continue; 1196 continue;
1210 1197
1211 for (s = r; s; s = next_mnt(s, r)) { 1198 for (s = r; s; s = next_mnt(s, r)) {
@@ -2342,10 +2329,10 @@ void __init mnt_init(void)
2342 err = sysfs_init(); 2329 err = sysfs_init();
2343 if (err) 2330 if (err)
2344 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 2331 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
2345 __FUNCTION__, err); 2332 __func__, err);
2346 fs_kobj = kobject_create_and_add("fs", NULL); 2333 fs_kobj = kobject_create_and_add("fs", NULL);
2347 if (!fs_kobj) 2334 if (!fs_kobj)
2348 printk(KERN_WARNING "%s: kobj create error\n", __FUNCTION__); 2335 printk(KERN_WARNING "%s: kobj create error\n", __func__);
2349 init_rootfs(); 2336 init_rootfs();
2350 init_mount_tree(); 2337 init_mount_tree();
2351} 2338}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index fbbb9f7afa1a..2e5ab1204dec 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -107,12 +107,6 @@ static const struct super_operations ncp_sops =
107 .show_options = ncp_show_options, 107 .show_options = ncp_show_options,
108}; 108};
109 109
110extern struct dentry_operations ncp_root_dentry_operations;
111#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
112extern const struct address_space_operations ncp_symlink_aops;
113extern int ncp_symlink(struct inode*, struct dentry*, const char*);
114#endif
115
116/* 110/*
117 * Fill in the ncpfs-specific information in the inode. 111 * Fill in the ncpfs-specific information in the inode.
118 */ 112 */
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index ad8f167e54bc..3a97c95e1ca2 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -389,11 +389,11 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
389 struct dentry* dentry = inode->i_sb->s_root; 389 struct dentry* dentry = inode->i_sb->s_root;
390 390
391 if (dentry) { 391 if (dentry) {
392 struct inode* inode = dentry->d_inode; 392 struct inode* s_inode = dentry->d_inode;
393 393
394 if (inode) { 394 if (s_inode) {
395 sr.volNumber = NCP_FINFO(inode)->volNumber; 395 sr.volNumber = NCP_FINFO(s_inode)->volNumber;
396 sr.dirEntNum = NCP_FINFO(inode)->dirEntNum; 396 sr.dirEntNum = NCP_FINFO(s_inode)->dirEntNum;
397 sr.namespace = server->name_space[sr.volNumber]; 397 sr.namespace = server->name_space[sr.volNumber];
398 } else 398 } else
399 DPRINTK("ncpfs: s_root->d_inode==NULL\n"); 399 DPRINTK("ncpfs: s_root->d_inode==NULL\n");
@@ -439,12 +439,12 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
439 dentry = inode->i_sb->s_root; 439 dentry = inode->i_sb->s_root;
440 server->root_setuped = 1; 440 server->root_setuped = 1;
441 if (dentry) { 441 if (dentry) {
442 struct inode* inode = dentry->d_inode; 442 struct inode* s_inode = dentry->d_inode;
443 443
444 if (inode) { 444 if (inode) {
445 NCP_FINFO(inode)->volNumber = vnum; 445 NCP_FINFO(s_inode)->volNumber = vnum;
446 NCP_FINFO(inode)->dirEntNum = de; 446 NCP_FINFO(s_inode)->dirEntNum = de;
447 NCP_FINFO(inode)->DosDirNum = dosde; 447 NCP_FINFO(s_inode)->DosDirNum = dosde;
448 } else 448 } else
449 DPRINTK("ncpfs: s_root->d_inode==NULL\n"); 449 DPRINTK("ncpfs: s_root->d_inode==NULL\n");
450 } else 450 } else
@@ -519,7 +519,6 @@ static int __ncp_ioctl(struct inode *inode, struct file *filp,
519 } 519 }
520 { 520 {
521 struct ncp_lock_ioctl rqdata; 521 struct ncp_lock_ioctl rqdata;
522 int result;
523 522
524 if (copy_from_user(&rqdata, argp, sizeof(rqdata))) 523 if (copy_from_user(&rqdata, argp, sizeof(rqdata)))
525 return -EFAULT; 524 return -EFAULT;
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index df6d60bdfcd3..97645f112114 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -102,48 +102,47 @@ static inline void ncp_init_request_s(struct ncp_server *server, int subfunction
102} 102}
103 103
104static inline char * 104static inline char *
105 ncp_reply_data(struct ncp_server *server, int offset) 105ncp_reply_data(struct ncp_server *server, int offset)
106{ 106{
107 return &(server->packet[sizeof(struct ncp_reply_header) + offset]); 107 return &(server->packet[sizeof(struct ncp_reply_header) + offset]);
108} 108}
109 109
110static inline __u8 BVAL(void* data) 110static inline u8 BVAL(void *data)
111{ 111{
112 return get_unaligned((__u8*)data); 112 return *(u8 *)data;
113} 113}
114 114
115static __u8 115static u8 ncp_reply_byte(struct ncp_server *server, int offset)
116 ncp_reply_byte(struct ncp_server *server, int offset)
117{ 116{
118 return get_unaligned((__u8 *) ncp_reply_data(server, offset)); 117 return *(u8 *)ncp_reply_data(server, offset);
119} 118}
120 119
121static inline __u16 WVAL_LH(void* data) 120static inline u16 WVAL_LH(void *data)
122{ 121{
123 return le16_to_cpu(get_unaligned((__le16*)data)); 122 return get_unaligned_le16(data);
124} 123}
125 124
126static __u16 125static u16
127 ncp_reply_le16(struct ncp_server *server, int offset) 126ncp_reply_le16(struct ncp_server *server, int offset)
128{ 127{
129 return le16_to_cpu(get_unaligned((__le16 *) ncp_reply_data(server, offset))); 128 return get_unaligned_le16(ncp_reply_data(server, offset));
130} 129}
131 130
132static __u16 131static u16
133 ncp_reply_be16(struct ncp_server *server, int offset) 132ncp_reply_be16(struct ncp_server *server, int offset)
134{ 133{
135 return be16_to_cpu(get_unaligned((__be16 *) ncp_reply_data(server, offset))); 134 return get_unaligned_be16(ncp_reply_data(server, offset));
136} 135}
137 136
138static inline __u32 DVAL_LH(void* data) 137static inline u32 DVAL_LH(void *data)
139{ 138{
140 return le32_to_cpu(get_unaligned((__le32*)data)); 139 return get_unaligned_le32(data);
141} 140}
142 141
143static __le32 142static __le32
144 ncp_reply_dword(struct ncp_server *server, int offset) 143ncp_reply_dword(struct ncp_server *server, int offset)
145{ 144{
146 return get_unaligned((__le32 *) ncp_reply_data(server, offset)); 145 return get_unaligned((__le32 *)ncp_reply_data(server, offset));
147} 146}
148 147
149static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) { 148static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) {
@@ -1006,8 +1005,8 @@ ncp_read_bounce(struct ncp_server *server, const char *file_id,
1006 result = ncp_request2(server, 72, bounce, bufsize); 1005 result = ncp_request2(server, 72, bounce, bufsize);
1007 ncp_unlock_server(server); 1006 ncp_unlock_server(server);
1008 if (!result) { 1007 if (!result) {
1009 int len = be16_to_cpu(get_unaligned((__be16*)((char*)bounce + 1008 int len = get_unaligned_be16((char *)bounce +
1010 sizeof(struct ncp_reply_header)))); 1009 sizeof(struct ncp_reply_header));
1011 result = -EIO; 1010 result = -EIO;
1012 if (len <= to_read) { 1011 if (len <= to_read) {
1013 char* source; 1012 char* source;
diff --git a/fs/ncpfs/ncpsign_kernel.c b/fs/ncpfs/ncpsign_kernel.c
index 749a18d33599..7c0b5c21e6cf 100644
--- a/fs/ncpfs/ncpsign_kernel.c
+++ b/fs/ncpfs/ncpsign_kernel.c
@@ -55,7 +55,7 @@ static void nwsign(char *r_data1, char *r_data2, char *outdata) {
55 unsigned int w0,w1,w2,w3; 55 unsigned int w0,w1,w2,w3;
56 static int rbit[4]={0, 2, 1, 3}; 56 static int rbit[4]={0, 2, 1, 3};
57#ifdef __i386__ 57#ifdef __i386__
58 unsigned int *data2=(int *)r_data2; 58 unsigned int *data2=(unsigned int *)r_data2;
59#else 59#else
60 unsigned int data2[16]; 60 unsigned int data2[16];
61 for (i=0;i<16;i++) 61 for (i=0;i<16;i++)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f2f3b284e6dd..89ac5bb0401c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1321,6 +1321,7 @@ static const struct file_operations nfs_server_list_fops = {
1321 .read = seq_read, 1321 .read = seq_read,
1322 .llseek = seq_lseek, 1322 .llseek = seq_lseek,
1323 .release = seq_release, 1323 .release = seq_release,
1324 .owner = THIS_MODULE,
1324}; 1325};
1325 1326
1326static int nfs_volume_list_open(struct inode *inode, struct file *file); 1327static int nfs_volume_list_open(struct inode *inode, struct file *file);
@@ -1341,6 +1342,7 @@ static const struct file_operations nfs_volume_list_fops = {
1341 .read = seq_read, 1342 .read = seq_read,
1342 .llseek = seq_lseek, 1343 .llseek = seq_lseek,
1343 .release = seq_release, 1344 .release = seq_release,
1345 .owner = THIS_MODULE,
1344}; 1346};
1345 1347
1346/* 1348/*
@@ -1500,33 +1502,29 @@ int __init nfs_fs_proc_init(void)
1500{ 1502{
1501 struct proc_dir_entry *p; 1503 struct proc_dir_entry *p;
1502 1504
1503 proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs); 1505 proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL);
1504 if (!proc_fs_nfs) 1506 if (!proc_fs_nfs)
1505 goto error_0; 1507 goto error_0;
1506 1508
1507 proc_fs_nfs->owner = THIS_MODULE; 1509 proc_fs_nfs->owner = THIS_MODULE;
1508 1510
1509 /* a file of servers with which we're dealing */ 1511 /* a file of servers with which we're dealing */
1510 p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs); 1512 p = proc_create("servers", S_IFREG|S_IRUGO,
1513 proc_fs_nfs, &nfs_server_list_fops);
1511 if (!p) 1514 if (!p)
1512 goto error_1; 1515 goto error_1;
1513 1516
1514 p->proc_fops = &nfs_server_list_fops;
1515 p->owner = THIS_MODULE;
1516
1517 /* a file of volumes that we have mounted */ 1517 /* a file of volumes that we have mounted */
1518 p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs); 1518 p = proc_create("volumes", S_IFREG|S_IRUGO,
1519 proc_fs_nfs, &nfs_volume_list_fops);
1519 if (!p) 1520 if (!p)
1520 goto error_2; 1521 goto error_2;
1521
1522 p->proc_fops = &nfs_volume_list_fops;
1523 p->owner = THIS_MODULE;
1524 return 0; 1522 return 0;
1525 1523
1526error_2: 1524error_2:
1527 remove_proc_entry("servers", proc_fs_nfs); 1525 remove_proc_entry("servers", proc_fs_nfs);
1528error_1: 1526error_1:
1529 remove_proc_entry("nfsfs", proc_root_fs); 1527 remove_proc_entry("fs/nfsfs", NULL);
1530error_0: 1528error_0:
1531 return -ENOMEM; 1529 return -ENOMEM;
1532} 1530}
@@ -1538,7 +1536,7 @@ void nfs_fs_proc_exit(void)
1538{ 1536{
1539 remove_proc_entry("volumes", proc_fs_nfs); 1537 remove_proc_entry("volumes", proc_fs_nfs);
1540 remove_proc_entry("servers", proc_fs_nfs); 1538 remove_proc_entry("servers", proc_fs_nfs);
1541 remove_proc_entry("nfsfs", proc_root_fs); 1539 remove_proc_entry("fs/nfsfs", NULL);
1542} 1540}
1543 1541
1544#endif /* CONFIG_PROC_FS */ 1542#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fa220dc74609..7226a506f3ca 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1575,6 +1575,11 @@ static int nfs_compare_super(struct super_block *sb, void *data)
1575 return nfs_compare_mount_options(sb, server, mntflags); 1575 return nfs_compare_mount_options(sb, server, mntflags);
1576} 1576}
1577 1577
1578static int nfs_bdi_register(struct nfs_server *server)
1579{
1580 return bdi_register_dev(&server->backing_dev_info, server->s_dev);
1581}
1582
1578static int nfs_get_sb(struct file_system_type *fs_type, 1583static int nfs_get_sb(struct file_system_type *fs_type,
1579 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1584 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1580{ 1585{
@@ -1617,6 +1622,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1617 if (s->s_fs_info != server) { 1622 if (s->s_fs_info != server) {
1618 nfs_free_server(server); 1623 nfs_free_server(server);
1619 server = NULL; 1624 server = NULL;
1625 } else {
1626 error = nfs_bdi_register(server);
1627 if (error)
1628 goto error_splat_super;
1620 } 1629 }
1621 1630
1622 if (!s->s_root) { 1631 if (!s->s_root) {
@@ -1664,6 +1673,7 @@ static void nfs_kill_super(struct super_block *s)
1664{ 1673{
1665 struct nfs_server *server = NFS_SB(s); 1674 struct nfs_server *server = NFS_SB(s);
1666 1675
1676 bdi_unregister(&server->backing_dev_info);
1667 kill_anon_super(s); 1677 kill_anon_super(s);
1668 nfs_free_server(server); 1678 nfs_free_server(server);
1669} 1679}
@@ -1708,6 +1718,10 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
1708 if (s->s_fs_info != server) { 1718 if (s->s_fs_info != server) {
1709 nfs_free_server(server); 1719 nfs_free_server(server);
1710 server = NULL; 1720 server = NULL;
1721 } else {
1722 error = nfs_bdi_register(server);
1723 if (error)
1724 goto error_splat_super;
1711 } 1725 }
1712 1726
1713 if (!s->s_root) { 1727 if (!s->s_root) {
@@ -1984,6 +1998,10 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
1984 if (s->s_fs_info != server) { 1998 if (s->s_fs_info != server) {
1985 nfs_free_server(server); 1999 nfs_free_server(server);
1986 server = NULL; 2000 server = NULL;
2001 } else {
2002 error = nfs_bdi_register(server);
2003 if (error)
2004 goto error_splat_super;
1987 } 2005 }
1988 2006
1989 if (!s->s_root) { 2007 if (!s->s_root) {
@@ -2070,6 +2088,10 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
2070 if (s->s_fs_info != server) { 2088 if (s->s_fs_info != server) {
2071 nfs_free_server(server); 2089 nfs_free_server(server);
2072 server = NULL; 2090 server = NULL;
2091 } else {
2092 error = nfs_bdi_register(server);
2093 if (error)
2094 goto error_splat_super;
2073 } 2095 }
2074 2096
2075 if (!s->s_root) { 2097 if (!s->s_root) {
@@ -2149,6 +2171,10 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
2149 if (s->s_fs_info != server) { 2171 if (s->s_fs_info != server) {
2150 nfs_free_server(server); 2172 nfs_free_server(server);
2151 server = NULL; 2173 server = NULL;
2174 } else {
2175 error = nfs_bdi_register(server);
2176 if (error)
2177 goto error_splat_super;
2152 } 2178 }
2153 2179
2154 if (!s->s_root) { 2180 if (!s->s_root) {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 562abf3380d0..0b3ffa9840c2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -104,7 +104,7 @@ xdr_writemem(__be32 *p, const void *ptr, int nbytes)
104} while (0) 104} while (0)
105#define RESERVE_SPACE(nbytes) do { \ 105#define RESERVE_SPACE(nbytes) do { \
106 p = xdr_reserve_space(xdr, nbytes); \ 106 p = xdr_reserve_space(xdr, nbytes); \
107 if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ 107 if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \
108 BUG_ON(!p); \ 108 BUG_ON(!p); \
109} while (0) 109} while (0)
110 110
@@ -134,7 +134,7 @@ xdr_error: \
134 p = xdr_inline_decode(xdr, nbytes); \ 134 p = xdr_inline_decode(xdr, nbytes); \
135 if (!p) { \ 135 if (!p) { \
136 dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \ 136 dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
137 __FUNCTION__, __LINE__); \ 137 __func__, __LINE__); \
138 return -EIO; \ 138 return -EIO; \
139 } \ 139 } \
140} while (0) 140} while (0)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 42f3820ee8f5..5ac00c4fee91 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -169,6 +169,7 @@ static const struct file_operations exports_operations = {
169 .read = seq_read, 169 .read = seq_read,
170 .llseek = seq_lseek, 170 .llseek = seq_lseek,
171 .release = seq_release, 171 .release = seq_release,
172 .owner = THIS_MODULE,
172}; 173};
173 174
174/*----------------------------------------------------------------------------*/ 175/*----------------------------------------------------------------------------*/
@@ -801,10 +802,9 @@ static int create_proc_exports_entry(void)
801 entry = proc_mkdir("fs/nfs", NULL); 802 entry = proc_mkdir("fs/nfs", NULL);
802 if (!entry) 803 if (!entry)
803 return -ENOMEM; 804 return -ENOMEM;
804 entry = create_proc_entry("fs/nfs/exports", 0, NULL); 805 entry = proc_create("exports", 0, entry, &exports_operations);
805 if (!entry) 806 if (!entry)
806 return -ENOMEM; 807 return -ENOMEM;
807 entry->proc_fops = &exports_operations;
808 return 0; 808 return 0;
809} 809}
810#else /* CONFIG_PROC_FS */ 810#else /* CONFIG_PROC_FS */
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 8ac37c33d127..5e6724c1afd1 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -45,7 +45,7 @@ static void ntfs_debug(const char *f, ...);
45extern void __ntfs_debug (const char *file, int line, const char *function, 45extern void __ntfs_debug (const char *file, int line, const char *function,
46 const char *format, ...) __attribute__ ((format (printf, 4, 5))); 46 const char *format, ...) __attribute__ ((format (printf, 4, 5)));
47#define ntfs_debug(f, a...) \ 47#define ntfs_debug(f, a...) \
48 __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a) 48 __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a)
49 49
50extern void ntfs_debug_dump_runlist(const runlist_element *rl); 50extern void ntfs_debug_dump_runlist(const runlist_element *rl);
51 51
@@ -58,10 +58,10 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl);
58 58
59extern void __ntfs_warning(const char *function, const struct super_block *sb, 59extern void __ntfs_warning(const char *function, const struct super_block *sb,
60 const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); 60 const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
61#define ntfs_warning(sb, f, a...) __ntfs_warning(__FUNCTION__, sb, f, ##a) 61#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a)
62 62
63extern void __ntfs_error(const char *function, const struct super_block *sb, 63extern void __ntfs_error(const char *function, const struct super_block *sb,
64 const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); 64 const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
65#define ntfs_error(sb, f, a...) __ntfs_error(__FUNCTION__, sb, f, ##a) 65#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a)
66 66
67#endif /* _LINUX_NTFS_DEBUG_H */ 67#endif /* _LINUX_NTFS_DEBUG_H */
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 2ad5c8b104b9..790defb847e7 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1191,7 +1191,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
1191 if (size) { 1191 if (size) {
1192 page = ntfs_map_page(mftbmp_mapping, 1192 page = ntfs_map_page(mftbmp_mapping,
1193 ofs >> PAGE_CACHE_SHIFT); 1193 ofs >> PAGE_CACHE_SHIFT);
1194 if (unlikely(IS_ERR(page))) { 1194 if (IS_ERR(page)) {
1195 ntfs_error(vol->sb, "Failed to read mft " 1195 ntfs_error(vol->sb, "Failed to read mft "
1196 "bitmap, aborting."); 1196 "bitmap, aborting.");
1197 return PTR_ERR(page); 1197 return PTR_ERR(page);
@@ -2118,7 +2118,7 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
2118 } 2118 }
2119 /* Read, map, and pin the page containing the mft record. */ 2119 /* Read, map, and pin the page containing the mft record. */
2120 page = ntfs_map_page(mft_vi->i_mapping, index); 2120 page = ntfs_map_page(mft_vi->i_mapping, index);
2121 if (unlikely(IS_ERR(page))) { 2121 if (IS_ERR(page)) {
2122 ntfs_error(vol->sb, "Failed to map page containing mft record " 2122 ntfs_error(vol->sb, "Failed to map page containing mft record "
2123 "to format 0x%llx.", (long long)mft_no); 2123 "to format 0x%llx.", (long long)mft_no);
2124 return PTR_ERR(page); 2124 return PTR_ERR(page);
@@ -2519,7 +2519,7 @@ mft_rec_already_initialized:
2519 ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; 2519 ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
2520 /* Read, map, and pin the page containing the mft record. */ 2520 /* Read, map, and pin the page containing the mft record. */
2521 page = ntfs_map_page(vol->mft_ino->i_mapping, index); 2521 page = ntfs_map_page(vol->mft_ino->i_mapping, index);
2522 if (unlikely(IS_ERR(page))) { 2522 if (IS_ERR(page)) {
2523 ntfs_error(vol->sb, "Failed to map page containing allocated " 2523 ntfs_error(vol->sb, "Failed to map page containing allocated "
2524 "mft record 0x%llx.", (long long)bit); 2524 "mft record 0x%llx.", (long long)bit);
2525 err = PTR_ERR(page); 2525 err = PTR_ERR(page);
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 61a000f8524c..e48aba698b77 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -327,7 +327,7 @@ clear_fields:
327 327
328static struct backing_dev_info dlmfs_backing_dev_info = { 328static struct backing_dev_info dlmfs_backing_dev_info = {
329 .ra_pages = 0, /* No readahead */ 329 .ra_pages = 0, /* No readahead */
330 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 330 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
331}; 331};
332 332
333static struct inode *dlmfs_get_root_inode(struct super_block *sb) 333static struct inode *dlmfs_get_root_inode(struct super_block *sb)
diff --git a/fs/open.c b/fs/open.c
index b70e7666bb2c..7af1f05d5978 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -837,7 +837,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
837 if (f->f_flags & O_DIRECT) { 837 if (f->f_flags & O_DIRECT) {
838 if (!f->f_mapping->a_ops || 838 if (!f->f_mapping->a_ops ||
839 ((!f->f_mapping->a_ops->direct_IO) && 839 ((!f->f_mapping->a_ops->direct_IO) &&
840 (!f->f_mapping->a_ops->get_xip_page))) { 840 (!f->f_mapping->a_ops->get_xip_mem))) {
841 fput(f); 841 fput(f);
842 f = ERR_PTR(-EINVAL); 842 f = ERR_PTR(-EINVAL);
843 } 843 }
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index e7dd1d4e3473..0fdda2e8a4cc 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -41,12 +41,12 @@
41#ifndef CONFIG_LDM_DEBUG 41#ifndef CONFIG_LDM_DEBUG
42#define ldm_debug(...) do {} while (0) 42#define ldm_debug(...) do {} while (0)
43#else 43#else
44#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __FUNCTION__, f, ##a) 44#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a)
45#endif 45#endif
46 46
47#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __FUNCTION__, f, ##a) 47#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __func__, f, ##a)
48#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __FUNCTION__, f, ##a) 48#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a)
49#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __FUNCTION__, f, ##a) 49#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a)
50 50
51__attribute__ ((format (printf, 3, 4))) 51__attribute__ ((format (printf, 3, 4)))
52static void _ldm_printk (const char *level, const char *function, 52static void _ldm_printk (const char *level, const char *function,
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 5567ec0d03a3..796511886f28 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -18,7 +18,7 @@
18 * 18 *
19 * Re-organised Feb 1998 Russell King 19 * Re-organised Feb 1998 Russell King
20 */ 20 */
21 21#include <linux/msdos_fs.h>
22 22
23#include "check.h" 23#include "check.h"
24#include "msdos.h" 24#include "msdos.h"
@@ -419,6 +419,7 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
419 Sector sect; 419 Sector sect;
420 unsigned char *data; 420 unsigned char *data;
421 struct partition *p; 421 struct partition *p;
422 struct fat_boot_sector *fb;
422 int slot; 423 int slot;
423 424
424 data = read_dev_sector(bdev, 0, &sect); 425 data = read_dev_sector(bdev, 0, &sect);
@@ -444,8 +445,21 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev)
444 p = (struct partition *) (data + 0x1be); 445 p = (struct partition *) (data + 0x1be);
445 for (slot = 1; slot <= 4; slot++, p++) { 446 for (slot = 1; slot <= 4; slot++, p++) {
446 if (p->boot_ind != 0 && p->boot_ind != 0x80) { 447 if (p->boot_ind != 0 && p->boot_ind != 0x80) {
447 put_dev_sector(sect); 448 /*
448 return 0; 449 * Even without a valid boot inidicator value
450 * its still possible this is valid FAT filesystem
451 * without a partition table.
452 */
453 fb = (struct fat_boot_sector *) data;
454 if (slot == 1 && fb->reserved && fb->fats
455 && fat_valid_media(fb->media)) {
456 printk("\n");
457 put_dev_sector(sect);
458 return 1;
459 } else {
460 put_dev_sector(sect);
461 return 0;
462 }
449 } 463 }
450 } 464 }
451 465
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 07d6c4853fe8..c135cbdd9127 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -425,12 +425,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
425 cutime = cstime = utime = stime = cputime_zero; 425 cutime = cstime = utime = stime = cputime_zero;
426 cgtime = gtime = cputime_zero; 426 cgtime = gtime = cputime_zero;
427 427
428 rcu_read_lock();
429 if (lock_task_sighand(task, &flags)) { 428 if (lock_task_sighand(task, &flags)) {
430 struct signal_struct *sig = task->signal; 429 struct signal_struct *sig = task->signal;
431 430
432 if (sig->tty) { 431 if (sig->tty) {
433 tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns); 432 struct pid *pgrp = tty_get_pgrp(sig->tty);
433 tty_pgrp = pid_nr_ns(pgrp, ns);
434 put_pid(pgrp);
434 tty_nr = new_encode_dev(tty_devnum(sig->tty)); 435 tty_nr = new_encode_dev(tty_devnum(sig->tty));
435 } 436 }
436 437
@@ -469,7 +470,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
469 470
470 unlock_task_sighand(task, &flags); 471 unlock_task_sighand(task, &flags);
471 } 472 }
472 rcu_read_unlock();
473 473
474 if (!whole || num_threads < 2) 474 if (!whole || num_threads < 2)
475 wchan = get_wchan(task); 475 wchan = get_wchan(task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c5e412a00b17..fcf02f2deeba 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -195,12 +195,32 @@ static int proc_root_link(struct inode *inode, struct path *path)
195 return result; 195 return result;
196} 196}
197 197
198#define MAY_PTRACE(task) \ 198/*
199 (task == current || \ 199 * Return zero if current may access user memory in @task, -error if not.
200 (task->parent == current && \ 200 */
201 (task->ptrace & PT_PTRACED) && \ 201static int check_mem_permission(struct task_struct *task)
202 (task_is_stopped_or_traced(task)) && \ 202{
203 security_ptrace(current,task) == 0)) 203 /*
204 * A task can always look at itself, in case it chooses
205 * to use system calls instead of load instructions.
206 */
207 if (task == current)
208 return 0;
209
210 /*
211 * If current is actively ptrace'ing, and would also be
212 * permitted to freshly attach with ptrace now, permit it.
213 */
214 if (task->parent == current && (task->ptrace & PT_PTRACED) &&
215 task_is_stopped_or_traced(task) &&
216 ptrace_may_attach(task))
217 return 0;
218
219 /*
220 * Noone else is allowed.
221 */
222 return -EPERM;
223}
204 224
205struct mm_struct *mm_for_maps(struct task_struct *task) 225struct mm_struct *mm_for_maps(struct task_struct *task)
206{ 226{
@@ -722,7 +742,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
722 if (!task) 742 if (!task)
723 goto out_no_task; 743 goto out_no_task;
724 744
725 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 745 if (check_mem_permission(task))
726 goto out; 746 goto out;
727 747
728 ret = -ENOMEM; 748 ret = -ENOMEM;
@@ -748,7 +768,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
748 768
749 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 769 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
750 retval = access_process_vm(task, src, page, this_len, 0); 770 retval = access_process_vm(task, src, page, this_len, 0);
751 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { 771 if (!retval || check_mem_permission(task)) {
752 if (!ret) 772 if (!ret)
753 ret = -EIO; 773 ret = -EIO;
754 break; 774 break;
@@ -792,7 +812,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
792 if (!task) 812 if (!task)
793 goto out_no_task; 813 goto out_no_task;
794 814
795 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 815 if (check_mem_permission(task))
796 goto out; 816 goto out;
797 817
798 copied = -ENOMEM; 818 copied = -ENOMEM;
@@ -1181,6 +1201,81 @@ static const struct file_operations proc_pid_sched_operations = {
1181 1201
1182#endif 1202#endif
1183 1203
1204/*
1205 * We added or removed a vma mapping the executable. The vmas are only mapped
1206 * during exec and are not mapped with the mmap system call.
1207 * Callers must hold down_write() on the mm's mmap_sem for these
1208 */
1209void added_exe_file_vma(struct mm_struct *mm)
1210{
1211 mm->num_exe_file_vmas++;
1212}
1213
1214void removed_exe_file_vma(struct mm_struct *mm)
1215{
1216 mm->num_exe_file_vmas--;
1217 if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
1218 fput(mm->exe_file);
1219 mm->exe_file = NULL;
1220 }
1221
1222}
1223
1224void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
1225{
1226 if (new_exe_file)
1227 get_file(new_exe_file);
1228 if (mm->exe_file)
1229 fput(mm->exe_file);
1230 mm->exe_file = new_exe_file;
1231 mm->num_exe_file_vmas = 0;
1232}
1233
1234struct file *get_mm_exe_file(struct mm_struct *mm)
1235{
1236 struct file *exe_file;
1237
1238 /* We need mmap_sem to protect against races with removal of
1239 * VM_EXECUTABLE vmas */
1240 down_read(&mm->mmap_sem);
1241 exe_file = mm->exe_file;
1242 if (exe_file)
1243 get_file(exe_file);
1244 up_read(&mm->mmap_sem);
1245 return exe_file;
1246}
1247
1248void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
1249{
1250 /* It's safe to write the exe_file pointer without exe_file_lock because
1251 * this is called during fork when the task is not yet in /proc */
1252 newmm->exe_file = get_mm_exe_file(oldmm);
1253}
1254
1255static int proc_exe_link(struct inode *inode, struct path *exe_path)
1256{
1257 struct task_struct *task;
1258 struct mm_struct *mm;
1259 struct file *exe_file;
1260
1261 task = get_proc_task(inode);
1262 if (!task)
1263 return -ENOENT;
1264 mm = get_task_mm(task);
1265 put_task_struct(task);
1266 if (!mm)
1267 return -ENOENT;
1268 exe_file = get_mm_exe_file(mm);
1269 mmput(mm);
1270 if (exe_file) {
1271 *exe_path = exe_file->f_path;
1272 path_get(&exe_file->f_path);
1273 fput(exe_file);
1274 return 0;
1275 } else
1276 return -ENOENT;
1277}
1278
1184static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1279static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1185{ 1280{
1186 struct inode *inode = dentry->d_inode; 1281 struct inode *inode = dentry->d_inode;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index a36ad3c75cf4..9d53b39a9cf8 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -69,12 +69,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
69 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 69 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
70 70
71 start = NULL; 71 start = NULL;
72 if (dp->get_info) { 72 if (dp->read_proc) {
73 /* Handle old net routines */
74 n = dp->get_info(page, &start, *ppos, count);
75 if (n < count)
76 eof = 1;
77 } else if (dp->read_proc) {
78 /* 73 /*
79 * How to be a proc read function 74 * How to be a proc read function
80 * ------------------------------ 75 * ------------------------------
@@ -277,8 +272,11 @@ static int xlate_proc_name(const char *name,
277 int len; 272 int len;
278 int rtn = 0; 273 int rtn = 0;
279 274
275 de = *ret;
276 if (!de)
277 de = &proc_root;
278
280 spin_lock(&proc_subdir_lock); 279 spin_lock(&proc_subdir_lock);
281 de = &proc_root;
282 while (1) { 280 while (1) {
283 next = strchr(cp, '/'); 281 next = strchr(cp, '/');
284 if (!next) 282 if (!next)
@@ -385,20 +383,18 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
385 383
386 lock_kernel(); 384 lock_kernel();
387 spin_lock(&proc_subdir_lock); 385 spin_lock(&proc_subdir_lock);
388 if (de) { 386 for (de = de->subdir; de ; de = de->next) {
389 for (de = de->subdir; de ; de = de->next) { 387 if (de->namelen != dentry->d_name.len)
390 if (de->namelen != dentry->d_name.len) 388 continue;
391 continue; 389 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
392 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 390 unsigned int ino;
393 unsigned int ino;
394 391
395 ino = de->low_ino; 392 ino = de->low_ino;
396 de_get(de); 393 de_get(de);
397 spin_unlock(&proc_subdir_lock); 394 spin_unlock(&proc_subdir_lock);
398 error = -EINVAL; 395 error = -EINVAL;
399 inode = proc_get_inode(dir->i_sb, ino, de); 396 inode = proc_get_inode(dir->i_sb, ino, de);
400 goto out_unlock; 397 goto out_unlock;
401 }
402 } 398 }
403 } 399 }
404 spin_unlock(&proc_subdir_lock); 400 spin_unlock(&proc_subdir_lock);
@@ -410,7 +406,8 @@ out_unlock:
410 d_add(dentry, inode); 406 d_add(dentry, inode);
411 return NULL; 407 return NULL;
412 } 408 }
413 de_put(de); 409 if (de)
410 de_put(de);
414 return ERR_PTR(error); 411 return ERR_PTR(error);
415} 412}
416 413
@@ -440,10 +437,6 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
440 lock_kernel(); 437 lock_kernel();
441 438
442 ino = inode->i_ino; 439 ino = inode->i_ino;
443 if (!de) {
444 ret = -EINVAL;
445 goto out;
446 }
447 i = filp->f_pos; 440 i = filp->f_pos;
448 switch (i) { 441 switch (i) {
449 case 0: 442 case 0:
@@ -582,7 +575,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
582 /* make sure name is valid */ 575 /* make sure name is valid */
583 if (!name || !strlen(name)) goto out; 576 if (!name || !strlen(name)) goto out;
584 577
585 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 578 if (xlate_proc_name(name, parent, &fn) != 0)
586 goto out; 579 goto out;
587 580
588 /* At this point there must not be any '/' characters beyond *fn */ 581 /* At this point there must not be any '/' characters beyond *fn */
@@ -682,9 +675,10 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
682 return ent; 675 return ent;
683} 676}
684 677
685struct proc_dir_entry *proc_create(const char *name, mode_t mode, 678struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
686 struct proc_dir_entry *parent, 679 struct proc_dir_entry *parent,
687 const struct file_operations *proc_fops) 680 const struct file_operations *proc_fops,
681 void *data)
688{ 682{
689 struct proc_dir_entry *pde; 683 struct proc_dir_entry *pde;
690 nlink_t nlink; 684 nlink_t nlink;
@@ -705,6 +699,7 @@ struct proc_dir_entry *proc_create(const char *name, mode_t mode,
705 if (!pde) 699 if (!pde)
706 goto out; 700 goto out;
707 pde->proc_fops = proc_fops; 701 pde->proc_fops = proc_fops;
702 pde->data = data;
708 if (proc_register(parent, pde) < 0) 703 if (proc_register(parent, pde) < 0)
709 goto out_free; 704 goto out_free;
710 return pde; 705 return pde;
@@ -734,55 +729,58 @@ void free_proc_entry(struct proc_dir_entry *de)
734void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 729void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
735{ 730{
736 struct proc_dir_entry **p; 731 struct proc_dir_entry **p;
737 struct proc_dir_entry *de; 732 struct proc_dir_entry *de = NULL;
738 const char *fn = name; 733 const char *fn = name;
739 int len; 734 int len;
740 735
741 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 736 if (xlate_proc_name(name, &parent, &fn) != 0)
742 goto out; 737 return;
743 len = strlen(fn); 738 len = strlen(fn);
744 739
745 spin_lock(&proc_subdir_lock); 740 spin_lock(&proc_subdir_lock);
746 for (p = &parent->subdir; *p; p=&(*p)->next ) { 741 for (p = &parent->subdir; *p; p=&(*p)->next ) {
747 if (!proc_match(len, fn, *p)) 742 if (proc_match(len, fn, *p)) {
748 continue; 743 de = *p;
749 de = *p; 744 *p = de->next;
750 *p = de->next; 745 de->next = NULL;
751 de->next = NULL; 746 break;
752 747 }
753 spin_lock(&de->pde_unload_lock); 748 }
754 /* 749 spin_unlock(&proc_subdir_lock);
755 * Stop accepting new callers into module. If you're 750 if (!de)
756 * dynamically allocating ->proc_fops, save a pointer somewhere. 751 return;
757 */
758 de->proc_fops = NULL;
759 /* Wait until all existing callers into module are done. */
760 if (de->pde_users > 0) {
761 DECLARE_COMPLETION_ONSTACK(c);
762
763 if (!de->pde_unload_completion)
764 de->pde_unload_completion = &c;
765
766 spin_unlock(&de->pde_unload_lock);
767 spin_unlock(&proc_subdir_lock);
768 752
769 wait_for_completion(de->pde_unload_completion); 753 spin_lock(&de->pde_unload_lock);
754 /*
755 * Stop accepting new callers into module. If you're
756 * dynamically allocating ->proc_fops, save a pointer somewhere.
757 */
758 de->proc_fops = NULL;
759 /* Wait until all existing callers into module are done. */
760 if (de->pde_users > 0) {
761 DECLARE_COMPLETION_ONSTACK(c);
762
763 if (!de->pde_unload_completion)
764 de->pde_unload_completion = &c;
770 765
771 spin_lock(&proc_subdir_lock);
772 goto continue_removing;
773 }
774 spin_unlock(&de->pde_unload_lock); 766 spin_unlock(&de->pde_unload_lock);
775 767
768 wait_for_completion(de->pde_unload_completion);
769
770 goto continue_removing;
771 }
772 spin_unlock(&de->pde_unload_lock);
773
776continue_removing: 774continue_removing:
777 if (S_ISDIR(de->mode)) 775 if (S_ISDIR(de->mode))
778 parent->nlink--; 776 parent->nlink--;
779 de->nlink = 0; 777 de->nlink = 0;
780 WARN_ON(de->subdir); 778 if (de->subdir) {
781 if (atomic_dec_and_test(&de->count)) 779 printk(KERN_WARNING "%s: removing non-empty directory "
782 free_proc_entry(de); 780 "'%s/%s', leaking at least '%s'\n", __func__,
783 break; 781 de->parent->name, de->name, de->subdir->name);
782 WARN_ON(1);
784 } 783 }
785 spin_unlock(&proc_subdir_lock); 784 if (atomic_dec_and_test(&de->count))
786out: 785 free_proc_entry(de);
787 return;
788} 786}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 82b3a1b5a70b..6f4e8dc97da1 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -25,8 +25,7 @@
25 25
26struct proc_dir_entry *de_get(struct proc_dir_entry *de) 26struct proc_dir_entry *de_get(struct proc_dir_entry *de)
27{ 27{
28 if (de) 28 atomic_inc(&de->count);
29 atomic_inc(&de->count);
30 return de; 29 return de;
31} 30}
32 31
@@ -35,18 +34,16 @@ struct proc_dir_entry *de_get(struct proc_dir_entry *de)
35 */ 34 */
36void de_put(struct proc_dir_entry *de) 35void de_put(struct proc_dir_entry *de)
37{ 36{
38 if (de) { 37 lock_kernel();
39 lock_kernel(); 38 if (!atomic_read(&de->count)) {
40 if (!atomic_read(&de->count)) { 39 printk("de_put: entry %s already free!\n", de->name);
41 printk("de_put: entry %s already free!\n", de->name);
42 unlock_kernel();
43 return;
44 }
45
46 if (atomic_dec_and_test(&de->count))
47 free_proc_entry(de);
48 unlock_kernel(); 40 unlock_kernel();
41 return;
49 } 42 }
43
44 if (atomic_dec_and_test(&de->count))
45 free_proc_entry(de);
46 unlock_kernel();
50} 47}
51 48
52/* 49/*
@@ -392,7 +389,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
392{ 389{
393 struct inode * inode; 390 struct inode * inode;
394 391
395 if (de != NULL && !try_module_get(de->owner)) 392 if (!try_module_get(de->owner))
396 goto out_mod; 393 goto out_mod;
397 394
398 inode = iget_locked(sb, ino); 395 inode = iget_locked(sb, ino);
@@ -402,30 +399,29 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
402 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 399 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
403 PROC_I(inode)->fd = 0; 400 PROC_I(inode)->fd = 0;
404 PROC_I(inode)->pde = de; 401 PROC_I(inode)->pde = de;
405 if (de) { 402
406 if (de->mode) { 403 if (de->mode) {
407 inode->i_mode = de->mode; 404 inode->i_mode = de->mode;
408 inode->i_uid = de->uid; 405 inode->i_uid = de->uid;
409 inode->i_gid = de->gid; 406 inode->i_gid = de->gid;
410 } 407 }
411 if (de->size) 408 if (de->size)
412 inode->i_size = de->size; 409 inode->i_size = de->size;
413 if (de->nlink) 410 if (de->nlink)
414 inode->i_nlink = de->nlink; 411 inode->i_nlink = de->nlink;
415 if (de->proc_iops) 412 if (de->proc_iops)
416 inode->i_op = de->proc_iops; 413 inode->i_op = de->proc_iops;
417 if (de->proc_fops) { 414 if (de->proc_fops) {
418 if (S_ISREG(inode->i_mode)) { 415 if (S_ISREG(inode->i_mode)) {
419#ifdef CONFIG_COMPAT 416#ifdef CONFIG_COMPAT
420 if (!de->proc_fops->compat_ioctl) 417 if (!de->proc_fops->compat_ioctl)
421 inode->i_fop = 418 inode->i_fop =
422 &proc_reg_file_ops_no_compat; 419 &proc_reg_file_ops_no_compat;
423 else 420 else
424#endif 421#endif
425 inode->i_fop = &proc_reg_file_ops; 422 inode->i_fop = &proc_reg_file_ops;
426 } else { 423 } else {
427 inode->i_fop = de->proc_fops; 424 inode->i_fop = de->proc_fops;
428 }
429 } 425 }
430 } 426 }
431 unlock_new_inode(inode); 427 unlock_new_inode(inode);
@@ -433,8 +429,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
433 return inode; 429 return inode;
434 430
435out_ino: 431out_ino:
436 if (de != NULL) 432 module_put(de->owner);
437 module_put(de->owner);
438out_mod: 433out_mod:
439 return NULL; 434 return NULL;
440} 435}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index bc72f5c8c47d..28cbca805905 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/proc_fs.h> 12#include <linux/proc_fs.h>
13 13
14extern struct proc_dir_entry proc_root;
14#ifdef CONFIG_PROC_SYSCTL 15#ifdef CONFIG_PROC_SYSCTL
15extern int proc_sys_init(void); 16extern int proc_sys_init(void);
16#else 17#else
@@ -46,9 +47,6 @@ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
46 47
47extern int maps_protect; 48extern int maps_protect;
48 49
49extern void create_seq_entry(char *name, mode_t mode,
50 const struct file_operations *f);
51extern int proc_exe_link(struct inode *, struct path *);
52extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, 50extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
53 struct pid *pid, struct task_struct *task); 51 struct pid *pid, struct task_struct *task);
54extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, 52extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 941e95114b5a..79ecd281d2cb 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -137,7 +137,7 @@ static const struct file_operations proc_nommu_vma_list_operations = {
137 137
138static int __init proc_nommu_init(void) 138static int __init proc_nommu_init(void)
139{ 139{
140 create_seq_entry("maps", S_IRUGO, &proc_nommu_vma_list_operations); 140 proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations);
141 return 0; 141 return 0;
142} 142}
143 143
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 2d563979cb02..74a323d2b850 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -179,6 +179,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
179 "PageTables: %8lu kB\n" 179 "PageTables: %8lu kB\n"
180 "NFS_Unstable: %8lu kB\n" 180 "NFS_Unstable: %8lu kB\n"
181 "Bounce: %8lu kB\n" 181 "Bounce: %8lu kB\n"
182 "WritebackTmp: %8lu kB\n"
182 "CommitLimit: %8lu kB\n" 183 "CommitLimit: %8lu kB\n"
183 "Committed_AS: %8lu kB\n" 184 "Committed_AS: %8lu kB\n"
184 "VmallocTotal: %8lu kB\n" 185 "VmallocTotal: %8lu kB\n"
@@ -210,6 +211,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
210 K(global_page_state(NR_PAGETABLE)), 211 K(global_page_state(NR_PAGETABLE)),
211 K(global_page_state(NR_UNSTABLE_NFS)), 212 K(global_page_state(NR_UNSTABLE_NFS)),
212 K(global_page_state(NR_BOUNCE)), 213 K(global_page_state(NR_BOUNCE)),
214 K(global_page_state(NR_WRITEBACK_TEMP)),
213 K(allowed), 215 K(allowed),
214 K(committed), 216 K(committed),
215 (unsigned long)VMALLOC_TOTAL >> 10, 217 (unsigned long)VMALLOC_TOTAL >> 10,
@@ -456,6 +458,20 @@ static const struct file_operations proc_slabstats_operations = {
456#endif 458#endif
457#endif 459#endif
458 460
461#ifdef CONFIG_MMU
462static int vmalloc_open(struct inode *inode, struct file *file)
463{
464 return seq_open(file, &vmalloc_op);
465}
466
467static const struct file_operations proc_vmalloc_operations = {
468 .open = vmalloc_open,
469 .read = seq_read,
470 .llseek = seq_lseek,
471 .release = seq_release,
472};
473#endif
474
459static int show_stat(struct seq_file *p, void *v) 475static int show_stat(struct seq_file *p, void *v)
460{ 476{
461 int i; 477 int i;
@@ -812,14 +828,6 @@ static struct file_operations proc_kpageflags_operations = {
812 828
813struct proc_dir_entry *proc_root_kcore; 829struct proc_dir_entry *proc_root_kcore;
814 830
815void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
816{
817 struct proc_dir_entry *entry;
818 entry = create_proc_entry(name, mode, NULL);
819 if (entry)
820 entry->proc_fops = f;
821}
822
823void __init proc_misc_init(void) 831void __init proc_misc_init(void)
824{ 832{
825 static struct { 833 static struct {
@@ -848,63 +856,52 @@ void __init proc_misc_init(void)
848 856
849 /* And now for trickier ones */ 857 /* And now for trickier ones */
850#ifdef CONFIG_PRINTK 858#ifdef CONFIG_PRINTK
851 { 859 proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations);
852 struct proc_dir_entry *entry;
853 entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
854 if (entry)
855 entry->proc_fops = &proc_kmsg_operations;
856 }
857#endif 860#endif
858 create_seq_entry("locks", 0, &proc_locks_operations); 861 proc_create("locks", 0, NULL, &proc_locks_operations);
859 create_seq_entry("devices", 0, &proc_devinfo_operations); 862 proc_create("devices", 0, NULL, &proc_devinfo_operations);
860 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); 863 proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
861#ifdef CONFIG_BLOCK 864#ifdef CONFIG_BLOCK
862 create_seq_entry("partitions", 0, &proc_partitions_operations); 865 proc_create("partitions", 0, NULL, &proc_partitions_operations);
863#endif 866#endif
864 create_seq_entry("stat", 0, &proc_stat_operations); 867 proc_create("stat", 0, NULL, &proc_stat_operations);
865 create_seq_entry("interrupts", 0, &proc_interrupts_operations); 868 proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
866#ifdef CONFIG_SLABINFO 869#ifdef CONFIG_SLABINFO
867 create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); 870 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
868#ifdef CONFIG_DEBUG_SLAB_LEAK 871#ifdef CONFIG_DEBUG_SLAB_LEAK
869 create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations); 872 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
873#endif
870#endif 874#endif
875#ifdef CONFIG_MMU
876 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
871#endif 877#endif
872 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); 878 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
873 create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops); 879 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
874 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); 880 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
875 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); 881 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
876#ifdef CONFIG_BLOCK 882#ifdef CONFIG_BLOCK
877 create_seq_entry("diskstats", 0, &proc_diskstats_operations); 883 proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
878#endif 884#endif
879#ifdef CONFIG_MODULES 885#ifdef CONFIG_MODULES
880 create_seq_entry("modules", 0, &proc_modules_operations); 886 proc_create("modules", 0, NULL, &proc_modules_operations);
881#endif 887#endif
882#ifdef CONFIG_SCHEDSTATS 888#ifdef CONFIG_SCHEDSTATS
883 create_seq_entry("schedstat", 0, &proc_schedstat_operations); 889 proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
884#endif 890#endif
885#ifdef CONFIG_PROC_KCORE 891#ifdef CONFIG_PROC_KCORE
886 proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); 892 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations);
887 if (proc_root_kcore) { 893 if (proc_root_kcore)
888 proc_root_kcore->proc_fops = &proc_kcore_operations;
889 proc_root_kcore->size = 894 proc_root_kcore->size =
890 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; 895 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
891 }
892#endif 896#endif
893#ifdef CONFIG_PROC_PAGE_MONITOR 897#ifdef CONFIG_PROC_PAGE_MONITOR
894 create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations); 898 proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
895 create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations); 899 proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
896#endif 900#endif
897#ifdef CONFIG_PROC_VMCORE 901#ifdef CONFIG_PROC_VMCORE
898 proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); 902 proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
899 if (proc_vmcore)
900 proc_vmcore->proc_fops = &proc_vmcore_operations;
901#endif 903#endif
902#ifdef CONFIG_MAGIC_SYSRQ 904#ifdef CONFIG_MAGIC_SYSRQ
903 { 905 proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations);
904 struct proc_dir_entry *entry;
905 entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
906 if (entry)
907 entry->proc_fops = &proc_sysrq_trigger_operations;
908 }
909#endif 906#endif
910} 907}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 614c34b6d1c2..5acc001d49f6 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -165,8 +165,8 @@ out:
165 return err; 165 return err;
166} 166}
167 167
168static ssize_t proc_sys_read(struct file *filp, char __user *buf, 168static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
169 size_t count, loff_t *ppos) 169 size_t count, loff_t *ppos, int write)
170{ 170{
171 struct dentry *dentry = filp->f_dentry; 171 struct dentry *dentry = filp->f_dentry;
172 struct ctl_table_header *head; 172 struct ctl_table_header *head;
@@ -190,12 +190,12 @@ static ssize_t proc_sys_read(struct file *filp, char __user *buf,
190 * and won't be until we finish. 190 * and won't be until we finish.
191 */ 191 */
192 error = -EPERM; 192 error = -EPERM;
193 if (sysctl_perm(table, MAY_READ)) 193 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
194 goto out; 194 goto out;
195 195
196 /* careful: calling conventions are nasty here */ 196 /* careful: calling conventions are nasty here */
197 res = count; 197 res = count;
198 error = table->proc_handler(table, 0, filp, buf, &res, ppos); 198 error = table->proc_handler(table, write, filp, buf, &res, ppos);
199 if (!error) 199 if (!error)
200 error = res; 200 error = res;
201out: 201out:
@@ -204,44 +204,16 @@ out:
204 return error; 204 return error;
205} 205}
206 206
207static ssize_t proc_sys_write(struct file *filp, const char __user *buf, 207static ssize_t proc_sys_read(struct file *filp, char __user *buf,
208 size_t count, loff_t *ppos) 208 size_t count, loff_t *ppos)
209{ 209{
210 struct dentry *dentry = filp->f_dentry; 210 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
211 struct ctl_table_header *head; 211}
212 struct ctl_table *table;
213 ssize_t error;
214 size_t res;
215
216 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
217 /* Has the sysctl entry disappeared on us? */
218 error = -ENOENT;
219 if (!table)
220 goto out;
221
222 /* Has the sysctl entry been replaced by a directory? */
223 error = -EISDIR;
224 if (!table->proc_handler)
225 goto out;
226
227 /*
228 * At this point we know that the sysctl was not unregistered
229 * and won't be until we finish.
230 */
231 error = -EPERM;
232 if (sysctl_perm(table, MAY_WRITE))
233 goto out;
234
235 /* careful: calling conventions are nasty here */
236 res = count;
237 error = table->proc_handler(table, 1, filp, (char __user *)buf,
238 &res, ppos);
239 if (!error)
240 error = res;
241out:
242 sysctl_head_finish(head);
243 212
244 return error; 213static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
214 size_t count, loff_t *ppos)
215{
216 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
245} 217}
246 218
247 219
@@ -416,7 +388,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *
416 goto out; 388 goto out;
417 389
418 /* Use the permissions on the sysctl table entry */ 390 /* Use the permissions on the sysctl table entry */
419 error = sysctl_perm(table, mask); 391 error = sysctl_perm(head->root, table, mask);
420out: 392out:
421 sysctl_head_finish(head); 393 sysctl_head_finish(head);
422 return error; 394 return error;
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 49816e00b51a..21f490f5d65c 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -5,7 +5,7 @@
5 */ 5 */
6 6
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8#include <linux/module.h>
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/errno.h> 10#include <linux/errno.h>
11#include <linux/time.h> 11#include <linux/time.h>
@@ -136,39 +136,54 @@ static const struct file_operations proc_tty_drivers_operations = {
136 .release = seq_release, 136 .release = seq_release,
137}; 137};
138 138
139/* 139static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
140 * This is the handler for /proc/tty/ldiscs
141 */
142static int tty_ldiscs_read_proc(char *page, char **start, off_t off,
143 int count, int *eof, void *data)
144{ 140{
145 int i; 141 return (*pos < NR_LDISCS) ? pos : NULL;
146 int len = 0; 142}
147 off_t begin = 0; 143
144static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
145{
146 (*pos)++;
147 return (*pos < NR_LDISCS) ? pos : NULL;
148}
149
150static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
151{
152}
153
154static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
155{
156 int i = *(loff_t *)v;
148 struct tty_ldisc *ld; 157 struct tty_ldisc *ld;
149 158
150 for (i=0; i < NR_LDISCS; i++) { 159 ld = tty_ldisc_get(i);
151 ld = tty_ldisc_get(i); 160 if (ld == NULL)
152 if (ld == NULL)
153 continue;
154 len += sprintf(page+len, "%-10s %2d\n",
155 ld->name ? ld->name : "???", i);
156 tty_ldisc_put(i);
157 if (len+begin > off+count)
158 break;
159 if (len+begin < off) {
160 begin += len;
161 len = 0;
162 }
163 }
164 if (i >= NR_LDISCS)
165 *eof = 1;
166 if (off >= len+begin)
167 return 0; 161 return 0;
168 *start = page + (off-begin); 162 seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i);
169 return ((count < begin+len-off) ? count : begin+len-off); 163 tty_ldisc_put(i);
164 return 0;
165}
166
167static const struct seq_operations tty_ldiscs_seq_ops = {
168 .start = tty_ldiscs_seq_start,
169 .next = tty_ldiscs_seq_next,
170 .stop = tty_ldiscs_seq_stop,
171 .show = tty_ldiscs_seq_show,
172};
173
174static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
175{
176 return seq_open(file, &tty_ldiscs_seq_ops);
170} 177}
171 178
179static const struct file_operations tty_ldiscs_proc_fops = {
180 .owner = THIS_MODULE,
181 .open = proc_tty_ldiscs_open,
182 .read = seq_read,
183 .llseek = seq_lseek,
184 .release = seq_release,
185};
186
172/* 187/*
173 * This function is called by tty_register_driver() to handle 188 * This function is called by tty_register_driver() to handle
174 * registering the driver's /proc handler into /proc/tty/driver/<foo> 189 * registering the driver's /proc handler into /proc/tty/driver/<foo>
@@ -177,16 +192,14 @@ void proc_tty_register_driver(struct tty_driver *driver)
177{ 192{
178 struct proc_dir_entry *ent; 193 struct proc_dir_entry *ent;
179 194
180 if ((!driver->read_proc && !driver->write_proc) || 195 if (!driver->ops->read_proc || !driver->driver_name ||
181 !driver->driver_name ||
182 driver->proc_entry) 196 driver->proc_entry)
183 return; 197 return;
184 198
185 ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); 199 ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver);
186 if (!ent) 200 if (!ent)
187 return; 201 return;
188 ent->read_proc = driver->read_proc; 202 ent->read_proc = driver->ops->read_proc;
189 ent->write_proc = driver->write_proc;
190 ent->owner = driver->owner; 203 ent->owner = driver->owner;
191 ent->data = driver; 204 ent->data = driver;
192 205
@@ -214,7 +227,6 @@ void proc_tty_unregister_driver(struct tty_driver *driver)
214 */ 227 */
215void __init proc_tty_init(void) 228void __init proc_tty_init(void)
216{ 229{
217 struct proc_dir_entry *entry;
218 if (!proc_mkdir("tty", NULL)) 230 if (!proc_mkdir("tty", NULL))
219 return; 231 return;
220 proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL); 232 proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL);
@@ -224,10 +236,7 @@ void __init proc_tty_init(void)
224 * password lengths and inter-keystroke timings during password 236 * password lengths and inter-keystroke timings during password
225 * entry. 237 * entry.
226 */ 238 */
227 proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR | S_IXUSR, NULL); 239 proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL);
228 240 proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops);
229 create_proc_read_entry("tty/ldiscs", 0, NULL, tty_ldiscs_read_proc, NULL); 241 proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations);
230 entry = create_proc_entry("tty/drivers", 0, NULL);
231 if (entry)
232 entry->proc_fops = &proc_tty_drivers_operations;
233} 242}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ef0fb57fc9ef..95117538a4f6 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -22,8 +22,6 @@
22 22
23#include "internal.h" 23#include "internal.h"
24 24
25struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
26
27static int proc_test_super(struct super_block *sb, void *data) 25static int proc_test_super(struct super_block *sb, void *data)
28{ 26{
29 return sb->s_fs_info == data; 27 return sb->s_fs_info == data;
@@ -126,8 +124,8 @@ void __init proc_root_init(void)
126#ifdef CONFIG_SYSVIPC 124#ifdef CONFIG_SYSVIPC
127 proc_mkdir("sysvipc", NULL); 125 proc_mkdir("sysvipc", NULL);
128#endif 126#endif
129 proc_root_fs = proc_mkdir("fs", NULL); 127 proc_mkdir("fs", NULL);
130 proc_root_driver = proc_mkdir("driver", NULL); 128 proc_mkdir("driver", NULL);
131 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ 129 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
132#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) 130#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
133 /* just give it a mountpoint */ 131 /* just give it a mountpoint */
@@ -137,7 +135,7 @@ void __init proc_root_init(void)
137#ifdef CONFIG_PROC_DEVICETREE 135#ifdef CONFIG_PROC_DEVICETREE
138 proc_device_tree_init(); 136 proc_device_tree_init();
139#endif 137#endif
140 proc_bus = proc_mkdir("bus", NULL); 138 proc_mkdir("bus", NULL);
141 proc_sys_init(); 139 proc_sys_init();
142} 140}
143 141
@@ -232,9 +230,5 @@ void pid_ns_release_proc(struct pid_namespace *ns)
232EXPORT_SYMBOL(proc_symlink); 230EXPORT_SYMBOL(proc_symlink);
233EXPORT_SYMBOL(proc_mkdir); 231EXPORT_SYMBOL(proc_mkdir);
234EXPORT_SYMBOL(create_proc_entry); 232EXPORT_SYMBOL(create_proc_entry);
235EXPORT_SYMBOL(proc_create); 233EXPORT_SYMBOL(proc_create_data);
236EXPORT_SYMBOL(remove_proc_entry); 234EXPORT_SYMBOL(remove_proc_entry);
237EXPORT_SYMBOL(proc_root);
238EXPORT_SYMBOL(proc_root_fs);
239EXPORT_SYMBOL(proc_bus);
240EXPORT_SYMBOL(proc_root_driver);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9dfb5ff24209..e2b8e769f510 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -75,40 +75,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
75 return mm->total_vm; 75 return mm->total_vm;
76} 76}
77 77
78int proc_exe_link(struct inode *inode, struct path *path)
79{
80 struct vm_area_struct * vma;
81 int result = -ENOENT;
82 struct task_struct *task = get_proc_task(inode);
83 struct mm_struct * mm = NULL;
84
85 if (task) {
86 mm = get_task_mm(task);
87 put_task_struct(task);
88 }
89 if (!mm)
90 goto out;
91 down_read(&mm->mmap_sem);
92
93 vma = mm->mmap;
94 while (vma) {
95 if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
96 break;
97 vma = vma->vm_next;
98 }
99
100 if (vma) {
101 *path = vma->vm_file->f_path;
102 path_get(&vma->vm_file->f_path);
103 result = 0;
104 }
105
106 up_read(&mm->mmap_sem);
107 mmput(mm);
108out:
109 return result;
110}
111
112static void pad_len_spaces(struct seq_file *m, int len) 78static void pad_len_spaces(struct seq_file *m, int len)
113{ 79{
114 len = 25 + sizeof(void*) * 6 - len; 80 len = 25 + sizeof(void*) * 6 - len;
@@ -338,8 +304,7 @@ const struct file_operations proc_maps_operations = {
338#define PSS_SHIFT 12 304#define PSS_SHIFT 12
339 305
340#ifdef CONFIG_PROC_PAGE_MONITOR 306#ifdef CONFIG_PROC_PAGE_MONITOR
341struct mem_size_stats 307struct mem_size_stats {
342{
343 struct vm_area_struct *vma; 308 struct vm_area_struct *vma;
344 unsigned long resident; 309 unsigned long resident;
345 unsigned long shared_clean; 310 unsigned long shared_clean;
@@ -347,6 +312,7 @@ struct mem_size_stats
347 unsigned long private_clean; 312 unsigned long private_clean;
348 unsigned long private_dirty; 313 unsigned long private_dirty;
349 unsigned long referenced; 314 unsigned long referenced;
315 unsigned long swap;
350 u64 pss; 316 u64 pss;
351}; 317};
352 318
@@ -363,6 +329,12 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
363 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 329 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
364 for (; addr != end; pte++, addr += PAGE_SIZE) { 330 for (; addr != end; pte++, addr += PAGE_SIZE) {
365 ptent = *pte; 331 ptent = *pte;
332
333 if (is_swap_pte(ptent)) {
334 mss->swap += PAGE_SIZE;
335 continue;
336 }
337
366 if (!pte_present(ptent)) 338 if (!pte_present(ptent))
367 continue; 339 continue;
368 340
@@ -421,7 +393,8 @@ static int show_smap(struct seq_file *m, void *v)
421 "Shared_Dirty: %8lu kB\n" 393 "Shared_Dirty: %8lu kB\n"
422 "Private_Clean: %8lu kB\n" 394 "Private_Clean: %8lu kB\n"
423 "Private_Dirty: %8lu kB\n" 395 "Private_Dirty: %8lu kB\n"
424 "Referenced: %8lu kB\n", 396 "Referenced: %8lu kB\n"
397 "Swap: %8lu kB\n",
425 (vma->vm_end - vma->vm_start) >> 10, 398 (vma->vm_end - vma->vm_start) >> 10,
426 mss.resident >> 10, 399 mss.resident >> 10,
427 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), 400 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
@@ -429,7 +402,8 @@ static int show_smap(struct seq_file *m, void *v)
429 mss.shared_dirty >> 10, 402 mss.shared_dirty >> 10,
430 mss.private_clean >> 10, 403 mss.private_clean >> 10,
431 mss.private_dirty >> 10, 404 mss.private_dirty >> 10,
432 mss.referenced >> 10); 405 mss.referenced >> 10,
406 mss.swap >> 10);
433 407
434 return ret; 408 return ret;
435} 409}
@@ -579,7 +553,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
579 return err; 553 return err;
580} 554}
581 555
582u64 swap_pte_to_pagemap_entry(pte_t pte) 556static u64 swap_pte_to_pagemap_entry(pte_t pte)
583{ 557{
584 swp_entry_t e = pte_to_swp_entry(pte); 558 swp_entry_t e = pte_to_swp_entry(pte);
585 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); 559 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 8011528518bd..4b733f108455 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -103,40 +103,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
103 return size; 103 return size;
104} 104}
105 105
106int proc_exe_link(struct inode *inode, struct path *path)
107{
108 struct vm_list_struct *vml;
109 struct vm_area_struct *vma;
110 struct task_struct *task = get_proc_task(inode);
111 struct mm_struct *mm = get_task_mm(task);
112 int result = -ENOENT;
113
114 if (!mm)
115 goto out;
116 down_read(&mm->mmap_sem);
117
118 vml = mm->context.vmlist;
119 vma = NULL;
120 while (vml) {
121 if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) {
122 vma = vml->vma;
123 break;
124 }
125 vml = vml->next;
126 }
127
128 if (vma) {
129 *path = vma->vm_file->f_path;
130 path_get(&vma->vm_file->f_path);
131 result = 0;
132 }
133
134 up_read(&mm->mmap_sem);
135 mmput(mm);
136out:
137 return result;
138}
139
140/* 106/*
141 * display mapping lines for a particular process's /proc/pid/maps 107 * display mapping lines for a particular process's /proc/pid/maps
142 */ 108 */
diff --git a/fs/quota.c b/fs/quota.c
index 84f28dd72116..db1cc9f3c7aa 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -69,7 +69,6 @@ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid
69 switch (cmd) { 69 switch (cmd) {
70 case Q_GETFMT: 70 case Q_GETFMT:
71 case Q_GETINFO: 71 case Q_GETINFO:
72 case Q_QUOTAOFF:
73 case Q_SETINFO: 72 case Q_SETINFO:
74 case Q_SETQUOTA: 73 case Q_SETQUOTA:
75 case Q_GETQUOTA: 74 case Q_GETQUOTA:
@@ -229,12 +228,12 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void
229 228
230 if (IS_ERR(pathname = getname(addr))) 229 if (IS_ERR(pathname = getname(addr)))
231 return PTR_ERR(pathname); 230 return PTR_ERR(pathname);
232 ret = sb->s_qcop->quota_on(sb, type, id, pathname); 231 ret = sb->s_qcop->quota_on(sb, type, id, pathname, 0);
233 putname(pathname); 232 putname(pathname);
234 return ret; 233 return ret;
235 } 234 }
236 case Q_QUOTAOFF: 235 case Q_QUOTAOFF:
237 return sb->s_qcop->quota_off(sb, type); 236 return sb->s_qcop->quota_off(sb, type, 0);
238 237
239 case Q_GETFMT: { 238 case Q_GETFMT: {
240 __u32 fmt; 239 __u32 fmt;
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index f3841f233069..a6cf9269105c 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -139,6 +139,9 @@ static int v1_read_file_info(struct super_block *sb, int type)
139 goto out; 139 goto out;
140 } 140 }
141 ret = 0; 141 ret = 0;
142 /* limits are stored as unsigned 32-bit data */
143 dqopt->info[type].dqi_maxblimit = 0xffffffff;
144 dqopt->info[type].dqi_maxilimit = 0xffffffff;
142 dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME; 145 dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME;
143 dqopt->info[type].dqi_bgrace = dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME; 146 dqopt->info[type].dqi_bgrace = dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME;
144out: 147out:
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index c519a583e681..234ada903633 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -59,6 +59,9 @@ static int v2_read_file_info(struct super_block *sb, int type)
59 sb->s_id); 59 sb->s_id);
60 return -1; 60 return -1;
61 } 61 }
62 /* limits are stored as unsigned 32-bit data */
63 info->dqi_maxblimit = 0xffffffff;
64 info->dqi_maxilimit = 0xffffffff;
62 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 65 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
63 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 66 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
64 info->dqi_flags = le32_to_cpu(dinfo.dqi_flags); 67 info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
@@ -303,7 +306,7 @@ static uint find_free_dqentry(struct dquot *dquot, int *err)
303 printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); 306 printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
304 goto out_buf; 307 goto out_buf;
305 } 308 }
306 dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1); 309 le16_add_cpu(&dh->dqdh_entries, 1);
307 memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); 310 memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
308 /* Find free structure in block */ 311 /* Find free structure in block */
309 for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++); 312 for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
@@ -445,7 +448,7 @@ static int free_dqentry(struct dquot *dquot, uint blk)
445 goto out_buf; 448 goto out_buf;
446 } 449 }
447 dh = (struct v2_disk_dqdbheader *)buf; 450 dh = (struct v2_disk_dqdbheader *)buf;
448 dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1); 451 le16_add_cpu(&dh->dqdh_entries, -1);
449 if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ 452 if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
450 if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 || 453 if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
451 (ret = put_free_dqblk(sb, type, buf, blk)) < 0) { 454 (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index b41a514b0976..9590b9024300 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -26,6 +26,9 @@
26 26
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/ramfs.h>
30
31#include "internal.h"
29 32
30const struct address_space_operations ramfs_aops = { 33const struct address_space_operations ramfs_aops = {
31 .readpage = simple_readpage, 34 .readpage = simple_readpage,
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 8428d5b2711d..b13123424e49 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -44,7 +44,7 @@ static const struct inode_operations ramfs_dir_inode_operations;
44 44
45static struct backing_dev_info ramfs_backing_dev_info = { 45static struct backing_dev_info ramfs_backing_dev_info = {
46 .ra_pages = 0, /* No readahead */ 46 .ra_pages = 0, /* No readahead */
47 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | 47 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK |
48 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | 48 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
49 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, 49 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP,
50}; 50};
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index af7cc074a476..6b330639b51d 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -11,5 +11,4 @@
11 11
12 12
13extern const struct address_space_operations ramfs_aops; 13extern const struct address_space_operations ramfs_aops;
14extern const struct file_operations ramfs_file_operations;
15extern const struct inode_operations ramfs_file_inode_operations; 14extern const struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index f491ceb5af02..4646caa60455 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -479,7 +479,7 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th,
479 if (ei->i_prealloc_count < 0) 479 if (ei->i_prealloc_count < 0)
480 reiserfs_warning(th->t_super, 480 reiserfs_warning(th->t_super,
481 "zam-4001:%s: inode has negative prealloc blocks count.", 481 "zam-4001:%s: inode has negative prealloc blocks count.",
482 __FUNCTION__); 482 __func__);
483#endif 483#endif
484 while (ei->i_prealloc_count > 0) { 484 while (ei->i_prealloc_count > 0) {
485 reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); 485 reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
@@ -517,7 +517,7 @@ void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
517 if (!ei->i_prealloc_count) { 517 if (!ei->i_prealloc_count) {
518 reiserfs_warning(th->t_super, 518 reiserfs_warning(th->t_super,
519 "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.", 519 "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.",
520 __FUNCTION__); 520 __func__);
521 } 521 }
522#endif 522#endif
523 __discard_prealloc(th, ei); 523 __discard_prealloc(th, ei);
@@ -632,7 +632,7 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options)
632 } 632 }
633 633
634 reiserfs_warning(s, "zam-4001: %s : unknown option - %s", 634 reiserfs_warning(s, "zam-4001: %s : unknown option - %s",
635 __FUNCTION__, this_char); 635 __func__, this_char);
636 return 1; 636 return 1;
637 } 637 }
638 638
@@ -1254,7 +1254,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1254 bh = sb_bread(sb, block); 1254 bh = sb_bread(sb, block);
1255 if (bh == NULL) 1255 if (bh == NULL)
1256 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " 1256 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1257 "reading failed", __FUNCTION__, block); 1257 "reading failed", __func__, block);
1258 else { 1258 else {
1259 if (buffer_locked(bh)) { 1259 if (buffer_locked(bh)) {
1260 PROC_INFO_INC(sb, scan_bitmap.wait); 1260 PROC_INFO_INC(sb, scan_bitmap.wait);
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 7ee4208793b6..2f87f5b14630 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -1464,29 +1464,29 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1464 } else 1464 } else
1465 /* item falls wholly into S_new[i] */ 1465 /* item falls wholly into S_new[i] */
1466 { 1466 {
1467 int ret_val; 1467 int leaf_mi;
1468 struct item_head *pasted; 1468 struct item_head *pasted;
1469 1469
1470#ifdef CONFIG_REISERFS_CHECK 1470#ifdef CONFIG_REISERFS_CHECK
1471 struct item_head *ih = 1471 struct item_head *ih_check =
1472 B_N_PITEM_HEAD(tbS0, item_pos); 1472 B_N_PITEM_HEAD(tbS0, item_pos);
1473 1473
1474 if (!is_direntry_le_ih(ih) 1474 if (!is_direntry_le_ih(ih_check)
1475 && (pos_in_item != ih_item_len(ih) 1475 && (pos_in_item != ih_item_len(ih_check)
1476 || tb->insert_size[0] <= 0)) 1476 || tb->insert_size[0] <= 0))
1477 reiserfs_panic(tb->tb_sb, 1477 reiserfs_panic(tb->tb_sb,
1478 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len"); 1478 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1479#endif /* CONFIG_REISERFS_CHECK */ 1479#endif /* CONFIG_REISERFS_CHECK */
1480 1480
1481 ret_val = 1481 leaf_mi =
1482 leaf_move_items(LEAF_FROM_S_TO_SNEW, 1482 leaf_move_items(LEAF_FROM_S_TO_SNEW,
1483 tb, snum[i], 1483 tb, snum[i],
1484 sbytes[i], 1484 sbytes[i],
1485 S_new[i]); 1485 S_new[i]);
1486 1486
1487 RFALSE(ret_val, 1487 RFALSE(leaf_mi,
1488 "PAP-12240: unexpected value returned by leaf_move_items (%d)", 1488 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1489 ret_val); 1489 leaf_mi);
1490 1490
1491 /* paste into item */ 1491 /* paste into item */
1492 bi.tb = tb; 1492 bi.tb = tb;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 74363a7aacbc..830332021ed4 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -12,8 +12,6 @@
12#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
13#include <linux/compat.h> 13#include <linux/compat.h>
14 14
15static int reiserfs_unpack(struct inode *inode, struct file *filp);
16
17/* 15/*
18** reiserfs_ioctl - handler for ioctl for inode 16** reiserfs_ioctl - handler for ioctl for inode
19** supported commands: 17** supported commands:
@@ -159,7 +157,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
159** Function try to convert tail from direct item into indirect. 157** Function try to convert tail from direct item into indirect.
160** It set up nopack attribute in the REISERFS_I(inode)->nopack 158** It set up nopack attribute in the REISERFS_I(inode)->nopack
161*/ 159*/
162static int reiserfs_unpack(struct inode *inode, struct file *filp) 160int reiserfs_unpack(struct inode *inode, struct file *filp)
163{ 161{
164 int retval = 0; 162 int retval = 0;
165 int index; 163 int index;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 060eb3f598e7..e396b2fa4743 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1187,7 +1187,7 @@ static int flush_commit_list(struct super_block *s,
1187 1187
1188 if (retval) 1188 if (retval)
1189 reiserfs_abort(s, retval, "Journal write error in %s", 1189 reiserfs_abort(s, retval, "Journal write error in %s",
1190 __FUNCTION__); 1190 __func__);
1191 put_fs_excl(); 1191 put_fs_excl();
1192 return retval; 1192 return retval;
1193} 1193}
@@ -1534,7 +1534,7 @@ static int flush_journal_list(struct super_block *s,
1534 reiserfs_warning(s, 1534 reiserfs_warning(s,
1535 "clm-2082: Unable to flush buffer %llu in %s", 1535 "clm-2082: Unable to flush buffer %llu in %s",
1536 (unsigned long long)saved_bh-> 1536 (unsigned long long)saved_bh->
1537 b_blocknr, __FUNCTION__); 1537 b_blocknr, __func__);
1538 } 1538 }
1539 free_cnode: 1539 free_cnode:
1540 last = cn; 1540 last = cn;
@@ -1586,7 +1586,7 @@ static int flush_journal_list(struct super_block *s,
1586 if (err) 1586 if (err)
1587 reiserfs_abort(s, -EIO, 1587 reiserfs_abort(s, -EIO,
1588 "Write error while pushing transaction to disk in %s", 1588 "Write error while pushing transaction to disk in %s",
1589 __FUNCTION__); 1589 __func__);
1590 flush_older_and_return: 1590 flush_older_and_return:
1591 1591
1592 /* before we can update the journal header block, we _must_ flush all 1592 /* before we can update the journal header block, we _must_ flush all
@@ -1616,7 +1616,7 @@ static int flush_journal_list(struct super_block *s,
1616 if (err) 1616 if (err)
1617 reiserfs_abort(s, -EIO, 1617 reiserfs_abort(s, -EIO,
1618 "Write error while updating journal header in %s", 1618 "Write error while updating journal header in %s",
1619 __FUNCTION__); 1619 __func__);
1620 } 1620 }
1621 remove_all_from_journal_list(s, jl, 0); 1621 remove_all_from_journal_list(s, jl, 0);
1622 list_del_init(&jl->j_list); 1622 list_del_init(&jl->j_list);
@@ -2574,11 +2574,9 @@ static int release_journal_dev(struct super_block *super,
2574 2574
2575 result = 0; 2575 result = 0;
2576 2576
2577 if (journal->j_dev_file != NULL) { 2577 if (journal->j_dev_bd != NULL) {
2578 result = filp_close(journal->j_dev_file, NULL); 2578 if (journal->j_dev_bd->bd_dev != super->s_dev)
2579 journal->j_dev_file = NULL; 2579 bd_release(journal->j_dev_bd);
2580 journal->j_dev_bd = NULL;
2581 } else if (journal->j_dev_bd != NULL) {
2582 result = blkdev_put(journal->j_dev_bd); 2580 result = blkdev_put(journal->j_dev_bd);
2583 journal->j_dev_bd = NULL; 2581 journal->j_dev_bd = NULL;
2584 } 2582 }
@@ -2603,7 +2601,6 @@ static int journal_init_dev(struct super_block *super,
2603 result = 0; 2601 result = 0;
2604 2602
2605 journal->j_dev_bd = NULL; 2603 journal->j_dev_bd = NULL;
2606 journal->j_dev_file = NULL;
2607 jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? 2604 jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
2608 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; 2605 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
2609 2606
@@ -2620,35 +2617,34 @@ static int journal_init_dev(struct super_block *super,
2620 "cannot init journal device '%s': %i", 2617 "cannot init journal device '%s': %i",
2621 __bdevname(jdev, b), result); 2618 __bdevname(jdev, b), result);
2622 return result; 2619 return result;
2623 } else if (jdev != super->s_dev) 2620 } else if (jdev != super->s_dev) {
2621 result = bd_claim(journal->j_dev_bd, journal);
2622 if (result) {
2623 blkdev_put(journal->j_dev_bd);
2624 return result;
2625 }
2626
2624 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2627 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2628 }
2629
2625 return 0; 2630 return 0;
2626 } 2631 }
2627 2632
2628 journal->j_dev_file = filp_open(jdev_name, 0, 0); 2633 journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal);
2629 if (!IS_ERR(journal->j_dev_file)) { 2634 if (IS_ERR(journal->j_dev_bd)) {
2630 struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; 2635 result = PTR_ERR(journal->j_dev_bd);
2631 if (!S_ISBLK(jdev_inode->i_mode)) { 2636 journal->j_dev_bd = NULL;
2632 reiserfs_warning(super, "journal_init_dev: '%s' is "
2633 "not a block device", jdev_name);
2634 result = -ENOTBLK;
2635 release_journal_dev(super, journal);
2636 } else {
2637 /* ok */
2638 journal->j_dev_bd = I_BDEV(jdev_inode);
2639 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2640 reiserfs_info(super,
2641 "journal_init_dev: journal device: %s\n",
2642 bdevname(journal->j_dev_bd, b));
2643 }
2644 } else {
2645 result = PTR_ERR(journal->j_dev_file);
2646 journal->j_dev_file = NULL;
2647 reiserfs_warning(super, 2637 reiserfs_warning(super,
2648 "journal_init_dev: Cannot open '%s': %i", 2638 "journal_init_dev: Cannot open '%s': %i",
2649 jdev_name, result); 2639 jdev_name, result);
2640 return result;
2650 } 2641 }
2651 return result; 2642
2643 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2644 reiserfs_info(super,
2645 "journal_init_dev: journal device: %s\n",
2646 bdevname(journal->j_dev_bd, b));
2647 return 0;
2652} 2648}
2653 2649
2654/** 2650/**
@@ -4316,5 +4312,5 @@ static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno)
4316 4312
4317void reiserfs_journal_abort(struct super_block *sb, int errno) 4313void reiserfs_journal_abort(struct super_block *sb, int errno)
4318{ 4314{
4319 return __reiserfs_journal_abort_soft(sb, errno); 4315 __reiserfs_journal_abort_soft(sb, errno);
4320} 4316}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 8867533cb727..c1add28dd45e 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -301,7 +301,7 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
301 path_to_entry, de); 301 path_to_entry, de);
302 if (retval == IO_ERROR) { 302 if (retval == IO_ERROR) {
303 reiserfs_warning(dir->i_sb, "zam-7001: io error in %s", 303 reiserfs_warning(dir->i_sb, "zam-7001: io error in %s",
304 __FUNCTION__); 304 __func__);
305 return IO_ERROR; 305 return IO_ERROR;
306 } 306 }
307 307
@@ -496,7 +496,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
496 reiserfs_warning(dir->i_sb, 496 reiserfs_warning(dir->i_sb,
497 "zam-7002:%s: \"reiserfs_find_entry\" " 497 "zam-7002:%s: \"reiserfs_find_entry\" "
498 "has returned unexpected value (%d)", 498 "has returned unexpected value (%d)",
499 __FUNCTION__, retval); 499 __func__, retval);
500 } 500 }
501 501
502 return -EEXIST; 502 return -EEXIST;
@@ -907,7 +907,7 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
907 907
908 if (inode->i_nlink != 2 && inode->i_nlink != 1) 908 if (inode->i_nlink != 2 && inode->i_nlink != 1)
909 reiserfs_warning(inode->i_sb, "%s: empty directory has nlink " 909 reiserfs_warning(inode->i_sb, "%s: empty directory has nlink "
910 "!= 2 (%d)", __FUNCTION__, inode->i_nlink); 910 "!= 2 (%d)", __func__, inode->i_nlink);
911 911
912 clear_nlink(inode); 912 clear_nlink(inode);
913 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; 913 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
@@ -984,7 +984,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
984 984
985 if (!inode->i_nlink) { 985 if (!inode->i_nlink) {
986 reiserfs_warning(inode->i_sb, "%s: deleting nonexistent file " 986 reiserfs_warning(inode->i_sb, "%s: deleting nonexistent file "
987 "(%s:%lu), %d", __FUNCTION__, 987 "(%s:%lu), %d", __func__,
988 reiserfs_bdevname(inode->i_sb), inode->i_ino, 988 reiserfs_bdevname(inode->i_sb), inode->i_ino,
989 inode->i_nlink); 989 inode->i_nlink);
990 inode->i_nlink = 1; 990 inode->i_nlink = 1;
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index 65feba4deb69..ea0cf8c28a99 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -61,7 +61,7 @@ __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th)
61 /* comment needed -Hans */ 61 /* comment needed -Hans */
62 unused_objectid = le32_to_cpu(map[1]); 62 unused_objectid = le32_to_cpu(map[1]);
63 if (unused_objectid == U32_MAX) { 63 if (unused_objectid == U32_MAX) {
64 reiserfs_warning(s, "%s: no more object ids", __FUNCTION__); 64 reiserfs_warning(s, "%s: no more object ids", __func__);
65 reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s)); 65 reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s));
66 return 0; 66 return 0;
67 } 67 }
@@ -114,7 +114,7 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
114 if (objectid_to_release == le32_to_cpu(map[i])) { 114 if (objectid_to_release == le32_to_cpu(map[i])) {
115 /* This incrementation unallocates the objectid. */ 115 /* This incrementation unallocates the objectid. */
116 //map[i]++; 116 //map[i]++;
117 map[i] = cpu_to_le32(le32_to_cpu(map[i]) + 1); 117 le32_add_cpu(&map[i], 1);
118 118
119 /* Did we unallocate the last member of an odd sequence, and can shrink oids? */ 119 /* Did we unallocate the last member of an odd sequence, and can shrink oids? */
120 if (map[i] == map[i + 1]) { 120 if (map[i] == map[i + 1]) {
@@ -138,8 +138,7 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
138 /* size of objectid map is not changed */ 138 /* size of objectid map is not changed */
139 if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) { 139 if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) {
140 //objectid_map[i+1]--; 140 //objectid_map[i+1]--;
141 map[i + 1] = 141 le32_add_cpu(&map[i + 1], -1);
142 cpu_to_le32(le32_to_cpu(map[i + 1]) - 1);
143 return; 142 return;
144 } 143 }
145 144
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 8f86c52b30d8..b9dbeeca7049 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -467,6 +467,7 @@ static const struct file_operations r_file_operations = {
467 .read = seq_read, 467 .read = seq_read,
468 .llseek = seq_lseek, 468 .llseek = seq_lseek,
469 .release = seq_release, 469 .release = seq_release,
470 .owner = THIS_MODULE,
470}; 471};
471 472
472static struct proc_dir_entry *proc_info_root = NULL; 473static struct proc_dir_entry *proc_info_root = NULL;
@@ -475,12 +476,8 @@ static const char proc_info_root_name[] = "fs/reiserfs";
475static void add_file(struct super_block *sb, char *name, 476static void add_file(struct super_block *sb, char *name,
476 int (*func) (struct seq_file *, struct super_block *)) 477 int (*func) (struct seq_file *, struct super_block *))
477{ 478{
478 struct proc_dir_entry *de; 479 proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
479 de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir); 480 &r_file_operations, func);
480 if (de) {
481 de->data = func;
482 de->proc_fops = &r_file_operations;
483 }
484} 481}
485 482
486int reiserfs_proc_info_init(struct super_block *sb) 483int reiserfs_proc_info_init(struct super_block *sb)
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index d2db2417b2bd..abbc64dcc8d4 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1419,8 +1419,7 @@ int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1419 1419
1420 inode_generation = 1420 inode_generation =
1421 &REISERFS_SB(th->t_super)->s_rs->s_inode_generation; 1421 &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1422 *inode_generation = 1422 le32_add_cpu(inode_generation, 1);
1423 cpu_to_le32(le32_to_cpu(*inode_generation) + 1);
1424 } 1423 }
1425/* USE_INODE_GENERATION_COUNTER */ 1424/* USE_INODE_GENERATION_COUNTER */
1426#endif 1425#endif
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 393cc22c1717..ed424d708e69 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -304,7 +304,7 @@ static int finish_unfinished(struct super_block *s)
304 /* Turn quotas off */ 304 /* Turn quotas off */
305 for (i = 0; i < MAXQUOTAS; i++) { 305 for (i = 0; i < MAXQUOTAS; i++) {
306 if (sb_dqopt(s)->files[i]) 306 if (sb_dqopt(s)->files[i])
307 vfs_quota_off_mount(s, i); 307 vfs_quota_off(s, i, 0);
308 } 308 }
309 if (ms_active_set) 309 if (ms_active_set)
310 /* Restore the flag back */ 310 /* Restore the flag back */
@@ -634,7 +634,7 @@ static int reiserfs_acquire_dquot(struct dquot *);
634static int reiserfs_release_dquot(struct dquot *); 634static int reiserfs_release_dquot(struct dquot *);
635static int reiserfs_mark_dquot_dirty(struct dquot *); 635static int reiserfs_mark_dquot_dirty(struct dquot *);
636static int reiserfs_write_info(struct super_block *, int); 636static int reiserfs_write_info(struct super_block *, int);
637static int reiserfs_quota_on(struct super_block *, int, int, char *); 637static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
638 638
639static struct dquot_operations reiserfs_quota_operations = { 639static struct dquot_operations reiserfs_quota_operations = {
640 .initialize = reiserfs_dquot_initialize, 640 .initialize = reiserfs_dquot_initialize,
@@ -1890,8 +1890,14 @@ static int reiserfs_dquot_drop(struct inode *inode)
1890 ret = 1890 ret =
1891 journal_begin(&th, inode->i_sb, 1891 journal_begin(&th, inode->i_sb,
1892 2 * REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)); 1892 2 * REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
1893 if (ret) 1893 if (ret) {
1894 /*
1895 * We call dquot_drop() anyway to at least release references
1896 * to quota structures so that umount does not hang.
1897 */
1898 dquot_drop(inode);
1894 goto out; 1899 goto out;
1900 }
1895 ret = dquot_drop(inode); 1901 ret = dquot_drop(inode);
1896 err = 1902 err =
1897 journal_end(&th, inode->i_sb, 1903 journal_end(&th, inode->i_sb,
@@ -2015,13 +2021,17 @@ static int reiserfs_quota_on_mount(struct super_block *sb, int type)
2015 * Standard function to be called on quota_on 2021 * Standard function to be called on quota_on
2016 */ 2022 */
2017static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, 2023static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2018 char *path) 2024 char *path, int remount)
2019{ 2025{
2020 int err; 2026 int err;
2021 struct nameidata nd; 2027 struct nameidata nd;
2028 struct inode *inode;
2022 2029
2023 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) 2030 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
2024 return -EINVAL; 2031 return -EINVAL;
2032 /* No more checks needed? Path and format_id are bogus anyway... */
2033 if (remount)
2034 return vfs_quota_on(sb, type, format_id, path, 1);
2025 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2035 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
2026 if (err) 2036 if (err)
2027 return err; 2037 return err;
@@ -2030,18 +2040,24 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2030 path_put(&nd.path); 2040 path_put(&nd.path);
2031 return -EXDEV; 2041 return -EXDEV;
2032 } 2042 }
2043 inode = nd.path.dentry->d_inode;
2033 /* We must not pack tails for quota files on reiserfs for quota IO to work */ 2044 /* We must not pack tails for quota files on reiserfs for quota IO to work */
2034 if (!(REISERFS_I(nd.path.dentry->d_inode)->i_flags & i_nopack_mask)) { 2045 if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) {
2035 reiserfs_warning(sb, 2046 err = reiserfs_unpack(inode, NULL);
2036 "reiserfs: Quota file must have tail packing disabled."); 2047 if (err) {
2037 path_put(&nd.path); 2048 reiserfs_warning(sb,
2038 return -EINVAL; 2049 "reiserfs: Unpacking tail of quota file failed"
2050 " (%d). Cannot turn on quotas.", err);
2051 path_put(&nd.path);
2052 return -EINVAL;
2053 }
2054 mark_inode_dirty(inode);
2039 } 2055 }
2040 /* Not journalling quota? No more tests needed... */ 2056 /* Not journalling quota? No more tests needed... */
2041 if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && 2057 if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
2042 !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { 2058 !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
2043 path_put(&nd.path); 2059 path_put(&nd.path);
2044 return vfs_quota_on(sb, type, format_id, path); 2060 return vfs_quota_on(sb, type, format_id, path, 0);
2045 } 2061 }
2046 /* Quotafile not of fs root? */ 2062 /* Quotafile not of fs root? */
2047 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 2063 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
@@ -2049,7 +2065,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2049 "reiserfs: Quota file not on filesystem root. " 2065 "reiserfs: Quota file not on filesystem root. "
2050 "Journalled quota will not work."); 2066 "Journalled quota will not work.");
2051 path_put(&nd.path); 2067 path_put(&nd.path);
2052 return vfs_quota_on(sb, type, format_id, path); 2068 return vfs_quota_on(sb, type, format_id, path, 0);
2053} 2069}
2054 2070
2055/* Read data from quotafile - avoid pagecache and such because we cannot afford 2071/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/select.c b/fs/select.c
index 00f58c5c7e05..2c292146e246 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -425,7 +425,7 @@ sticky:
425 return ret; 425 return ret;
426} 426}
427 427
428#ifdef TIF_RESTORE_SIGMASK 428#ifdef HAVE_SET_RESTORE_SIGMASK
429asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, 429asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
430 fd_set __user *exp, struct timespec __user *tsp, 430 fd_set __user *exp, struct timespec __user *tsp,
431 const sigset_t __user *sigmask, size_t sigsetsize) 431 const sigset_t __user *sigmask, size_t sigsetsize)
@@ -498,7 +498,7 @@ sticky:
498 if (sigmask) { 498 if (sigmask) {
499 memcpy(&current->saved_sigmask, &sigsaved, 499 memcpy(&current->saved_sigmask, &sigsaved,
500 sizeof(sigsaved)); 500 sizeof(sigsaved));
501 set_thread_flag(TIF_RESTORE_SIGMASK); 501 set_restore_sigmask();
502 } 502 }
503 } else if (sigmask) 503 } else if (sigmask)
504 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 504 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -528,7 +528,7 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
528 528
529 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize); 529 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize);
530} 530}
531#endif /* TIF_RESTORE_SIGMASK */ 531#endif /* HAVE_SET_RESTORE_SIGMASK */
532 532
533struct poll_list { 533struct poll_list {
534 struct poll_list *next; 534 struct poll_list *next;
@@ -759,7 +759,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
759 return ret; 759 return ret;
760} 760}
761 761
762#ifdef TIF_RESTORE_SIGMASK 762#ifdef HAVE_SET_RESTORE_SIGMASK
763asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, 763asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
764 struct timespec __user *tsp, const sigset_t __user *sigmask, 764 struct timespec __user *tsp, const sigset_t __user *sigmask,
765 size_t sigsetsize) 765 size_t sigsetsize)
@@ -805,7 +805,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
805 if (sigmask) { 805 if (sigmask) {
806 memcpy(&current->saved_sigmask, &sigsaved, 806 memcpy(&current->saved_sigmask, &sigsaved,
807 sizeof(sigsaved)); 807 sizeof(sigsaved));
808 set_thread_flag(TIF_RESTORE_SIGMASK); 808 set_restore_sigmask();
809 } 809 }
810 ret = -ERESTARTNOHAND; 810 ret = -ERESTARTNOHAND;
811 } else if (sigmask) 811 } else if (sigmask)
@@ -839,4 +839,4 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
839 839
840 return ret; 840 return ret;
841} 841}
842#endif /* TIF_RESTORE_SIGMASK */ 842#endif /* HAVE_SET_RESTORE_SIGMASK */
diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h
index 734972b92694..fc4b1a5dd755 100644
--- a/fs/smbfs/smb_debug.h
+++ b/fs/smbfs/smb_debug.h
@@ -11,14 +11,14 @@
11 * these are normally enabled. 11 * these are normally enabled.
12 */ 12 */
13#ifdef SMBFS_PARANOIA 13#ifdef SMBFS_PARANOIA
14# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __FUNCTION__ , ## a) 14# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
15#else 15#else
16# define PARANOIA(f, a...) do { ; } while(0) 16# define PARANOIA(f, a...) do { ; } while(0)
17#endif 17#endif
18 18
19/* lots of debug messages */ 19/* lots of debug messages */
20#ifdef SMBFS_DEBUG_VERBOSE 20#ifdef SMBFS_DEBUG_VERBOSE
21# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a) 21# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
22#else 22#else
23# define VERBOSE(f, a...) do { ; } while(0) 23# define VERBOSE(f, a...) do { ; } while(0)
24#endif 24#endif
@@ -28,7 +28,7 @@
28 * too common name. 28 * too common name.
29 */ 29 */
30#ifdef SMBFS_DEBUG 30#ifdef SMBFS_DEBUG
31#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a) 31#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
32#else 32#else
33#define DEBUG1(f, a...) do { ; } while(0) 33#define DEBUG1(f, a...) do { ; } while(0)
34#endif 34#endif
diff --git a/fs/splice.c b/fs/splice.c
index eeb1a86a7014..633f58ebfb72 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1075,7 +1075,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1075 1075
1076 ret = splice_direct_to_actor(in, &sd, direct_splice_actor); 1076 ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1077 if (ret > 0) 1077 if (ret > 0)
1078 *ppos += ret; 1078 *ppos = sd.pos;
1079 1079
1080 return ret; 1080 return ret;
1081} 1081}
diff --git a/fs/super.c b/fs/super.c
index 4798350b2bc9..453877c5697b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -117,7 +117,7 @@ static inline void destroy_super(struct super_block *s)
117 * Drop a superblock's refcount. Returns non-zero if the superblock was 117 * Drop a superblock's refcount. Returns non-zero if the superblock was
118 * destroyed. The caller must hold sb_lock. 118 * destroyed. The caller must hold sb_lock.
119 */ 119 */
120int __put_super(struct super_block *sb) 120static int __put_super(struct super_block *sb)
121{ 121{
122 int ret = 0; 122 int ret = 0;
123 123
@@ -179,7 +179,7 @@ void deactivate_super(struct super_block *s)
179 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { 179 if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
180 s->s_count -= S_BIAS-1; 180 s->s_count -= S_BIAS-1;
181 spin_unlock(&sb_lock); 181 spin_unlock(&sb_lock);
182 DQUOT_OFF(s); 182 DQUOT_OFF(s, 0);
183 down_write(&s->s_umount); 183 down_write(&s->s_umount);
184 fs->kill_sb(s); 184 fs->kill_sb(s);
185 put_filesystem(fs); 185 put_filesystem(fs);
@@ -608,6 +608,7 @@ retry:
608int do_remount_sb(struct super_block *sb, int flags, void *data, int force) 608int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
609{ 609{
610 int retval; 610 int retval;
611 int remount_rw;
611 612
612#ifdef CONFIG_BLOCK 613#ifdef CONFIG_BLOCK
613 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev)) 614 if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
@@ -625,8 +626,11 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
625 mark_files_ro(sb); 626 mark_files_ro(sb);
626 else if (!fs_may_remount_ro(sb)) 627 else if (!fs_may_remount_ro(sb))
627 return -EBUSY; 628 return -EBUSY;
628 DQUOT_OFF(sb); 629 retval = DQUOT_OFF(sb, 1);
630 if (retval < 0 && retval != -ENOSYS)
631 return -EBUSY;
629 } 632 }
633 remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
630 634
631 if (sb->s_op->remount_fs) { 635 if (sb->s_op->remount_fs) {
632 lock_super(sb); 636 lock_super(sb);
@@ -636,6 +640,8 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
636 return retval; 640 return retval;
637 } 641 }
638 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); 642 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
643 if (remount_rw)
644 DQUOT_ON_REMOUNT(sb);
639 return 0; 645 return 0;
640} 646}
641 647
diff --git a/fs/sync.c b/fs/sync.c
index 7cd005ea7639..228e17b5e9ee 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -64,7 +64,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
64 /* sync the superblock to buffers */ 64 /* sync the superblock to buffers */
65 sb = inode->i_sb; 65 sb = inode->i_sb;
66 lock_super(sb); 66 lock_super(sb);
67 if (sb->s_op->write_super) 67 if (sb->s_dirt && sb->s_op->write_super)
68 sb->s_op->write_super(sb); 68 sb->s_op->write_super(sb);
69 unlock_super(sb); 69 unlock_super(sb);
70 70
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index ade9a7e6a757..e7735f643cd1 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -135,7 +135,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
135 goto out; 135 goto out;
136 } 136 }
137 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", 137 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
138 __FUNCTION__, count, *ppos, buffer->page); 138 __func__, count, *ppos, buffer->page);
139 retval = simple_read_from_buffer(buf, count, ppos, buffer->page, 139 retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
140 buffer->count); 140 buffer->count);
141out: 141out:
@@ -477,11 +477,10 @@ const struct file_operations sysfs_file_operations = {
477 .poll = sysfs_poll, 477 .poll = sysfs_poll,
478}; 478};
479 479
480 480int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
481int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, 481 const struct attribute *attr, int type, mode_t amode)
482 int type)
483{ 482{
484 umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG; 483 umode_t mode = (amode & S_IALLUGO) | S_IFREG;
485 struct sysfs_addrm_cxt acxt; 484 struct sysfs_addrm_cxt acxt;
486 struct sysfs_dirent *sd; 485 struct sysfs_dirent *sd;
487 int rc; 486 int rc;
@@ -502,6 +501,13 @@ int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
502} 501}
503 502
504 503
504int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
505 int type)
506{
507 return sysfs_add_file_mode(dir_sd, attr, type, attr->mode);
508}
509
510
505/** 511/**
506 * sysfs_create_file - create an attribute file for an object. 512 * sysfs_create_file - create an attribute file for an object.
507 * @kobj: object we're creating for. 513 * @kobj: object we're creating for.
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 477904915032..eeba38417b1d 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -23,35 +23,50 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
23 int i; 23 int i;
24 24
25 for (i = 0, attr = grp->attrs; *attr; i++, attr++) 25 for (i = 0, attr = grp->attrs; *attr; i++, attr++)
26 if (!grp->is_visible || 26 sysfs_hash_and_remove(dir_sd, (*attr)->name);
27 grp->is_visible(kobj, *attr, i))
28 sysfs_hash_and_remove(dir_sd, (*attr)->name);
29} 27}
30 28
31static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, 29static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
32 const struct attribute_group *grp) 30 const struct attribute_group *grp, int update)
33{ 31{
34 struct attribute *const* attr; 32 struct attribute *const* attr;
35 int error = 0, i; 33 int error = 0, i;
36 34
37 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) 35 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
38 if (!grp->is_visible || 36 mode_t mode = 0;
39 grp->is_visible(kobj, *attr, i)) 37
40 error |= 38 /* in update mode, we're changing the permissions or
41 sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); 39 * visibility. Do this by first removing then
40 * re-adding (if required) the file */
41 if (update)
42 sysfs_hash_and_remove(dir_sd, (*attr)->name);
43 if (grp->is_visible) {
44 mode = grp->is_visible(kobj, *attr, i);
45 if (!mode)
46 continue;
47 }
48 error = sysfs_add_file_mode(dir_sd, *attr, SYSFS_KOBJ_ATTR,
49 (*attr)->mode | mode);
50 if (unlikely(error))
51 break;
52 }
42 if (error) 53 if (error)
43 remove_files(dir_sd, kobj, grp); 54 remove_files(dir_sd, kobj, grp);
44 return error; 55 return error;
45} 56}
46 57
47 58
48int sysfs_create_group(struct kobject * kobj, 59static int internal_create_group(struct kobject *kobj, int update,
49 const struct attribute_group * grp) 60 const struct attribute_group *grp)
50{ 61{
51 struct sysfs_dirent *sd; 62 struct sysfs_dirent *sd;
52 int error; 63 int error;
53 64
54 BUG_ON(!kobj || !kobj->sd); 65 BUG_ON(!kobj || (!update && !kobj->sd));
66
67 /* Updates may happen before the object has been instantiated */
68 if (unlikely(update && !kobj->sd))
69 return -EINVAL;
55 70
56 if (grp->name) { 71 if (grp->name) {
57 error = sysfs_create_subdir(kobj, grp->name, &sd); 72 error = sysfs_create_subdir(kobj, grp->name, &sd);
@@ -60,7 +75,7 @@ int sysfs_create_group(struct kobject * kobj,
60 } else 75 } else
61 sd = kobj->sd; 76 sd = kobj->sd;
62 sysfs_get(sd); 77 sysfs_get(sd);
63 error = create_files(sd, kobj, grp); 78 error = create_files(sd, kobj, grp, update);
64 if (error) { 79 if (error) {
65 if (grp->name) 80 if (grp->name)
66 sysfs_remove_subdir(sd); 81 sysfs_remove_subdir(sd);
@@ -69,6 +84,47 @@ int sysfs_create_group(struct kobject * kobj,
69 return error; 84 return error;
70} 85}
71 86
87/**
88 * sysfs_create_group - given a directory kobject, create an attribute group
89 * @kobj: The kobject to create the group on
90 * @grp: The attribute group to create
91 *
92 * This function creates a group for the first time. It will explicitly
93 * warn and error if any of the attribute files being created already exist.
94 *
95 * Returns 0 on success or error.
96 */
97int sysfs_create_group(struct kobject *kobj,
98 const struct attribute_group *grp)
99{
100 return internal_create_group(kobj, 0, grp);
101}
102
103/**
104 * sysfs_update_group - given a directory kobject, create an attribute group
105 * @kobj: The kobject to create the group on
106 * @grp: The attribute group to create
107 *
108 * This function updates an attribute group. Unlike
109 * sysfs_create_group(), it will explicitly not warn or error if any
110 * of the attribute files being created already exist. Furthermore,
111 * if the visibility of the files has changed through the is_visible()
112 * callback, it will update the permissions and add or remove the
113 * relevant files.
114 *
115 * The primary use for this function is to call it after making a change
116 * that affects group visibility.
117 *
118 * Returns 0 on success or error.
119 */
120int sysfs_update_group(struct kobject *kobj,
121 const struct attribute_group *grp)
122{
123 return internal_create_group(kobj, 1, grp);
124}
125
126
127
72void sysfs_remove_group(struct kobject * kobj, 128void sysfs_remove_group(struct kobject * kobj,
73 const struct attribute_group * grp) 129 const struct attribute_group * grp)
74{ 130{
@@ -95,4 +151,5 @@ void sysfs_remove_group(struct kobject * kobj,
95 151
96 152
97EXPORT_SYMBOL_GPL(sysfs_create_group); 153EXPORT_SYMBOL_GPL(sysfs_create_group);
154EXPORT_SYMBOL_GPL(sysfs_update_group);
98EXPORT_SYMBOL_GPL(sysfs_remove_group); 155EXPORT_SYMBOL_GPL(sysfs_remove_group);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index d9262f74f94e..eb53c632f856 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -30,7 +30,7 @@ static const struct address_space_operations sysfs_aops = {
30 30
31static struct backing_dev_info sysfs_backing_dev_info = { 31static struct backing_dev_info sysfs_backing_dev_info = {
32 .ra_pages = 0, /* No readahead */ 32 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 33 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
34}; 34};
35 35
36static const struct inode_operations sysfs_inode_operations ={ 36static const struct inode_operations sysfs_inode_operations ={
@@ -59,6 +59,8 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
59 if (error) 59 if (error)
60 return error; 60 return error;
61 61
62 iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
63
62 error = inode_setattr(inode, iattr); 64 error = inode_setattr(inode, iattr);
63 if (error) 65 if (error)
64 return error; 66 return error;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 74168266cd59..14f0023984d7 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -61,7 +61,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
61 /* instantiate and link root dentry */ 61 /* instantiate and link root dentry */
62 root = d_alloc_root(inode); 62 root = d_alloc_root(inode);
63 if (!root) { 63 if (!root) {
64 pr_debug("%s: could not get root dentry!\n",__FUNCTION__); 64 pr_debug("%s: could not get root dentry!\n",__func__);
65 iput(inode); 65 iput(inode);
66 return -ENOMEM; 66 return -ENOMEM;
67 } 67 }
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ff17f8da9b43..ce4e15f8aaeb 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -154,6 +154,8 @@ extern const struct file_operations sysfs_file_operations;
154int sysfs_add_file(struct sysfs_dirent *dir_sd, 154int sysfs_add_file(struct sysfs_dirent *dir_sd,
155 const struct attribute *attr, int type); 155 const struct attribute *attr, int type);
156 156
157int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
158 const struct attribute *attr, int type, mode_t amode);
157/* 159/*
158 * bin.c 160 * bin.c
159 */ 161 */
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 42d51d1c05cd..38ebe3f85b3d 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -217,9 +217,9 @@ static inline __fs32 fs32_add(struct sysv_sb_info *sbi, __fs32 *n, int d)
217 if (sbi->s_bytesex == BYTESEX_PDP) 217 if (sbi->s_bytesex == BYTESEX_PDP)
218 *(__u32*)n = PDP_swab(PDP_swab(*(__u32*)n)+d); 218 *(__u32*)n = PDP_swab(PDP_swab(*(__u32*)n)+d);
219 else if (sbi->s_bytesex == BYTESEX_LE) 219 else if (sbi->s_bytesex == BYTESEX_LE)
220 *(__le32*)n = cpu_to_le32(le32_to_cpu(*(__le32*)n)+d); 220 le32_add_cpu((__le32 *)n, d);
221 else 221 else
222 *(__be32*)n = cpu_to_be32(be32_to_cpu(*(__be32*)n)+d); 222 be32_add_cpu((__be32 *)n, d);
223 return *n; 223 return *n;
224} 224}
225 225
@@ -242,9 +242,9 @@ static inline __fs16 cpu_to_fs16(struct sysv_sb_info *sbi, __u16 n)
242static inline __fs16 fs16_add(struct sysv_sb_info *sbi, __fs16 *n, int d) 242static inline __fs16 fs16_add(struct sysv_sb_info *sbi, __fs16 *n, int d)
243{ 243{
244 if (sbi->s_bytesex != BYTESEX_BE) 244 if (sbi->s_bytesex != BYTESEX_BE)
245 *(__le16*)n = cpu_to_le16(le16_to_cpu(*(__le16 *)n)+d); 245 le16_add_cpu((__le16 *)n, d);
246 else 246 else
247 *(__be16*)n = cpu_to_be16(be16_to_cpu(*(__be16 *)n)+d); 247 be16_add_cpu((__be16 *)n, d);
248 return *n; 248 return *n;
249} 249}
250 250
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 10c80b59ec4b..5400524e9cb1 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -20,6 +20,7 @@
20#include <linux/hrtimer.h> 20#include <linux/hrtimer.h>
21#include <linux/anon_inodes.h> 21#include <linux/anon_inodes.h>
22#include <linux/timerfd.h> 22#include <linux/timerfd.h>
23#include <linux/syscalls.h>
23 24
24struct timerfd_ctx { 25struct timerfd_ctx {
25 struct hrtimer tmr; 26 struct hrtimer tmr;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index ba5537d4bc15..2b34c8ca6c83 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -890,7 +890,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
890 890
891 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { 891 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
892 kernel_lb_addr eloc; 892 kernel_lb_addr eloc;
893 uint32_t elen; 893 uint32_t bsize;
894 894
895 block = udf_new_block(inode->i_sb, inode, 895 block = udf_new_block(inode->i_sb, inode,
896 iinfo->i_location.partitionReferenceNum, 896 iinfo->i_location.partitionReferenceNum,
@@ -903,9 +903,9 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
903 eloc.logicalBlockNum = block; 903 eloc.logicalBlockNum = block;
904 eloc.partitionReferenceNum = 904 eloc.partitionReferenceNum =
905 iinfo->i_location.partitionReferenceNum; 905 iinfo->i_location.partitionReferenceNum;
906 elen = inode->i_sb->s_blocksize; 906 bsize = inode->i_sb->s_blocksize;
907 iinfo->i_lenExtents = elen; 907 iinfo->i_lenExtents = bsize;
908 udf_add_aext(inode, &epos, eloc, elen, 0); 908 udf_add_aext(inode, &epos, eloc, bsize, 0);
909 brelse(epos.bh); 909 brelse(epos.bh);
910 910
911 block = udf_get_pblock(inode->i_sb, block, 911 block = udf_get_pblock(inode->i_sb, block,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index b564fc140fe4..9fb18a340fc1 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -240,7 +240,7 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
240 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map), 240 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map),
241 GFP_KERNEL); 241 GFP_KERNEL);
242 if (!sbi->s_partmaps) { 242 if (!sbi->s_partmaps) {
243 udf_error(sb, __FUNCTION__, 243 udf_error(sb, __func__,
244 "Unable to allocate space for %d partition maps", 244 "Unable to allocate space for %d partition maps",
245 count); 245 count);
246 sbi->s_partitions = 0; 246 sbi->s_partitions = 0;
@@ -1086,7 +1086,7 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
1086 bitmap = vmalloc(size); /* TODO: get rid of vmalloc */ 1086 bitmap = vmalloc(size); /* TODO: get rid of vmalloc */
1087 1087
1088 if (bitmap == NULL) { 1088 if (bitmap == NULL) {
1089 udf_error(sb, __FUNCTION__, 1089 udf_error(sb, __func__,
1090 "Unable to allocate space for bitmap " 1090 "Unable to allocate space for bitmap "
1091 "and %d buffer_head pointers", nr_groups); 1091 "and %d buffer_head pointers", nr_groups);
1092 return NULL; 1092 return NULL;
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 1e7598fb9787..0d9ada173739 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -277,7 +277,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
277 if (!page)/* it was truncated */ 277 if (!page)/* it was truncated */
278 continue; 278 continue;
279 if (IS_ERR(page)) {/* or EIO */ 279 if (IS_ERR(page)) {/* or EIO */
280 ufs_error(inode->i_sb, __FUNCTION__, 280 ufs_error(inode->i_sb, __func__,
281 "read of page %llu failed\n", 281 "read of page %llu failed\n",
282 (unsigned long long)index); 282 (unsigned long long)index);
283 continue; 283 continue;
@@ -308,7 +308,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
308 ll_rw_block(READ, 1, &bh); 308 ll_rw_block(READ, 1, &bh);
309 wait_on_buffer(bh); 309 wait_on_buffer(bh);
310 if (!buffer_uptodate(bh)) { 310 if (!buffer_uptodate(bh)) {
311 ufs_error(inode->i_sb, __FUNCTION__, 311 ufs_error(inode->i_sb, __func__,
312 "read of block failed\n"); 312 "read of block failed\n");
313 break; 313 break;
314 } 314 }
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index ef563fc8d72c..df0bef18742d 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -179,7 +179,7 @@ bad_entry:
179 goto fail; 179 goto fail;
180Eend: 180Eend:
181 p = (struct ufs_dir_entry *)(kaddr + offs); 181 p = (struct ufs_dir_entry *)(kaddr + offs);
182 ufs_error(sb, __FUNCTION__, 182 ufs_error(sb, __func__,
183 "entry in directory #%lu spans the page boundary" 183 "entry in directory #%lu spans the page boundary"
184 "offset=%lu", 184 "offset=%lu",
185 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs); 185 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs);
@@ -284,7 +284,7 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, struct dentry *dentry,
284 kaddr += ufs_last_byte(dir, n) - reclen; 284 kaddr += ufs_last_byte(dir, n) - reclen;
285 while ((char *) de <= kaddr) { 285 while ((char *) de <= kaddr) {
286 if (de->d_reclen == 0) { 286 if (de->d_reclen == 0) {
287 ufs_error(dir->i_sb, __FUNCTION__, 287 ufs_error(dir->i_sb, __func__,
288 "zero-length directory entry"); 288 "zero-length directory entry");
289 ufs_put_page(page); 289 ufs_put_page(page);
290 goto out; 290 goto out;
@@ -356,7 +356,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
356 goto got_it; 356 goto got_it;
357 } 357 }
358 if (de->d_reclen == 0) { 358 if (de->d_reclen == 0) {
359 ufs_error(dir->i_sb, __FUNCTION__, 359 ufs_error(dir->i_sb, __func__,
360 "zero-length directory entry"); 360 "zero-length directory entry");
361 err = -EIO; 361 err = -EIO;
362 goto out_unlock; 362 goto out_unlock;
@@ -456,7 +456,7 @@ ufs_readdir(struct file *filp, void *dirent, filldir_t filldir)
456 struct page *page = ufs_get_page(inode, n); 456 struct page *page = ufs_get_page(inode, n);
457 457
458 if (IS_ERR(page)) { 458 if (IS_ERR(page)) {
459 ufs_error(sb, __FUNCTION__, 459 ufs_error(sb, __func__,
460 "bad page in #%lu", 460 "bad page in #%lu",
461 inode->i_ino); 461 inode->i_ino);
462 filp->f_pos += PAGE_CACHE_SIZE - offset; 462 filp->f_pos += PAGE_CACHE_SIZE - offset;
@@ -475,7 +475,7 @@ ufs_readdir(struct file *filp, void *dirent, filldir_t filldir)
475 limit = kaddr + ufs_last_byte(inode, n) - UFS_DIR_REC_LEN(1); 475 limit = kaddr + ufs_last_byte(inode, n) - UFS_DIR_REC_LEN(1);
476 for ( ;(char*)de <= limit; de = ufs_next_entry(sb, de)) { 476 for ( ;(char*)de <= limit; de = ufs_next_entry(sb, de)) {
477 if (de->d_reclen == 0) { 477 if (de->d_reclen == 0) {
478 ufs_error(sb, __FUNCTION__, 478 ufs_error(sb, __func__,
479 "zero-length directory entry"); 479 "zero-length directory entry");
480 ufs_put_page(page); 480 ufs_put_page(page);
481 return -EIO; 481 return -EIO;
@@ -536,7 +536,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
536 536
537 while ((char*)de < (char*)dir) { 537 while ((char*)de < (char*)dir) {
538 if (de->d_reclen == 0) { 538 if (de->d_reclen == 0) {
539 ufs_error(inode->i_sb, __FUNCTION__, 539 ufs_error(inode->i_sb, __func__,
540 "zero-length directory entry"); 540 "zero-length directory entry");
541 err = -EIO; 541 err = -EIO;
542 goto out; 542 goto out;
@@ -633,7 +633,7 @@ int ufs_empty_dir(struct inode * inode)
633 633
634 while ((char *)de <= kaddr) { 634 while ((char *)de <= kaddr) {
635 if (de->d_reclen == 0) { 635 if (de->d_reclen == 0) {
636 ufs_error(inode->i_sb, __FUNCTION__, 636 ufs_error(inode->i_sb, __func__,
637 "zero-length directory entry: " 637 "zero-length directory entry: "
638 "kaddr=%p, de=%p\n", kaddr, de); 638 "kaddr=%p, de=%p\n", kaddr, de);
639 goto not_empty; 639 goto not_empty;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 5446b888fc8e..39f877898565 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -929,7 +929,7 @@ void ufs_delete_inode (struct inode * inode)
929 old_i_size = inode->i_size; 929 old_i_size = inode->i_size;
930 inode->i_size = 0; 930 inode->i_size = 0;
931 if (inode->i_blocks && ufs_truncate(inode, old_i_size)) 931 if (inode->i_blocks && ufs_truncate(inode, old_i_size))
932 ufs_warning(inode->i_sb, __FUNCTION__, "ufs_truncate failed\n"); 932 ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
933 ufs_free_inode (inode); 933 ufs_free_inode (inode);
934 unlock_kernel(); 934 unlock_kernel();
935 return; 935 return;
diff --git a/fs/ufs/swab.h b/fs/ufs/swab.h
index 1683d2bee614..8d974c4fd18b 100644
--- a/fs/ufs/swab.h
+++ b/fs/ufs/swab.h
@@ -40,25 +40,7 @@ cpu_to_fs64(struct super_block *sbp, u64 n)
40 return (__force __fs64)cpu_to_be64(n); 40 return (__force __fs64)cpu_to_be64(n);
41} 41}
42 42
43static __inline u32 43static inline u32
44fs64_add(struct super_block *sbp, u32 *n, int d)
45{
46 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
47 return *n = cpu_to_le64(le64_to_cpu(*n)+d);
48 else
49 return *n = cpu_to_be64(be64_to_cpu(*n)+d);
50}
51
52static __inline u32
53fs64_sub(struct super_block *sbp, u32 *n, int d)
54{
55 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
56 return *n = cpu_to_le64(le64_to_cpu(*n)-d);
57 else
58 return *n = cpu_to_be64(be64_to_cpu(*n)-d);
59}
60
61static __inline u32
62fs32_to_cpu(struct super_block *sbp, __fs32 n) 44fs32_to_cpu(struct super_block *sbp, __fs32 n)
63{ 45{
64 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE) 46 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
@@ -80,18 +62,18 @@ static inline void
80fs32_add(struct super_block *sbp, __fs32 *n, int d) 62fs32_add(struct super_block *sbp, __fs32 *n, int d)
81{ 63{
82 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE) 64 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
83 *(__le32 *)n = cpu_to_le32(le32_to_cpu(*(__le32 *)n)+d); 65 le32_add_cpu((__le32 *)n, d);
84 else 66 else
85 *(__be32 *)n = cpu_to_be32(be32_to_cpu(*(__be32 *)n)+d); 67 be32_add_cpu((__be32 *)n, d);
86} 68}
87 69
88static inline void 70static inline void
89fs32_sub(struct super_block *sbp, __fs32 *n, int d) 71fs32_sub(struct super_block *sbp, __fs32 *n, int d)
90{ 72{
91 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE) 73 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
92 *(__le32 *)n = cpu_to_le32(le32_to_cpu(*(__le32 *)n)-d); 74 le32_add_cpu((__le32 *)n, -d);
93 else 75 else
94 *(__be32 *)n = cpu_to_be32(be32_to_cpu(*(__be32 *)n)-d); 76 be32_add_cpu((__be32 *)n, -d);
95} 77}
96 78
97static inline u16 79static inline u16
@@ -116,18 +98,18 @@ static inline void
116fs16_add(struct super_block *sbp, __fs16 *n, int d) 98fs16_add(struct super_block *sbp, __fs16 *n, int d)
117{ 99{
118 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE) 100 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
119 *(__le16 *)n = cpu_to_le16(le16_to_cpu(*(__le16 *)n)+d); 101 le16_add_cpu((__le16 *)n, d);
120 else 102 else
121 *(__be16 *)n = cpu_to_be16(be16_to_cpu(*(__be16 *)n)+d); 103 be16_add_cpu((__be16 *)n, d);
122} 104}
123 105
124static inline void 106static inline void
125fs16_sub(struct super_block *sbp, __fs16 *n, int d) 107fs16_sub(struct super_block *sbp, __fs16 *n, int d)
126{ 108{
127 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE) 109 if (UFS_SB(sbp)->s_bytesex == BYTESEX_LE)
128 *(__le16 *)n = cpu_to_le16(le16_to_cpu(*(__le16 *)n)-d); 110 le16_add_cpu((__le16 *)n, -d);
129 else 111 else
130 *(__be16 *)n = cpu_to_be16(be16_to_cpu(*(__be16 *)n)-d); 112 be16_add_cpu((__be16 *)n, -d);
131} 113}
132 114
133#endif /* _UFS_SWAB_H */ 115#endif /* _UFS_SWAB_H */
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index fcb9231bb9ed..244a1aaa940e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -66,7 +66,7 @@ struct ufs_inode_info {
66#ifdef CONFIG_UFS_DEBUG 66#ifdef CONFIG_UFS_DEBUG
67# define UFSD(f, a...) { \ 67# define UFSD(f, a...) { \
68 printk ("UFSD (%s, %d): %s:", \ 68 printk ("UFSD (%s, %d): %s:", \
69 __FILE__, __LINE__, __FUNCTION__); \ 69 __FILE__, __LINE__, __func__); \
70 printk (f, ## a); \ 70 printk (f, ## a); \
71 } 71 }
72#else 72#else
diff --git a/fs/utimes.c b/fs/utimes.c
index a2bef77dc9c9..af059d5cb485 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -40,9 +40,14 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times)
40 40
41#endif 41#endif
42 42
43static bool nsec_special(long nsec)
44{
45 return nsec == UTIME_OMIT || nsec == UTIME_NOW;
46}
47
43static bool nsec_valid(long nsec) 48static bool nsec_valid(long nsec)
44{ 49{
45 if (nsec == UTIME_OMIT || nsec == UTIME_NOW) 50 if (nsec_special(nsec))
46 return true; 51 return true;
47 52
48 return nsec >= 0 && nsec <= 999999999; 53 return nsec >= 0 && nsec <= 999999999;
@@ -119,7 +124,15 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
119 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; 124 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
120 newattrs.ia_valid |= ATTR_MTIME_SET; 125 newattrs.ia_valid |= ATTR_MTIME_SET;
121 } 126 }
122 } else { 127 }
128
129 /*
130 * If times is NULL or both times are either UTIME_OMIT or
131 * UTIME_NOW, then need to check permissions, because
132 * inode_change_ok() won't do it.
133 */
134 if (!times || (nsec_special(times[0].tv_nsec) &&
135 nsec_special(times[1].tv_nsec))) {
123 error = -EACCES; 136 error = -EACCES;
124 if (IS_IMMUTABLE(inode)) 137 if (IS_IMMUTABLE(inode))
125 goto mnt_drop_write_and_out; 138 goto mnt_drop_write_and_out;
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index cd450bea9f1a..a3522727ea5b 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -176,15 +176,10 @@ static inline int vfat_is_used_badchars(const wchar_t *s, int len)
176 for (i = 0; i < len; i++) 176 for (i = 0; i < len; i++)
177 if (vfat_bad_char(s[i])) 177 if (vfat_bad_char(s[i]))
178 return -EINVAL; 178 return -EINVAL;
179 return 0;
180}
181 179
182static int vfat_valid_longname(const unsigned char *name, unsigned int len) 180 if (s[i - 1] == ' ') /* last character cannot be space */
183{
184 if (name[len - 1] == ' ')
185 return -EINVAL; 181 return -EINVAL;
186 if (len >= 256) 182
187 return -ENAMETOOLONG;
188 return 0; 183 return 0;
189} 184}
190 185
@@ -477,7 +472,7 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
477 if (utf8) { 472 if (utf8) {
478 int name_len = strlen(name); 473 int name_len = strlen(name);
479 474
480 *outlen = utf8_mbstowcs((wchar_t *)outname, name, PAGE_SIZE); 475 *outlen = utf8_mbstowcs((wchar_t *)outname, name, PATH_MAX);
481 476
482 /* 477 /*
483 * We stripped '.'s before and set len appropriately, 478 * We stripped '.'s before and set len appropriately,
@@ -485,11 +480,14 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
485 */ 480 */
486 *outlen -= (name_len - len); 481 *outlen -= (name_len - len);
487 482
483 if (*outlen > 255)
484 return -ENAMETOOLONG;
485
488 op = &outname[*outlen * sizeof(wchar_t)]; 486 op = &outname[*outlen * sizeof(wchar_t)];
489 } else { 487 } else {
490 if (nls) { 488 if (nls) {
491 for (i = 0, ip = name, op = outname, *outlen = 0; 489 for (i = 0, ip = name, op = outname, *outlen = 0;
492 i < len && *outlen <= 260; 490 i < len && *outlen <= 255;
493 *outlen += 1) 491 *outlen += 1)
494 { 492 {
495 if (escape && (*ip == ':')) { 493 if (escape && (*ip == ':')) {
@@ -525,18 +523,20 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
525 op += 2; 523 op += 2;
526 } 524 }
527 } 525 }
526 if (i < len)
527 return -ENAMETOOLONG;
528 } else { 528 } else {
529 for (i = 0, ip = name, op = outname, *outlen = 0; 529 for (i = 0, ip = name, op = outname, *outlen = 0;
530 i < len && *outlen <= 260; 530 i < len && *outlen <= 255;
531 i++, *outlen += 1) 531 i++, *outlen += 1)
532 { 532 {
533 *op++ = *ip++; 533 *op++ = *ip++;
534 *op++ = 0; 534 *op++ = 0;
535 } 535 }
536 if (i < len)
537 return -ENAMETOOLONG;
536 } 538 }
537 } 539 }
538 if (*outlen > 260)
539 return -ENAMETOOLONG;
540 540
541 *longlen = *outlen; 541 *longlen = *outlen;
542 if (*outlen % 13) { 542 if (*outlen % 13) {
@@ -565,7 +565,6 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
565 struct fat_mount_options *opts = &sbi->options; 565 struct fat_mount_options *opts = &sbi->options;
566 struct msdos_dir_slot *ps; 566 struct msdos_dir_slot *ps;
567 struct msdos_dir_entry *de; 567 struct msdos_dir_entry *de;
568 unsigned long page;
569 unsigned char cksum, lcase; 568 unsigned char cksum, lcase;
570 unsigned char msdos_name[MSDOS_NAME]; 569 unsigned char msdos_name[MSDOS_NAME];
571 wchar_t *uname; 570 wchar_t *uname;
@@ -574,15 +573,11 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
574 loff_t offset; 573 loff_t offset;
575 574
576 *nr_slots = 0; 575 *nr_slots = 0;
577 err = vfat_valid_longname(name, len);
578 if (err)
579 return err;
580 576
581 page = __get_free_page(GFP_KERNEL); 577 uname = __getname();
582 if (!page) 578 if (!uname)
583 return -ENOMEM; 579 return -ENOMEM;
584 580
585 uname = (wchar_t *)page;
586 err = xlate_to_uni(name, len, (unsigned char *)uname, &ulen, &usize, 581 err = xlate_to_uni(name, len, (unsigned char *)uname, &ulen, &usize,
587 opts->unicode_xlate, opts->utf8, sbi->nls_io); 582 opts->unicode_xlate, opts->utf8, sbi->nls_io);
588 if (err) 583 if (err)
@@ -634,7 +629,7 @@ shortname:
634 de->starthi = cpu_to_le16(cluster >> 16); 629 de->starthi = cpu_to_le16(cluster >> 16);
635 de->size = 0; 630 de->size = 0;
636out_free: 631out_free:
637 free_page(page); 632 __putname(uname);
638 return err; 633 return err;
639} 634}
640 635
@@ -991,7 +986,7 @@ error_inode:
991 if (corrupt < 0) { 986 if (corrupt < 0) {
992 fat_fs_panic(new_dir->i_sb, 987 fat_fs_panic(new_dir->i_sb,
993 "%s: Filesystem corrupted (i_pos %lld)", 988 "%s: Filesystem corrupted (i_pos %lld)",
994 __FUNCTION__, sinfo.i_pos); 989 __func__, sinfo.i_pos);
995 } 990 }
996 goto out; 991 goto out;
997} 992}
@@ -1003,7 +998,7 @@ static const struct inode_operations vfat_dir_inode_operations = {
1003 .mkdir = vfat_mkdir, 998 .mkdir = vfat_mkdir,
1004 .rmdir = vfat_rmdir, 999 .rmdir = vfat_rmdir,
1005 .rename = vfat_rename, 1000 .rename = vfat_rename,
1006 .setattr = fat_notify_change, 1001 .setattr = fat_setattr,
1007 .getattr = fat_getattr, 1002 .getattr = fat_getattr,
1008}; 1003};
1009 1004
diff --git a/fs/xattr.c b/fs/xattr.c
index 89a942f07e1b..4706a8b1f495 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -67,7 +67,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
67} 67}
68 68
69int 69int
70vfs_setxattr(struct dentry *dentry, char *name, void *value, 70vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
71 size_t size, int flags) 71 size_t size, int flags)
72{ 72{
73 struct inode *inode = dentry->d_inode; 73 struct inode *inode = dentry->d_inode;
@@ -131,7 +131,7 @@ out_noalloc:
131EXPORT_SYMBOL_GPL(xattr_getsecurity); 131EXPORT_SYMBOL_GPL(xattr_getsecurity);
132 132
133ssize_t 133ssize_t
134vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) 134vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
135{ 135{
136 struct inode *inode = dentry->d_inode; 136 struct inode *inode = dentry->d_inode;
137 int error; 137 int error;
@@ -187,7 +187,7 @@ vfs_listxattr(struct dentry *d, char *list, size_t size)
187EXPORT_SYMBOL_GPL(vfs_listxattr); 187EXPORT_SYMBOL_GPL(vfs_listxattr);
188 188
189int 189int
190vfs_removexattr(struct dentry *dentry, char *name) 190vfs_removexattr(struct dentry *dentry, const char *name)
191{ 191{
192 struct inode *inode = dentry->d_inode; 192 struct inode *inode = dentry->d_inode;
193 int error; 193 int error;
@@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
218 * Extended attribute SET operations 218 * Extended attribute SET operations
219 */ 219 */
220static long 220static long
221setxattr(struct dentry *d, char __user *name, void __user *value, 221setxattr(struct dentry *d, const char __user *name, const void __user *value,
222 size_t size, int flags) 222 size_t size, int flags)
223{ 223{
224 int error; 224 int error;
@@ -252,8 +252,8 @@ setxattr(struct dentry *d, char __user *name, void __user *value,
252} 252}
253 253
254asmlinkage long 254asmlinkage long
255sys_setxattr(char __user *path, char __user *name, void __user *value, 255sys_setxattr(const char __user *path, const char __user *name,
256 size_t size, int flags) 256 const void __user *value, size_t size, int flags)
257{ 257{
258 struct nameidata nd; 258 struct nameidata nd;
259 int error; 259 int error;
@@ -271,8 +271,8 @@ sys_setxattr(char __user *path, char __user *name, void __user *value,
271} 271}
272 272
273asmlinkage long 273asmlinkage long
274sys_lsetxattr(char __user *path, char __user *name, void __user *value, 274sys_lsetxattr(const char __user *path, const char __user *name,
275 size_t size, int flags) 275 const void __user *value, size_t size, int flags)
276{ 276{
277 struct nameidata nd; 277 struct nameidata nd;
278 int error; 278 int error;
@@ -290,7 +290,7 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value,
290} 290}
291 291
292asmlinkage long 292asmlinkage long
293sys_fsetxattr(int fd, char __user *name, void __user *value, 293sys_fsetxattr(int fd, const char __user *name, const void __user *value,
294 size_t size, int flags) 294 size_t size, int flags)
295{ 295{
296 struct file *f; 296 struct file *f;
@@ -315,7 +315,8 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
315 * Extended attribute GET operations 315 * Extended attribute GET operations
316 */ 316 */
317static ssize_t 317static ssize_t
318getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) 318getxattr(struct dentry *d, const char __user *name, void __user *value,
319 size_t size)
319{ 320{
320 ssize_t error; 321 ssize_t error;
321 void *kvalue = NULL; 322 void *kvalue = NULL;
@@ -349,8 +350,8 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
349} 350}
350 351
351asmlinkage ssize_t 352asmlinkage ssize_t
352sys_getxattr(char __user *path, char __user *name, void __user *value, 353sys_getxattr(const char __user *path, const char __user *name,
353 size_t size) 354 void __user *value, size_t size)
354{ 355{
355 struct nameidata nd; 356 struct nameidata nd;
356 ssize_t error; 357 ssize_t error;
@@ -364,7 +365,7 @@ sys_getxattr(char __user *path, char __user *name, void __user *value,
364} 365}
365 366
366asmlinkage ssize_t 367asmlinkage ssize_t
367sys_lgetxattr(char __user *path, char __user *name, void __user *value, 368sys_lgetxattr(const char __user *path, const char __user *name, void __user *value,
368 size_t size) 369 size_t size)
369{ 370{
370 struct nameidata nd; 371 struct nameidata nd;
@@ -379,7 +380,7 @@ sys_lgetxattr(char __user *path, char __user *name, void __user *value,
379} 380}
380 381
381asmlinkage ssize_t 382asmlinkage ssize_t
382sys_fgetxattr(int fd, char __user *name, void __user *value, size_t size) 383sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size)
383{ 384{
384 struct file *f; 385 struct file *f;
385 ssize_t error = -EBADF; 386 ssize_t error = -EBADF;
@@ -424,7 +425,7 @@ listxattr(struct dentry *d, char __user *list, size_t size)
424} 425}
425 426
426asmlinkage ssize_t 427asmlinkage ssize_t
427sys_listxattr(char __user *path, char __user *list, size_t size) 428sys_listxattr(const char __user *path, char __user *list, size_t size)
428{ 429{
429 struct nameidata nd; 430 struct nameidata nd;
430 ssize_t error; 431 ssize_t error;
@@ -438,7 +439,7 @@ sys_listxattr(char __user *path, char __user *list, size_t size)
438} 439}
439 440
440asmlinkage ssize_t 441asmlinkage ssize_t
441sys_llistxattr(char __user *path, char __user *list, size_t size) 442sys_llistxattr(const char __user *path, char __user *list, size_t size)
442{ 443{
443 struct nameidata nd; 444 struct nameidata nd;
444 ssize_t error; 445 ssize_t error;
@@ -470,7 +471,7 @@ sys_flistxattr(int fd, char __user *list, size_t size)
470 * Extended attribute REMOVE operations 471 * Extended attribute REMOVE operations
471 */ 472 */
472static long 473static long
473removexattr(struct dentry *d, char __user *name) 474removexattr(struct dentry *d, const char __user *name)
474{ 475{
475 int error; 476 int error;
476 char kname[XATTR_NAME_MAX + 1]; 477 char kname[XATTR_NAME_MAX + 1];
@@ -485,7 +486,7 @@ removexattr(struct dentry *d, char __user *name)
485} 486}
486 487
487asmlinkage long 488asmlinkage long
488sys_removexattr(char __user *path, char __user *name) 489sys_removexattr(const char __user *path, const char __user *name)
489{ 490{
490 struct nameidata nd; 491 struct nameidata nd;
491 int error; 492 int error;
@@ -503,7 +504,7 @@ sys_removexattr(char __user *path, char __user *name)
503} 504}
504 505
505asmlinkage long 506asmlinkage long
506sys_lremovexattr(char __user *path, char __user *name) 507sys_lremovexattr(const char __user *path, const char __user *name)
507{ 508{
508 struct nameidata nd; 509 struct nameidata nd;
509 int error; 510 int error;
@@ -521,7 +522,7 @@ sys_lremovexattr(char __user *path, char __user *name)
521} 522}
522 523
523asmlinkage long 524asmlinkage long
524sys_fremovexattr(int fd, char __user *name) 525sys_fremovexattr(int fd, const char __user *name)
525{ 526{
526 struct file *f; 527 struct file *f;
527 struct dentry *dentry; 528 struct dentry *dentry;
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 524021ff5436..3f53dd101f99 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -64,3 +64,16 @@ config XFS_RT
64 See the xfs man page in section 5 for additional information. 64 See the xfs man page in section 5 for additional information.
65 65
66 If unsure, say N. 66 If unsure, say N.
67
68config XFS_DEBUG
69 bool "XFS Debugging support (EXPERIMENTAL)"
70 depends on XFS_FS && EXPERIMENTAL
71 help
72 Say Y here to get an XFS build with many debugging features,
73 including ASSERT checks, function wrappers around macros,
74 and extra sanity-checking functions in various code paths.
75
76 Note that the resulting code will be HUGE and SLOW, and probably
77 not useful unless you are debugging a particular problem.
78
79 Say N unless you are an XFS developer, or you play one on TV.
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index c110bb002665..ff6a19873e5c 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -20,29 +20,24 @@
20 20
21#include <linux/rwsem.h> 21#include <linux/rwsem.h>
22 22
23enum { MR_NONE, MR_ACCESS, MR_UPDATE };
24
25typedef struct { 23typedef struct {
26 struct rw_semaphore mr_lock; 24 struct rw_semaphore mr_lock;
25#ifdef DEBUG
27 int mr_writer; 26 int mr_writer;
27#endif
28} mrlock_t; 28} mrlock_t;
29 29
30#ifdef DEBUG
30#define mrinit(mrp, name) \ 31#define mrinit(mrp, name) \
31 do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0) 32 do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
33#else
34#define mrinit(mrp, name) \
35 do { init_rwsem(&(mrp)->mr_lock); } while (0)
36#endif
37
32#define mrlock_init(mrp, t,n,s) mrinit(mrp, n) 38#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
33#define mrfree(mrp) do { } while (0) 39#define mrfree(mrp) do { } while (0)
34 40
35static inline void mraccess(mrlock_t *mrp)
36{
37 down_read(&mrp->mr_lock);
38}
39
40static inline void mrupdate(mrlock_t *mrp)
41{
42 down_write(&mrp->mr_lock);
43 mrp->mr_writer = 1;
44}
45
46static inline void mraccess_nested(mrlock_t *mrp, int subclass) 41static inline void mraccess_nested(mrlock_t *mrp, int subclass)
47{ 42{
48 down_read_nested(&mrp->mr_lock, subclass); 43 down_read_nested(&mrp->mr_lock, subclass);
@@ -51,10 +46,11 @@ static inline void mraccess_nested(mrlock_t *mrp, int subclass)
51static inline void mrupdate_nested(mrlock_t *mrp, int subclass) 46static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
52{ 47{
53 down_write_nested(&mrp->mr_lock, subclass); 48 down_write_nested(&mrp->mr_lock, subclass);
49#ifdef DEBUG
54 mrp->mr_writer = 1; 50 mrp->mr_writer = 1;
51#endif
55} 52}
56 53
57
58static inline int mrtryaccess(mrlock_t *mrp) 54static inline int mrtryaccess(mrlock_t *mrp)
59{ 55{
60 return down_read_trylock(&mrp->mr_lock); 56 return down_read_trylock(&mrp->mr_lock);
@@ -64,39 +60,31 @@ static inline int mrtryupdate(mrlock_t *mrp)
64{ 60{
65 if (!down_write_trylock(&mrp->mr_lock)) 61 if (!down_write_trylock(&mrp->mr_lock))
66 return 0; 62 return 0;
63#ifdef DEBUG
67 mrp->mr_writer = 1; 64 mrp->mr_writer = 1;
65#endif
68 return 1; 66 return 1;
69} 67}
70 68
71static inline void mrunlock(mrlock_t *mrp) 69static inline void mrunlock_excl(mrlock_t *mrp)
72{ 70{
73 if (mrp->mr_writer) { 71#ifdef DEBUG
74 mrp->mr_writer = 0; 72 mrp->mr_writer = 0;
75 up_write(&mrp->mr_lock); 73#endif
76 } else { 74 up_write(&mrp->mr_lock);
77 up_read(&mrp->mr_lock);
78 }
79} 75}
80 76
81static inline void mrdemote(mrlock_t *mrp) 77static inline void mrunlock_shared(mrlock_t *mrp)
82{ 78{
83 mrp->mr_writer = 0; 79 up_read(&mrp->mr_lock);
84 downgrade_write(&mrp->mr_lock);
85} 80}
86 81
87#ifdef DEBUG 82static inline void mrdemote(mrlock_t *mrp)
88/*
89 * Debug-only routine, without some platform-specific asm code, we can
90 * now only answer requests regarding whether we hold the lock for write
91 * (reader state is outside our visibility, we only track writer state).
92 * Note: means !ismrlocked would give false positives, so don't do that.
93 */
94static inline int ismrlocked(mrlock_t *mrp, int type)
95{ 83{
96 if (mrp && type == MR_UPDATE) 84#ifdef DEBUG
97 return mrp->mr_writer; 85 mrp->mr_writer = 0;
98 return 1;
99}
100#endif 86#endif
87 downgrade_write(&mrp->mr_lock);
88}
101 89
102#endif /* __XFS_SUPPORT_MRLOCK_H__ */ 90#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 52f6846101d5..5105015a75ad 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -886,7 +886,7 @@ int
886xfs_buf_lock_value( 886xfs_buf_lock_value(
887 xfs_buf_t *bp) 887 xfs_buf_t *bp)
888{ 888{
889 return atomic_read(&bp->b_sema.count); 889 return bp->b_sema.count;
890} 890}
891#endif 891#endif
892 892
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 265f0168ab76..c672b3238b14 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -133,7 +133,7 @@ xfs_nfs_get_inode(
133 if (!ip) 133 if (!ip)
134 return ERR_PTR(-EIO); 134 return ERR_PTR(-EIO);
135 135
136 if (!ip->i_d.di_mode || ip->i_d.di_gen != generation) { 136 if (ip->i_d.di_gen != generation) {
137 xfs_iput_new(ip, XFS_ILOCK_SHARED); 137 xfs_iput_new(ip, XFS_ILOCK_SHARED);
138 return ERR_PTR(-ENOENT); 138 return ERR_PTR(-ENOENT);
139 } 139 }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 05905246434d..65e78c13d4ae 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -43,9 +43,6 @@
43#include <linux/smp_lock.h> 43#include <linux/smp_lock.h>
44 44
45static struct vm_operations_struct xfs_file_vm_ops; 45static struct vm_operations_struct xfs_file_vm_ops;
46#ifdef CONFIG_XFS_DMAPI
47static struct vm_operations_struct xfs_dmapi_file_vm_ops;
48#endif
49 46
50STATIC_INLINE ssize_t 47STATIC_INLINE ssize_t
51__xfs_file_read( 48__xfs_file_read(
@@ -202,22 +199,6 @@ xfs_file_fsync(
202 (xfs_off_t)0, (xfs_off_t)-1); 199 (xfs_off_t)0, (xfs_off_t)-1);
203} 200}
204 201
205#ifdef CONFIG_XFS_DMAPI
206STATIC int
207xfs_vm_fault(
208 struct vm_area_struct *vma,
209 struct vm_fault *vmf)
210{
211 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
212 bhv_vnode_t *vp = vn_from_inode(inode);
213
214 ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
215 if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0))
216 return VM_FAULT_SIGBUS;
217 return filemap_fault(vma, vmf);
218}
219#endif /* CONFIG_XFS_DMAPI */
220
221/* 202/*
222 * Unfortunately we can't just use the clean and simple readdir implementation 203 * Unfortunately we can't just use the clean and simple readdir implementation
223 * below, because nfs might call back into ->lookup from the filldir callback 204 * below, because nfs might call back into ->lookup from the filldir callback
@@ -386,11 +367,6 @@ xfs_file_mmap(
386 vma->vm_ops = &xfs_file_vm_ops; 367 vma->vm_ops = &xfs_file_vm_ops;
387 vma->vm_flags |= VM_CAN_NONLINEAR; 368 vma->vm_flags |= VM_CAN_NONLINEAR;
388 369
389#ifdef CONFIG_XFS_DMAPI
390 if (XFS_M(filp->f_path.dentry->d_inode->i_sb)->m_flags & XFS_MOUNT_DMAPI)
391 vma->vm_ops = &xfs_dmapi_file_vm_ops;
392#endif /* CONFIG_XFS_DMAPI */
393
394 file_accessed(filp); 370 file_accessed(filp);
395 return 0; 371 return 0;
396} 372}
@@ -437,47 +413,6 @@ xfs_file_ioctl_invis(
437 return error; 413 return error;
438} 414}
439 415
440#ifdef CONFIG_XFS_DMAPI
441#ifdef HAVE_VMOP_MPROTECT
442STATIC int
443xfs_vm_mprotect(
444 struct vm_area_struct *vma,
445 unsigned int newflags)
446{
447 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
448 struct xfs_mount *mp = XFS_M(inode->i_sb);
449 int error = 0;
450
451 if (mp->m_flags & XFS_MOUNT_DMAPI) {
452 if ((vma->vm_flags & VM_MAYSHARE) &&
453 (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE))
454 error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
455 }
456 return error;
457}
458#endif /* HAVE_VMOP_MPROTECT */
459#endif /* CONFIG_XFS_DMAPI */
460
461#ifdef HAVE_FOP_OPEN_EXEC
462/* If the user is attempting to execute a file that is offline then
463 * we have to trigger a DMAPI READ event before the file is marked as busy
464 * otherwise the invisible I/O will not be able to write to the file to bring
465 * it back online.
466 */
467STATIC int
468xfs_file_open_exec(
469 struct inode *inode)
470{
471 struct xfs_mount *mp = XFS_M(inode->i_sb);
472 struct xfs_inode *ip = XFS_I(inode);
473
474 if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) &&
475 DM_EVENT_ENABLED(ip, DM_EVENT_READ))
476 return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
477 return 0;
478}
479#endif /* HAVE_FOP_OPEN_EXEC */
480
481/* 416/*
482 * mmap()d file has taken write protection fault and is being made 417 * mmap()d file has taken write protection fault and is being made
483 * writable. We can set the page state up correctly for a writable 418 * writable. We can set the page state up correctly for a writable
@@ -546,13 +481,3 @@ static struct vm_operations_struct xfs_file_vm_ops = {
546 .fault = filemap_fault, 481 .fault = filemap_fault,
547 .page_mkwrite = xfs_vm_page_mkwrite, 482 .page_mkwrite = xfs_vm_page_mkwrite,
548}; 483};
549
550#ifdef CONFIG_XFS_DMAPI
551static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
552 .fault = xfs_vm_fault,
553 .page_mkwrite = xfs_vm_page_mkwrite,
554#ifdef HAVE_VMOP_MPROTECT
555 .mprotect = xfs_vm_mprotect,
556#endif
557};
558#endif /* CONFIG_XFS_DMAPI */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4ddb86b73c6b..a42ba9d71156 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -238,7 +238,7 @@ xfs_vget_fsop_handlereq(
238 return error; 238 return error;
239 if (ip == NULL) 239 if (ip == NULL)
240 return XFS_ERROR(EIO); 240 return XFS_ERROR(EIO);
241 if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { 241 if (ip->i_d.di_gen != igen) {
242 xfs_iput_new(ip, XFS_ILOCK_SHARED); 242 xfs_iput_new(ip, XFS_ILOCK_SHARED);
243 return XFS_ERROR(ENOENT); 243 return XFS_ERROR(ENOENT);
244 } 244 }
@@ -505,14 +505,14 @@ xfs_attrmulti_attr_get(
505{ 505{
506 char *kbuf; 506 char *kbuf;
507 int error = EFAULT; 507 int error = EFAULT;
508 508
509 if (*len > XATTR_SIZE_MAX) 509 if (*len > XATTR_SIZE_MAX)
510 return EINVAL; 510 return EINVAL;
511 kbuf = kmalloc(*len, GFP_KERNEL); 511 kbuf = kmalloc(*len, GFP_KERNEL);
512 if (!kbuf) 512 if (!kbuf)
513 return ENOMEM; 513 return ENOMEM;
514 514
515 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags, NULL); 515 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
516 if (error) 516 if (error)
517 goto out_kfree; 517 goto out_kfree;
518 518
@@ -546,7 +546,7 @@ xfs_attrmulti_attr_set(
546 546
547 if (copy_from_user(kbuf, ubuf, len)) 547 if (copy_from_user(kbuf, ubuf, len))
548 goto out_kfree; 548 goto out_kfree;
549 549
550 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); 550 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
551 551
552 out_kfree: 552 out_kfree:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index a1237dad6430..2bf287ef5489 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -511,7 +511,8 @@ xfs_vn_rename(
511 xfs_dentry_to_name(&nname, ndentry); 511 xfs_dentry_to_name(&nname, ndentry);
512 512
513 error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), 513 error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
514 XFS_I(ndir), &nname); 514 XFS_I(ndir), &nname, new_inode ?
515 XFS_I(new_inode) : NULL);
515 if (likely(!error)) { 516 if (likely(!error)) {
516 if (new_inode) 517 if (new_inode)
517 xfs_validate_fields(new_inode); 518 xfs_validate_fields(new_inode);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index e5143323e71f..4edc46915b57 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -75,6 +75,7 @@
75#include <linux/delay.h> 75#include <linux/delay.h>
76#include <linux/log2.h> 76#include <linux/log2.h>
77#include <linux/spinlock.h> 77#include <linux/spinlock.h>
78#include <linux/random.h>
78 79
79#include <asm/page.h> 80#include <asm/page.h>
80#include <asm/div64.h> 81#include <asm/div64.h>
@@ -99,7 +100,6 @@
99/* 100/*
100 * Feature macros (disable/enable) 101 * Feature macros (disable/enable)
101 */ 102 */
102#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */
103#ifdef CONFIG_SMP 103#ifdef CONFIG_SMP
104#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 104#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
105#else 105#else
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 1ebd8004469c..5e3b57516ec7 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -394,7 +394,7 @@ xfs_zero_last_block(
394 int error = 0; 394 int error = 0;
395 xfs_bmbt_irec_t imap; 395 xfs_bmbt_irec_t imap;
396 396
397 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 397 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
398 398
399 zero_offset = XFS_B_FSB_OFFSET(mp, isize); 399 zero_offset = XFS_B_FSB_OFFSET(mp, isize);
400 if (zero_offset == 0) { 400 if (zero_offset == 0) {
@@ -425,14 +425,14 @@ xfs_zero_last_block(
425 * out sync. We need to drop the ilock while we do this so we 425 * out sync. We need to drop the ilock while we do this so we
426 * don't deadlock when the buffer cache calls back to us. 426 * don't deadlock when the buffer cache calls back to us.
427 */ 427 */
428 xfs_iunlock(ip, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); 428 xfs_iunlock(ip, XFS_ILOCK_EXCL);
429 429
430 zero_len = mp->m_sb.sb_blocksize - zero_offset; 430 zero_len = mp->m_sb.sb_blocksize - zero_offset;
431 if (isize + zero_len > offset) 431 if (isize + zero_len > offset)
432 zero_len = offset - isize; 432 zero_len = offset - isize;
433 error = xfs_iozero(ip, isize, zero_len); 433 error = xfs_iozero(ip, isize, zero_len);
434 434
435 xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 435 xfs_ilock(ip, XFS_ILOCK_EXCL);
436 ASSERT(error >= 0); 436 ASSERT(error >= 0);
437 return error; 437 return error;
438} 438}
@@ -465,8 +465,7 @@ xfs_zero_eof(
465 int error = 0; 465 int error = 0;
466 xfs_bmbt_irec_t imap; 466 xfs_bmbt_irec_t imap;
467 467
468 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 468 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
469 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
470 ASSERT(offset > isize); 469 ASSERT(offset > isize);
471 470
472 /* 471 /*
@@ -475,8 +474,7 @@ xfs_zero_eof(
475 */ 474 */
476 error = xfs_zero_last_block(ip, offset, isize); 475 error = xfs_zero_last_block(ip, offset, isize);
477 if (error) { 476 if (error) {
478 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 477 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
479 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
480 return error; 478 return error;
481 } 479 }
482 480
@@ -507,8 +505,7 @@ xfs_zero_eof(
507 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, 505 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
508 0, NULL, 0, &imap, &nimaps, NULL, NULL); 506 0, NULL, 0, &imap, &nimaps, NULL, NULL);
509 if (error) { 507 if (error) {
510 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 508 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
511 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
512 return error; 509 return error;
513 } 510 }
514 ASSERT(nimaps > 0); 511 ASSERT(nimaps > 0);
@@ -532,7 +529,7 @@ xfs_zero_eof(
532 * Drop the inode lock while we're doing the I/O. 529 * Drop the inode lock while we're doing the I/O.
533 * We'll still have the iolock to protect us. 530 * We'll still have the iolock to protect us.
534 */ 531 */
535 xfs_iunlock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 532 xfs_iunlock(ip, XFS_ILOCK_EXCL);
536 533
537 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); 534 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
538 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); 535 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
@@ -548,13 +545,13 @@ xfs_zero_eof(
548 start_zero_fsb = imap.br_startoff + imap.br_blockcount; 545 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
549 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 546 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
550 547
551 xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 548 xfs_ilock(ip, XFS_ILOCK_EXCL);
552 } 549 }
553 550
554 return 0; 551 return 0;
555 552
556out_lock: 553out_lock:
557 xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 554 xfs_ilock(ip, XFS_ILOCK_EXCL);
558 ASSERT(error >= 0); 555 ASSERT(error >= 0);
559 return error; 556 return error;
560} 557}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index e1d498b4ba7a..e6be37dbd0e9 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -50,7 +50,6 @@ struct xfs_iomap;
50#define XFS_INVAL_CACHED 18 50#define XFS_INVAL_CACHED 18
51#define XFS_DIORD_ENTER 19 51#define XFS_DIORD_ENTER 19
52#define XFS_DIOWR_ENTER 20 52#define XFS_DIOWR_ENTER 20
53#define XFS_SENDFILE_ENTER 21
54#define XFS_WRITEPAGE_ENTER 22 53#define XFS_WRITEPAGE_ENTER 22
55#define XFS_RELEASEPAGE_ENTER 23 54#define XFS_RELEASEPAGE_ENTER 23
56#define XFS_INVALIDPAGE_ENTER 24 55#define XFS_INVALIDPAGE_ENTER 24
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 865eb708aa95..742b2c7852c1 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1181,7 +1181,7 @@ xfs_fs_statfs(
1181 statp->f_fsid.val[0] = (u32)id; 1181 statp->f_fsid.val[0] = (u32)id;
1182 statp->f_fsid.val[1] = (u32)(id >> 32); 1182 statp->f_fsid.val[1] = (u32)(id >> 32);
1183 1183
1184 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); 1184 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
1185 1185
1186 spin_lock(&mp->m_sb_lock); 1186 spin_lock(&mp->m_sb_lock);
1187 statp->f_bsize = sbp->sb_blocksize; 1187 statp->f_bsize = sbp->sb_blocksize;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 8b4d63ce8694..9d73cb5c0fc7 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -25,12 +25,6 @@ struct attrlist_cursor_kern;
25 25
26typedef struct inode bhv_vnode_t; 26typedef struct inode bhv_vnode_t;
27 27
28#define VN_ISLNK(vp) S_ISLNK((vp)->i_mode)
29#define VN_ISREG(vp) S_ISREG((vp)->i_mode)
30#define VN_ISDIR(vp) S_ISDIR((vp)->i_mode)
31#define VN_ISCHR(vp) S_ISCHR((vp)->i_mode)
32#define VN_ISBLK(vp) S_ISBLK((vp)->i_mode)
33
34/* 28/*
35 * Vnode to Linux inode mapping. 29 * Vnode to Linux inode mapping.
36 */ 30 */
@@ -151,24 +145,6 @@ typedef struct bhv_vattr {
151 XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\ 145 XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
152 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT) 146 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
153 147
154/*
155 * Modes.
156 */
157#define VSUID S_ISUID /* set user id on execution */
158#define VSGID S_ISGID /* set group id on execution */
159#define VSVTX S_ISVTX /* save swapped text even after use */
160#define VREAD S_IRUSR /* read, write, execute permissions */
161#define VWRITE S_IWUSR
162#define VEXEC S_IXUSR
163
164#define MODEMASK S_IALLUGO /* mode bits plus permission bits */
165
166/*
167 * Check whether mandatory file locking is enabled.
168 */
169#define MANDLOCK(vp, mode) \
170 (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
171
172extern void vn_init(void); 148extern void vn_init(void);
173extern int vn_revalidate(bhv_vnode_t *); 149extern int vn_revalidate(bhv_vnode_t *);
174 150
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 631ebb31b295..85df3288efd5 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -933,7 +933,7 @@ xfs_qm_dqget(
933 type == XFS_DQ_PROJ || 933 type == XFS_DQ_PROJ ||
934 type == XFS_DQ_GROUP); 934 type == XFS_DQ_GROUP);
935 if (ip) { 935 if (ip) {
936 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 936 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
937 if (type == XFS_DQ_USER) 937 if (type == XFS_DQ_USER)
938 ASSERT(ip->i_udquot == NULL); 938 ASSERT(ip->i_udquot == NULL);
939 else 939 else
@@ -1088,7 +1088,7 @@ xfs_qm_dqget(
1088 xfs_qm_mplist_unlock(mp); 1088 xfs_qm_mplist_unlock(mp);
1089 XFS_DQ_HASH_UNLOCK(h); 1089 XFS_DQ_HASH_UNLOCK(h);
1090 dqret: 1090 dqret:
1091 ASSERT((ip == NULL) || XFS_ISLOCKED_INODE_EXCL(ip)); 1091 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
1092 xfs_dqtrace_entry(dqp, "DQGET DONE"); 1092 xfs_dqtrace_entry(dqp, "DQGET DONE");
1093 *O_dqpp = dqp; 1093 *O_dqpp = dqp;
1094 return (0); 1094 return (0);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 40ea56409561..d31cce1165c5 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -670,7 +670,7 @@ xfs_qm_dqattach_one(
670 xfs_dquot_t *dqp; 670 xfs_dquot_t *dqp;
671 int error; 671 int error;
672 672
673 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 673 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
674 error = 0; 674 error = 0;
675 /* 675 /*
676 * See if we already have it in the inode itself. IO_idqpp is 676 * See if we already have it in the inode itself. IO_idqpp is
@@ -874,7 +874,7 @@ xfs_qm_dqattach(
874 return 0; 874 return 0;
875 875
876 ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 || 876 ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
877 XFS_ISLOCKED_INODE_EXCL(ip)); 877 xfs_isilocked(ip, XFS_ILOCK_EXCL));
878 878
879 if (! (flags & XFS_QMOPT_ILOCKED)) 879 if (! (flags & XFS_QMOPT_ILOCKED))
880 xfs_ilock(ip, XFS_ILOCK_EXCL); 880 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -888,7 +888,8 @@ xfs_qm_dqattach(
888 goto done; 888 goto done;
889 nquotas++; 889 nquotas++;
890 } 890 }
891 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 891
892 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
892 if (XFS_IS_OQUOTA_ON(mp)) { 893 if (XFS_IS_OQUOTA_ON(mp)) {
893 error = XFS_IS_GQUOTA_ON(mp) ? 894 error = XFS_IS_GQUOTA_ON(mp) ?
894 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, 895 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
@@ -913,7 +914,7 @@ xfs_qm_dqattach(
913 * This WON'T, in general, result in a thrash. 914 * This WON'T, in general, result in a thrash.
914 */ 915 */
915 if (nquotas == 2) { 916 if (nquotas == 2) {
916 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 917 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
917 ASSERT(ip->i_udquot); 918 ASSERT(ip->i_udquot);
918 ASSERT(ip->i_gdquot); 919 ASSERT(ip->i_gdquot);
919 920
@@ -956,7 +957,7 @@ xfs_qm_dqattach(
956 957
957#ifdef QUOTADEBUG 958#ifdef QUOTADEBUG
958 else 959 else
959 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 960 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
960#endif 961#endif
961 return error; 962 return error;
962} 963}
@@ -1291,7 +1292,7 @@ xfs_qm_dqget_noattach(
1291 xfs_mount_t *mp; 1292 xfs_mount_t *mp;
1292 xfs_dquot_t *udqp, *gdqp; 1293 xfs_dquot_t *udqp, *gdqp;
1293 1294
1294 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 1295 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1295 mp = ip->i_mount; 1296 mp = ip->i_mount;
1296 udqp = NULL; 1297 udqp = NULL;
1297 gdqp = NULL; 1298 gdqp = NULL;
@@ -1392,7 +1393,7 @@ xfs_qm_qino_alloc(
1392 * Keep an extra reference to this quota inode. This inode is 1393 * Keep an extra reference to this quota inode. This inode is
1393 * locked exclusively and joined to the transaction already. 1394 * locked exclusively and joined to the transaction already.
1394 */ 1395 */
1395 ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip)); 1396 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1396 VN_HOLD(XFS_ITOV((*ip))); 1397 VN_HOLD(XFS_ITOV((*ip)));
1397 1398
1398 /* 1399 /*
@@ -1737,12 +1738,6 @@ xfs_qm_dqusage_adjust(
1737 return error; 1738 return error;
1738 } 1739 }
1739 1740
1740 if (ip->i_d.di_mode == 0) {
1741 xfs_iput_new(ip, XFS_ILOCK_EXCL);
1742 *res = BULKSTAT_RV_NOTHING;
1743 return XFS_ERROR(ENOENT);
1744 }
1745
1746 /* 1741 /*
1747 * Obtain the locked dquots. In case of an error (eg. allocation 1742 * Obtain the locked dquots. In case of an error (eg. allocation
1748 * fails for ENOSPC), we return the negative of the error number 1743 * fails for ENOSPC), we return the negative of the error number
@@ -2563,7 +2558,7 @@ xfs_qm_vop_chown(
2563 uint bfield = XFS_IS_REALTIME_INODE(ip) ? 2558 uint bfield = XFS_IS_REALTIME_INODE(ip) ?
2564 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; 2559 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2565 2560
2566 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 2561 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2567 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); 2562 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2568 2563
2569 /* old dquot */ 2564 /* old dquot */
@@ -2607,7 +2602,7 @@ xfs_qm_vop_chown_reserve(
2607 uint delblks, blkflags, prjflags = 0; 2602 uint delblks, blkflags, prjflags = 0;
2608 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; 2603 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2609 2604
2610 ASSERT(XFS_ISLOCKED_INODE(ip)); 2605 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2611 mp = ip->i_mount; 2606 mp = ip->i_mount;
2612 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 2607 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2613 2608
@@ -2717,7 +2712,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
2717 if (!XFS_IS_QUOTA_ON(tp->t_mountp)) 2712 if (!XFS_IS_QUOTA_ON(tp->t_mountp))
2718 return; 2713 return;
2719 2714
2720 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 2715 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2721 ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); 2716 ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
2722 2717
2723 if (udqp) { 2718 if (udqp) {
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 8342823dbdc3..768a3b27d2b6 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1366,12 +1366,6 @@ xfs_qm_internalqcheck_adjust(
1366 return (error); 1366 return (error);
1367 } 1367 }
1368 1368
1369 if (ip->i_d.di_mode == 0) {
1370 xfs_iput_new(ip, lock_flags);
1371 *res = BULKSTAT_RV_NOTHING;
1372 return XFS_ERROR(ENOENT);
1373 }
1374
1375 /* 1369 /*
1376 * This inode can have blocks after eof which can get released 1370 * This inode can have blocks after eof which can get released
1377 * when we send it to inactive. Since we don't check the dquot 1371 * when we send it to inactive. Since we don't check the dquot
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index a8b85e2be9d5..5e4a40b1c565 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -27,11 +27,6 @@
27/* Number of dquots that fit in to a dquot block */ 27/* Number of dquots that fit in to a dquot block */
28#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk) 28#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk)
29 29
30#define XFS_ISLOCKED_INODE(ip) (ismrlocked(&(ip)->i_lock, \
31 MR_UPDATE | MR_ACCESS) != 0)
32#define XFS_ISLOCKED_INODE_EXCL(ip) (ismrlocked(&(ip)->i_lock, \
33 MR_UPDATE) != 0)
34
35#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t)) 30#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t))
36 31
37#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims) 32#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims)
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index f441f836ca8b..99611381e740 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -834,7 +834,7 @@ xfs_trans_reserve_quota_nblks(
834 ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); 834 ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
835 ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); 835 ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
836 836
837 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 837 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
838 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); 838 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
839 ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == 839 ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
840 XFS_TRANS_DQ_RES_RTBLKS || 840 XFS_TRANS_DQ_RES_RTBLKS ||
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 855da0408647..75845f950814 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -49,8 +49,6 @@ extern void assfail(char *expr, char *f, int l);
49 49
50#else /* DEBUG */ 50#else /* DEBUG */
51 51
52#include <linux/random.h>
53
54#define ASSERT(expr) \ 52#define ASSERT(expr) \
55 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) 53 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
56 54
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 765aaf65e2d3..540e4c989825 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -22,7 +22,7 @@
22#define STATIC 22#define STATIC
23#define DEBUG 1 23#define DEBUG 1
24#define XFS_BUF_LOCK_TRACKING 1 24#define XFS_BUF_LOCK_TRACKING 1
25#define QUOTADEBUG 1 25/* #define QUOTADEBUG 1 */
26#endif 26#endif
27 27
28#ifdef CONFIG_XFS_TRACE 28#ifdef CONFIG_XFS_TRACE
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 8e130b9720ae..ebee3a4f703a 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -72,7 +72,7 @@ xfs_acl_vhasacl_default(
72{ 72{
73 int error; 73 int error;
74 74
75 if (!VN_ISDIR(vp)) 75 if (!S_ISDIR(vp->i_mode))
76 return 0; 76 return 0;
77 xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); 77 xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
78 return (error == 0); 78 return (error == 0);
@@ -238,15 +238,8 @@ xfs_acl_vget(
238 error = EINVAL; 238 error = EINVAL;
239 goto out; 239 goto out;
240 } 240 }
241 if (kind == _ACL_TYPE_ACCESS) { 241 if (kind == _ACL_TYPE_ACCESS)
242 bhv_vattr_t va; 242 xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl);
243
244 va.va_mask = XFS_AT_MODE;
245 error = xfs_getattr(xfs_vtoi(vp), &va, 0);
246 if (error)
247 goto out;
248 xfs_acl_sync_mode(va.va_mode, xfs_acl);
249 }
250 error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); 243 error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
251 } 244 }
252out: 245out:
@@ -341,14 +334,15 @@ xfs_acl_iaccess(
341{ 334{
342 xfs_acl_t *acl; 335 xfs_acl_t *acl;
343 int rval; 336 int rval;
337 struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
344 338
345 if (!(_ACL_ALLOC(acl))) 339 if (!(_ACL_ALLOC(acl)))
346 return -1; 340 return -1;
347 341
348 /* If the file has no ACL return -1. */ 342 /* If the file has no ACL return -1. */
349 rval = sizeof(xfs_acl_t); 343 rval = sizeof(xfs_acl_t);
350 if (xfs_attr_fetch(ip, SGI_ACL_FILE, SGI_ACL_FILE_SIZE, 344 if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval,
351 (char *)acl, &rval, ATTR_ROOT | ATTR_KERNACCESS, cr)) { 345 ATTR_ROOT | ATTR_KERNACCESS)) {
352 _ACL_FREE(acl); 346 _ACL_FREE(acl);
353 return -1; 347 return -1;
354 } 348 }
@@ -373,23 +367,15 @@ xfs_acl_allow_set(
373 bhv_vnode_t *vp, 367 bhv_vnode_t *vp,
374 int kind) 368 int kind)
375{ 369{
376 xfs_inode_t *ip = xfs_vtoi(vp);
377 bhv_vattr_t va;
378 int error;
379
380 if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) 370 if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
381 return EPERM; 371 return EPERM;
382 if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp)) 372 if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
383 return ENOTDIR; 373 return ENOTDIR;
384 if (vp->i_sb->s_flags & MS_RDONLY) 374 if (vp->i_sb->s_flags & MS_RDONLY)
385 return EROFS; 375 return EROFS;
386 va.va_mask = XFS_AT_UID; 376 if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))
387 error = xfs_getattr(ip, &va, 0);
388 if (error)
389 return error;
390 if (va.va_uid != current->fsuid && !capable(CAP_FOWNER))
391 return EPERM; 377 return EPERM;
392 return error; 378 return 0;
393} 379}
394 380
395/* 381/*
@@ -594,7 +580,7 @@ xfs_acl_get_attr(
594 *error = xfs_attr_get(xfs_vtoi(vp), 580 *error = xfs_attr_get(xfs_vtoi(vp),
595 kind == _ACL_TYPE_ACCESS ? 581 kind == _ACL_TYPE_ACCESS ?
596 SGI_ACL_FILE : SGI_ACL_DEFAULT, 582 SGI_ACL_FILE : SGI_ACL_DEFAULT,
597 (char *)aclp, &len, flags, sys_cred); 583 (char *)aclp, &len, flags);
598 if (*error || (flags & ATTR_KERNOVAL)) 584 if (*error || (flags & ATTR_KERNOVAL))
599 return; 585 return;
600 xfs_acl_get_endian(aclp); 586 xfs_acl_get_endian(aclp);
@@ -643,7 +629,6 @@ xfs_acl_vtoacl(
643 xfs_acl_t *access_acl, 629 xfs_acl_t *access_acl,
644 xfs_acl_t *default_acl) 630 xfs_acl_t *default_acl)
645{ 631{
646 bhv_vattr_t va;
647 int error = 0; 632 int error = 0;
648 633
649 if (access_acl) { 634 if (access_acl) {
@@ -652,16 +637,10 @@ xfs_acl_vtoacl(
652 * be obtained for some reason, invalidate the access ACL. 637 * be obtained for some reason, invalidate the access ACL.
653 */ 638 */
654 xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error); 639 xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
655 if (!error) {
656 /* Got the ACL, need the mode... */
657 va.va_mask = XFS_AT_MODE;
658 error = xfs_getattr(xfs_vtoi(vp), &va, 0);
659 }
660
661 if (error) 640 if (error)
662 access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; 641 access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
663 else /* We have a good ACL and the file mode, synchronize. */ 642 else /* We have a good ACL and the file mode, synchronize. */
664 xfs_acl_sync_mode(va.va_mode, access_acl); 643 xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl);
665 } 644 }
666 645
667 if (default_acl) { 646 if (default_acl) {
@@ -719,7 +698,7 @@ xfs_acl_inherit(
719 * If the new file is a directory, its default ACL is a copy of 698 * If the new file is a directory, its default ACL is a copy of
720 * the containing directory's default ACL. 699 * the containing directory's default ACL.
721 */ 700 */
722 if (VN_ISDIR(vp)) 701 if (S_ISDIR(vp->i_mode))
723 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); 702 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
724 if (!error && !basicperms) 703 if (!error && !basicperms)
725 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); 704 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
@@ -744,7 +723,7 @@ xfs_acl_setmode(
744 bhv_vattr_t va; 723 bhv_vattr_t va;
745 xfs_acl_entry_t *ap; 724 xfs_acl_entry_t *ap;
746 xfs_acl_entry_t *gap = NULL; 725 xfs_acl_entry_t *gap = NULL;
747 int i, error, nomask = 1; 726 int i, nomask = 1;
748 727
749 *basicperms = 1; 728 *basicperms = 1;
750 729
@@ -756,11 +735,7 @@ xfs_acl_setmode(
756 * mode. The m:: bits take precedence over the g:: bits. 735 * mode. The m:: bits take precedence over the g:: bits.
757 */ 736 */
758 va.va_mask = XFS_AT_MODE; 737 va.va_mask = XFS_AT_MODE;
759 error = xfs_getattr(xfs_vtoi(vp), &va, 0); 738 va.va_mode = xfs_vtoi(vp)->i_d.di_mode;
760 if (error)
761 return error;
762
763 va.va_mask = XFS_AT_MODE;
764 va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); 739 va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
765 ap = acl->acl_entry; 740 ap = acl->acl_entry;
766 for (i = 0; i < acl->acl_cnt; ++i) { 741 for (i = 0; i < acl->acl_cnt; ++i) {
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 36d781ee5fcc..df151a859186 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -101,14 +101,28 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
101ktrace_t *xfs_attr_trace_buf; 101ktrace_t *xfs_attr_trace_buf;
102#endif 102#endif
103 103
104STATIC int
105xfs_attr_name_to_xname(
106 struct xfs_name *xname,
107 const char *aname)
108{
109 if (!aname)
110 return EINVAL;
111 xname->name = aname;
112 xname->len = strlen(aname);
113 if (xname->len >= MAXNAMELEN)
114 return EFAULT; /* match IRIX behaviour */
115
116 return 0;
117}
104 118
105/*======================================================================== 119/*========================================================================
106 * Overall external interface routines. 120 * Overall external interface routines.
107 *========================================================================*/ 121 *========================================================================*/
108 122
109int 123int
110xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen, 124xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
111 char *value, int *valuelenp, int flags, struct cred *cred) 125 char *value, int *valuelenp, int flags)
112{ 126{
113 xfs_da_args_t args; 127 xfs_da_args_t args;
114 int error; 128 int error;
@@ -122,8 +136,8 @@ xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
122 * Fill in the arg structure for this request. 136 * Fill in the arg structure for this request.
123 */ 137 */
124 memset((char *)&args, 0, sizeof(args)); 138 memset((char *)&args, 0, sizeof(args));
125 args.name = name; 139 args.name = name->name;
126 args.namelen = namelen; 140 args.namelen = name->len;
127 args.value = value; 141 args.value = value;
128 args.valuelen = *valuelenp; 142 args.valuelen = *valuelenp;
129 args.flags = flags; 143 args.flags = flags;
@@ -162,31 +176,29 @@ xfs_attr_get(
162 const char *name, 176 const char *name,
163 char *value, 177 char *value,
164 int *valuelenp, 178 int *valuelenp,
165 int flags, 179 int flags)
166 cred_t *cred)
167{ 180{
168 int error, namelen; 181 int error;
182 struct xfs_name xname;
169 183
170 XFS_STATS_INC(xs_attr_get); 184 XFS_STATS_INC(xs_attr_get);
171 185
172 if (!name)
173 return(EINVAL);
174 namelen = strlen(name);
175 if (namelen >= MAXNAMELEN)
176 return(EFAULT); /* match IRIX behaviour */
177
178 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 186 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
179 return(EIO); 187 return(EIO);
180 188
189 error = xfs_attr_name_to_xname(&xname, name);
190 if (error)
191 return error;
192
181 xfs_ilock(ip, XFS_ILOCK_SHARED); 193 xfs_ilock(ip, XFS_ILOCK_SHARED);
182 error = xfs_attr_fetch(ip, name, namelen, value, valuelenp, flags, cred); 194 error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags);
183 xfs_iunlock(ip, XFS_ILOCK_SHARED); 195 xfs_iunlock(ip, XFS_ILOCK_SHARED);
184 return(error); 196 return(error);
185} 197}
186 198
187int 199STATIC int
188xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen, 200xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
189 char *value, int valuelen, int flags) 201 char *value, int valuelen, int flags)
190{ 202{
191 xfs_da_args_t args; 203 xfs_da_args_t args;
192 xfs_fsblock_t firstblock; 204 xfs_fsblock_t firstblock;
@@ -209,7 +221,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
209 */ 221 */
210 if (XFS_IFORK_Q(dp) == 0) { 222 if (XFS_IFORK_Q(dp) == 0) {
211 int sf_size = sizeof(xfs_attr_sf_hdr_t) + 223 int sf_size = sizeof(xfs_attr_sf_hdr_t) +
212 XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen); 224 XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen);
213 225
214 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd))) 226 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
215 return(error); 227 return(error);
@@ -219,8 +231,8 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
219 * Fill in the arg structure for this request. 231 * Fill in the arg structure for this request.
220 */ 232 */
221 memset((char *)&args, 0, sizeof(args)); 233 memset((char *)&args, 0, sizeof(args));
222 args.name = name; 234 args.name = name->name;
223 args.namelen = namelen; 235 args.namelen = name->len;
224 args.value = value; 236 args.value = value;
225 args.valuelen = valuelen; 237 args.valuelen = valuelen;
226 args.flags = flags; 238 args.flags = flags;
@@ -236,7 +248,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
236 * Determine space new attribute will use, and if it would be 248 * Determine space new attribute will use, and if it would be
237 * "local" or "remote" (note: local != inline). 249 * "local" or "remote" (note: local != inline).
238 */ 250 */
239 size = xfs_attr_leaf_newentsize(namelen, valuelen, 251 size = xfs_attr_leaf_newentsize(name->len, valuelen,
240 mp->m_sb.sb_blocksize, &local); 252 mp->m_sb.sb_blocksize, &local);
241 253
242 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); 254 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
@@ -429,26 +441,27 @@ xfs_attr_set(
429 int valuelen, 441 int valuelen,
430 int flags) 442 int flags)
431{ 443{
432 int namelen; 444 int error;
433 445 struct xfs_name xname;
434 namelen = strlen(name);
435 if (namelen >= MAXNAMELEN)
436 return EFAULT; /* match IRIX behaviour */
437 446
438 XFS_STATS_INC(xs_attr_set); 447 XFS_STATS_INC(xs_attr_set);
439 448
440 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 449 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
441 return (EIO); 450 return (EIO);
442 451
443 return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags); 452 error = xfs_attr_name_to_xname(&xname, name);
453 if (error)
454 return error;
455
456 return xfs_attr_set_int(dp, &xname, value, valuelen, flags);
444} 457}
445 458
446/* 459/*
447 * Generic handler routine to remove a name from an attribute list. 460 * Generic handler routine to remove a name from an attribute list.
448 * Transitions attribute list from Btree to shortform as necessary. 461 * Transitions attribute list from Btree to shortform as necessary.
449 */ 462 */
450int 463STATIC int
451xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags) 464xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
452{ 465{
453 xfs_da_args_t args; 466 xfs_da_args_t args;
454 xfs_fsblock_t firstblock; 467 xfs_fsblock_t firstblock;
@@ -460,8 +473,8 @@ xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
460 * Fill in the arg structure for this request. 473 * Fill in the arg structure for this request.
461 */ 474 */
462 memset((char *)&args, 0, sizeof(args)); 475 memset((char *)&args, 0, sizeof(args));
463 args.name = name; 476 args.name = name->name;
464 args.namelen = namelen; 477 args.namelen = name->len;
465 args.flags = flags; 478 args.flags = flags;
466 args.hashval = xfs_da_hashname(args.name, args.namelen); 479 args.hashval = xfs_da_hashname(args.name, args.namelen);
467 args.dp = dp; 480 args.dp = dp;
@@ -575,17 +588,18 @@ xfs_attr_remove(
575 const char *name, 588 const char *name,
576 int flags) 589 int flags)
577{ 590{
578 int namelen; 591 int error;
579 592 struct xfs_name xname;
580 namelen = strlen(name);
581 if (namelen >= MAXNAMELEN)
582 return EFAULT; /* match IRIX behaviour */
583 593
584 XFS_STATS_INC(xs_attr_remove); 594 XFS_STATS_INC(xs_attr_remove);
585 595
586 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 596 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
587 return (EIO); 597 return (EIO);
588 598
599 error = xfs_attr_name_to_xname(&xname, name);
600 if (error)
601 return error;
602
589 xfs_ilock(dp, XFS_ILOCK_SHARED); 603 xfs_ilock(dp, XFS_ILOCK_SHARED);
590 if (XFS_IFORK_Q(dp) == 0 || 604 if (XFS_IFORK_Q(dp) == 0 ||
591 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && 605 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
@@ -595,10 +609,10 @@ xfs_attr_remove(
595 } 609 }
596 xfs_iunlock(dp, XFS_ILOCK_SHARED); 610 xfs_iunlock(dp, XFS_ILOCK_SHARED);
597 611
598 return xfs_attr_remove_int(dp, name, namelen, flags); 612 return xfs_attr_remove_int(dp, &xname, flags);
599} 613}
600 614
601int /* error */ 615STATIC int
602xfs_attr_list_int(xfs_attr_list_context_t *context) 616xfs_attr_list_int(xfs_attr_list_context_t *context)
603{ 617{
604 int error; 618 int error;
@@ -2522,8 +2536,7 @@ attr_generic_get(
2522{ 2536{
2523 int error, asize = size; 2537 int error, asize = size;
2524 2538
2525 error = xfs_attr_get(xfs_vtoi(vp), name, data, 2539 error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
2526 &asize, xflags, NULL);
2527 if (!error) 2540 if (!error)
2528 return asize; 2541 return asize;
2529 return -error; 2542 return -error;
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 786eba3121c4..6cfc9384fe35 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -158,14 +158,10 @@ struct xfs_da_args;
158/* 158/*
159 * Overall external interface routines. 159 * Overall external interface routines.
160 */ 160 */
161int xfs_attr_set_int(struct xfs_inode *, const char *, int, char *, int, int);
162int xfs_attr_remove_int(struct xfs_inode *, const char *, int, int);
163int xfs_attr_list_int(struct xfs_attr_list_context *);
164int xfs_attr_inactive(struct xfs_inode *dp); 161int xfs_attr_inactive(struct xfs_inode *dp);
165 162
166int xfs_attr_shortform_getvalue(struct xfs_da_args *); 163int xfs_attr_shortform_getvalue(struct xfs_da_args *);
167int xfs_attr_fetch(struct xfs_inode *, const char *, int, 164int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
168 char *, int *, int, struct cred *);
169int xfs_attr_rmtval_get(struct xfs_da_args *args); 165int xfs_attr_rmtval_get(struct xfs_da_args *args);
170 166
171#endif /* __XFS_ATTR_H__ */ 167#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index eb198c01c35d..53c259f5a5af 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4074,7 +4074,6 @@ xfs_bmap_add_attrfork(
4074error2: 4074error2:
4075 xfs_bmap_cancel(&flist); 4075 xfs_bmap_cancel(&flist);
4076error1: 4076error1:
4077 ASSERT(ismrlocked(&ip->i_lock,MR_UPDATE));
4078 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4077 xfs_iunlock(ip, XFS_ILOCK_EXCL);
4079error0: 4078error0:
4080 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 4079 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3f53fad356a3..5f3647cb9885 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -162,7 +162,7 @@ xfs_swap_extents(
162 ips[1] = ip; 162 ips[1] = ip;
163 } 163 }
164 164
165 xfs_lock_inodes(ips, 2, 0, lock_flags); 165 xfs_lock_inodes(ips, 2, lock_flags);
166 locked = 1; 166 locked = 1;
167 167
168 /* Verify that both files have the same format */ 168 /* Verify that both files have the same format */
@@ -265,7 +265,7 @@ xfs_swap_extents(
265 locked = 0; 265 locked = 0;
266 goto error0; 266 goto error0;
267 } 267 }
268 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 268 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
269 269
270 /* 270 /*
271 * Count the number of extended attribute blocks 271 * Count the number of extended attribute blocks
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d3a0f538d6a6..381ebda4f7bc 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -462,7 +462,7 @@ xfs_fs_counts(
462 xfs_mount_t *mp, 462 xfs_mount_t *mp,
463 xfs_fsop_counts_t *cnt) 463 xfs_fsop_counts_t *cnt)
464{ 464{
465 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); 465 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
466 spin_lock(&mp->m_sb_lock); 466 spin_lock(&mp->m_sb_lock);
467 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 467 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
468 cnt->freertx = mp->m_sb.sb_frextents; 468 cnt->freertx = mp->m_sb.sb_frextents;
@@ -524,7 +524,7 @@ xfs_reserve_blocks(
524 */ 524 */
525retry: 525retry:
526 spin_lock(&mp->m_sb_lock); 526 spin_lock(&mp->m_sb_lock);
527 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_SB_LOCKED); 527 xfs_icsb_sync_counters_locked(mp, 0);
528 528
529 /* 529 /*
530 * If our previous reservation was larger than the current value, 530 * If our previous reservation was larger than the current value,
@@ -552,11 +552,8 @@ retry:
552 mp->m_resblks += free; 552 mp->m_resblks += free;
553 mp->m_resblks_avail += free; 553 mp->m_resblks_avail += free;
554 fdblks_delta = -free; 554 fdblks_delta = -free;
555 mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
556 } else { 555 } else {
557 fdblks_delta = -delta; 556 fdblks_delta = -delta;
558 mp->m_sb.sb_fdblocks =
559 lcounter + XFS_ALLOC_SET_ASIDE(mp);
560 mp->m_resblks = request; 557 mp->m_resblks = request;
561 mp->m_resblks_avail += delta; 558 mp->m_resblks_avail += delta;
562 } 559 }
@@ -587,7 +584,6 @@ out:
587 if (error == ENOSPC) 584 if (error == ENOSPC)
588 goto retry; 585 goto retry;
589 } 586 }
590
591 return 0; 587 return 0;
592} 588}
593 589
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a64dfbd565a5..aad8c5da38af 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -147,6 +147,7 @@ xfs_ialloc_ag_alloc(
147 int version; /* inode version number to use */ 147 int version; /* inode version number to use */
148 int isaligned = 0; /* inode allocation at stripe unit */ 148 int isaligned = 0; /* inode allocation at stripe unit */
149 /* boundary */ 149 /* boundary */
150 unsigned int gen;
150 151
151 args.tp = tp; 152 args.tp = tp;
152 args.mp = tp->t_mountp; 153 args.mp = tp->t_mountp;
@@ -290,6 +291,14 @@ xfs_ialloc_ag_alloc(
290 else 291 else
291 version = XFS_DINODE_VERSION_1; 292 version = XFS_DINODE_VERSION_1;
292 293
294 /*
295 * Seed the new inode cluster with a random generation number. This
296 * prevents short-term reuse of generation numbers if a chunk is
297 * freed and then immediately reallocated. We use random numbers
298 * rather than a linear progression to prevent the next generation
299 * number from being easily guessable.
300 */
301 gen = random32();
293 for (j = 0; j < nbufs; j++) { 302 for (j = 0; j < nbufs; j++) {
294 /* 303 /*
295 * Get the block. 304 * Get the block.
@@ -309,6 +318,7 @@ xfs_ialloc_ag_alloc(
309 free = XFS_MAKE_IPTR(args.mp, fbuf, i); 318 free = XFS_MAKE_IPTR(args.mp, fbuf, i);
310 free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 319 free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
311 free->di_core.di_version = version; 320 free->di_core.di_version = version;
321 free->di_core.di_gen = cpu_to_be32(gen);
312 free->di_next_unlinked = cpu_to_be32(NULLAGINO); 322 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
313 xfs_ialloc_log_di(tp, fbuf, i, 323 xfs_ialloc_log_di(tp, fbuf, i,
314 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED); 324 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e657c5128460..b07604b94d9f 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -593,8 +593,9 @@ xfs_iunlock_map_shared(
593 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 593 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
594 */ 594 */
595void 595void
596xfs_ilock(xfs_inode_t *ip, 596xfs_ilock(
597 uint lock_flags) 597 xfs_inode_t *ip,
598 uint lock_flags)
598{ 599{
599 /* 600 /*
600 * You can't set both SHARED and EXCL for the same lock, 601 * You can't set both SHARED and EXCL for the same lock,
@@ -607,16 +608,16 @@ xfs_ilock(xfs_inode_t *ip,
607 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 608 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
608 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 609 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
609 610
610 if (lock_flags & XFS_IOLOCK_EXCL) { 611 if (lock_flags & XFS_IOLOCK_EXCL)
611 mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 612 mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
612 } else if (lock_flags & XFS_IOLOCK_SHARED) { 613 else if (lock_flags & XFS_IOLOCK_SHARED)
613 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 614 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
614 } 615
615 if (lock_flags & XFS_ILOCK_EXCL) { 616 if (lock_flags & XFS_ILOCK_EXCL)
616 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 617 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
617 } else if (lock_flags & XFS_ILOCK_SHARED) { 618 else if (lock_flags & XFS_ILOCK_SHARED)
618 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 619 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
619 } 620
620 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); 621 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
621} 622}
622 623
@@ -631,15 +632,12 @@ xfs_ilock(xfs_inode_t *ip,
631 * lock_flags -- this parameter indicates the inode's locks to be 632 * lock_flags -- this parameter indicates the inode's locks to be
632 * to be locked. See the comment for xfs_ilock() for a list 633 * to be locked. See the comment for xfs_ilock() for a list
633 * of valid values. 634 * of valid values.
634 *
635 */ 635 */
636int 636int
637xfs_ilock_nowait(xfs_inode_t *ip, 637xfs_ilock_nowait(
638 uint lock_flags) 638 xfs_inode_t *ip,
639 uint lock_flags)
639{ 640{
640 int iolocked;
641 int ilocked;
642
643 /* 641 /*
644 * You can't set both SHARED and EXCL for the same lock, 642 * You can't set both SHARED and EXCL for the same lock,
645 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 643 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
@@ -651,37 +649,30 @@ xfs_ilock_nowait(xfs_inode_t *ip,
651 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 649 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
652 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 650 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
653 651
654 iolocked = 0;
655 if (lock_flags & XFS_IOLOCK_EXCL) { 652 if (lock_flags & XFS_IOLOCK_EXCL) {
656 iolocked = mrtryupdate(&ip->i_iolock); 653 if (!mrtryupdate(&ip->i_iolock))
657 if (!iolocked) { 654 goto out;
658 return 0;
659 }
660 } else if (lock_flags & XFS_IOLOCK_SHARED) { 655 } else if (lock_flags & XFS_IOLOCK_SHARED) {
661 iolocked = mrtryaccess(&ip->i_iolock); 656 if (!mrtryaccess(&ip->i_iolock))
662 if (!iolocked) { 657 goto out;
663 return 0;
664 }
665 } 658 }
666 if (lock_flags & XFS_ILOCK_EXCL) { 659 if (lock_flags & XFS_ILOCK_EXCL) {
667 ilocked = mrtryupdate(&ip->i_lock); 660 if (!mrtryupdate(&ip->i_lock))
668 if (!ilocked) { 661 goto out_undo_iolock;
669 if (iolocked) {
670 mrunlock(&ip->i_iolock);
671 }
672 return 0;
673 }
674 } else if (lock_flags & XFS_ILOCK_SHARED) { 662 } else if (lock_flags & XFS_ILOCK_SHARED) {
675 ilocked = mrtryaccess(&ip->i_lock); 663 if (!mrtryaccess(&ip->i_lock))
676 if (!ilocked) { 664 goto out_undo_iolock;
677 if (iolocked) {
678 mrunlock(&ip->i_iolock);
679 }
680 return 0;
681 }
682 } 665 }
683 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address); 666 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address);
684 return 1; 667 return 1;
668
669 out_undo_iolock:
670 if (lock_flags & XFS_IOLOCK_EXCL)
671 mrunlock_excl(&ip->i_iolock);
672 else if (lock_flags & XFS_IOLOCK_SHARED)
673 mrunlock_shared(&ip->i_iolock);
674 out:
675 return 0;
685} 676}
686 677
687/* 678/*
@@ -697,8 +688,9 @@ xfs_ilock_nowait(xfs_inode_t *ip,
697 * 688 *
698 */ 689 */
699void 690void
700xfs_iunlock(xfs_inode_t *ip, 691xfs_iunlock(
701 uint lock_flags) 692 xfs_inode_t *ip,
693 uint lock_flags)
702{ 694{
703 /* 695 /*
704 * You can't set both SHARED and EXCL for the same lock, 696 * You can't set both SHARED and EXCL for the same lock,
@@ -713,31 +705,25 @@ xfs_iunlock(xfs_inode_t *ip,
713 XFS_LOCK_DEP_MASK)) == 0); 705 XFS_LOCK_DEP_MASK)) == 0);
714 ASSERT(lock_flags != 0); 706 ASSERT(lock_flags != 0);
715 707
716 if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { 708 if (lock_flags & XFS_IOLOCK_EXCL)
717 ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) || 709 mrunlock_excl(&ip->i_iolock);
718 (ismrlocked(&ip->i_iolock, MR_ACCESS))); 710 else if (lock_flags & XFS_IOLOCK_SHARED)
719 ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) || 711 mrunlock_shared(&ip->i_iolock);
720 (ismrlocked(&ip->i_iolock, MR_UPDATE)));
721 mrunlock(&ip->i_iolock);
722 }
723 712
724 if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) { 713 if (lock_flags & XFS_ILOCK_EXCL)
725 ASSERT(!(lock_flags & XFS_ILOCK_SHARED) || 714 mrunlock_excl(&ip->i_lock);
726 (ismrlocked(&ip->i_lock, MR_ACCESS))); 715 else if (lock_flags & XFS_ILOCK_SHARED)
727 ASSERT(!(lock_flags & XFS_ILOCK_EXCL) || 716 mrunlock_shared(&ip->i_lock);
728 (ismrlocked(&ip->i_lock, MR_UPDATE)));
729 mrunlock(&ip->i_lock);
730 717
718 if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) &&
719 !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) {
731 /* 720 /*
732 * Let the AIL know that this item has been unlocked in case 721 * Let the AIL know that this item has been unlocked in case
733 * it is in the AIL and anyone is waiting on it. Don't do 722 * it is in the AIL and anyone is waiting on it. Don't do
734 * this if the caller has asked us not to. 723 * this if the caller has asked us not to.
735 */ 724 */
736 if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) && 725 xfs_trans_unlocked_item(ip->i_mount,
737 ip->i_itemp != NULL) { 726 (xfs_log_item_t*)(ip->i_itemp));
738 xfs_trans_unlocked_item(ip->i_mount,
739 (xfs_log_item_t*)(ip->i_itemp));
740 }
741 } 727 }
742 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); 728 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
743} 729}
@@ -747,21 +733,47 @@ xfs_iunlock(xfs_inode_t *ip,
747 * if it is being demoted. 733 * if it is being demoted.
748 */ 734 */
749void 735void
750xfs_ilock_demote(xfs_inode_t *ip, 736xfs_ilock_demote(
751 uint lock_flags) 737 xfs_inode_t *ip,
738 uint lock_flags)
752{ 739{
753 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 740 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
754 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 741 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
755 742
756 if (lock_flags & XFS_ILOCK_EXCL) { 743 if (lock_flags & XFS_ILOCK_EXCL)
757 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
758 mrdemote(&ip->i_lock); 744 mrdemote(&ip->i_lock);
759 } 745 if (lock_flags & XFS_IOLOCK_EXCL)
760 if (lock_flags & XFS_IOLOCK_EXCL) {
761 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
762 mrdemote(&ip->i_iolock); 746 mrdemote(&ip->i_iolock);
747}
748
749#ifdef DEBUG
750/*
751 * Debug-only routine, without additional rw_semaphore APIs, we can
752 * now only answer requests regarding whether we hold the lock for write
753 * (reader state is outside our visibility, we only track writer state).
754 *
755 * Note: this means !xfs_isilocked would give false positives, so don't do that.
756 */
757int
758xfs_isilocked(
759 xfs_inode_t *ip,
760 uint lock_flags)
761{
762 if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) ==
763 XFS_ILOCK_EXCL) {
764 if (!ip->i_lock.mr_writer)
765 return 0;
763 } 766 }
767
768 if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) ==
769 XFS_IOLOCK_EXCL) {
770 if (!ip->i_iolock.mr_writer)
771 return 0;
772 }
773
774 return 1;
764} 775}
776#endif
765 777
766/* 778/*
767 * The following three routines simply manage the i_flock 779 * The following three routines simply manage the i_flock
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ca12acb90394..cf0bb9c1d621 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1291,7 +1291,7 @@ xfs_file_last_byte(
1291 xfs_fileoff_t size_last_block; 1291 xfs_fileoff_t size_last_block;
1292 int error; 1292 int error;
1293 1293
1294 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE | MR_ACCESS)); 1294 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
1295 1295
1296 mp = ip->i_mount; 1296 mp = ip->i_mount;
1297 /* 1297 /*
@@ -1402,7 +1402,7 @@ xfs_itruncate_start(
1402 bhv_vnode_t *vp; 1402 bhv_vnode_t *vp;
1403 int error = 0; 1403 int error = 0;
1404 1404
1405 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1405 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1406 ASSERT((new_size == 0) || (new_size <= ip->i_size)); 1406 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1407 ASSERT((flags == XFS_ITRUNC_DEFINITE) || 1407 ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
1408 (flags == XFS_ITRUNC_MAYBE)); 1408 (flags == XFS_ITRUNC_MAYBE));
@@ -1528,8 +1528,7 @@ xfs_itruncate_finish(
1528 xfs_bmap_free_t free_list; 1528 xfs_bmap_free_t free_list;
1529 int error; 1529 int error;
1530 1530
1531 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1531 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1532 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
1533 ASSERT((new_size == 0) || (new_size <= ip->i_size)); 1532 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1534 ASSERT(*tp != NULL); 1533 ASSERT(*tp != NULL);
1535 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 1534 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -1780,8 +1779,7 @@ xfs_igrow_start(
1780 xfs_fsize_t new_size, 1779 xfs_fsize_t new_size,
1781 cred_t *credp) 1780 cred_t *credp)
1782{ 1781{
1783 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1782 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1784 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1785 ASSERT(new_size > ip->i_size); 1783 ASSERT(new_size > ip->i_size);
1786 1784
1787 /* 1785 /*
@@ -1809,8 +1807,7 @@ xfs_igrow_finish(
1809 xfs_fsize_t new_size, 1807 xfs_fsize_t new_size,
1810 int change_flag) 1808 int change_flag)
1811{ 1809{
1812 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1810 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1813 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1814 ASSERT(ip->i_transp == tp); 1811 ASSERT(ip->i_transp == tp);
1815 ASSERT(new_size > ip->i_size); 1812 ASSERT(new_size > ip->i_size);
1816 1813
@@ -2287,7 +2284,7 @@ xfs_ifree(
2287 xfs_dinode_t *dip; 2284 xfs_dinode_t *dip;
2288 xfs_buf_t *ibp; 2285 xfs_buf_t *ibp;
2289 2286
2290 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 2287 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2291 ASSERT(ip->i_transp == tp); 2288 ASSERT(ip->i_transp == tp);
2292 ASSERT(ip->i_d.di_nlink == 0); 2289 ASSERT(ip->i_d.di_nlink == 0);
2293 ASSERT(ip->i_d.di_nextents == 0); 2290 ASSERT(ip->i_d.di_nextents == 0);
@@ -2746,7 +2743,7 @@ void
2746xfs_ipin( 2743xfs_ipin(
2747 xfs_inode_t *ip) 2744 xfs_inode_t *ip)
2748{ 2745{
2749 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 2746 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2750 2747
2751 atomic_inc(&ip->i_pincount); 2748 atomic_inc(&ip->i_pincount);
2752} 2749}
@@ -2779,7 +2776,7 @@ __xfs_iunpin_wait(
2779{ 2776{
2780 xfs_inode_log_item_t *iip = ip->i_itemp; 2777 xfs_inode_log_item_t *iip = ip->i_itemp;
2781 2778
2782 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); 2779 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2783 if (atomic_read(&ip->i_pincount) == 0) 2780 if (atomic_read(&ip->i_pincount) == 0)
2784 return; 2781 return;
2785 2782
@@ -2829,7 +2826,7 @@ xfs_iextents_copy(
2829 xfs_fsblock_t start_block; 2826 xfs_fsblock_t start_block;
2830 2827
2831 ifp = XFS_IFORK_PTR(ip, whichfork); 2828 ifp = XFS_IFORK_PTR(ip, whichfork);
2832 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 2829 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2833 ASSERT(ifp->if_bytes > 0); 2830 ASSERT(ifp->if_bytes > 0);
2834 2831
2835 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 2832 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
@@ -3132,7 +3129,7 @@ xfs_iflush(
3132 3129
3133 XFS_STATS_INC(xs_iflush_count); 3130 XFS_STATS_INC(xs_iflush_count);
3134 3131
3135 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3132 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3136 ASSERT(issemalocked(&(ip->i_flock))); 3133 ASSERT(issemalocked(&(ip->i_flock)));
3137 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3134 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3138 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3135 ip->i_d.di_nextents > ip->i_df.if_ext_max);
@@ -3297,7 +3294,7 @@ xfs_iflush_int(
3297 int first; 3294 int first;
3298#endif 3295#endif
3299 3296
3300 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3297 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3301 ASSERT(issemalocked(&(ip->i_flock))); 3298 ASSERT(issemalocked(&(ip->i_flock)));
3302 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3299 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3303 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3300 ip->i_d.di_nextents > ip->i_df.if_ext_max);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 93c37697a72c..0a999fee4f03 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -386,20 +386,9 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
386#define XFS_ILOCK_EXCL (1<<2) 386#define XFS_ILOCK_EXCL (1<<2)
387#define XFS_ILOCK_SHARED (1<<3) 387#define XFS_ILOCK_SHARED (1<<3)
388#define XFS_IUNLOCK_NONOTIFY (1<<4) 388#define XFS_IUNLOCK_NONOTIFY (1<<4)
389/* #define XFS_IOLOCK_NESTED (1<<5) */
390#define XFS_EXTENT_TOKEN_RD (1<<6)
391#define XFS_SIZE_TOKEN_RD (1<<7)
392#define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
393#define XFS_WILLLEND (1<<8) /* Always acquire tokens for lending */
394#define XFS_EXTENT_TOKEN_WR (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND)
395#define XFS_SIZE_TOKEN_WR (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
396#define XFS_EXTSIZE_WR (XFS_EXTSIZE_RD | XFS_WILLLEND)
397/* TODO:XFS_SIZE_TOKEN_WANT (1<<9) */
398 389
399#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 390#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
400 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \ 391 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
401 | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD \
402 | XFS_WILLLEND)
403 392
404/* 393/*
405 * Flags for lockdep annotations. 394 * Flags for lockdep annotations.
@@ -483,6 +472,7 @@ void xfs_ilock(xfs_inode_t *, uint);
483int xfs_ilock_nowait(xfs_inode_t *, uint); 472int xfs_ilock_nowait(xfs_inode_t *, uint);
484void xfs_iunlock(xfs_inode_t *, uint); 473void xfs_iunlock(xfs_inode_t *, uint);
485void xfs_ilock_demote(xfs_inode_t *, uint); 474void xfs_ilock_demote(xfs_inode_t *, uint);
475int xfs_isilocked(xfs_inode_t *, uint);
486void xfs_iflock(xfs_inode_t *); 476void xfs_iflock(xfs_inode_t *);
487int xfs_iflock_nowait(xfs_inode_t *); 477int xfs_iflock_nowait(xfs_inode_t *);
488uint xfs_ilock_map_shared(xfs_inode_t *); 478uint xfs_ilock_map_shared(xfs_inode_t *);
@@ -534,7 +524,7 @@ int xfs_iflush(xfs_inode_t *, uint);
534void xfs_iflush_all(struct xfs_mount *); 524void xfs_iflush_all(struct xfs_mount *);
535void xfs_ichgtime(xfs_inode_t *, int); 525void xfs_ichgtime(xfs_inode_t *, int);
536xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 526xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
537void xfs_lock_inodes(xfs_inode_t **, int, int, uint); 527void xfs_lock_inodes(xfs_inode_t **, int, uint);
538 528
539void xfs_synchronize_atime(xfs_inode_t *); 529void xfs_synchronize_atime(xfs_inode_t *);
540void xfs_mark_inode_dirty_sync(xfs_inode_t *); 530void xfs_mark_inode_dirty_sync(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 93b5db453ea2..167b33f15772 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -547,7 +547,7 @@ STATIC void
547xfs_inode_item_pin( 547xfs_inode_item_pin(
548 xfs_inode_log_item_t *iip) 548 xfs_inode_log_item_t *iip)
549{ 549{
550 ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE)); 550 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
551 xfs_ipin(iip->ili_inode); 551 xfs_ipin(iip->ili_inode);
552} 552}
553 553
@@ -664,13 +664,13 @@ xfs_inode_item_unlock(
664 664
665 ASSERT(iip != NULL); 665 ASSERT(iip != NULL);
666 ASSERT(iip->ili_inode->i_itemp != NULL); 666 ASSERT(iip->ili_inode->i_itemp != NULL);
667 ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE)); 667 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
668 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 668 ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
669 XFS_ILI_IOLOCKED_EXCL)) || 669 XFS_ILI_IOLOCKED_EXCL)) ||
670 ismrlocked(&(iip->ili_inode->i_iolock), MR_UPDATE)); 670 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL));
671 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 671 ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
672 XFS_ILI_IOLOCKED_SHARED)) || 672 XFS_ILI_IOLOCKED_SHARED)) ||
673 ismrlocked(&(iip->ili_inode->i_iolock), MR_ACCESS)); 673 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED));
674 /* 674 /*
675 * Clear the transaction pointer in the inode. 675 * Clear the transaction pointer in the inode.
676 */ 676 */
@@ -769,7 +769,7 @@ xfs_inode_item_pushbuf(
769 769
770 ip = iip->ili_inode; 770 ip = iip->ili_inode;
771 771
772 ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); 772 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
773 773
774 /* 774 /*
775 * The ili_pushbuf_flag keeps others from 775 * The ili_pushbuf_flag keeps others from
@@ -857,7 +857,7 @@ xfs_inode_item_push(
857 857
858 ip = iip->ili_inode; 858 ip = iip->ili_inode;
859 859
860 ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); 860 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
861 ASSERT(issemalocked(&(ip->i_flock))); 861 ASSERT(issemalocked(&(ip->i_flock)));
862 /* 862 /*
863 * Since we were able to lock the inode's flush lock and 863 * Since we were able to lock the inode's flush lock and
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fb3cf1191419..7edcde691d1a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -196,14 +196,14 @@ xfs_iomap(
196 break; 196 break;
197 case BMAPI_WRITE: 197 case BMAPI_WRITE:
198 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count); 198 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count);
199 lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; 199 lockmode = XFS_ILOCK_EXCL;
200 if (flags & BMAPI_IGNSTATE) 200 if (flags & BMAPI_IGNSTATE)
201 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; 201 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
202 xfs_ilock(ip, lockmode); 202 xfs_ilock(ip, lockmode);
203 break; 203 break;
204 case BMAPI_ALLOCATE: 204 case BMAPI_ALLOCATE:
205 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count); 205 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count);
206 lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD; 206 lockmode = XFS_ILOCK_SHARED;
207 bmapi_flags = XFS_BMAPI_ENTIRE; 207 bmapi_flags = XFS_BMAPI_ENTIRE;
208 208
209 /* Attempt non-blocking lock */ 209 /* Attempt non-blocking lock */
@@ -523,8 +523,7 @@ xfs_iomap_write_direct(
523 goto error_out; 523 goto error_out;
524 } 524 }
525 525
526 if (unlikely(!imap.br_startblock && 526 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
527 !(XFS_IS_REALTIME_INODE(ip)))) {
528 error = xfs_cmn_err_fsblock_zero(ip, &imap); 527 error = xfs_cmn_err_fsblock_zero(ip, &imap);
529 goto error_out; 528 goto error_out;
530 } 529 }
@@ -624,7 +623,7 @@ xfs_iomap_write_delay(
624 int prealloc, fsynced = 0; 623 int prealloc, fsynced = 0;
625 int error; 624 int error;
626 625
627 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 626 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
628 627
629 /* 628 /*
630 * Make sure that the dquots are there. This doesn't hold 629 * Make sure that the dquots are there. This doesn't hold
@@ -686,8 +685,7 @@ retry:
686 goto retry; 685 goto retry;
687 } 686 }
688 687
689 if (unlikely(!imap[0].br_startblock && 688 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
690 !(XFS_IS_REALTIME_INODE(ip))))
691 return xfs_cmn_err_fsblock_zero(ip, &imap[0]); 689 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
692 690
693 *ret_imap = imap[0]; 691 *ret_imap = imap[0];
@@ -838,9 +836,9 @@ xfs_iomap_write_allocate(
838 * See if we were able to allocate an extent that 836 * See if we were able to allocate an extent that
839 * covers at least part of the callers request 837 * covers at least part of the callers request
840 */ 838 */
841 if (unlikely(!imap.br_startblock && 839 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
842 XFS_IS_REALTIME_INODE(ip)))
843 return xfs_cmn_err_fsblock_zero(ip, &imap); 840 return xfs_cmn_err_fsblock_zero(ip, &imap);
841
844 if ((offset_fsb >= imap.br_startoff) && 842 if ((offset_fsb >= imap.br_startoff) &&
845 (offset_fsb < (imap.br_startoff + 843 (offset_fsb < (imap.br_startoff +
846 imap.br_blockcount))) { 844 imap.br_blockcount))) {
@@ -934,8 +932,7 @@ xfs_iomap_write_unwritten(
934 if (error) 932 if (error)
935 return XFS_ERROR(error); 933 return XFS_ERROR(error);
936 934
937 if (unlikely(!imap.br_startblock && 935 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
938 !(XFS_IS_REALTIME_INODE(ip))))
939 return xfs_cmn_err_fsblock_zero(ip, &imap); 936 return xfs_cmn_err_fsblock_zero(ip, &imap);
940 937
941 if ((numblks_fsb = imap.br_blockcount) == 0) { 938 if ((numblks_fsb = imap.br_blockcount) == 0) {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index eb85bdedad0c..419de15aeb43 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -71,11 +71,6 @@ xfs_bulkstat_one_iget(
71 71
72 ASSERT(ip != NULL); 72 ASSERT(ip != NULL);
73 ASSERT(ip->i_blkno != (xfs_daddr_t)0); 73 ASSERT(ip->i_blkno != (xfs_daddr_t)0);
74 if (ip->i_d.di_mode == 0) {
75 *stat = BULKSTAT_RV_NOTHING;
76 error = XFS_ERROR(ENOENT);
77 goto out_iput;
78 }
79 74
80 vp = XFS_ITOV(ip); 75 vp = XFS_ITOV(ip);
81 dic = &ip->i_d; 76 dic = &ip->i_d;
@@ -124,7 +119,6 @@ xfs_bulkstat_one_iget(
124 break; 119 break;
125 } 120 }
126 121
127 out_iput:
128 xfs_iput(ip, XFS_ILOCK_SHARED); 122 xfs_iput(ip, XFS_ILOCK_SHARED);
129 return error; 123 return error;
130} 124}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2fec452afbcc..da3988453b71 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -54,8 +54,9 @@ STATIC void xfs_unmountfs_wait(xfs_mount_t *);
54#ifdef HAVE_PERCPU_SB 54#ifdef HAVE_PERCPU_SB
55STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); 55STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
56STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, 56STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
57 int, int); 57 int);
58STATIC void xfs_icsb_sync_counters(xfs_mount_t *); 58STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
59 int);
59STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 60STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
60 int64_t, int); 61 int64_t, int);
61STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 62STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
@@ -63,8 +64,8 @@ STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
63#else 64#else
64 65
65#define xfs_icsb_destroy_counters(mp) do { } while (0) 66#define xfs_icsb_destroy_counters(mp) do { } while (0)
66#define xfs_icsb_balance_counter(mp, a, b, c) do { } while (0) 67#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
67#define xfs_icsb_sync_counters(mp) do { } while (0) 68#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
68#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 69#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
69 70
70#endif 71#endif
@@ -1400,7 +1401,7 @@ xfs_log_sbcount(
1400 if (!xfs_fs_writable(mp)) 1401 if (!xfs_fs_writable(mp))
1401 return 0; 1402 return 0;
1402 1403
1403 xfs_icsb_sync_counters(mp); 1404 xfs_icsb_sync_counters(mp, 0);
1404 1405
1405 /* 1406 /*
1406 * we don't need to do this if we are updating the superblock 1407 * we don't need to do this if we are updating the superblock
@@ -2026,9 +2027,9 @@ xfs_icsb_cpu_notify(
2026 case CPU_ONLINE: 2027 case CPU_ONLINE:
2027 case CPU_ONLINE_FROZEN: 2028 case CPU_ONLINE_FROZEN:
2028 xfs_icsb_lock(mp); 2029 xfs_icsb_lock(mp);
2029 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 2030 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
2030 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 2031 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
2031 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 2032 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
2032 xfs_icsb_unlock(mp); 2033 xfs_icsb_unlock(mp);
2033 break; 2034 break;
2034 case CPU_DEAD: 2035 case CPU_DEAD:
@@ -2048,12 +2049,9 @@ xfs_icsb_cpu_notify(
2048 2049
2049 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 2050 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
2050 2051
2051 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 2052 xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
2052 XFS_ICSB_SB_LOCKED, 0); 2053 xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
2053 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 2054 xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
2054 XFS_ICSB_SB_LOCKED, 0);
2055 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS,
2056 XFS_ICSB_SB_LOCKED, 0);
2057 spin_unlock(&mp->m_sb_lock); 2055 spin_unlock(&mp->m_sb_lock);
2058 xfs_icsb_unlock(mp); 2056 xfs_icsb_unlock(mp);
2059 break; 2057 break;
@@ -2105,9 +2103,9 @@ xfs_icsb_reinit_counters(
2105 * initial balance kicks us off correctly 2103 * initial balance kicks us off correctly
2106 */ 2104 */
2107 mp->m_icsb_counters = -1; 2105 mp->m_icsb_counters = -1;
2108 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 2106 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
2109 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 2107 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
2110 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 2108 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
2111 xfs_icsb_unlock(mp); 2109 xfs_icsb_unlock(mp);
2112} 2110}
2113 2111
@@ -2223,7 +2221,7 @@ xfs_icsb_disable_counter(
2223 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 2221 if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
2224 /* drain back to superblock */ 2222 /* drain back to superblock */
2225 2223
2226 xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT); 2224 xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
2227 switch(field) { 2225 switch(field) {
2228 case XFS_SBS_ICOUNT: 2226 case XFS_SBS_ICOUNT:
2229 mp->m_sb.sb_icount = cnt.icsb_icount; 2227 mp->m_sb.sb_icount = cnt.icsb_icount;
@@ -2278,38 +2276,33 @@ xfs_icsb_enable_counter(
2278} 2276}
2279 2277
2280void 2278void
2281xfs_icsb_sync_counters_flags( 2279xfs_icsb_sync_counters_locked(
2282 xfs_mount_t *mp, 2280 xfs_mount_t *mp,
2283 int flags) 2281 int flags)
2284{ 2282{
2285 xfs_icsb_cnts_t cnt; 2283 xfs_icsb_cnts_t cnt;
2286 2284
2287 /* Pass 1: lock all counters */
2288 if ((flags & XFS_ICSB_SB_LOCKED) == 0)
2289 spin_lock(&mp->m_sb_lock);
2290
2291 xfs_icsb_count(mp, &cnt, flags); 2285 xfs_icsb_count(mp, &cnt, flags);
2292 2286
2293 /* Step 3: update mp->m_sb fields */
2294 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) 2287 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
2295 mp->m_sb.sb_icount = cnt.icsb_icount; 2288 mp->m_sb.sb_icount = cnt.icsb_icount;
2296 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) 2289 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
2297 mp->m_sb.sb_ifree = cnt.icsb_ifree; 2290 mp->m_sb.sb_ifree = cnt.icsb_ifree;
2298 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) 2291 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
2299 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 2292 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
2300
2301 if ((flags & XFS_ICSB_SB_LOCKED) == 0)
2302 spin_unlock(&mp->m_sb_lock);
2303} 2293}
2304 2294
2305/* 2295/*
2306 * Accurate update of per-cpu counters to incore superblock 2296 * Accurate update of per-cpu counters to incore superblock
2307 */ 2297 */
2308STATIC void 2298void
2309xfs_icsb_sync_counters( 2299xfs_icsb_sync_counters(
2310 xfs_mount_t *mp) 2300 xfs_mount_t *mp,
2301 int flags)
2311{ 2302{
2312 xfs_icsb_sync_counters_flags(mp, 0); 2303 spin_lock(&mp->m_sb_lock);
2304 xfs_icsb_sync_counters_locked(mp, flags);
2305 spin_unlock(&mp->m_sb_lock);
2313} 2306}
2314 2307
2315/* 2308/*
@@ -2332,19 +2325,15 @@ xfs_icsb_sync_counters(
2332#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ 2325#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
2333 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp)) 2326 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
2334STATIC void 2327STATIC void
2335xfs_icsb_balance_counter( 2328xfs_icsb_balance_counter_locked(
2336 xfs_mount_t *mp, 2329 xfs_mount_t *mp,
2337 xfs_sb_field_t field, 2330 xfs_sb_field_t field,
2338 int flags,
2339 int min_per_cpu) 2331 int min_per_cpu)
2340{ 2332{
2341 uint64_t count, resid; 2333 uint64_t count, resid;
2342 int weight = num_online_cpus(); 2334 int weight = num_online_cpus();
2343 uint64_t min = (uint64_t)min_per_cpu; 2335 uint64_t min = (uint64_t)min_per_cpu;
2344 2336
2345 if (!(flags & XFS_ICSB_SB_LOCKED))
2346 spin_lock(&mp->m_sb_lock);
2347
2348 /* disable counter and sync counter */ 2337 /* disable counter and sync counter */
2349 xfs_icsb_disable_counter(mp, field); 2338 xfs_icsb_disable_counter(mp, field);
2350 2339
@@ -2354,19 +2343,19 @@ xfs_icsb_balance_counter(
2354 count = mp->m_sb.sb_icount; 2343 count = mp->m_sb.sb_icount;
2355 resid = do_div(count, weight); 2344 resid = do_div(count, weight);
2356 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2345 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
2357 goto out; 2346 return;
2358 break; 2347 break;
2359 case XFS_SBS_IFREE: 2348 case XFS_SBS_IFREE:
2360 count = mp->m_sb.sb_ifree; 2349 count = mp->m_sb.sb_ifree;
2361 resid = do_div(count, weight); 2350 resid = do_div(count, weight);
2362 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2351 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
2363 goto out; 2352 return;
2364 break; 2353 break;
2365 case XFS_SBS_FDBLOCKS: 2354 case XFS_SBS_FDBLOCKS:
2366 count = mp->m_sb.sb_fdblocks; 2355 count = mp->m_sb.sb_fdblocks;
2367 resid = do_div(count, weight); 2356 resid = do_div(count, weight);
2368 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp))) 2357 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
2369 goto out; 2358 return;
2370 break; 2359 break;
2371 default: 2360 default:
2372 BUG(); 2361 BUG();
@@ -2375,9 +2364,17 @@ xfs_icsb_balance_counter(
2375 } 2364 }
2376 2365
2377 xfs_icsb_enable_counter(mp, field, count, resid); 2366 xfs_icsb_enable_counter(mp, field, count, resid);
2378out: 2367}
2379 if (!(flags & XFS_ICSB_SB_LOCKED)) 2368
2380 spin_unlock(&mp->m_sb_lock); 2369STATIC void
2370xfs_icsb_balance_counter(
2371 xfs_mount_t *mp,
2372 xfs_sb_field_t fields,
2373 int min_per_cpu)
2374{
2375 spin_lock(&mp->m_sb_lock);
2376 xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
2377 spin_unlock(&mp->m_sb_lock);
2381} 2378}
2382 2379
2383STATIC int 2380STATIC int
@@ -2484,7 +2481,7 @@ slow_path:
2484 * we are done. 2481 * we are done.
2485 */ 2482 */
2486 if (ret != ENOSPC) 2483 if (ret != ENOSPC)
2487 xfs_icsb_balance_counter(mp, field, 0, 0); 2484 xfs_icsb_balance_counter(mp, field, 0);
2488 xfs_icsb_unlock(mp); 2485 xfs_icsb_unlock(mp);
2489 return ret; 2486 return ret;
2490 2487
@@ -2508,7 +2505,7 @@ balance_counter:
2508 * will either succeed through the fast path or slow path without 2505 * will either succeed through the fast path or slow path without
2509 * another balance operation being required. 2506 * another balance operation being required.
2510 */ 2507 */
2511 xfs_icsb_balance_counter(mp, field, 0, delta); 2508 xfs_icsb_balance_counter(mp, field, delta);
2512 xfs_icsb_unlock(mp); 2509 xfs_icsb_unlock(mp);
2513 goto again; 2510 goto again;
2514} 2511}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1ed575110ff0..63e0693a358a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -206,17 +206,18 @@ typedef struct xfs_icsb_cnts {
206 206
207#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */ 207#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */
208 208
209#define XFS_ICSB_SB_LOCKED (1 << 0) /* sb already locked */
210#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */ 209#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
211 210
212extern int xfs_icsb_init_counters(struct xfs_mount *); 211extern int xfs_icsb_init_counters(struct xfs_mount *);
213extern void xfs_icsb_reinit_counters(struct xfs_mount *); 212extern void xfs_icsb_reinit_counters(struct xfs_mount *);
214extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int); 213extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
214extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
215 215
216#else 216#else
217#define xfs_icsb_init_counters(mp) (0) 217#define xfs_icsb_init_counters(mp) (0)
218#define xfs_icsb_reinit_counters(mp) do { } while (0) 218#define xfs_icsb_reinit_counters(mp) do { } while (0)
219#define xfs_icsb_sync_counters_flags(mp, flags) do { } while (0) 219#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
220#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
220#endif 221#endif
221 222
222typedef struct xfs_ail { 223typedef struct xfs_ail {
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index ee371890d85d..d8063e1ad298 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -55,85 +55,32 @@ xfs_rename_unlock4(
55 55
56 xfs_iunlock(i_tab[0], lock_mode); 56 xfs_iunlock(i_tab[0], lock_mode);
57 for (i = 1; i < 4; i++) { 57 for (i = 1; i < 4; i++) {
58 if (i_tab[i] == NULL) { 58 if (i_tab[i] == NULL)
59 break; 59 break;
60 } 60
61 /* 61 /*
62 * Watch out for duplicate entries in the table. 62 * Watch out for duplicate entries in the table.
63 */ 63 */
64 if (i_tab[i] != i_tab[i-1]) { 64 if (i_tab[i] != i_tab[i-1])
65 xfs_iunlock(i_tab[i], lock_mode); 65 xfs_iunlock(i_tab[i], lock_mode);
66 }
67 } 66 }
68} 67}
69 68
70#ifdef DEBUG
71int xfs_rename_skip, xfs_rename_nskip;
72#endif
73
74/* 69/*
75 * The following routine will acquire the locks required for a rename 70 * Enter all inodes for a rename transaction into a sorted array.
76 * operation. The code understands the semantics of renames and will
77 * validate that name1 exists under dp1 & that name2 may or may not
78 * exist under dp2.
79 *
80 * We are renaming dp1/name1 to dp2/name2.
81 *
82 * Return ENOENT if dp1 does not exist, other lookup errors, or 0 for success.
83 */ 71 */
84STATIC int 72STATIC void
85xfs_lock_for_rename( 73xfs_sort_for_rename(
86 xfs_inode_t *dp1, /* in: old (source) directory inode */ 74 xfs_inode_t *dp1, /* in: old (source) directory inode */
87 xfs_inode_t *dp2, /* in: new (target) directory inode */ 75 xfs_inode_t *dp2, /* in: new (target) directory inode */
88 xfs_inode_t *ip1, /* in: inode of old entry */ 76 xfs_inode_t *ip1, /* in: inode of old entry */
89 struct xfs_name *name2, /* in: new entry name */ 77 xfs_inode_t *ip2, /* in: inode of new entry, if it
90 xfs_inode_t **ipp2, /* out: inode of new entry, if it
91 already exists, NULL otherwise. */ 78 already exists, NULL otherwise. */
92 xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ 79 xfs_inode_t **i_tab,/* out: array of inode returned, sorted */
93 int *num_inodes) /* out: number of inodes in array */ 80 int *num_inodes) /* out: number of inodes in array */
94{ 81{
95 xfs_inode_t *ip2 = NULL;
96 xfs_inode_t *temp; 82 xfs_inode_t *temp;
97 xfs_ino_t inum1, inum2;
98 int error;
99 int i, j; 83 int i, j;
100 uint lock_mode;
101 int diff_dirs = (dp1 != dp2);
102
103 /*
104 * First, find out the current inums of the entries so that we
105 * can determine the initial locking order. We'll have to
106 * sanity check stuff after all the locks have been acquired
107 * to see if we still have the right inodes, directories, etc.
108 */
109 lock_mode = xfs_ilock_map_shared(dp1);
110 IHOLD(ip1);
111 xfs_itrace_ref(ip1);
112
113 inum1 = ip1->i_ino;
114
115 /*
116 * Unlock dp1 and lock dp2 if they are different.
117 */
118 if (diff_dirs) {
119 xfs_iunlock_map_shared(dp1, lock_mode);
120 lock_mode = xfs_ilock_map_shared(dp2);
121 }
122
123 error = xfs_dir_lookup_int(dp2, lock_mode, name2, &inum2, &ip2);
124 if (error == ENOENT) { /* target does not need to exist. */
125 inum2 = 0;
126 } else if (error) {
127 /*
128 * If dp2 and dp1 are the same, the next line unlocks dp1.
129 * Got it?
130 */
131 xfs_iunlock_map_shared(dp2, lock_mode);
132 IRELE (ip1);
133 return error;
134 } else {
135 xfs_itrace_ref(ip2);
136 }
137 84
138 /* 85 /*
139 * i_tab contains a list of pointers to inodes. We initialize 86 * i_tab contains a list of pointers to inodes. We initialize
@@ -145,21 +92,20 @@ xfs_lock_for_rename(
145 i_tab[0] = dp1; 92 i_tab[0] = dp1;
146 i_tab[1] = dp2; 93 i_tab[1] = dp2;
147 i_tab[2] = ip1; 94 i_tab[2] = ip1;
148 if (inum2 == 0) { 95 if (ip2) {
149 *num_inodes = 3;
150 i_tab[3] = NULL;
151 } else {
152 *num_inodes = 4; 96 *num_inodes = 4;
153 i_tab[3] = ip2; 97 i_tab[3] = ip2;
98 } else {
99 *num_inodes = 3;
100 i_tab[3] = NULL;
154 } 101 }
155 *ipp2 = i_tab[3];
156 102
157 /* 103 /*
158 * Sort the elements via bubble sort. (Remember, there are at 104 * Sort the elements via bubble sort. (Remember, there are at
159 * most 4 elements to sort, so this is adequate.) 105 * most 4 elements to sort, so this is adequate.)
160 */ 106 */
161 for (i=0; i < *num_inodes; i++) { 107 for (i = 0; i < *num_inodes; i++) {
162 for (j=1; j < *num_inodes; j++) { 108 for (j = 1; j < *num_inodes; j++) {
163 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { 109 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
164 temp = i_tab[j]; 110 temp = i_tab[j];
165 i_tab[j] = i_tab[j-1]; 111 i_tab[j] = i_tab[j-1];
@@ -167,30 +113,6 @@ xfs_lock_for_rename(
167 } 113 }
168 } 114 }
169 } 115 }
170
171 /*
172 * We have dp2 locked. If it isn't first, unlock it.
173 * If it is first, tell xfs_lock_inodes so it can skip it
174 * when locking. if dp1 == dp2, xfs_lock_inodes will skip both
175 * since they are equal. xfs_lock_inodes needs all these inodes
176 * so that it can unlock and retry if there might be a dead-lock
177 * potential with the log.
178 */
179
180 if (i_tab[0] == dp2 && lock_mode == XFS_ILOCK_SHARED) {
181#ifdef DEBUG
182 xfs_rename_skip++;
183#endif
184 xfs_lock_inodes(i_tab, *num_inodes, 1, XFS_ILOCK_SHARED);
185 } else {
186#ifdef DEBUG
187 xfs_rename_nskip++;
188#endif
189 xfs_iunlock_map_shared(dp2, lock_mode);
190 xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED);
191 }
192
193 return 0;
194} 116}
195 117
196/* 118/*
@@ -202,10 +124,10 @@ xfs_rename(
202 struct xfs_name *src_name, 124 struct xfs_name *src_name,
203 xfs_inode_t *src_ip, 125 xfs_inode_t *src_ip,
204 xfs_inode_t *target_dp, 126 xfs_inode_t *target_dp,
205 struct xfs_name *target_name) 127 struct xfs_name *target_name,
128 xfs_inode_t *target_ip)
206{ 129{
207 xfs_trans_t *tp; 130 xfs_trans_t *tp = NULL;
208 xfs_inode_t *target_ip;
209 xfs_mount_t *mp = src_dp->i_mount; 131 xfs_mount_t *mp = src_dp->i_mount;
210 int new_parent; /* moving to a new dir */ 132 int new_parent; /* moving to a new dir */
211 int src_is_directory; /* src_name is a directory */ 133 int src_is_directory; /* src_name is a directory */
@@ -215,9 +137,7 @@ xfs_rename(
215 int cancel_flags; 137 int cancel_flags;
216 int committed; 138 int committed;
217 xfs_inode_t *inodes[4]; 139 xfs_inode_t *inodes[4];
218 int target_ip_dropped = 0; /* dropped target_ip link? */
219 int spaceres; 140 int spaceres;
220 int target_link_zero = 0;
221 int num_inodes; 141 int num_inodes;
222 142
223 xfs_itrace_entry(src_dp); 143 xfs_itrace_entry(src_dp);
@@ -230,64 +150,27 @@ xfs_rename(
230 target_dp, DM_RIGHT_NULL, 150 target_dp, DM_RIGHT_NULL,
231 src_name->name, target_name->name, 151 src_name->name, target_name->name,
232 0, 0, 0); 152 0, 0, 0);
233 if (error) { 153 if (error)
234 return error; 154 return error;
235 }
236 } 155 }
237 /* Return through std_return after this point. */ 156 /* Return through std_return after this point. */
238 157
239 /* 158 new_parent = (src_dp != target_dp);
240 * Lock all the participating inodes. Depending upon whether 159 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
241 * the target_name exists in the target directory, and
242 * whether the target directory is the same as the source
243 * directory, we can lock from 2 to 4 inodes.
244 * xfs_lock_for_rename() will return ENOENT if src_name
245 * does not exist in the source directory.
246 */
247 tp = NULL;
248 error = xfs_lock_for_rename(src_dp, target_dp, src_ip, target_name,
249 &target_ip, inodes, &num_inodes);
250 if (error) {
251 /*
252 * We have nothing locked, no inode references, and
253 * no transaction, so just get out.
254 */
255 goto std_return;
256 }
257
258 ASSERT(src_ip != NULL);
259 160
260 if ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 161 if (src_is_directory) {
261 /* 162 /*
262 * Check for link count overflow on target_dp 163 * Check for link count overflow on target_dp
263 */ 164 */
264 if (target_ip == NULL && (src_dp != target_dp) && 165 if (target_ip == NULL && new_parent &&
265 target_dp->i_d.di_nlink >= XFS_MAXLINK) { 166 target_dp->i_d.di_nlink >= XFS_MAXLINK) {
266 error = XFS_ERROR(EMLINK); 167 error = XFS_ERROR(EMLINK);
267 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); 168 goto std_return;
268 goto rele_return;
269 } 169 }
270 } 170 }
271 171
272 /* 172 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
273 * If we are using project inheritance, we only allow renames 173 inodes, &num_inodes);
274 * into our tree when the project IDs are the same; else the
275 * tree quota mechanism would be circumvented.
276 */
277 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
278 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
279 error = XFS_ERROR(EXDEV);
280 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
281 goto rele_return;
282 }
283
284 new_parent = (src_dp != target_dp);
285 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
286
287 /*
288 * Drop the locks on our inodes so that we can start the transaction.
289 */
290 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
291 174
292 XFS_BMAP_INIT(&free_list, &first_block); 175 XFS_BMAP_INIT(&free_list, &first_block);
293 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); 176 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
@@ -302,7 +185,7 @@ xfs_rename(
302 } 185 }
303 if (error) { 186 if (error) {
304 xfs_trans_cancel(tp, 0); 187 xfs_trans_cancel(tp, 0);
305 goto rele_return; 188 goto std_return;
306 } 189 }
307 190
308 /* 191 /*
@@ -310,13 +193,29 @@ xfs_rename(
310 */ 193 */
311 if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { 194 if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
312 xfs_trans_cancel(tp, cancel_flags); 195 xfs_trans_cancel(tp, cancel_flags);
313 goto rele_return; 196 goto std_return;
314 } 197 }
315 198
316 /* 199 /*
317 * Reacquire the inode locks we dropped above. 200 * Lock all the participating inodes. Depending upon whether
201 * the target_name exists in the target directory, and
202 * whether the target directory is the same as the source
203 * directory, we can lock from 2 to 4 inodes.
204 */
205 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
206
207 /*
208 * If we are using project inheritance, we only allow renames
209 * into our tree when the project IDs are the same; else the
210 * tree quota mechanism would be circumvented.
318 */ 211 */
319 xfs_lock_inodes(inodes, num_inodes, 0, XFS_ILOCK_EXCL); 212 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
213 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
214 error = XFS_ERROR(EXDEV);
215 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
216 xfs_trans_cancel(tp, cancel_flags);
217 goto std_return;
218 }
320 219
321 /* 220 /*
322 * Join all the inodes to the transaction. From this point on, 221 * Join all the inodes to the transaction. From this point on,
@@ -328,17 +227,17 @@ xfs_rename(
328 */ 227 */
329 IHOLD(src_dp); 228 IHOLD(src_dp);
330 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 229 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
230
331 if (new_parent) { 231 if (new_parent) {
332 IHOLD(target_dp); 232 IHOLD(target_dp);
333 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 233 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
334 } 234 }
335 if ((src_ip != src_dp) && (src_ip != target_dp)) { 235
336 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 236 IHOLD(src_ip);
337 } 237 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
338 if ((target_ip != NULL) && 238
339 (target_ip != src_ip) && 239 if (target_ip) {
340 (target_ip != src_dp) && 240 IHOLD(target_ip);
341 (target_ip != target_dp)) {
342 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); 241 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
343 } 242 }
344 243
@@ -412,7 +311,6 @@ xfs_rename(
412 error = xfs_droplink(tp, target_ip); 311 error = xfs_droplink(tp, target_ip);
413 if (error) 312 if (error)
414 goto abort_return; 313 goto abort_return;
415 target_ip_dropped = 1;
416 314
417 if (src_is_directory) { 315 if (src_is_directory) {
418 /* 316 /*
@@ -422,10 +320,6 @@ xfs_rename(
422 if (error) 320 if (error)
423 goto abort_return; 321 goto abort_return;
424 } 322 }
425
426 /* Do this test while we still hold the locks */
427 target_link_zero = (target_ip)->i_d.di_nlink==0;
428
429 } /* target_ip != NULL */ 323 } /* target_ip != NULL */
430 324
431 /* 325 /*
@@ -492,15 +386,6 @@ xfs_rename(
492 } 386 }
493 387
494 /* 388 /*
495 * If there was a target inode, take an extra reference on
496 * it here so that it doesn't go to xfs_inactive() from
497 * within the commit.
498 */
499 if (target_ip != NULL) {
500 IHOLD(target_ip);
501 }
502
503 /*
504 * If this is a synchronous mount, make sure that the 389 * If this is a synchronous mount, make sure that the
505 * rename transaction goes to disk before returning to 390 * rename transaction goes to disk before returning to
506 * the user. 391 * the user.
@@ -509,30 +394,11 @@ xfs_rename(
509 xfs_trans_set_sync(tp); 394 xfs_trans_set_sync(tp);
510 } 395 }
511 396
512 /*
513 * Take refs. for vop_link_removed calls below. No need to worry
514 * about directory refs. because the caller holds them.
515 *
516 * Do holds before the xfs_bmap_finish since it might rele them down
517 * to zero.
518 */
519
520 if (target_ip_dropped)
521 IHOLD(target_ip);
522 IHOLD(src_ip);
523
524 error = xfs_bmap_finish(&tp, &free_list, &committed); 397 error = xfs_bmap_finish(&tp, &free_list, &committed);
525 if (error) { 398 if (error) {
526 xfs_bmap_cancel(&free_list); 399 xfs_bmap_cancel(&free_list);
527 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 400 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
528 XFS_TRANS_ABORT)); 401 XFS_TRANS_ABORT));
529 if (target_ip != NULL) {
530 IRELE(target_ip);
531 }
532 if (target_ip_dropped) {
533 IRELE(target_ip);
534 }
535 IRELE(src_ip);
536 goto std_return; 402 goto std_return;
537 } 403 }
538 404
@@ -541,15 +407,6 @@ xfs_rename(
541 * the vnode references. 407 * the vnode references.
542 */ 408 */
543 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 409 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
544 if (target_ip != NULL)
545 IRELE(target_ip);
546 /*
547 * Let interposed file systems know about removed links.
548 */
549 if (target_ip_dropped)
550 IRELE(target_ip);
551
552 IRELE(src_ip);
553 410
554 /* Fall through to std_return with error = 0 or errno from 411 /* Fall through to std_return with error = 0 or errno from
555 * xfs_trans_commit */ 412 * xfs_trans_commit */
@@ -571,11 +428,4 @@ std_return:
571 xfs_bmap_cancel(&free_list); 428 xfs_bmap_cancel(&free_list);
572 xfs_trans_cancel(tp, cancel_flags); 429 xfs_trans_cancel(tp, cancel_flags);
573 goto std_return; 430 goto std_return;
574
575 rele_return:
576 IRELE(src_ip);
577 if (target_ip != NULL) {
578 IRELE(target_ip);
579 }
580 goto std_return;
581} 431}
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index b8db1d5cde5a..4c70bf5e9985 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -111,13 +111,13 @@ xfs_trans_iget(
111 */ 111 */
112 ASSERT(ip->i_itemp != NULL); 112 ASSERT(ip->i_itemp != NULL);
113 ASSERT(lock_flags & XFS_ILOCK_EXCL); 113 ASSERT(lock_flags & XFS_ILOCK_EXCL);
114 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 114 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
115 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || 115 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
116 ismrlocked(&ip->i_iolock, MR_UPDATE)); 116 xfs_isilocked(ip, XFS_IOLOCK_EXCL));
117 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || 117 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
118 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL)); 118 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL));
119 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || 119 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
120 ismrlocked(&ip->i_iolock, (MR_UPDATE | MR_ACCESS))); 120 xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
121 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || 121 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
122 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY)); 122 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY));
123 123
@@ -185,7 +185,7 @@ xfs_trans_ijoin(
185 xfs_inode_log_item_t *iip; 185 xfs_inode_log_item_t *iip;
186 186
187 ASSERT(ip->i_transp == NULL); 187 ASSERT(ip->i_transp == NULL);
188 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 188 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
189 ASSERT(lock_flags & XFS_ILOCK_EXCL); 189 ASSERT(lock_flags & XFS_ILOCK_EXCL);
190 if (ip->i_itemp == NULL) 190 if (ip->i_itemp == NULL)
191 xfs_inode_item_init(ip, ip->i_mount); 191 xfs_inode_item_init(ip, ip->i_mount);
@@ -232,7 +232,7 @@ xfs_trans_ihold(
232{ 232{
233 ASSERT(ip->i_transp == tp); 233 ASSERT(ip->i_transp == tp);
234 ASSERT(ip->i_itemp != NULL); 234 ASSERT(ip->i_itemp != NULL);
235 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 235 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
236 236
237 ip->i_itemp->ili_flags |= XFS_ILI_HOLD; 237 ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
238} 238}
@@ -257,7 +257,7 @@ xfs_trans_log_inode(
257 257
258 ASSERT(ip->i_transp == tp); 258 ASSERT(ip->i_transp == tp);
259 ASSERT(ip->i_itemp != NULL); 259 ASSERT(ip->i_itemp != NULL);
260 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 260 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
261 261
262 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp)); 262 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
263 ASSERT(lidp != NULL); 263 ASSERT(lidp != NULL);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 2b8dc7e40772..98e5f110ba5f 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -41,49 +41,6 @@
41#include "xfs_utils.h" 41#include "xfs_utils.h"
42 42
43 43
44int
45xfs_dir_lookup_int(
46 xfs_inode_t *dp,
47 uint lock_mode,
48 struct xfs_name *name,
49 xfs_ino_t *inum,
50 xfs_inode_t **ipp)
51{
52 int error;
53
54 xfs_itrace_entry(dp);
55
56 error = xfs_dir_lookup(NULL, dp, name, inum);
57 if (!error) {
58 /*
59 * Unlock the directory. We do this because we can't
60 * hold the directory lock while doing the vn_get()
61 * in xfs_iget(). Doing so could cause us to hold
62 * a lock while waiting for the inode to finish
63 * being inactive while it's waiting for a log
64 * reservation in the inactive routine.
65 */
66 xfs_iunlock(dp, lock_mode);
67 error = xfs_iget(dp->i_mount, NULL, *inum, 0, 0, ipp, 0);
68 xfs_ilock(dp, lock_mode);
69
70 if (error) {
71 *ipp = NULL;
72 } else if ((*ipp)->i_d.di_mode == 0) {
73 /*
74 * The inode has been freed. Something is
75 * wrong so just get out of here.
76 */
77 xfs_iunlock(dp, lock_mode);
78 xfs_iput_new(*ipp, 0);
79 *ipp = NULL;
80 xfs_ilock(dp, lock_mode);
81 error = XFS_ERROR(ENOENT);
82 }
83 }
84 return error;
85}
86
87/* 44/*
88 * Allocates a new inode from disk and return a pointer to the 45 * Allocates a new inode from disk and return a pointer to the
89 * incore copy. This routine will internally commit the current 46 * incore copy. This routine will internally commit the current
@@ -310,7 +267,7 @@ xfs_bump_ino_vers2(
310{ 267{
311 xfs_mount_t *mp; 268 xfs_mount_t *mp;
312 269
313 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 270 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
314 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); 271 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1);
315 272
316 ip->i_d.di_version = XFS_DINODE_VERSION_2; 273 ip->i_d.di_version = XFS_DINODE_VERSION_2;
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 175b126d2cab..f316cb85d8e2 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -21,8 +21,6 @@
21#define IRELE(ip) VN_RELE(XFS_ITOV(ip)) 21#define IRELE(ip) VN_RELE(XFS_ITOV(ip))
22#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) 22#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip))
23 23
24extern int xfs_dir_lookup_int(xfs_inode_t *, uint, struct xfs_name *,
25 xfs_ino_t *, xfs_inode_t **);
26extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); 24extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
27extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 25extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
28 xfs_dev_t, cred_t *, prid_t, int, 26 xfs_dev_t, cred_t *, prid_t, int,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index fc48158fe479..30bacd8bb0e5 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -186,6 +186,7 @@ xfs_cleanup(void)
186 kmem_zone_destroy(xfs_efi_zone); 186 kmem_zone_destroy(xfs_efi_zone);
187 kmem_zone_destroy(xfs_ifork_zone); 187 kmem_zone_destroy(xfs_ifork_zone);
188 kmem_zone_destroy(xfs_ili_zone); 188 kmem_zone_destroy(xfs_ili_zone);
189 kmem_zone_destroy(xfs_log_ticket_zone);
189} 190}
190 191
191/* 192/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6650601c64f7..70702a60b4bb 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -76,132 +76,6 @@ xfs_open(
76} 76}
77 77
78/* 78/*
79 * xfs_getattr
80 */
81int
82xfs_getattr(
83 xfs_inode_t *ip,
84 bhv_vattr_t *vap,
85 int flags)
86{
87 bhv_vnode_t *vp = XFS_ITOV(ip);
88 xfs_mount_t *mp = ip->i_mount;
89
90 xfs_itrace_entry(ip);
91
92 if (XFS_FORCED_SHUTDOWN(mp))
93 return XFS_ERROR(EIO);
94
95 if (!(flags & ATTR_LAZY))
96 xfs_ilock(ip, XFS_ILOCK_SHARED);
97
98 vap->va_size = XFS_ISIZE(ip);
99 if (vap->va_mask == XFS_AT_SIZE)
100 goto all_done;
101
102 vap->va_nblocks =
103 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
104 vap->va_nodeid = ip->i_ino;
105#if XFS_BIG_INUMS
106 vap->va_nodeid += mp->m_inoadd;
107#endif
108 vap->va_nlink = ip->i_d.di_nlink;
109
110 /*
111 * Quick exit for non-stat callers
112 */
113 if ((vap->va_mask &
114 ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID|
115 XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0)
116 goto all_done;
117
118 /*
119 * Copy from in-core inode.
120 */
121 vap->va_mode = ip->i_d.di_mode;
122 vap->va_uid = ip->i_d.di_uid;
123 vap->va_gid = ip->i_d.di_gid;
124 vap->va_projid = ip->i_d.di_projid;
125
126 /*
127 * Check vnode type block/char vs. everything else.
128 */
129 switch (ip->i_d.di_mode & S_IFMT) {
130 case S_IFBLK:
131 case S_IFCHR:
132 vap->va_rdev = ip->i_df.if_u2.if_rdev;
133 vap->va_blocksize = BLKDEV_IOSIZE;
134 break;
135 default:
136 vap->va_rdev = 0;
137
138 if (!(XFS_IS_REALTIME_INODE(ip))) {
139 vap->va_blocksize = xfs_preferred_iosize(mp);
140 } else {
141
142 /*
143 * If the file blocks are being allocated from a
144 * realtime partition, then return the inode's
145 * realtime extent size or the realtime volume's
146 * extent size.
147 */
148 vap->va_blocksize =
149 xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
150 }
151 break;
152 }
153
154 vn_atime_to_timespec(vp, &vap->va_atime);
155 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
156 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
157 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
158 vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
159
160 /*
161 * Exit for stat callers. See if any of the rest of the fields
162 * to be filled in are needed.
163 */
164 if ((vap->va_mask &
165 (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
166 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
167 goto all_done;
168
169 /*
170 * Convert di_flags to xflags.
171 */
172 vap->va_xflags = xfs_ip2xflags(ip);
173
174 /*
175 * Exit for inode revalidate. See if any of the rest of
176 * the fields to be filled in are needed.
177 */
178 if ((vap->va_mask &
179 (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
180 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
181 goto all_done;
182
183 vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog;
184 vap->va_nextents =
185 (ip->i_df.if_flags & XFS_IFEXTENTS) ?
186 ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) :
187 ip->i_d.di_nextents;
188 if (ip->i_afp)
189 vap->va_anextents =
190 (ip->i_afp->if_flags & XFS_IFEXTENTS) ?
191 ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) :
192 ip->i_d.di_anextents;
193 else
194 vap->va_anextents = 0;
195 vap->va_gen = ip->i_d.di_gen;
196
197 all_done:
198 if (!(flags & ATTR_LAZY))
199 xfs_iunlock(ip, XFS_ILOCK_SHARED);
200 return 0;
201}
202
203
204/*
205 * xfs_setattr 79 * xfs_setattr
206 */ 80 */
207int 81int
@@ -211,7 +85,6 @@ xfs_setattr(
211 int flags, 85 int flags,
212 cred_t *credp) 86 cred_t *credp)
213{ 87{
214 bhv_vnode_t *vp = XFS_ITOV(ip);
215 xfs_mount_t *mp = ip->i_mount; 88 xfs_mount_t *mp = ip->i_mount;
216 xfs_trans_t *tp; 89 xfs_trans_t *tp;
217 int mask; 90 int mask;
@@ -222,7 +95,6 @@ xfs_setattr(
222 gid_t gid=0, igid=0; 95 gid_t gid=0, igid=0;
223 int timeflags = 0; 96 int timeflags = 0;
224 xfs_prid_t projid=0, iprojid=0; 97 xfs_prid_t projid=0, iprojid=0;
225 int mandlock_before, mandlock_after;
226 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 98 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
227 int file_owner; 99 int file_owner;
228 int need_iolock = 1; 100 int need_iolock = 1;
@@ -383,7 +255,7 @@ xfs_setattr(
383 m |= S_ISGID; 255 m |= S_ISGID;
384#if 0 256#if 0
385 /* Linux allows this, Irix doesn't. */ 257 /* Linux allows this, Irix doesn't. */
386 if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) 258 if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
387 m |= S_ISVTX; 259 m |= S_ISVTX;
388#endif 260#endif
389 if (m && !capable(CAP_FSETID)) 261 if (m && !capable(CAP_FSETID))
@@ -461,10 +333,10 @@ xfs_setattr(
461 goto error_return; 333 goto error_return;
462 } 334 }
463 335
464 if (VN_ISDIR(vp)) { 336 if (S_ISDIR(ip->i_d.di_mode)) {
465 code = XFS_ERROR(EISDIR); 337 code = XFS_ERROR(EISDIR);
466 goto error_return; 338 goto error_return;
467 } else if (!VN_ISREG(vp)) { 339 } else if (!S_ISREG(ip->i_d.di_mode)) {
468 code = XFS_ERROR(EINVAL); 340 code = XFS_ERROR(EINVAL);
469 goto error_return; 341 goto error_return;
470 } 342 }
@@ -626,9 +498,6 @@ xfs_setattr(
626 xfs_trans_ihold(tp, ip); 498 xfs_trans_ihold(tp, ip);
627 } 499 }
628 500
629 /* determine whether mandatory locking mode changes */
630 mandlock_before = MANDLOCK(vp, ip->i_d.di_mode);
631
632 /* 501 /*
633 * Truncate file. Must have write permission and not be a directory. 502 * Truncate file. Must have write permission and not be a directory.
634 */ 503 */
@@ -858,13 +727,6 @@ xfs_setattr(
858 code = xfs_trans_commit(tp, commit_flags); 727 code = xfs_trans_commit(tp, commit_flags);
859 } 728 }
860 729
861 /*
862 * If the (regular) file's mandatory locking mode changed, then
863 * notify the vnode. We do this under the inode lock to prevent
864 * racing calls to vop_vnode_change.
865 */
866 mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
867
868 xfs_iunlock(ip, lock_flags); 730 xfs_iunlock(ip, lock_flags);
869 731
870 /* 732 /*
@@ -1443,7 +1305,7 @@ xfs_inactive_attrs(
1443 int error; 1305 int error;
1444 xfs_mount_t *mp; 1306 xfs_mount_t *mp;
1445 1307
1446 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); 1308 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1447 tp = *tpp; 1309 tp = *tpp;
1448 mp = ip->i_mount; 1310 mp = ip->i_mount;
1449 ASSERT(ip->i_d.di_forkoff != 0); 1311 ASSERT(ip->i_d.di_forkoff != 0);
@@ -1491,7 +1353,7 @@ xfs_release(
1491 xfs_mount_t *mp = ip->i_mount; 1353 xfs_mount_t *mp = ip->i_mount;
1492 int error; 1354 int error;
1493 1355
1494 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) 1356 if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
1495 return 0; 1357 return 0;
1496 1358
1497 /* If this is a read-only mount, don't do this (would generate I/O) */ 1359 /* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1774,8 +1636,7 @@ xfs_lookup(
1774 struct xfs_name *name, 1636 struct xfs_name *name,
1775 xfs_inode_t **ipp) 1637 xfs_inode_t **ipp)
1776{ 1638{
1777 xfs_inode_t *ip; 1639 xfs_ino_t inum;
1778 xfs_ino_t e_inum;
1779 int error; 1640 int error;
1780 uint lock_mode; 1641 uint lock_mode;
1781 1642
@@ -1785,12 +1646,21 @@ xfs_lookup(
1785 return XFS_ERROR(EIO); 1646 return XFS_ERROR(EIO);
1786 1647
1787 lock_mode = xfs_ilock_map_shared(dp); 1648 lock_mode = xfs_ilock_map_shared(dp);
1788 error = xfs_dir_lookup_int(dp, lock_mode, name, &e_inum, &ip); 1649 error = xfs_dir_lookup(NULL, dp, name, &inum);
1789 if (!error) {
1790 *ipp = ip;
1791 xfs_itrace_ref(ip);
1792 }
1793 xfs_iunlock_map_shared(dp, lock_mode); 1650 xfs_iunlock_map_shared(dp, lock_mode);
1651
1652 if (error)
1653 goto out;
1654
1655 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
1656 if (error)
1657 goto out;
1658
1659 xfs_itrace_ref(*ipp);
1660 return 0;
1661
1662 out:
1663 *ipp = NULL;
1794 return error; 1664 return error;
1795} 1665}
1796 1666
@@ -1906,7 +1776,7 @@ xfs_create(
1906 * It is locked (and joined to the transaction). 1776 * It is locked (and joined to the transaction).
1907 */ 1777 */
1908 1778
1909 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 1779 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1910 1780
1911 /* 1781 /*
1912 * Now we join the directory inode to the transaction. We do not do it 1782 * Now we join the directory inode to the transaction. We do not do it
@@ -2112,7 +1982,7 @@ again:
2112 1982
2113 ips[0] = ip; 1983 ips[0] = ip;
2114 ips[1] = dp; 1984 ips[1] = dp;
2115 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 1985 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
2116 } 1986 }
2117 /* else e_inum == dp->i_ino */ 1987 /* else e_inum == dp->i_ino */
2118 /* This can happen if we're asked to lock /x/.. 1988 /* This can happen if we're asked to lock /x/..
@@ -2160,7 +2030,6 @@ void
2160xfs_lock_inodes( 2030xfs_lock_inodes(
2161 xfs_inode_t **ips, 2031 xfs_inode_t **ips,
2162 int inodes, 2032 int inodes,
2163 int first_locked,
2164 uint lock_mode) 2033 uint lock_mode)
2165{ 2034{
2166 int attempts = 0, i, j, try_lock; 2035 int attempts = 0, i, j, try_lock;
@@ -2168,13 +2037,8 @@ xfs_lock_inodes(
2168 2037
2169 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 2038 ASSERT(ips && (inodes >= 2)); /* we need at least two */
2170 2039
2171 if (first_locked) { 2040 try_lock = 0;
2172 try_lock = 1; 2041 i = 0;
2173 i = 1;
2174 } else {
2175 try_lock = 0;
2176 i = 0;
2177 }
2178 2042
2179again: 2043again:
2180 for (; i < inodes; i++) { 2044 for (; i < inodes; i++) {
@@ -2298,29 +2162,14 @@ xfs_remove(
2298 return error; 2162 return error;
2299 } 2163 }
2300 2164
2301 /*
2302 * We need to get a reference to ip before we get our log
2303 * reservation. The reason for this is that we cannot call
2304 * xfs_iget for an inode for which we do not have a reference
2305 * once we've acquired a log reservation. This is because the
2306 * inode we are trying to get might be in xfs_inactive going
2307 * for a log reservation. Since we'll have to wait for the
2308 * inactive code to complete before returning from xfs_iget,
2309 * we need to make sure that we don't have log space reserved
2310 * when we call xfs_iget. Instead we get an unlocked reference
2311 * to the inode before getting our log reservation.
2312 */
2313 IHOLD(ip);
2314
2315 xfs_itrace_entry(ip); 2165 xfs_itrace_entry(ip);
2316 xfs_itrace_ref(ip); 2166 xfs_itrace_ref(ip);
2317 2167
2318 error = XFS_QM_DQATTACH(mp, dp, 0); 2168 error = XFS_QM_DQATTACH(mp, dp, 0);
2319 if (!error && dp != ip) 2169 if (!error)
2320 error = XFS_QM_DQATTACH(mp, ip, 0); 2170 error = XFS_QM_DQATTACH(mp, ip, 0);
2321 if (error) { 2171 if (error) {
2322 REMOVE_DEBUG_TRACE(__LINE__); 2172 REMOVE_DEBUG_TRACE(__LINE__);
2323 IRELE(ip);
2324 goto std_return; 2173 goto std_return;
2325 } 2174 }
2326 2175
@@ -2347,7 +2196,6 @@ xfs_remove(
2347 ASSERT(error != ENOSPC); 2196 ASSERT(error != ENOSPC);
2348 REMOVE_DEBUG_TRACE(__LINE__); 2197 REMOVE_DEBUG_TRACE(__LINE__);
2349 xfs_trans_cancel(tp, 0); 2198 xfs_trans_cancel(tp, 0);
2350 IRELE(ip);
2351 return error; 2199 return error;
2352 } 2200 }
2353 2201
@@ -2355,7 +2203,6 @@ xfs_remove(
2355 if (error) { 2203 if (error) {
2356 REMOVE_DEBUG_TRACE(__LINE__); 2204 REMOVE_DEBUG_TRACE(__LINE__);
2357 xfs_trans_cancel(tp, cancel_flags); 2205 xfs_trans_cancel(tp, cancel_flags);
2358 IRELE(ip);
2359 goto std_return; 2206 goto std_return;
2360 } 2207 }
2361 2208
@@ -2363,23 +2210,18 @@ xfs_remove(
2363 * At this point, we've gotten both the directory and the entry 2210 * At this point, we've gotten both the directory and the entry
2364 * inodes locked. 2211 * inodes locked.
2365 */ 2212 */
2213 IHOLD(ip);
2366 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2214 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2367 if (dp != ip) { 2215
2368 /* 2216 IHOLD(dp);
2369 * Increment vnode ref count only in this case since 2217 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2370 * there's an extra vnode reference in the case where
2371 * dp == ip.
2372 */
2373 IHOLD(dp);
2374 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2375 }
2376 2218
2377 /* 2219 /*
2378 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2220 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
2379 */ 2221 */
2380 XFS_BMAP_INIT(&free_list, &first_block); 2222 XFS_BMAP_INIT(&free_list, &first_block);
2381 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 2223 error = xfs_dir_removename(tp, dp, name, ip->i_ino,
2382 &first_block, &free_list, 0); 2224 &first_block, &free_list, resblks);
2383 if (error) { 2225 if (error) {
2384 ASSERT(error != ENOENT); 2226 ASSERT(error != ENOENT);
2385 REMOVE_DEBUG_TRACE(__LINE__); 2227 REMOVE_DEBUG_TRACE(__LINE__);
@@ -2402,12 +2244,6 @@ xfs_remove(
2402 link_zero = (ip)->i_d.di_nlink==0; 2244 link_zero = (ip)->i_d.di_nlink==0;
2403 2245
2404 /* 2246 /*
2405 * Take an extra ref on the inode so that it doesn't
2406 * go to xfs_inactive() from within the commit.
2407 */
2408 IHOLD(ip);
2409
2410 /*
2411 * If this is a synchronous mount, make sure that the 2247 * If this is a synchronous mount, make sure that the
2412 * remove transaction goes to disk before returning to 2248 * remove transaction goes to disk before returning to
2413 * the user. 2249 * the user.
@@ -2423,10 +2259,8 @@ xfs_remove(
2423 } 2259 }
2424 2260
2425 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2261 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2426 if (error) { 2262 if (error)
2427 IRELE(ip);
2428 goto std_return; 2263 goto std_return;
2429 }
2430 2264
2431 /* 2265 /*
2432 * If we are using filestreams, kill the stream association. 2266 * If we are using filestreams, kill the stream association.
@@ -2438,7 +2272,6 @@ xfs_remove(
2438 xfs_filestream_deassociate(ip); 2272 xfs_filestream_deassociate(ip);
2439 2273
2440 xfs_itrace_exit(ip); 2274 xfs_itrace_exit(ip);
2441 IRELE(ip);
2442 2275
2443/* Fall through to std_return with error = 0 */ 2276/* Fall through to std_return with error = 0 */
2444 std_return: 2277 std_return:
@@ -2467,8 +2300,6 @@ xfs_remove(
2467 cancel_flags |= XFS_TRANS_ABORT; 2300 cancel_flags |= XFS_TRANS_ABORT;
2468 xfs_trans_cancel(tp, cancel_flags); 2301 xfs_trans_cancel(tp, cancel_flags);
2469 2302
2470 IRELE(ip);
2471
2472 goto std_return; 2303 goto std_return;
2473} 2304}
2474 2305
@@ -2536,7 +2367,7 @@ xfs_link(
2536 ips[1] = sip; 2367 ips[1] = sip;
2537 } 2368 }
2538 2369
2539 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2370 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
2540 2371
2541 /* 2372 /*
2542 * Increment vnode ref counts since xfs_trans_commit & 2373 * Increment vnode ref counts since xfs_trans_commit &
@@ -2840,7 +2671,6 @@ xfs_rmdir(
2840 struct xfs_name *name, 2671 struct xfs_name *name,
2841 xfs_inode_t *cdp) 2672 xfs_inode_t *cdp)
2842{ 2673{
2843 bhv_vnode_t *dir_vp = XFS_ITOV(dp);
2844 xfs_mount_t *mp = dp->i_mount; 2674 xfs_mount_t *mp = dp->i_mount;
2845 xfs_trans_t *tp; 2675 xfs_trans_t *tp;
2846 int error; 2676 int error;
@@ -2866,27 +2696,12 @@ xfs_rmdir(
2866 } 2696 }
2867 2697
2868 /* 2698 /*
2869 * We need to get a reference to cdp before we get our log
2870 * reservation. The reason for this is that we cannot call
2871 * xfs_iget for an inode for which we do not have a reference
2872 * once we've acquired a log reservation. This is because the
2873 * inode we are trying to get might be in xfs_inactive going
2874 * for a log reservation. Since we'll have to wait for the
2875 * inactive code to complete before returning from xfs_iget,
2876 * we need to make sure that we don't have log space reserved
2877 * when we call xfs_iget. Instead we get an unlocked reference
2878 * to the inode before getting our log reservation.
2879 */
2880 IHOLD(cdp);
2881
2882 /*
2883 * Get the dquots for the inodes. 2699 * Get the dquots for the inodes.
2884 */ 2700 */
2885 error = XFS_QM_DQATTACH(mp, dp, 0); 2701 error = XFS_QM_DQATTACH(mp, dp, 0);
2886 if (!error && dp != cdp) 2702 if (!error)
2887 error = XFS_QM_DQATTACH(mp, cdp, 0); 2703 error = XFS_QM_DQATTACH(mp, cdp, 0);
2888 if (error) { 2704 if (error) {
2889 IRELE(cdp);
2890 REMOVE_DEBUG_TRACE(__LINE__); 2705 REMOVE_DEBUG_TRACE(__LINE__);
2891 goto std_return; 2706 goto std_return;
2892 } 2707 }
@@ -2913,7 +2728,6 @@ xfs_rmdir(
2913 if (error) { 2728 if (error) {
2914 ASSERT(error != ENOSPC); 2729 ASSERT(error != ENOSPC);
2915 cancel_flags = 0; 2730 cancel_flags = 0;
2916 IRELE(cdp);
2917 goto error_return; 2731 goto error_return;
2918 } 2732 }
2919 XFS_BMAP_INIT(&free_list, &first_block); 2733 XFS_BMAP_INIT(&free_list, &first_block);
@@ -2927,21 +2741,13 @@ xfs_rmdir(
2927 error = xfs_lock_dir_and_entry(dp, cdp); 2741 error = xfs_lock_dir_and_entry(dp, cdp);
2928 if (error) { 2742 if (error) {
2929 xfs_trans_cancel(tp, cancel_flags); 2743 xfs_trans_cancel(tp, cancel_flags);
2930 IRELE(cdp);
2931 goto std_return; 2744 goto std_return;
2932 } 2745 }
2933 2746
2747 IHOLD(dp);
2934 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2748 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2935 if (dp != cdp) {
2936 /*
2937 * Only increment the parent directory vnode count if
2938 * we didn't bump it in looking up cdp. The only time
2939 * we don't bump it is when we're looking up ".".
2940 */
2941 VN_HOLD(dir_vp);
2942 }
2943 2749
2944 xfs_itrace_ref(cdp); 2750 IHOLD(cdp);
2945 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); 2751 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
2946 2752
2947 ASSERT(cdp->i_d.di_nlink >= 2); 2753 ASSERT(cdp->i_d.di_nlink >= 2);
@@ -2995,12 +2801,6 @@ xfs_rmdir(
2995 last_cdp_link = (cdp)->i_d.di_nlink==0; 2801 last_cdp_link = (cdp)->i_d.di_nlink==0;
2996 2802
2997 /* 2803 /*
2998 * Take an extra ref on the child vnode so that it
2999 * does not go to xfs_inactive() from within the commit.
3000 */
3001 IHOLD(cdp);
3002
3003 /*
3004 * If this is a synchronous mount, make sure that the 2804 * If this is a synchronous mount, make sure that the
3005 * rmdir transaction goes to disk before returning to 2805 * rmdir transaction goes to disk before returning to
3006 * the user. 2806 * the user.
@@ -3014,19 +2814,15 @@ xfs_rmdir(
3014 xfs_bmap_cancel(&free_list); 2814 xfs_bmap_cancel(&free_list);
3015 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 2815 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
3016 XFS_TRANS_ABORT)); 2816 XFS_TRANS_ABORT));
3017 IRELE(cdp);
3018 goto std_return; 2817 goto std_return;
3019 } 2818 }
3020 2819
3021 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2820 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3022 if (error) { 2821 if (error) {
3023 IRELE(cdp);
3024 goto std_return; 2822 goto std_return;
3025 } 2823 }
3026 2824
3027 2825
3028 IRELE(cdp);
3029
3030 /* Fall through to std_return with error = 0 or the errno 2826 /* Fall through to std_return with error = 0 or the errno
3031 * from xfs_trans_commit. */ 2827 * from xfs_trans_commit. */
3032 std_return: 2828 std_return:
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 24c53923dc2c..8abe8f186e20 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -15,7 +15,6 @@ struct xfs_iomap;
15 15
16 16
17int xfs_open(struct xfs_inode *ip); 17int xfs_open(struct xfs_inode *ip);
18int xfs_getattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags);
19int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, 18int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
20 struct cred *credp); 19 struct cred *credp);
21int xfs_readlink(struct xfs_inode *ip, char *link); 20int xfs_readlink(struct xfs_inode *ip, char *link);
@@ -48,9 +47,9 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd,
48 struct cred *credp, int attr_flags); 47 struct cred *credp, int attr_flags);
49int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 48int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
50 struct xfs_inode *src_ip, struct xfs_inode *target_dp, 49 struct xfs_inode *src_ip, struct xfs_inode *target_dp,
51 struct xfs_name *target_name); 50 struct xfs_name *target_name, struct xfs_inode *target_ip);
52int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, 51int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value,
53 int *valuelenp, int flags, cred_t *cred); 52 int *valuelenp, int flags);
54int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, 53int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,
55 int valuelen, int flags); 54 int valuelen, int flags);
56int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); 55int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags);
@@ -61,9 +60,6 @@ int xfs_ioctl(struct xfs_inode *ip, struct file *filp,
61ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, 60ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
62 const struct iovec *iovp, unsigned int segs, 61 const struct iovec *iovp, unsigned int segs,
63 loff_t *offset, int ioflags); 62 loff_t *offset, int ioflags);
64ssize_t xfs_sendfile(struct xfs_inode *ip, struct file *filp,
65 loff_t *offset, int ioflags, size_t count,
66 read_actor_t actor, void *target);
67ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp, 63ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp,
68 loff_t *ppos, struct pipe_inode_info *pipe, size_t count, 64 loff_t *ppos, struct pipe_inode_info *pipe, size_t count,
69 int flags, int ioflags); 65 int flags, int ioflags);