aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSteve French <sfrench@us.ibm.com>2008-05-06 13:55:32 -0400
committerSteve French <sfrench@us.ibm.com>2008-05-06 13:55:32 -0400
commita815752ac0ffdb910e92958d41d28f4fb28e5296 (patch)
treea3aa16a282354da0debe8e3a3a7ed8aac6e54001 /fs
parent5ade9deaaa3e1f7291467d97b238648e43eae15e (diff)
parenta15306365a16380f3bafee9e181ba01231d4acd7 (diff)
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/adfs/adfs.h2
-rw-r--r--fs/adfs/dir_f.c4
-rw-r--r--fs/affs/file.c25
-rw-r--r--fs/affs/super.c3
-rw-r--r--fs/afs/afs_cm.h3
-rw-r--r--fs/afs/cell.c2
-rw-r--r--fs/afs/cmservice.c133
-rw-r--r--fs/afs/dir.c4
-rw-r--r--fs/afs/internal.h8
-rw-r--r--fs/afs/proc.c33
-rw-r--r--fs/aio.c79
-rw-r--r--fs/anon_inodes.c13
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/expire.c26
-rw-r--r--fs/autofs4/root.c40
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/befs/linuxvfs.c6
-rw-r--r--fs/bfs/bfs.h2
-rw-r--r--fs/binfmt_aout.c12
-rw-r--r--fs/binfmt_elf.c30
-rw-r--r--fs/binfmt_elf_fdpic.c14
-rw-r--r--fs/binfmt_em86.c2
-rw-r--r--fs/binfmt_flat.c9
-rw-r--r--fs/binfmt_misc.c6
-rw-r--r--fs/binfmt_script.c2
-rw-r--r--fs/bio.c90
-rw-r--r--fs/buffer.c24
-rw-r--r--fs/char_dev.c1
-rw-r--r--fs/cifs/cifs_debug.c4
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/dir.c6
-rw-r--r--fs/compat.c15
-rw-r--r--fs/compat_ioctl.c4
-rw-r--r--fs/configfs/file.c2
-rw-r--r--fs/configfs/inode.c2
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/devpts/inode.c43
-rw-r--r--fs/dlm/lockspace.c2
-rw-r--r--fs/dnotify.c11
-rw-r--r--fs/drop_caches.c14
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c33
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h102
-rw-r--r--fs/ecryptfs/file.c2
-rw-r--r--fs/ecryptfs/inode.c6
-rw-r--r--fs/ecryptfs/keystore.c89
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/ecryptfs/messaging.c524
-rw-r--r--fs/ecryptfs/miscdev.c598
-rw-r--r--fs/ecryptfs/mmap.c18
-rw-r--r--fs/ecryptfs/netlink.c33
-rw-r--r--fs/ecryptfs/read_write.c16
-rw-r--r--fs/eventfd.c15
-rw-r--r--fs/eventpoll.c57
-rw-r--r--fs/exec.c24
-rw-r--r--fs/exportfs/expfs.c10
-rw-r--r--fs/ext3/inode.c14
-rw-r--r--fs/ext4/acl.c12
-rw-r--r--fs/ext4/balloc.c33
-rw-r--r--fs/ext4/bitmap.c2
-rw-r--r--fs/ext4/dir.c4
-rw-r--r--fs/ext4/ext4.h1205
-rw-r--r--fs/ext4/ext4_extents.h232
-rw-r--r--fs/ext4/ext4_i.h167
-rw-r--r--fs/ext4/ext4_jbd2.c14
-rw-r--r--fs/ext4/ext4_jbd2.h231
-rw-r--r--fs/ext4/ext4_sb.h148
-rw-r--r--fs/ext4/extents.c354
-rw-r--r--fs/ext4/file.c6
-rw-r--r--fs/ext4/fsync.c7
-rw-r--r--fs/ext4/hash.c2
-rw-r--r--fs/ext4/ialloc.c44
-rw-r--r--fs/ext4/inode.c57
-rw-r--r--fs/ext4/ioctl.c16
-rw-r--r--fs/ext4/mballoc.c459
-rw-r--r--fs/ext4/mballoc.h304
-rw-r--r--fs/ext4/migrate.c43
-rw-r--r--fs/ext4/namei.c44
-rw-r--r--fs/ext4/resize.c83
-rw-r--r--fs/ext4/super.c66
-rw-r--r--fs/ext4/symlink.c2
-rw-r--r--fs/ext4/xattr.c40
-rw-r--r--fs/ext4/xattr.h7
-rw-r--r--fs/ext4/xattr_security.c4
-rw-r--r--fs/ext4/xattr_trusted.c4
-rw-r--r--fs/ext4/xattr_user.c4
-rw-r--r--fs/fat/cache.c6
-rw-r--r--fs/fat/fatent.c2
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fat/inode.c8
-rw-r--r--fs/fcntl.c1
-rw-r--r--fs/file.c23
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/freevxfs/vxfs_extern.h5
-rw-r--r--fs/freevxfs/vxfs_immed.c1
-rw-r--r--fs/freevxfs/vxfs_inode.c5
-rw-r--r--fs/fs-writeback.c78
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/dev.c23
-rw-r--r--fs/fuse/dir.c86
-rw-r--r--fs/fuse/file.c633
-rw-r--r--fs/fuse/fuse_i.h52
-rw-r--r--fs/fuse/inode.c90
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c2
-rw-r--r--fs/gfs2/util.h18
-rw-r--r--fs/hfs/btree.c10
-rw-r--r--fs/hfs/mdb.c2
-rw-r--r--fs/hfs/super.c6
-rw-r--r--fs/hfsplus/btree.c10
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/inode.c3
-rw-r--r--fs/hfsplus/options.c3
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c6
-rw-r--r--fs/inotify_user.c2
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/isofs/dir.c8
-rw-r--r--fs/isofs/isofs.h12
-rw-r--r--fs/isofs/namei.c7
-rw-r--r--fs/jbd2/commit.c19
-rw-r--r--fs/jbd2/journal.c55
-rw-r--r--fs/jbd2/revoke.c165
-rw-r--r--fs/jbd2/transaction.c41
-rw-r--r--fs/jffs2/build.c31
-rw-r--r--fs/jffs2/debug.h8
-rw-r--r--fs/jffs2/dir.c42
-rw-r--r--fs/jffs2/erase.c9
-rw-r--r--fs/jffs2/fs.c14
-rw-r--r--fs/jffs2/gc.c8
-rw-r--r--fs/jffs2/nodelist.h5
-rw-r--r--fs/jffs2/nodemgmt.c2
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/readinode.c16
-rw-r--r--fs/jffs2/scan.c9
-rw-r--r--fs/jffs2/super.c15
-rw-r--r--fs/jffs2/wbuf.c2
-rw-r--r--fs/jffs2/write.c17
-rw-r--r--fs/jffs2/xattr.c8
-rw-r--r--fs/jfs/jfs_debug.c4
-rw-r--r--fs/lockd/clntproc.c2
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/locks.c1
-rw-r--r--fs/msdos/namei.c2
-rw-r--r--fs/namei.c9
-rw-r--r--fs/namespace.c17
-rw-r--r--fs/ncpfs/ncplib_kernel.c39
-rw-r--r--fs/nfs/client.c20
-rw-r--r--fs/nfs/super.c26
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/nfsctl.c4
-rw-r--r--fs/ntfs/debug.h6
-rw-r--r--fs/ntfs/mft.c6
-rw-r--r--fs/ocfs2/cluster/sys.c2
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c8
-rw-r--r--fs/ocfs2/dlm/dlmfs.c2
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/open.c1
-rw-r--r--fs/partitions/ldm.c8
-rw-r--r--fs/pipe.c17
-rw-r--r--fs/proc/array.c7
-rw-r--r--fs/proc/base.c114
-rw-r--r--fs/proc/generic.c149
-rw-r--r--fs/proc/inode.c69
-rw-r--r--fs/proc/internal.h4
-rw-r--r--fs/proc/nommu.c2
-rw-r--r--fs/proc/proc_misc.c68
-rw-r--r--fs/proc/proc_net.c11
-rw-r--r--fs/proc/proc_sysctl.c52
-rw-r--r--fs/proc/proc_tty.c87
-rw-r--r--fs/proc/root.c14
-rw-r--r--fs/proc/task_mmu.c34
-rw-r--r--fs/proc/task_nommu.c35
-rw-r--r--fs/quota_v2.c4
-rw-r--r--fs/ramfs/file-mmu.c3
-rw-r--r--fs/ramfs/inode.c2
-rw-r--r--fs/ramfs/internal.h1
-rw-r--r--fs/reiserfs/journal.c50
-rw-r--r--fs/reiserfs/procfs.c9
-rw-r--r--fs/select.c15
-rw-r--r--fs/signalfd.c17
-rw-r--r--fs/smbfs/smb_debug.h6
-rw-r--r--fs/splice.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/sync.c2
-rw-r--r--fs/sysfs/file.c2
-rw-r--r--fs/sysfs/inode.c4
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysv/sysv.h8
-rw-r--r--fs/timerfd.c12
-rw-r--r--fs/udf/super.c4
-rw-r--r--fs/utimes.c17
-rw-r--r--fs/vfat/namei.c2
-rw-r--r--fs/xattr.c41
-rw-r--r--fs/xfs/Kconfig13
-rw-r--r--fs/xfs/linux-2.6/mrlock.h60
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c75
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c21
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h24
-rw-r--r--fs/xfs/quota/xfs_dquot.c4
-rw-r--r--fs/xfs/quota/xfs_qm.c27
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c6
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h5
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c2
-rw-r--r--fs/xfs/support/debug.h2
-rw-r--r--fs/xfs/xfs.h2
-rw-r--r--fs/xfs/xfs_acl.c53
-rw-r--r--fs/xfs/xfs_attr.c93
-rw-r--r--fs/xfs/xfs_attr.h6
-rw-r--r--fs/xfs/xfs_bmap.c1
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_fsops.c8
-rw-r--r--fs/xfs/xfs_ialloc.c10
-rw-r--r--fs/xfs/xfs_iget.c140
-rw-r--r--fs/xfs/xfs_inode.c25
-rw-r--r--fs/xfs/xfs_inode.h16
-rw-r--r--fs/xfs/xfs_inode_item.c12
-rw-r--r--fs/xfs/xfs_iomap.c19
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_mount.c83
-rw-r--r--fs/xfs/xfs_mount.h7
-rw-r--r--fs/xfs/xfs_rename.c252
-rw-r--r--fs/xfs/xfs_trans_inode.c12
-rw-r--r--fs/xfs/xfs_utils.c45
-rw-r--r--fs/xfs/xfs_utils.h2
-rw-r--r--fs/xfs/xfs_vfsops.c1
-rw-r--r--fs/xfs/xfs_vnodeops.c274
-rw-r--r--fs/xfs/xfs_vnodeops.h8
244 files changed, 6688 insertions, 3120 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 2e43d46f65d6..cf12c403b8c7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1005,7 +1005,8 @@ config TMPFS_POSIX_ACL
1005 1005
1006config HUGETLBFS 1006config HUGETLBFS
1007 bool "HugeTLB file system support" 1007 bool "HugeTLB file system support"
1008 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN 1008 depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
1009 (S390 && 64BIT) || BROKEN
1009 help 1010 help
1010 hugetlbfs is a filesystem backing for HugeTLB pages, based on 1011 hugetlbfs is a filesystem backing for HugeTLB pages, based on
1011 ramfs. For architectures that support it, say Y here and read 1012 ramfs. For architectures that support it, say Y here and read
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 853845abcca6..55e8ee1900a5 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -41,7 +41,7 @@ config BINFMT_ELF_FDPIC
41 It is also possible to run FDPIC ELF binaries on MMU linux also. 41 It is also possible to run FDPIC ELF binaries on MMU linux also.
42 42
43config BINFMT_FLAT 43config BINFMT_FLAT
44 tristate "Kernel support for flat binaries" 44 bool "Kernel support for flat binaries"
45 depends on !MMU 45 depends on !MMU
46 help 46 help
47 Support uClinux FLAT format binaries. 47 Support uClinux FLAT format binaries.
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 936f2af39c43..831157502d5a 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -75,7 +75,7 @@ extern unsigned int adfs_map_free(struct super_block *sb);
75/* Misc */ 75/* Misc */
76void __adfs_error(struct super_block *sb, const char *function, 76void __adfs_error(struct super_block *sb, const char *function,
77 const char *fmt, ...); 77 const char *fmt, ...);
78#define adfs_error(sb, fmt...) __adfs_error(sb, __FUNCTION__, fmt) 78#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt)
79 79
80/* super.c */ 80/* super.c */
81 81
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index b9b2b27b68c3..ea7df2146921 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -122,9 +122,9 @@ adfs_dir_checkbyte(const struct adfs_dir *dir)
122 ptr.ptr8 = bufoff(bh, i); 122 ptr.ptr8 = bufoff(bh, i);
123 end.ptr8 = ptr.ptr8 + last - i; 123 end.ptr8 = ptr.ptr8 + last - i;
124 124
125 do 125 do {
126 dircheck = *ptr.ptr8++ ^ ror13(dircheck); 126 dircheck = *ptr.ptr8++ ^ ror13(dircheck);
127 while (ptr.ptr8 < end.ptr8); 127 } while (ptr.ptr8 < end.ptr8);
128 } 128 }
129 129
130 /* 130 /*
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6e0c9399200e..1a4f092f24ef 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -325,8 +325,7 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
325 pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block); 325 pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block);
326 326
327 327
328 if (block > (sector_t)0x7fffffffUL) 328 BUG_ON(block > (sector_t)0x7fffffffUL);
329 BUG();
330 329
331 if (block >= AFFS_I(inode)->i_blkcnt) { 330 if (block >= AFFS_I(inode)->i_blkcnt) {
332 if (block > AFFS_I(inode)->i_blkcnt || !create) 331 if (block > AFFS_I(inode)->i_blkcnt || !create)
@@ -493,8 +492,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign
493 u32 tmp; 492 u32 tmp;
494 493
495 pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); 494 pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to);
496 if (from > to || to > PAGE_CACHE_SIZE) 495 BUG_ON(from > to || to > PAGE_CACHE_SIZE);
497 BUG();
498 kmap(page); 496 kmap(page);
499 data = page_address(page); 497 data = page_address(page);
500 bsize = AFFS_SB(sb)->s_data_blksize; 498 bsize = AFFS_SB(sb)->s_data_blksize;
@@ -507,8 +505,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign
507 if (IS_ERR(bh)) 505 if (IS_ERR(bh))
508 return PTR_ERR(bh); 506 return PTR_ERR(bh);
509 tmp = min(bsize - boff, to - from); 507 tmp = min(bsize - boff, to - from);
510 if (from + tmp > to || tmp > bsize) 508 BUG_ON(from + tmp > to || tmp > bsize);
511 BUG();
512 memcpy(data + from, AFFS_DATA(bh) + boff, tmp); 509 memcpy(data + from, AFFS_DATA(bh) + boff, tmp);
513 affs_brelse(bh); 510 affs_brelse(bh);
514 bidx++; 511 bidx++;
@@ -540,10 +537,9 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
540 if (IS_ERR(bh)) 537 if (IS_ERR(bh))
541 return PTR_ERR(bh); 538 return PTR_ERR(bh);
542 tmp = min(bsize - boff, newsize - size); 539 tmp = min(bsize - boff, newsize - size);
543 if (boff + tmp > bsize || tmp > bsize) 540 BUG_ON(boff + tmp > bsize || tmp > bsize);
544 BUG();
545 memset(AFFS_DATA(bh) + boff, 0, tmp); 541 memset(AFFS_DATA(bh) + boff, 0, tmp);
546 AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); 542 be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
547 affs_fix_checksum(sb, bh); 543 affs_fix_checksum(sb, bh);
548 mark_buffer_dirty_inode(bh, inode); 544 mark_buffer_dirty_inode(bh, inode);
549 size += tmp; 545 size += tmp;
@@ -560,8 +556,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
560 if (IS_ERR(bh)) 556 if (IS_ERR(bh))
561 goto out; 557 goto out;
562 tmp = min(bsize, newsize - size); 558 tmp = min(bsize, newsize - size);
563 if (tmp > bsize) 559 BUG_ON(tmp > bsize);
564 BUG();
565 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); 560 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
566 AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); 561 AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino);
567 AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); 562 AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx);
@@ -683,10 +678,9 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
683 if (IS_ERR(bh)) 678 if (IS_ERR(bh))
684 return PTR_ERR(bh); 679 return PTR_ERR(bh);
685 tmp = min(bsize - boff, to - from); 680 tmp = min(bsize - boff, to - from);
686 if (boff + tmp > bsize || tmp > bsize) 681 BUG_ON(boff + tmp > bsize || tmp > bsize);
687 BUG();
688 memcpy(AFFS_DATA(bh) + boff, data + from, tmp); 682 memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
689 AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); 683 be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
690 affs_fix_checksum(sb, bh); 684 affs_fix_checksum(sb, bh);
691 mark_buffer_dirty_inode(bh, inode); 685 mark_buffer_dirty_inode(bh, inode);
692 written += tmp; 686 written += tmp;
@@ -732,8 +726,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
732 if (IS_ERR(bh)) 726 if (IS_ERR(bh))
733 goto out; 727 goto out;
734 tmp = min(bsize, to - from); 728 tmp = min(bsize, to - from);
735 if (tmp > bsize) 729 BUG_ON(tmp > bsize);
736 BUG();
737 memcpy(AFFS_DATA(bh), data + from, tmp); 730 memcpy(AFFS_DATA(bh), data + from, tmp);
738 if (buffer_new(bh)) { 731 if (buffer_new(bh)) {
739 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); 732 AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d2dc047cb479..01d25d532541 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -199,7 +199,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
199 case Opt_prefix: 199 case Opt_prefix:
200 /* Free any previous prefix */ 200 /* Free any previous prefix */
201 kfree(*prefix); 201 kfree(*prefix);
202 *prefix = NULL;
203 *prefix = match_strdup(&args[0]); 202 *prefix = match_strdup(&args[0]);
204 if (!*prefix) 203 if (!*prefix)
205 return 0; 204 return 0;
@@ -233,6 +232,8 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
233 break; 232 break;
234 case Opt_volume: { 233 case Opt_volume: {
235 char *vol = match_strdup(&args[0]); 234 char *vol = match_strdup(&args[0]);
235 if (!vol)
236 return 0;
236 strlcpy(volume, vol, 32); 237 strlcpy(volume, vol, 32);
237 kfree(vol); 238 kfree(vol);
238 break; 239 break;
diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h
index 7b4d4fab4c80..255f5dd6040c 100644
--- a/fs/afs/afs_cm.h
+++ b/fs/afs/afs_cm.h
@@ -24,7 +24,8 @@ enum AFS_CM_Operations {
24 CBGetXStatsVersion = 209, /* get version of extended statistics */ 24 CBGetXStatsVersion = 209, /* get version of extended statistics */
25 CBGetXStats = 210, /* get contents of extended statistics data */ 25 CBGetXStats = 210, /* get contents of extended statistics data */
26 CBInitCallBackState3 = 213, /* initialise callback state, version 3 */ 26 CBInitCallBackState3 = 213, /* initialise callback state, version 3 */
27 CBGetCapabilities = 65538, /* get client capabilities */ 27 CBProbeUuid = 214, /* check the client hasn't rebooted */
28 CBTellMeAboutYourself = 65538, /* get client capabilities */
28}; 29};
29 30
30#define AFS_CAP_ERROR_TRANSLATION 0x1 31#define AFS_CAP_ERROR_TRANSLATION 0x1
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 584bb0f9c36a..5e1df14e16b1 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -20,7 +20,7 @@
20DECLARE_RWSEM(afs_proc_cells_sem); 20DECLARE_RWSEM(afs_proc_cells_sem);
21LIST_HEAD(afs_proc_cells); 21LIST_HEAD(afs_proc_cells);
22 22
23static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells); 23static LIST_HEAD(afs_cells);
24static DEFINE_RWLOCK(afs_cells_lock); 24static DEFINE_RWLOCK(afs_cells_lock);
25static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ 25static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
26static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq); 26static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq);
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 47b71c8947f9..eb765489164f 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -26,8 +26,9 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
26 struct sk_buff *, bool); 26 struct sk_buff *, bool);
27static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool); 27static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
28static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool); 28static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
29static int afs_deliver_cb_get_capabilities(struct afs_call *, struct sk_buff *, 29static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool);
30 bool); 30static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *,
31 struct sk_buff *, bool);
31static void afs_cm_destructor(struct afs_call *); 32static void afs_cm_destructor(struct afs_call *);
32 33
33/* 34/*
@@ -71,11 +72,21 @@ static const struct afs_call_type afs_SRXCBProbe = {
71}; 72};
72 73
73/* 74/*
74 * CB.GetCapabilities operation type 75 * CB.ProbeUuid operation type
75 */ 76 */
76static const struct afs_call_type afs_SRXCBGetCapabilites = { 77static const struct afs_call_type afs_SRXCBProbeUuid = {
77 .name = "CB.GetCapabilities", 78 .name = "CB.ProbeUuid",
78 .deliver = afs_deliver_cb_get_capabilities, 79 .deliver = afs_deliver_cb_probe_uuid,
80 .abort_to_error = afs_abort_to_error,
81 .destructor = afs_cm_destructor,
82};
83
84/*
85 * CB.TellMeAboutYourself operation type
86 */
87static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
88 .name = "CB.TellMeAboutYourself",
89 .deliver = afs_deliver_cb_tell_me_about_yourself,
79 .abort_to_error = afs_abort_to_error, 90 .abort_to_error = afs_abort_to_error,
80 .destructor = afs_cm_destructor, 91 .destructor = afs_cm_destructor,
81}; 92};
@@ -103,8 +114,8 @@ bool afs_cm_incoming_call(struct afs_call *call)
103 case CBProbe: 114 case CBProbe:
104 call->type = &afs_SRXCBProbe; 115 call->type = &afs_SRXCBProbe;
105 return true; 116 return true;
106 case CBGetCapabilities: 117 case CBTellMeAboutYourself:
107 call->type = &afs_SRXCBGetCapabilites; 118 call->type = &afs_SRXCBTellMeAboutYourself;
108 return true; 119 return true;
109 default: 120 default:
110 return false; 121 return false;
@@ -393,9 +404,105 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
393} 404}
394 405
395/* 406/*
407 * allow the fileserver to quickly find out if the fileserver has been rebooted
408 */
409static void SRXAFSCB_ProbeUuid(struct work_struct *work)
410{
411 struct afs_call *call = container_of(work, struct afs_call, work);
412 struct afs_uuid *r = call->request;
413
414 struct {
415 __be32 match;
416 } reply;
417
418 _enter("");
419
420
421 if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
422 reply.match = htonl(0);
423 else
424 reply.match = htonl(1);
425
426 afs_send_simple_reply(call, &reply, sizeof(reply));
427 _leave("");
428}
429
430/*
431 * deliver request data to a CB.ProbeUuid call
432 */
433static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
434 bool last)
435{
436 struct afs_uuid *r;
437 unsigned loop;
438 __be32 *b;
439 int ret;
440
441 _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
442
443 if (skb->len > 0)
444 return -EBADMSG;
445 if (!last)
446 return 0;
447
448 switch (call->unmarshall) {
449 case 0:
450 call->offset = 0;
451 call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL);
452 if (!call->buffer)
453 return -ENOMEM;
454 call->unmarshall++;
455
456 case 1:
457 _debug("extract UUID");
458 ret = afs_extract_data(call, skb, last, call->buffer,
459 11 * sizeof(__be32));
460 switch (ret) {
461 case 0: break;
462 case -EAGAIN: return 0;
463 default: return ret;
464 }
465
466 _debug("unmarshall UUID");
467 call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
468 if (!call->request)
469 return -ENOMEM;
470
471 b = call->buffer;
472 r = call->request;
473 r->time_low = ntohl(b[0]);
474 r->time_mid = ntohl(b[1]);
475 r->time_hi_and_version = ntohl(b[2]);
476 r->clock_seq_hi_and_reserved = ntohl(b[3]);
477 r->clock_seq_low = ntohl(b[4]);
478
479 for (loop = 0; loop < 6; loop++)
480 r->node[loop] = ntohl(b[loop + 5]);
481
482 call->offset = 0;
483 call->unmarshall++;
484
485 case 2:
486 _debug("trailer");
487 if (skb->len != 0)
488 return -EBADMSG;
489 break;
490 }
491
492 if (!last)
493 return 0;
494
495 call->state = AFS_CALL_REPLYING;
496
497 INIT_WORK(&call->work, SRXAFSCB_ProbeUuid);
498 schedule_work(&call->work);
499 return 0;
500}
501
502/*
396 * allow the fileserver to ask about the cache manager's capabilities 503 * allow the fileserver to ask about the cache manager's capabilities
397 */ 504 */
398static void SRXAFSCB_GetCapabilities(struct work_struct *work) 505static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
399{ 506{
400 struct afs_interface *ifs; 507 struct afs_interface *ifs;
401 struct afs_call *call = container_of(work, struct afs_call, work); 508 struct afs_call *call = container_of(work, struct afs_call, work);
@@ -456,10 +563,10 @@ static void SRXAFSCB_GetCapabilities(struct work_struct *work)
456} 563}
457 564
458/* 565/*
459 * deliver request data to a CB.GetCapabilities call 566 * deliver request data to a CB.TellMeAboutYourself call
460 */ 567 */
461static int afs_deliver_cb_get_capabilities(struct afs_call *call, 568static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
462 struct sk_buff *skb, bool last) 569 struct sk_buff *skb, bool last)
463{ 570{
464 _enter(",{%u},%d", skb->len, last); 571 _enter(",{%u},%d", skb->len, last);
465 572
@@ -471,7 +578,7 @@ static int afs_deliver_cb_get_capabilities(struct afs_call *call,
471 /* no unmarshalling required */ 578 /* no unmarshalling required */
472 call->state = AFS_CALL_REPLYING; 579 call->state = AFS_CALL_REPLYING;
473 580
474 INIT_WORK(&call->work, SRXAFSCB_GetCapabilities); 581 INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself);
475 schedule_work(&call->work); 582 schedule_work(&call->work);
476 return 0; 583 return 0;
477} 584}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b58af8f18bc4..dfda03d4397d 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -140,7 +140,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
140 140
141 if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) { 141 if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) {
142 printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n", 142 printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
143 __FUNCTION__, dir->i_ino, qty, 143 __func__, dir->i_ino, qty,
144 ntohs(dbuf->blocks[0].pagehdr.npages)); 144 ntohs(dbuf->blocks[0].pagehdr.npages));
145 goto error; 145 goto error;
146 } 146 }
@@ -159,7 +159,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
159 for (tmp = 0; tmp < qty; tmp++) { 159 for (tmp = 0; tmp < qty; tmp++) {
160 if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) { 160 if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
161 printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n", 161 printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
162 __FUNCTION__, dir->i_ino, tmp, qty, 162 __func__, dir->i_ino, tmp, qty,
163 ntohs(dbuf->blocks[tmp].pagehdr.magic)); 163 ntohs(dbuf->blocks[tmp].pagehdr.magic));
164 goto error; 164 goto error;
165 } 165 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index eec41c76de72..7102824ba847 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -757,8 +757,8 @@ void _dbprintk(const char *fmt, ...)
757{ 757{
758} 758}
759 759
760#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) 760#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
761#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) 761#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
762#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) 762#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
763 763
764 764
@@ -791,8 +791,8 @@ do { \
791} while (0) 791} while (0)
792 792
793#else 793#else
794#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) 794#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
795#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) 795#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
796#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__) 796#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__)
797#endif 797#endif
798 798
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 846c7615ac9e..9f7d1ae70269 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -41,6 +41,7 @@ static const struct file_operations afs_proc_cells_fops = {
41 .write = afs_proc_cells_write, 41 .write = afs_proc_cells_write,
42 .llseek = seq_lseek, 42 .llseek = seq_lseek,
43 .release = seq_release, 43 .release = seq_release,
44 .owner = THIS_MODULE,
44}; 45};
45 46
46static int afs_proc_rootcell_open(struct inode *inode, struct file *file); 47static int afs_proc_rootcell_open(struct inode *inode, struct file *file);
@@ -56,7 +57,8 @@ static const struct file_operations afs_proc_rootcell_fops = {
56 .read = afs_proc_rootcell_read, 57 .read = afs_proc_rootcell_read,
57 .write = afs_proc_rootcell_write, 58 .write = afs_proc_rootcell_write,
58 .llseek = no_llseek, 59 .llseek = no_llseek,
59 .release = afs_proc_rootcell_release 60 .release = afs_proc_rootcell_release,
61 .owner = THIS_MODULE,
60}; 62};
61 63
62static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file); 64static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
@@ -80,6 +82,7 @@ static const struct file_operations afs_proc_cell_volumes_fops = {
80 .read = seq_read, 82 .read = seq_read,
81 .llseek = seq_lseek, 83 .llseek = seq_lseek,
82 .release = afs_proc_cell_volumes_release, 84 .release = afs_proc_cell_volumes_release,
85 .owner = THIS_MODULE,
83}; 86};
84 87
85static int afs_proc_cell_vlservers_open(struct inode *inode, 88static int afs_proc_cell_vlservers_open(struct inode *inode,
@@ -104,6 +107,7 @@ static const struct file_operations afs_proc_cell_vlservers_fops = {
104 .read = seq_read, 107 .read = seq_read,
105 .llseek = seq_lseek, 108 .llseek = seq_lseek,
106 .release = afs_proc_cell_vlservers_release, 109 .release = afs_proc_cell_vlservers_release,
110 .owner = THIS_MODULE,
107}; 111};
108 112
109static int afs_proc_cell_servers_open(struct inode *inode, struct file *file); 113static int afs_proc_cell_servers_open(struct inode *inode, struct file *file);
@@ -127,6 +131,7 @@ static const struct file_operations afs_proc_cell_servers_fops = {
127 .read = seq_read, 131 .read = seq_read,
128 .llseek = seq_lseek, 132 .llseek = seq_lseek,
129 .release = afs_proc_cell_servers_release, 133 .release = afs_proc_cell_servers_release,
134 .owner = THIS_MODULE,
130}; 135};
131 136
132/* 137/*
@@ -143,17 +148,13 @@ int afs_proc_init(void)
143 goto error_dir; 148 goto error_dir;
144 proc_afs->owner = THIS_MODULE; 149 proc_afs->owner = THIS_MODULE;
145 150
146 p = create_proc_entry("cells", 0, proc_afs); 151 p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops);
147 if (!p) 152 if (!p)
148 goto error_cells; 153 goto error_cells;
149 p->proc_fops = &afs_proc_cells_fops;
150 p->owner = THIS_MODULE;
151 154
152 p = create_proc_entry("rootcell", 0, proc_afs); 155 p = proc_create("rootcell", 0, proc_afs, &afs_proc_rootcell_fops);
153 if (!p) 156 if (!p)
154 goto error_rootcell; 157 goto error_rootcell;
155 p->proc_fops = &afs_proc_rootcell_fops;
156 p->owner = THIS_MODULE;
157 158
158 _leave(" = 0"); 159 _leave(" = 0");
159 return 0; 160 return 0;
@@ -395,26 +396,20 @@ int afs_proc_cell_setup(struct afs_cell *cell)
395 if (!cell->proc_dir) 396 if (!cell->proc_dir)
396 goto error_dir; 397 goto error_dir;
397 398
398 p = create_proc_entry("servers", 0, cell->proc_dir); 399 p = proc_create_data("servers", 0, cell->proc_dir,
400 &afs_proc_cell_servers_fops, cell);
399 if (!p) 401 if (!p)
400 goto error_servers; 402 goto error_servers;
401 p->proc_fops = &afs_proc_cell_servers_fops;
402 p->owner = THIS_MODULE;
403 p->data = cell;
404 403
405 p = create_proc_entry("vlservers", 0, cell->proc_dir); 404 p = proc_create_data("vlservers", 0, cell->proc_dir,
405 &afs_proc_cell_vlservers_fops, cell);
406 if (!p) 406 if (!p)
407 goto error_vlservers; 407 goto error_vlservers;
408 p->proc_fops = &afs_proc_cell_vlservers_fops;
409 p->owner = THIS_MODULE;
410 p->data = cell;
411 408
412 p = create_proc_entry("volumes", 0, cell->proc_dir); 409 p = proc_create_data("volumes", 0, cell->proc_dir,
410 &afs_proc_cell_volumes_fops, cell);
413 if (!p) 411 if (!p)
414 goto error_volumes; 412 goto error_volumes;
415 p->proc_fops = &afs_proc_cell_volumes_fops;
416 p->owner = THIS_MODULE;
417 p->data = cell;
418 413
419 _leave(" = 0"); 414 _leave(" = 0");
420 return 0; 415 return 0;
diff --git a/fs/aio.c b/fs/aio.c
index ae94e1dea266..b5253e77eb2f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -191,6 +191,43 @@ static int aio_setup_ring(struct kioctx *ctx)
191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ 191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
192} while(0) 192} while(0)
193 193
194
195/* __put_ioctx
196 * Called when the last user of an aio context has gone away,
197 * and the struct needs to be freed.
198 */
199static void __put_ioctx(struct kioctx *ctx)
200{
201 unsigned nr_events = ctx->max_reqs;
202
203 BUG_ON(ctx->reqs_active);
204
205 cancel_delayed_work(&ctx->wq);
206 cancel_work_sync(&ctx->wq.work);
207 aio_free_ring(ctx);
208 mmdrop(ctx->mm);
209 ctx->mm = NULL;
210 pr_debug("__put_ioctx: freeing %p\n", ctx);
211 kmem_cache_free(kioctx_cachep, ctx);
212
213 if (nr_events) {
214 spin_lock(&aio_nr_lock);
215 BUG_ON(aio_nr - nr_events > aio_nr);
216 aio_nr -= nr_events;
217 spin_unlock(&aio_nr_lock);
218 }
219}
220
221#define get_ioctx(kioctx) do { \
222 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
223 atomic_inc(&(kioctx)->users); \
224} while (0)
225#define put_ioctx(kioctx) do { \
226 BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
227 if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \
228 __put_ioctx(kioctx); \
229} while (0)
230
194/* ioctx_alloc 231/* ioctx_alloc
195 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. 232 * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
196 */ 233 */
@@ -240,7 +277,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
240 if (ctx->max_reqs == 0) 277 if (ctx->max_reqs == 0)
241 goto out_cleanup; 278 goto out_cleanup;
242 279
243 /* now link into global list. kludge. FIXME */ 280 /* now link into global list. */
244 write_lock(&mm->ioctx_list_lock); 281 write_lock(&mm->ioctx_list_lock);
245 ctx->next = mm->ioctx_list; 282 ctx->next = mm->ioctx_list;
246 mm->ioctx_list = ctx; 283 mm->ioctx_list = ctx;
@@ -361,32 +398,6 @@ void exit_aio(struct mm_struct *mm)
361 } 398 }
362} 399}
363 400
364/* __put_ioctx
365 * Called when the last user of an aio context has gone away,
366 * and the struct needs to be freed.
367 */
368void __put_ioctx(struct kioctx *ctx)
369{
370 unsigned nr_events = ctx->max_reqs;
371
372 BUG_ON(ctx->reqs_active);
373
374 cancel_delayed_work(&ctx->wq);
375 cancel_work_sync(&ctx->wq.work);
376 aio_free_ring(ctx);
377 mmdrop(ctx->mm);
378 ctx->mm = NULL;
379 pr_debug("__put_ioctx: freeing %p\n", ctx);
380 kmem_cache_free(kioctx_cachep, ctx);
381
382 if (nr_events) {
383 spin_lock(&aio_nr_lock);
384 BUG_ON(aio_nr - nr_events > aio_nr);
385 aio_nr -= nr_events;
386 spin_unlock(&aio_nr_lock);
387 }
388}
389
390/* aio_get_req 401/* aio_get_req
391 * Allocate a slot for an aio request. Increments the users count 402 * Allocate a slot for an aio request. Increments the users count
392 * of the kioctx so that the kioctx stays around until all requests are 403 * of the kioctx so that the kioctx stays around until all requests are
@@ -542,10 +553,7 @@ int aio_put_req(struct kiocb *req)
542 return ret; 553 return ret;
543} 554}
544 555
545/* Lookup an ioctx id. ioctx_list is lockless for reads. 556static struct kioctx *lookup_ioctx(unsigned long ctx_id)
546 * FIXME: this is O(n) and is only suitable for development.
547 */
548struct kioctx *lookup_ioctx(unsigned long ctx_id)
549{ 557{
550 struct kioctx *ioctx; 558 struct kioctx *ioctx;
551 struct mm_struct *mm; 559 struct mm_struct *mm;
@@ -1070,9 +1078,7 @@ static void timeout_func(unsigned long data)
1070 1078
1071static inline void init_timeout(struct aio_timeout *to) 1079static inline void init_timeout(struct aio_timeout *to)
1072{ 1080{
1073 init_timer(&to->timer); 1081 setup_timer_on_stack(&to->timer, timeout_func, (unsigned long) to);
1074 to->timer.data = (unsigned long)to;
1075 to->timer.function = timeout_func;
1076 to->timed_out = 0; 1082 to->timed_out = 0;
1077 to->p = current; 1083 to->p = current;
1078} 1084}
@@ -1205,6 +1211,7 @@ retry:
1205 if (timeout) 1211 if (timeout)
1206 clear_timeout(&to); 1212 clear_timeout(&to);
1207out: 1213out:
1214 destroy_timer_on_stack(&to.timer);
1208 return i ? i : ret; 1215 return i ? i : ret;
1209} 1216}
1210 1217
@@ -1552,7 +1559,7 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode,
1552 return 1; 1559 return 1;
1553} 1560}
1554 1561
1555int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1562static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1556 struct iocb *iocb) 1563 struct iocb *iocb)
1557{ 1564{
1558 struct kiocb *req; 1565 struct kiocb *req;
@@ -1593,7 +1600,7 @@ int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1593 * event using the eventfd_signal() function. 1600 * event using the eventfd_signal() function.
1594 */ 1601 */
1595 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); 1602 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
1596 if (unlikely(IS_ERR(req->ki_eventfd))) { 1603 if (IS_ERR(req->ki_eventfd)) {
1597 ret = PTR_ERR(req->ki_eventfd); 1604 ret = PTR_ERR(req->ki_eventfd);
1598 goto out_put_req; 1605 goto out_put_req;
1599 } 1606 }
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f42be069e085..977ef208c051 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -57,9 +57,6 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
57 * anonymous inode, and a dentry that describe the "class" 57 * anonymous inode, and a dentry that describe the "class"
58 * of the file 58 * of the file
59 * 59 *
60 * @pfd: [out] pointer to the file descriptor
61 * @dpinode: [out] pointer to the inode
62 * @pfile: [out] pointer to the file struct
63 * @name: [in] name of the "class" of the new file 60 * @name: [in] name of the "class" of the new file
64 * @fops [in] file operations for the new file 61 * @fops [in] file operations for the new file
65 * @priv [in] private data for the new file (will be file's private_data) 62 * @priv [in] private data for the new file (will be file's private_data)
@@ -68,10 +65,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
68 * that do not need to have a full-fledged inode in order to operate correctly. 65 * that do not need to have a full-fledged inode in order to operate correctly.
69 * All the files created with anon_inode_getfd() will share a single inode, 66 * All the files created with anon_inode_getfd() will share a single inode,
70 * hence saving memory and avoiding code duplication for the file/inode/dentry 67 * hence saving memory and avoiding code duplication for the file/inode/dentry
71 * setup. 68 * setup. Returns new descriptor or -error.
72 */ 69 */
73int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile, 70int anon_inode_getfd(const char *name, const struct file_operations *fops,
74 const char *name, const struct file_operations *fops,
75 void *priv) 71 void *priv)
76{ 72{
77 struct qstr this; 73 struct qstr this;
@@ -125,10 +121,7 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
125 121
126 fd_install(fd, file); 122 fd_install(fd, file);
127 123
128 *pfd = fd; 124 return fd;
129 *pinode = anon_inode_inode;
130 *pfile = file;
131 return 0;
132 125
133err_dput: 126err_dput:
134 dput(dentry); 127 dput(dentry);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 2d4ae40718d9..c3d352d7fa93 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -35,7 +35,7 @@
35/* #define DEBUG */ 35/* #define DEBUG */
36 36
37#ifdef DEBUG 37#ifdef DEBUG
38#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __FUNCTION__ , ##args); } while(0) 38#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0)
39#else 39#else
40#define DPRINTK(fmt,args...) do {} while(0) 40#define DPRINTK(fmt,args...) do {} while(0)
41#endif 41#endif
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index d96e5c14a9ca..894fee54d4d8 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -73,8 +73,8 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
73 status = 0; 73 status = 0;
74done: 74done:
75 DPRINTK("returning = %d", status); 75 DPRINTK("returning = %d", status);
76 mntput(mnt);
77 dput(dentry); 76 dput(dentry);
77 mntput(mnt);
78 return status; 78 return status;
79} 79}
80 80
@@ -333,7 +333,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
333 /* Can we expire this guy */ 333 /* Can we expire this guy */
334 if (autofs4_can_expire(dentry, timeout, do_now)) { 334 if (autofs4_can_expire(dentry, timeout, do_now)) {
335 expired = dentry; 335 expired = dentry;
336 break; 336 goto found;
337 } 337 }
338 goto next; 338 goto next;
339 } 339 }
@@ -352,7 +352,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
352 inf->flags |= AUTOFS_INF_EXPIRING; 352 inf->flags |= AUTOFS_INF_EXPIRING;
353 spin_unlock(&sbi->fs_lock); 353 spin_unlock(&sbi->fs_lock);
354 expired = dentry; 354 expired = dentry;
355 break; 355 goto found;
356 } 356 }
357 spin_unlock(&sbi->fs_lock); 357 spin_unlock(&sbi->fs_lock);
358 /* 358 /*
@@ -363,7 +363,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
363 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 363 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
364 if (expired) { 364 if (expired) {
365 dput(dentry); 365 dput(dentry);
366 break; 366 goto found;
367 } 367 }
368 } 368 }
369next: 369next:
@@ -371,18 +371,16 @@ next:
371 spin_lock(&dcache_lock); 371 spin_lock(&dcache_lock);
372 next = next->next; 372 next = next->next;
373 } 373 }
374
375 if (expired) {
376 DPRINTK("returning %p %.*s",
377 expired, (int)expired->d_name.len, expired->d_name.name);
378 spin_lock(&dcache_lock);
379 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
380 spin_unlock(&dcache_lock);
381 return expired;
382 }
383 spin_unlock(&dcache_lock); 374 spin_unlock(&dcache_lock);
384
385 return NULL; 375 return NULL;
376
377found:
378 DPRINTK("returning %p %.*s",
379 expired, (int)expired->d_name.len, expired->d_name.name);
380 spin_lock(&dcache_lock);
381 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
382 spin_unlock(&dcache_lock);
383 return expired;
386} 384}
387 385
388/* Perform an expiry operation */ 386/* Perform an expiry operation */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index a54a946a50ae..edf5b6bddb52 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -146,17 +146,17 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
146 146
147 if (d_mountpoint(dentry)) { 147 if (d_mountpoint(dentry)) {
148 struct file *fp = NULL; 148 struct file *fp = NULL;
149 struct vfsmount *fp_mnt = mntget(mnt); 149 struct path fp_path = { .dentry = dentry, .mnt = mnt };
150 struct dentry *fp_dentry = dget(dentry);
151 150
152 if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) { 151 path_get(&fp_path);
153 dput(fp_dentry); 152
154 mntput(fp_mnt); 153 if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
154 path_put(&fp_path);
155 dcache_dir_close(inode, file); 155 dcache_dir_close(inode, file);
156 goto out; 156 goto out;
157 } 157 }
158 158
159 fp = dentry_open(fp_dentry, fp_mnt, file->f_flags); 159 fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
160 status = PTR_ERR(fp); 160 status = PTR_ERR(fp);
161 if (IS_ERR(fp)) { 161 if (IS_ERR(fp)) {
162 dcache_dir_close(inode, file); 162 dcache_dir_close(inode, file);
@@ -242,7 +242,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
242{ 242{
243 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 243 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
244 struct autofs_info *ino = autofs4_dentry_ino(dentry); 244 struct autofs_info *ino = autofs4_dentry_ino(dentry);
245 int status = 0; 245 struct dentry *new;
246 int status;
246 247
247 /* Block on any pending expiry here; invalidate the dentry 248 /* Block on any pending expiry here; invalidate the dentry
248 when expiration is done to trigger mount request with a new 249 when expiration is done to trigger mount request with a new
@@ -318,7 +319,28 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
318 spin_lock(&dentry->d_lock); 319 spin_lock(&dentry->d_lock);
319 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 320 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
320 spin_unlock(&dentry->d_lock); 321 spin_unlock(&dentry->d_lock);
321 return status; 322
323 /*
324 * The dentry that is passed in from lookup may not be the one
325 * we end up using, as mkdir can create a new one. If this
326 * happens, and another process tries the lookup at the same time,
327 * it will set the PENDING flag on this new dentry, but add itself
328 * to our waitq. Then, if after the lookup succeeds, the first
329 * process that requested the mount performs another lookup of the
330 * same directory, it will show up as still pending! So, we need
331 * to redo the lookup here and clear pending on that dentry.
332 */
333 if (d_unhashed(dentry)) {
334 new = d_lookup(dentry->d_parent, &dentry->d_name);
335 if (new) {
336 spin_lock(&new->d_lock);
337 new->d_flags &= ~DCACHE_AUTOFS_PENDING;
338 spin_unlock(&new->d_lock);
339 dput(new);
340 }
341 }
342
343 return 0;
322} 344}
323 345
324/* For autofs direct mounts the follow link triggers the mount */ 346/* For autofs direct mounts the follow link triggers the mount */
@@ -533,9 +555,9 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
533 goto next; 555 goto next;
534 556
535 if (d_unhashed(dentry)) { 557 if (d_unhashed(dentry)) {
536 struct autofs_info *ino = autofs4_dentry_ino(dentry);
537 struct inode *inode = dentry->d_inode; 558 struct inode *inode = dentry->d_inode;
538 559
560 ino = autofs4_dentry_ino(dentry);
539 list_del_init(&ino->rehash); 561 list_del_init(&ino->rehash);
540 dget(dentry); 562 dget(dentry);
541 /* 563 /*
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 1fe28e4754c2..75e5955c3f6d 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -171,7 +171,7 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
171 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) 171 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
172 len += tmp->d_name.len + 1; 172 len += tmp->d_name.len + 1;
173 173
174 if (--len > NAME_MAX) { 174 if (!len || --len > NAME_MAX) {
175 spin_unlock(&dcache_lock); 175 spin_unlock(&dcache_lock);
176 return 0; 176 return 0;
177 } 177 }
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 82123ff3e1dd..e8717de3bab3 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -489,9 +489,9 @@ static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
489{ 489{
490 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); 490 befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
491 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { 491 if (befs_ino->i_flags & BEFS_LONG_SYMLINK) {
492 char *p = nd_get_link(nd); 492 char *link = nd_get_link(nd);
493 if (!IS_ERR(p)) 493 if (!IS_ERR(link))
494 kfree(p); 494 kfree(link);
495 } 495 }
496} 496}
497 497
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 71faf4d23908..70f5d3a8eede 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -42,7 +42,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
42 42
43 43
44#define printf(format, args...) \ 44#define printf(format, args...) \
45 printk(KERN_ERR "BFS-fs: %s(): " format, __FUNCTION__, ## args) 45 printk(KERN_ERR "BFS-fs: %s(): " format, __func__, ## args)
46 46
47/* inode.c */ 47/* inode.c */
48extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino); 48extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a1bb2244cac7..ba4cddb92f1d 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -372,21 +372,17 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
372 372
373 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); 373 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
374 } else { 374 } else {
375 static unsigned long error_time, error_time2;
376 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && 375 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
377 (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) 376 (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
378 { 377 {
379 printk(KERN_NOTICE "executable not page aligned\n"); 378 printk(KERN_NOTICE "executable not page aligned\n");
380 error_time2 = jiffies;
381 } 379 }
382 380
383 if ((fd_offset & ~PAGE_MASK) != 0 && 381 if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit())
384 (jiffies-error_time) > 5*HZ)
385 { 382 {
386 printk(KERN_WARNING 383 printk(KERN_WARNING
387 "fd_offset is not page aligned. Please convert program: %s\n", 384 "fd_offset is not page aligned. Please convert program: %s\n",
388 bprm->file->f_path.dentry->d_name.name); 385 bprm->file->f_path.dentry->d_name.name);
389 error_time = jiffies;
390 } 386 }
391 387
392 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { 388 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
@@ -495,15 +491,13 @@ static int load_aout_library(struct file *file)
495 start_addr = ex.a_entry & 0xfffff000; 491 start_addr = ex.a_entry & 0xfffff000;
496 492
497 if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { 493 if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
498 static unsigned long error_time;
499 loff_t pos = N_TXTOFF(ex); 494 loff_t pos = N_TXTOFF(ex);
500 495
501 if ((jiffies-error_time) > 5*HZ) 496 if (printk_ratelimit())
502 { 497 {
503 printk(KERN_WARNING 498 printk(KERN_WARNING
504 "N_TXTOFF is not page aligned. Please convert library: %s\n", 499 "N_TXTOFF is not page aligned. Please convert library: %s\n",
505 file->f_path.dentry->d_name.name); 500 file->f_path.dentry->d_name.name);
506 error_time = jiffies;
507 } 501 }
508 down_write(&current->mm->mmap_sem); 502 down_write(&current->mm->mmap_sem);
509 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); 503 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9924581df6f6..b25707fee2cc 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1255,26 +1255,23 @@ static int writenote(struct memelfnote *men, struct file *file,
1255static void fill_elf_header(struct elfhdr *elf, int segs, 1255static void fill_elf_header(struct elfhdr *elf, int segs,
1256 u16 machine, u32 flags, u8 osabi) 1256 u16 machine, u32 flags, u8 osabi)
1257{ 1257{
1258 memset(elf, 0, sizeof(*elf));
1259
1258 memcpy(elf->e_ident, ELFMAG, SELFMAG); 1260 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1259 elf->e_ident[EI_CLASS] = ELF_CLASS; 1261 elf->e_ident[EI_CLASS] = ELF_CLASS;
1260 elf->e_ident[EI_DATA] = ELF_DATA; 1262 elf->e_ident[EI_DATA] = ELF_DATA;
1261 elf->e_ident[EI_VERSION] = EV_CURRENT; 1263 elf->e_ident[EI_VERSION] = EV_CURRENT;
1262 elf->e_ident[EI_OSABI] = ELF_OSABI; 1264 elf->e_ident[EI_OSABI] = ELF_OSABI;
1263 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1264 1265
1265 elf->e_type = ET_CORE; 1266 elf->e_type = ET_CORE;
1266 elf->e_machine = machine; 1267 elf->e_machine = machine;
1267 elf->e_version = EV_CURRENT; 1268 elf->e_version = EV_CURRENT;
1268 elf->e_entry = 0;
1269 elf->e_phoff = sizeof(struct elfhdr); 1269 elf->e_phoff = sizeof(struct elfhdr);
1270 elf->e_shoff = 0;
1271 elf->e_flags = flags; 1270 elf->e_flags = flags;
1272 elf->e_ehsize = sizeof(struct elfhdr); 1271 elf->e_ehsize = sizeof(struct elfhdr);
1273 elf->e_phentsize = sizeof(struct elf_phdr); 1272 elf->e_phentsize = sizeof(struct elf_phdr);
1274 elf->e_phnum = segs; 1273 elf->e_phnum = segs;
1275 elf->e_shentsize = 0; 1274
1276 elf->e_shnum = 0;
1277 elf->e_shstrndx = 0;
1278 return; 1275 return;
1279} 1276}
1280 1277
@@ -1725,26 +1722,25 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1725 1722
1726 info->thread_status_size = 0; 1723 info->thread_status_size = 0;
1727 if (signr) { 1724 if (signr) {
1728 struct elf_thread_status *tmp; 1725 struct elf_thread_status *ets;
1729 rcu_read_lock(); 1726 rcu_read_lock();
1730 do_each_thread(g, p) 1727 do_each_thread(g, p)
1731 if (current->mm == p->mm && current != p) { 1728 if (current->mm == p->mm && current != p) {
1732 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); 1729 ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
1733 if (!tmp) { 1730 if (!ets) {
1734 rcu_read_unlock(); 1731 rcu_read_unlock();
1735 return 0; 1732 return 0;
1736 } 1733 }
1737 tmp->thread = p; 1734 ets->thread = p;
1738 list_add(&tmp->list, &info->thread_list); 1735 list_add(&ets->list, &info->thread_list);
1739 } 1736 }
1740 while_each_thread(g, p); 1737 while_each_thread(g, p);
1741 rcu_read_unlock(); 1738 rcu_read_unlock();
1742 list_for_each(t, &info->thread_list) { 1739 list_for_each(t, &info->thread_list) {
1743 struct elf_thread_status *tmp;
1744 int sz; 1740 int sz;
1745 1741
1746 tmp = list_entry(t, struct elf_thread_status, list); 1742 ets = list_entry(t, struct elf_thread_status, list);
1747 sz = elf_dump_thread_status(signr, tmp); 1743 sz = elf_dump_thread_status(signr, ets);
1748 info->thread_status_size += sz; 1744 info->thread_status_size += sz;
1749 } 1745 }
1750 } 1746 }
@@ -2000,10 +1996,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2000 1996
2001 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { 1997 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2002 struct page *page; 1998 struct page *page;
2003 struct vm_area_struct *vma; 1999 struct vm_area_struct *tmp_vma;
2004 2000
2005 if (get_user_pages(current, current->mm, addr, 1, 0, 1, 2001 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2006 &page, &vma) <= 0) { 2002 &page, &tmp_vma) <= 0) {
2007 DUMP_SEEK(PAGE_SIZE); 2003 DUMP_SEEK(PAGE_SIZE);
2008 } else { 2004 } else {
2009 if (page == ZERO_PAGE(0)) { 2005 if (page == ZERO_PAGE(0)) {
@@ -2013,7 +2009,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
2013 } 2009 }
2014 } else { 2010 } else {
2015 void *kaddr; 2011 void *kaddr;
2016 flush_cache_page(vma, addr, 2012 flush_cache_page(tmp_vma, addr,
2017 page_to_pfn(page)); 2013 page_to_pfn(page));
2018 kaddr = kmap(page); 2014 kaddr = kmap(page);
2019 if ((size += PAGE_SIZE) > limit || 2015 if ((size += PAGE_SIZE) > limit ||
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 32649f2a1654..ddd35d873391 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -136,8 +136,8 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params,
136 136
137 retval = kernel_read(file, params->hdr.e_phoff, 137 retval = kernel_read(file, params->hdr.e_phoff,
138 (char *) params->phdrs, size); 138 (char *) params->phdrs, size);
139 if (retval < 0) 139 if (unlikely(retval != size))
140 return retval; 140 return retval < 0 ? retval : -ENOEXEC;
141 141
142 /* determine stack size for this binary */ 142 /* determine stack size for this binary */
143 phdr = params->phdrs; 143 phdr = params->phdrs;
@@ -218,8 +218,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
218 phdr->p_offset, 218 phdr->p_offset,
219 interpreter_name, 219 interpreter_name,
220 phdr->p_filesz); 220 phdr->p_filesz);
221 if (retval < 0) 221 if (unlikely(retval != phdr->p_filesz)) {
222 if (retval >= 0)
223 retval = -ENOEXEC;
222 goto error; 224 goto error;
225 }
223 226
224 retval = -ENOENT; 227 retval = -ENOENT;
225 if (interpreter_name[phdr->p_filesz - 1] != '\0') 228 if (interpreter_name[phdr->p_filesz - 1] != '\0')
@@ -245,8 +248,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
245 248
246 retval = kernel_read(interpreter, 0, bprm->buf, 249 retval = kernel_read(interpreter, 0, bprm->buf,
247 BINPRM_BUF_SIZE); 250 BINPRM_BUF_SIZE);
248 if (retval < 0) 251 if (unlikely(retval != BINPRM_BUF_SIZE)) {
252 if (retval >= 0)
253 retval = -ENOEXEC;
249 goto error; 254 goto error;
255 }
250 256
251 interp_params.hdr = *((struct elfhdr *) bprm->buf); 257 interp_params.hdr = *((struct elfhdr *) bprm->buf);
252 break; 258 break;
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index f95ae9789c91..f9c88d0c8ced 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
43 return -ENOEXEC; 43 return -ENOEXEC;
44 } 44 }
45 45
46 bprm->sh_bang++; /* Well, the bang-shell is implicit... */ 46 bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */
47 allow_write_access(bprm->file); 47 allow_write_access(bprm->file);
48 fput(bprm->file); 48 fput(bprm->file);
49 bprm->file = NULL; 49 bprm->file = NULL;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 0498b181dd52..3b40d45a3a16 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -531,7 +531,8 @@ static int load_flat_file(struct linux_binprm * bprm,
531 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); 531 DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
532 532
533 down_write(&current->mm->mmap_sem); 533 down_write(&current->mm->mmap_sem);
534 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0); 534 textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
535 MAP_PRIVATE|MAP_EXECUTABLE, 0);
535 up_write(&current->mm->mmap_sem); 536 up_write(&current->mm->mmap_sem);
536 if (!textpos || textpos >= (unsigned long) -4096) { 537 if (!textpos || textpos >= (unsigned long) -4096) {
537 if (!textpos) 538 if (!textpos)
@@ -932,14 +933,8 @@ static int __init init_flat_binfmt(void)
932 return register_binfmt(&flat_format); 933 return register_binfmt(&flat_format);
933} 934}
934 935
935static void __exit exit_flat_binfmt(void)
936{
937 unregister_binfmt(&flat_format);
938}
939
940/****************************************************************************/ 936/****************************************************************************/
941 937
942core_initcall(init_flat_binfmt); 938core_initcall(init_flat_binfmt);
943module_exit(exit_flat_binfmt);
944 939
945/****************************************************************************/ 940/****************************************************************************/
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index dbf0ac0523de..7191306367c5 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -115,6 +115,12 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
115 if (!enabled) 115 if (!enabled)
116 goto _ret; 116 goto _ret;
117 117
118 retval = -ENOEXEC;
119 if (bprm->misc_bang)
120 goto _ret;
121
122 bprm->misc_bang = 1;
123
118 /* to keep locking time low, we copy the interpreter string */ 124 /* to keep locking time low, we copy the interpreter string */
119 read_lock(&entries_lock); 125 read_lock(&entries_lock);
120 fmt = check_file(bprm); 126 fmt = check_file(bprm);
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index ab33939b12a7..9e3963f7ebf1 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -29,7 +29,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
29 * Sorta complicated, but hopefully it will work. -TYT 29 * Sorta complicated, but hopefully it will work. -TYT
30 */ 30 */
31 31
32 bprm->sh_bang++; 32 bprm->sh_bang = 1;
33 allow_write_access(bprm->file); 33 allow_write_access(bprm->file);
34 fput(bprm->file); 34 fput(bprm->file);
35 bprm->file = NULL; 35 bprm->file = NULL;
diff --git a/fs/bio.c b/fs/bio.c
index 6e0b6f66df03..799f86deff24 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -937,6 +937,95 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
937 return ERR_PTR(-EINVAL); 937 return ERR_PTR(-EINVAL);
938} 938}
939 939
940static void bio_copy_kern_endio(struct bio *bio, int err)
941{
942 struct bio_vec *bvec;
943 const int read = bio_data_dir(bio) == READ;
944 char *p = bio->bi_private;
945 int i;
946
947 __bio_for_each_segment(bvec, bio, i, 0) {
948 char *addr = page_address(bvec->bv_page);
949
950 if (read && !err)
951 memcpy(p, addr, bvec->bv_len);
952
953 __free_page(bvec->bv_page);
954 p += bvec->bv_len;
955 }
956
957 bio_put(bio);
958}
959
960/**
961 * bio_copy_kern - copy kernel address into bio
962 * @q: the struct request_queue for the bio
963 * @data: pointer to buffer to copy
964 * @len: length in bytes
965 * @gfp_mask: allocation flags for bio and page allocation
966 *
967 * copy the kernel address into a bio suitable for io to a block
968 * device. Returns an error pointer in case of error.
969 */
970struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
971 gfp_t gfp_mask, int reading)
972{
973 unsigned long kaddr = (unsigned long)data;
974 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
975 unsigned long start = kaddr >> PAGE_SHIFT;
976 const int nr_pages = end - start;
977 struct bio *bio;
978 struct bio_vec *bvec;
979 int i, ret;
980
981 bio = bio_alloc(gfp_mask, nr_pages);
982 if (!bio)
983 return ERR_PTR(-ENOMEM);
984
985 while (len) {
986 struct page *page;
987 unsigned int bytes = PAGE_SIZE;
988
989 if (bytes > len)
990 bytes = len;
991
992 page = alloc_page(q->bounce_gfp | gfp_mask);
993 if (!page) {
994 ret = -ENOMEM;
995 goto cleanup;
996 }
997
998 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
999 ret = -EINVAL;
1000 goto cleanup;
1001 }
1002
1003 len -= bytes;
1004 }
1005
1006 if (!reading) {
1007 void *p = data;
1008
1009 bio_for_each_segment(bvec, bio, i) {
1010 char *addr = page_address(bvec->bv_page);
1011
1012 memcpy(addr, p, bvec->bv_len);
1013 p += bvec->bv_len;
1014 }
1015 }
1016
1017 bio->bi_private = data;
1018 bio->bi_end_io = bio_copy_kern_endio;
1019 return bio;
1020cleanup:
1021 bio_for_each_segment(bvec, bio, i)
1022 __free_page(bvec->bv_page);
1023
1024 bio_put(bio);
1025
1026 return ERR_PTR(ret);
1027}
1028
940/* 1029/*
941 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions 1030 * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
942 * for performing direct-IO in BIOs. 1031 * for performing direct-IO in BIOs.
@@ -1273,6 +1362,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
1273EXPORT_SYMBOL(bio_map_user); 1362EXPORT_SYMBOL(bio_map_user);
1274EXPORT_SYMBOL(bio_unmap_user); 1363EXPORT_SYMBOL(bio_unmap_user);
1275EXPORT_SYMBOL(bio_map_kern); 1364EXPORT_SYMBOL(bio_map_kern);
1365EXPORT_SYMBOL(bio_copy_kern);
1276EXPORT_SYMBOL(bio_pair_release); 1366EXPORT_SYMBOL(bio_pair_release);
1277EXPORT_SYMBOL(bio_split); 1367EXPORT_SYMBOL(bio_split);
1278EXPORT_SYMBOL(bio_split_pool); 1368EXPORT_SYMBOL(bio_split_pool);
diff --git a/fs/buffer.c b/fs/buffer.c
index 3db4a26adc44..a073f3f4f013 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1101,7 +1101,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
1101 1101
1102 printk(KERN_ERR "%s: requested out-of-range block %llu for " 1102 printk(KERN_ERR "%s: requested out-of-range block %llu for "
1103 "device %s\n", 1103 "device %s\n",
1104 __FUNCTION__, (unsigned long long)block, 1104 __func__, (unsigned long long)block,
1105 bdevname(bdev, b)); 1105 bdevname(bdev, b));
1106 return -EIO; 1106 return -EIO;
1107 } 1107 }
@@ -2211,8 +2211,8 @@ out:
2211 return err; 2211 return err;
2212} 2212}
2213 2213
2214int cont_expand_zero(struct file *file, struct address_space *mapping, 2214static int cont_expand_zero(struct file *file, struct address_space *mapping,
2215 loff_t pos, loff_t *bytes) 2215 loff_t pos, loff_t *bytes)
2216{ 2216{
2217 struct inode *inode = mapping->host; 2217 struct inode *inode = mapping->host;
2218 unsigned blocksize = 1 << inode->i_blkbits; 2218 unsigned blocksize = 1 << inode->i_blkbits;
@@ -2328,23 +2328,6 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
2328 return 0; 2328 return 0;
2329} 2329}
2330 2330
2331int generic_commit_write(struct file *file, struct page *page,
2332 unsigned from, unsigned to)
2333{
2334 struct inode *inode = page->mapping->host;
2335 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2336 __block_commit_write(inode,page,from,to);
2337 /*
2338 * No need to use i_size_read() here, the i_size
2339 * cannot change under us because we hold i_mutex.
2340 */
2341 if (pos > inode->i_size) {
2342 i_size_write(inode, pos);
2343 mark_inode_dirty(inode);
2344 }
2345 return 0;
2346}
2347
2348/* 2331/*
2349 * block_page_mkwrite() is not allowed to change the file size as it gets 2332 * block_page_mkwrite() is not allowed to change the file size as it gets
2350 * called from a page fault handler when a page is first dirtied. Hence we must 2333 * called from a page fault handler when a page is first dirtied. Hence we must
@@ -3315,7 +3298,6 @@ EXPORT_SYMBOL(end_buffer_write_sync);
3315EXPORT_SYMBOL(file_fsync); 3298EXPORT_SYMBOL(file_fsync);
3316EXPORT_SYMBOL(fsync_bdev); 3299EXPORT_SYMBOL(fsync_bdev);
3317EXPORT_SYMBOL(generic_block_bmap); 3300EXPORT_SYMBOL(generic_block_bmap);
3318EXPORT_SYMBOL(generic_commit_write);
3319EXPORT_SYMBOL(generic_cont_expand_simple); 3301EXPORT_SYMBOL(generic_cont_expand_simple);
3320EXPORT_SYMBOL(init_buffer); 3302EXPORT_SYMBOL(init_buffer);
3321EXPORT_SYMBOL(invalidate_bdev); 3303EXPORT_SYMBOL(invalidate_bdev);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 038674aa88a7..68e510b88457 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -55,7 +55,6 @@ static struct char_device_struct {
55 unsigned int baseminor; 55 unsigned int baseminor;
56 int minorct; 56 int minorct;
57 char name[64]; 57 char name[64];
58 struct file_operations *fops;
59 struct cdev *cdev; /* will die */ 58 struct cdev *cdev; /* will die */
60} *chrdevs[CHRDEV_MAJOR_HASH_SIZE]; 59} *chrdevs[CHRDEV_MAJOR_HASH_SIZE];
61 60
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 0228ed06069e..cc950f69e51e 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -468,7 +468,7 @@ cifs_proc_init(void)
468{ 468{
469 struct proc_dir_entry *pde; 469 struct proc_dir_entry *pde;
470 470
471 proc_fs_cifs = proc_mkdir("cifs", proc_root_fs); 471 proc_fs_cifs = proc_mkdir("fs/cifs", NULL);
472 if (proc_fs_cifs == NULL) 472 if (proc_fs_cifs == NULL)
473 return; 473 return;
474 474
@@ -559,7 +559,7 @@ cifs_proc_clean(void)
559 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); 559 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
560 remove_proc_entry("Experimental", proc_fs_cifs); 560 remove_proc_entry("Experimental", proc_fs_cifs);
561 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); 561 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
562 remove_proc_entry("cifs", proc_root_fs); 562 remove_proc_entry("fs/cifs", NULL);
563} 563}
564 564
565static int 565static int
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 95a54253c047..e1c854890f94 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -134,7 +134,7 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr)
134 unsigned int valid; 134 unsigned int valid;
135 135
136 /* clean out */ 136 /* clean out */
137 vattr->va_mode = (umode_t) -1; 137 vattr->va_mode = -1;
138 vattr->va_uid = (vuid_t) -1; 138 vattr->va_uid = (vuid_t) -1;
139 vattr->va_gid = (vgid_t) -1; 139 vattr->va_gid = (vgid_t) -1;
140 vattr->va_size = (off_t) -1; 140 vattr->va_size = (off_t) -1;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index f89ff083079b..3d2580e00a3e 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -345,7 +345,7 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de,
345} 345}
346 346
347/* destruction routines: unlink, rmdir */ 347/* destruction routines: unlink, rmdir */
348int coda_unlink(struct inode *dir, struct dentry *de) 348static int coda_unlink(struct inode *dir, struct dentry *de)
349{ 349{
350 int error; 350 int error;
351 const char *name = de->d_name.name; 351 const char *name = de->d_name.name;
@@ -365,7 +365,7 @@ int coda_unlink(struct inode *dir, struct dentry *de)
365 return 0; 365 return 0;
366} 366}
367 367
368int coda_rmdir(struct inode *dir, struct dentry *de) 368static int coda_rmdir(struct inode *dir, struct dentry *de)
369{ 369{
370 const char *name = de->d_name.name; 370 const char *name = de->d_name.name;
371 int len = de->d_name.len; 371 int len = de->d_name.len;
@@ -424,7 +424,7 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
424 424
425 425
426/* file operations for directories */ 426/* file operations for directories */
427int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) 427static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir)
428{ 428{
429 struct coda_file_info *cfi; 429 struct coda_file_info *cfi;
430 struct file *host_file; 430 struct file *host_file;
diff --git a/fs/compat.c b/fs/compat.c
index 2ce4456aad30..332a869d2c53 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -24,6 +24,7 @@
24#include <linux/fcntl.h> 24#include <linux/fcntl.h>
25#include <linux/namei.h> 25#include <linux/namei.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h>
27#include <linux/vfs.h> 28#include <linux/vfs.h>
28#include <linux/ioctl.h> 29#include <linux/ioctl.h>
29#include <linux/init.h> 30#include <linux/init.h>
@@ -1634,7 +1635,7 @@ sticky:
1634 return ret; 1635 return ret;
1635} 1636}
1636 1637
1637#ifdef TIF_RESTORE_SIGMASK 1638#ifdef HAVE_SET_RESTORE_SIGMASK
1638asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, 1639asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1639 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1640 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1640 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, 1641 struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
@@ -1720,7 +1721,7 @@ sticky:
1720 if (sigmask) { 1721 if (sigmask) {
1721 memcpy(&current->saved_sigmask, &sigsaved, 1722 memcpy(&current->saved_sigmask, &sigsaved,
1722 sizeof(sigsaved)); 1723 sizeof(sigsaved));
1723 set_thread_flag(TIF_RESTORE_SIGMASK); 1724 set_restore_sigmask();
1724 } 1725 }
1725 } else if (sigmask) 1726 } else if (sigmask)
1726 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1727 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -1791,7 +1792,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1791 if (sigmask) { 1792 if (sigmask) {
1792 memcpy(&current->saved_sigmask, &sigsaved, 1793 memcpy(&current->saved_sigmask, &sigsaved,
1793 sizeof(sigsaved)); 1794 sizeof(sigsaved));
1794 set_thread_flag(TIF_RESTORE_SIGMASK); 1795 set_restore_sigmask();
1795 } 1796 }
1796 ret = -ERESTARTNOHAND; 1797 ret = -ERESTARTNOHAND;
1797 } else if (sigmask) 1798 } else if (sigmask)
@@ -1825,7 +1826,7 @@ sticky:
1825 1826
1826 return ret; 1827 return ret;
1827} 1828}
1828#endif /* TIF_RESTORE_SIGMASK */ 1829#endif /* HAVE_SET_RESTORE_SIGMASK */
1829 1830
1830#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) 1831#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
1831/* Stuff for NFS server syscalls... */ 1832/* Stuff for NFS server syscalls... */
@@ -2080,7 +2081,7 @@ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2)
2080 2081
2081#ifdef CONFIG_EPOLL 2082#ifdef CONFIG_EPOLL
2082 2083
2083#ifdef TIF_RESTORE_SIGMASK 2084#ifdef HAVE_SET_RESTORE_SIGMASK
2084asmlinkage long compat_sys_epoll_pwait(int epfd, 2085asmlinkage long compat_sys_epoll_pwait(int epfd,
2085 struct compat_epoll_event __user *events, 2086 struct compat_epoll_event __user *events,
2086 int maxevents, int timeout, 2087 int maxevents, int timeout,
@@ -2117,14 +2118,14 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
2117 if (err == -EINTR) { 2118 if (err == -EINTR) {
2118 memcpy(&current->saved_sigmask, &sigsaved, 2119 memcpy(&current->saved_sigmask, &sigsaved,
2119 sizeof(sigsaved)); 2120 sizeof(sigsaved));
2120 set_thread_flag(TIF_RESTORE_SIGMASK); 2121 set_restore_sigmask();
2121 } else 2122 } else
2122 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 2123 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2123 } 2124 }
2124 2125
2125 return err; 2126 return err;
2126} 2127}
2127#endif /* TIF_RESTORE_SIGMASK */ 2128#endif /* HAVE_SET_RESTORE_SIGMASK */
2128 2129
2129#endif /* CONFIG_EPOLL */ 2130#endif /* CONFIG_EPOLL */
2130 2131
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c6e72aebd16b..97dba0d92348 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1046,14 +1046,14 @@ static int vt_check(struct file *file)
1046 struct inode *inode = file->f_path.dentry->d_inode; 1046 struct inode *inode = file->f_path.dentry->d_inode;
1047 struct vc_data *vc; 1047 struct vc_data *vc;
1048 1048
1049 if (file->f_op->ioctl != tty_ioctl) 1049 if (file->f_op->unlocked_ioctl != tty_ioctl)
1050 return -EINVAL; 1050 return -EINVAL;
1051 1051
1052 tty = (struct tty_struct *)file->private_data; 1052 tty = (struct tty_struct *)file->private_data;
1053 if (tty_paranoia_check(tty, inode, "tty_ioctl")) 1053 if (tty_paranoia_check(tty, inode, "tty_ioctl"))
1054 return -EINVAL; 1054 return -EINVAL;
1055 1055
1056 if (tty->driver->ioctl != vt_ioctl) 1056 if (tty->ops->ioctl != vt_ioctl)
1057 return -EINVAL; 1057 return -EINVAL;
1058 1058
1059 vc = (struct vc_data *)tty->driver_data; 1059 vc = (struct vc_data *)tty->driver_data;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 397cb503a180..2b6cb23dd14e 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -115,7 +115,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
115 goto out; 115 goto out;
116 } 116 }
117 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", 117 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
118 __FUNCTION__, count, *ppos, buffer->page); 118 __func__, count, *ppos, buffer->page);
119 retval = simple_read_from_buffer(buf, count, ppos, buffer->page, 119 retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
120 buffer->count); 120 buffer->count);
121out: 121out:
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4c1ebff778ee..b9a1d810346d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -47,7 +47,7 @@ static const struct address_space_operations configfs_aops = {
47 47
48static struct backing_dev_info configfs_backing_dev_info = { 48static struct backing_dev_info configfs_backing_dev_info = {
49 .ra_pages = 0, /* No readahead */ 49 .ra_pages = 0, /* No readahead */
50 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 50 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
51}; 51};
52 52
53static const struct inode_operations configfs_inode_operations ={ 53static const struct inode_operations configfs_inode_operations ={
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index de3b31d0a37d..8421cea7d8c7 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -92,7 +92,7 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
92 92
93 root = d_alloc_root(inode); 93 root = d_alloc_root(inode);
94 if (!root) { 94 if (!root) {
95 pr_debug("%s: could not get root dentry!\n",__FUNCTION__); 95 pr_debug("%s: could not get root dentry!\n",__func__);
96 iput(inode); 96 iput(inode);
97 return -ENOMEM; 97 return -ENOMEM;
98 } 98 }
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 78929ea84ff2..2a731ef5f305 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -210,13 +210,13 @@ static int configfs_get_target_path(struct config_item * item, struct config_ite
210 if (size > PATH_MAX) 210 if (size > PATH_MAX)
211 return -ENAMETOOLONG; 211 return -ENAMETOOLONG;
212 212
213 pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size); 213 pr_debug("%s: depth = %d, size = %d\n", __func__, depth, size);
214 214
215 for (s = path; depth--; s += 3) 215 for (s = path; depth--; s += 3)
216 strcpy(s,"../"); 216 strcpy(s,"../");
217 217
218 fill_item_path(target, path, size); 218 fill_item_path(target, path, size);
219 pr_debug("%s: path = '%s'\n", __FUNCTION__, path); 219 pr_debug("%s: path = '%s'\n", __func__, path);
220 220
221 return 0; 221 return 0;
222} 222}
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index fddffe4851f5..159a5efd6a8a 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -9,7 +9,7 @@
9 * 2 as published by the Free Software Foundation. 9 * 2 as published by the Free Software Foundation.
10 * 10 *
11 * debugfs is for people to use instead of /proc or /sys. 11 * debugfs is for people to use instead of /proc or /sys.
12 * See Documentation/DocBook/kernel-api for more details. 12 * See Documentation/DocBook/filesystems for more details.
13 * 13 *
14 */ 14 */
15 15
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f120e1207874..285b64a8b06e 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -17,6 +17,8 @@
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/tty.h> 19#include <linux/tty.h>
20#include <linux/mutex.h>
21#include <linux/idr.h>
20#include <linux/devpts_fs.h> 22#include <linux/devpts_fs.h>
21#include <linux/parser.h> 23#include <linux/parser.h>
22#include <linux/fsnotify.h> 24#include <linux/fsnotify.h>
@@ -26,6 +28,10 @@
26 28
27#define DEVPTS_DEFAULT_MODE 0600 29#define DEVPTS_DEFAULT_MODE 0600
28 30
31extern int pty_limit; /* Config limit on Unix98 ptys */
32static DEFINE_IDR(allocated_ptys);
33static DEFINE_MUTEX(allocated_ptys_lock);
34
29static struct vfsmount *devpts_mnt; 35static struct vfsmount *devpts_mnt;
30static struct dentry *devpts_root; 36static struct dentry *devpts_root;
31 37
@@ -171,9 +177,44 @@ static struct dentry *get_node(int num)
171 return lookup_one_len(s, root, sprintf(s, "%d", num)); 177 return lookup_one_len(s, root, sprintf(s, "%d", num));
172} 178}
173 179
180int devpts_new_index(void)
181{
182 int index;
183 int idr_ret;
184
185retry:
186 if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
187 return -ENOMEM;
188 }
189
190 mutex_lock(&allocated_ptys_lock);
191 idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
192 if (idr_ret < 0) {
193 mutex_unlock(&allocated_ptys_lock);
194 if (idr_ret == -EAGAIN)
195 goto retry;
196 return -EIO;
197 }
198
199 if (index >= pty_limit) {
200 idr_remove(&allocated_ptys, index);
201 mutex_unlock(&allocated_ptys_lock);
202 return -EIO;
203 }
204 mutex_unlock(&allocated_ptys_lock);
205 return index;
206}
207
208void devpts_kill_index(int idx)
209{
210 mutex_lock(&allocated_ptys_lock);
211 idr_remove(&allocated_ptys, idx);
212 mutex_unlock(&allocated_ptys_lock);
213}
214
174int devpts_pty_new(struct tty_struct *tty) 215int devpts_pty_new(struct tty_struct *tty)
175{ 216{
176 int number = tty->index; 217 int number = tty->index; /* tty layer puts index from devpts_new_index() in here */
177 struct tty_driver *driver = tty->driver; 218 struct tty_driver *driver = tty->driver;
178 dev_t device = MKDEV(driver->major, driver->minor_start+number); 219 dev_t device = MKDEV(driver->major, driver->minor_start+number);
179 struct dentry *dentry; 220 struct dentry *dentry;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index b64e55e0515d..499e16759e96 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -200,7 +200,7 @@ int __init dlm_lockspace_init(void)
200 200
201 dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj); 201 dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj);
202 if (!dlm_kset) { 202 if (!dlm_kset) {
203 printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); 203 printk(KERN_WARNING "%s: can not create kset\n", __func__);
204 return -ENOMEM; 204 return -ENOMEM;
205 } 205 }
206 return 0; 206 return 0;
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 28d01ed66de0..676073b8dda5 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -20,6 +20,7 @@
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/spinlock.h> 21#include <linux/spinlock.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/fdtable.h>
23 24
24int dir_notify_enable __read_mostly = 1; 25int dir_notify_enable __read_mostly = 1;
25 26
@@ -66,6 +67,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
66 struct dnotify_struct **prev; 67 struct dnotify_struct **prev;
67 struct inode *inode; 68 struct inode *inode;
68 fl_owner_t id = current->files; 69 fl_owner_t id = current->files;
70 struct file *f;
69 int error = 0; 71 int error = 0;
70 72
71 if ((arg & ~DN_MULTISHOT) == 0) { 73 if ((arg & ~DN_MULTISHOT) == 0) {
@@ -92,6 +94,15 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
92 prev = &odn->dn_next; 94 prev = &odn->dn_next;
93 } 95 }
94 96
97 rcu_read_lock();
98 f = fcheck(fd);
99 rcu_read_unlock();
100 /* we'd lost the race with close(), sod off silently */
101 /* note that inode->i_lock prevents reordering problems
102 * between accesses to descriptor table and ->i_dnotify */
103 if (f != filp)
104 goto out_free;
105
95 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); 106 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
96 if (error) 107 if (error)
97 goto out_free; 108 goto out_free;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 59375efcf39d..3e5637fc3779 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -14,18 +14,26 @@ int sysctl_drop_caches;
14 14
15static void drop_pagecache_sb(struct super_block *sb) 15static void drop_pagecache_sb(struct super_block *sb)
16{ 16{
17 struct inode *inode; 17 struct inode *inode, *toput_inode = NULL;
18 18
19 spin_lock(&inode_lock); 19 spin_lock(&inode_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_WILL_FREE)) 21 if (inode->i_state & (I_FREEING|I_WILL_FREE))
22 continue; 22 continue;
23 if (inode->i_mapping->nrpages == 0)
24 continue;
25 __iget(inode);
26 spin_unlock(&inode_lock);
23 __invalidate_mapping_pages(inode->i_mapping, 0, -1, true); 27 __invalidate_mapping_pages(inode->i_mapping, 0, -1, true);
28 iput(toput_inode);
29 toput_inode = inode;
30 spin_lock(&inode_lock);
24 } 31 }
25 spin_unlock(&inode_lock); 32 spin_unlock(&inode_lock);
33 iput(toput_inode);
26} 34}
27 35
28void drop_pagecache(void) 36static void drop_pagecache(void)
29{ 37{
30 struct super_block *sb; 38 struct super_block *sb;
31 39
@@ -45,7 +53,7 @@ restart:
45 spin_unlock(&sb_lock); 53 spin_unlock(&sb_lock);
46} 54}
47 55
48void drop_slab(void) 56static void drop_slab(void)
49{ 57{
50 int nr_objects; 58 int nr_objects;
51 59
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 768857015516..1e34a7fd4884 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o 5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6 6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o debug.o 7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index a066e109ad9c..cd62d75b2cc0 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -119,21 +119,21 @@ static int ecryptfs_calculate_md5(char *dst,
119 if (rc) { 119 if (rc) {
120 printk(KERN_ERR 120 printk(KERN_ERR
121 "%s: Error initializing crypto hash; rc = [%d]\n", 121 "%s: Error initializing crypto hash; rc = [%d]\n",
122 __FUNCTION__, rc); 122 __func__, rc);
123 goto out; 123 goto out;
124 } 124 }
125 rc = crypto_hash_update(&desc, &sg, len); 125 rc = crypto_hash_update(&desc, &sg, len);
126 if (rc) { 126 if (rc) {
127 printk(KERN_ERR 127 printk(KERN_ERR
128 "%s: Error updating crypto hash; rc = [%d]\n", 128 "%s: Error updating crypto hash; rc = [%d]\n",
129 __FUNCTION__, rc); 129 __func__, rc);
130 goto out; 130 goto out;
131 } 131 }
132 rc = crypto_hash_final(&desc, dst); 132 rc = crypto_hash_final(&desc, dst);
133 if (rc) { 133 if (rc) {
134 printk(KERN_ERR 134 printk(KERN_ERR
135 "%s: Error finalizing crypto hash; rc = [%d]\n", 135 "%s: Error finalizing crypto hash; rc = [%d]\n",
136 __FUNCTION__, rc); 136 __func__, rc);
137 goto out; 137 goto out;
138 } 138 }
139out: 139out:
@@ -437,7 +437,7 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
437 if (rc < 0) { 437 if (rc < 0) {
438 printk(KERN_ERR "%s: Error attempting to encrypt page with " 438 printk(KERN_ERR "%s: Error attempting to encrypt page with "
439 "page->index = [%ld], extent_offset = [%ld]; " 439 "page->index = [%ld], extent_offset = [%ld]; "
440 "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, 440 "rc = [%d]\n", __func__, page->index, extent_offset,
441 rc); 441 rc);
442 goto out; 442 goto out;
443 } 443 }
@@ -487,7 +487,7 @@ int ecryptfs_encrypt_page(struct page *page)
487 0, PAGE_CACHE_SIZE); 487 0, PAGE_CACHE_SIZE);
488 if (rc) 488 if (rc)
489 printk(KERN_ERR "%s: Error attempting to copy " 489 printk(KERN_ERR "%s: Error attempting to copy "
490 "page at index [%ld]\n", __FUNCTION__, 490 "page at index [%ld]\n", __func__,
491 page->index); 491 page->index);
492 goto out; 492 goto out;
493 } 493 }
@@ -508,7 +508,7 @@ int ecryptfs_encrypt_page(struct page *page)
508 extent_offset); 508 extent_offset);
509 if (rc) { 509 if (rc) {
510 printk(KERN_ERR "%s: Error encrypting extent; " 510 printk(KERN_ERR "%s: Error encrypting extent; "
511 "rc = [%d]\n", __FUNCTION__, rc); 511 "rc = [%d]\n", __func__, rc);
512 goto out; 512 goto out;
513 } 513 }
514 ecryptfs_lower_offset_for_extent( 514 ecryptfs_lower_offset_for_extent(
@@ -569,7 +569,7 @@ static int ecryptfs_decrypt_extent(struct page *page,
569 if (rc < 0) { 569 if (rc < 0) {
570 printk(KERN_ERR "%s: Error attempting to decrypt to page with " 570 printk(KERN_ERR "%s: Error attempting to decrypt to page with "
571 "page->index = [%ld], extent_offset = [%ld]; " 571 "page->index = [%ld], extent_offset = [%ld]; "
572 "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, 572 "rc = [%d]\n", __func__, page->index, extent_offset,
573 rc); 573 rc);
574 goto out; 574 goto out;
575 } 575 }
@@ -622,7 +622,7 @@ int ecryptfs_decrypt_page(struct page *page)
622 ecryptfs_inode); 622 ecryptfs_inode);
623 if (rc) 623 if (rc)
624 printk(KERN_ERR "%s: Error attempting to copy " 624 printk(KERN_ERR "%s: Error attempting to copy "
625 "page at index [%ld]\n", __FUNCTION__, 625 "page at index [%ld]\n", __func__,
626 page->index); 626 page->index);
627 goto out; 627 goto out;
628 } 628 }
@@ -656,7 +656,7 @@ int ecryptfs_decrypt_page(struct page *page)
656 extent_offset); 656 extent_offset);
657 if (rc) { 657 if (rc) {
658 printk(KERN_ERR "%s: Error encrypting extent; " 658 printk(KERN_ERR "%s: Error encrypting extent; "
659 "rc = [%d]\n", __FUNCTION__, rc); 659 "rc = [%d]\n", __func__, rc);
660 goto out; 660 goto out;
661 } 661 }
662 } 662 }
@@ -1215,7 +1215,7 @@ int ecryptfs_read_and_validate_header_region(char *data,
1215 ecryptfs_inode); 1215 ecryptfs_inode);
1216 if (rc) { 1216 if (rc) {
1217 printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", 1217 printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n",
1218 __FUNCTION__, rc); 1218 __func__, rc);
1219 goto out; 1219 goto out;
1220 } 1220 }
1221 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { 1221 if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) {
@@ -1246,7 +1246,6 @@ ecryptfs_write_header_metadata(char *virt,
1246 (*written) = 6; 1246 (*written) = 6;
1247} 1247}
1248 1248
1249struct kmem_cache *ecryptfs_header_cache_0;
1250struct kmem_cache *ecryptfs_header_cache_1; 1249struct kmem_cache *ecryptfs_header_cache_1;
1251struct kmem_cache *ecryptfs_header_cache_2; 1250struct kmem_cache *ecryptfs_header_cache_2;
1252 1251
@@ -1320,7 +1319,7 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
1320 0, crypt_stat->num_header_bytes_at_front); 1319 0, crypt_stat->num_header_bytes_at_front);
1321 if (rc) 1320 if (rc)
1322 printk(KERN_ERR "%s: Error attempting to write header " 1321 printk(KERN_ERR "%s: Error attempting to write header "
1323 "information to lower file; rc = [%d]\n", __FUNCTION__, 1322 "information to lower file; rc = [%d]\n", __func__,
1324 rc); 1323 rc);
1325 return rc; 1324 return rc;
1326} 1325}
@@ -1365,14 +1364,14 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1365 } 1364 }
1366 } else { 1365 } else {
1367 printk(KERN_WARNING "%s: Encrypted flag not set\n", 1366 printk(KERN_WARNING "%s: Encrypted flag not set\n",
1368 __FUNCTION__); 1367 __func__);
1369 rc = -EINVAL; 1368 rc = -EINVAL;
1370 goto out; 1369 goto out;
1371 } 1370 }
1372 /* Released in this function */ 1371 /* Released in this function */
1373 virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); 1372 virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL);
1374 if (!virt) { 1373 if (!virt) {
1375 printk(KERN_ERR "%s: Out of memory\n", __FUNCTION__); 1374 printk(KERN_ERR "%s: Out of memory\n", __func__);
1376 rc = -ENOMEM; 1375 rc = -ENOMEM;
1377 goto out; 1376 goto out;
1378 } 1377 }
@@ -1380,7 +1379,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1380 ecryptfs_dentry); 1379 ecryptfs_dentry);
1381 if (unlikely(rc)) { 1380 if (unlikely(rc)) {
1382 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", 1381 printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n",
1383 __FUNCTION__, rc); 1382 __func__, rc);
1384 goto out_free; 1383 goto out_free;
1385 } 1384 }
1386 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 1385 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
@@ -1391,7 +1390,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
1391 ecryptfs_dentry, virt); 1390 ecryptfs_dentry, virt);
1392 if (rc) { 1391 if (rc) {
1393 printk(KERN_ERR "%s: Error writing metadata out to lower file; " 1392 printk(KERN_ERR "%s: Error writing metadata out to lower file; "
1394 "rc = [%d]\n", __FUNCTION__, rc); 1393 "rc = [%d]\n", __func__, rc);
1395 goto out_free; 1394 goto out_free;
1396 } 1395 }
1397out_free: 1396out_free:
@@ -1585,7 +1584,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
1585 if (!page_virt) { 1584 if (!page_virt) {
1586 rc = -ENOMEM; 1585 rc = -ENOMEM;
1587 printk(KERN_ERR "%s: Unable to allocate page_virt\n", 1586 printk(KERN_ERR "%s: Unable to allocate page_virt\n",
1588 __FUNCTION__); 1587 __func__);
1589 goto out; 1588 goto out;
1590 } 1589 }
1591 rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, 1590 rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 5007f788da01..951ee33a022d 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -4,7 +4,7 @@
4 * 4 *
5 * Copyright (C) 1997-2003 Erez Zadok 5 * Copyright (C) 1997-2003 Erez Zadok
6 * Copyright (C) 2001-2003 Stony Brook University 6 * Copyright (C) 2001-2003 Stony Brook University
7 * Copyright (C) 2004-2007 International Business Machines Corp. 7 * Copyright (C) 2004-2008 International Business Machines Corp.
8 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> 8 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
9 * Trevor S. Highland <trevor.highland@gmail.com> 9 * Trevor S. Highland <trevor.highland@gmail.com>
10 * Tyler Hicks <tyhicks@ou.edu> 10 * Tyler Hicks <tyhicks@ou.edu>
@@ -34,6 +34,7 @@
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/nsproxy.h>
37 38
38/* Version verification for shared data structures w/ userspace */ 39/* Version verification for shared data structures w/ userspace */
39#define ECRYPTFS_VERSION_MAJOR 0x00 40#define ECRYPTFS_VERSION_MAJOR 0x00
@@ -49,11 +50,13 @@
49#define ECRYPTFS_VERSIONING_POLICY 0x00000008 50#define ECRYPTFS_VERSIONING_POLICY 0x00000008
50#define ECRYPTFS_VERSIONING_XATTR 0x00000010 51#define ECRYPTFS_VERSIONING_XATTR 0x00000010
51#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 52#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020
53#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040
52#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ 54#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
53 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ 55 | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
54 | ECRYPTFS_VERSIONING_PUBKEY \ 56 | ECRYPTFS_VERSIONING_PUBKEY \
55 | ECRYPTFS_VERSIONING_XATTR \ 57 | ECRYPTFS_VERSIONING_XATTR \
56 | ECRYPTFS_VERSIONING_MULTKEY) 58 | ECRYPTFS_VERSIONING_MULTKEY \
59 | ECRYPTFS_VERSIONING_DEVMISC)
57#define ECRYPTFS_MAX_PASSWORD_LENGTH 64 60#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
58#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH 61#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
59#define ECRYPTFS_SALT_SIZE 8 62#define ECRYPTFS_SALT_SIZE 8
@@ -73,17 +76,14 @@
73#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32 76#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32
74#define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ 77#define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ
75#define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3) 78#define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3)
76#define ECRYPTFS_NLMSG_HELO 100
77#define ECRYPTFS_NLMSG_QUIT 101
78#define ECRYPTFS_NLMSG_REQUEST 102
79#define ECRYPTFS_NLMSG_RESPONSE 103
80#define ECRYPTFS_MAX_PKI_NAME_BYTES 16 79#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
81#define ECRYPTFS_DEFAULT_NUM_USERS 4 80#define ECRYPTFS_DEFAULT_NUM_USERS 4
82#define ECRYPTFS_MAX_NUM_USERS 32768 81#define ECRYPTFS_MAX_NUM_USERS 32768
83#define ECRYPTFS_TRANSPORT_NETLINK 0 82#define ECRYPTFS_TRANSPORT_NETLINK 0
84#define ECRYPTFS_TRANSPORT_CONNECTOR 1 83#define ECRYPTFS_TRANSPORT_CONNECTOR 1
85#define ECRYPTFS_TRANSPORT_RELAYFS 2 84#define ECRYPTFS_TRANSPORT_RELAYFS 2
86#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_NETLINK 85#define ECRYPTFS_TRANSPORT_MISCDEV 3
86#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_MISCDEV
87#define ECRYPTFS_XATTR_NAME "user.ecryptfs" 87#define ECRYPTFS_XATTR_NAME "user.ecryptfs"
88 88
89#define RFC2440_CIPHER_DES3_EDE 0x02 89#define RFC2440_CIPHER_DES3_EDE 0x02
@@ -366,32 +366,63 @@ struct ecryptfs_auth_tok_list_item {
366}; 366};
367 367
368struct ecryptfs_message { 368struct ecryptfs_message {
369 /* Can never be greater than ecryptfs_message_buf_len */
370 /* Used to find the parent msg_ctx */
371 /* Inherits from msg_ctx->index */
369 u32 index; 372 u32 index;
370 u32 data_len; 373 u32 data_len;
371 u8 data[]; 374 u8 data[];
372}; 375};
373 376
374struct ecryptfs_msg_ctx { 377struct ecryptfs_msg_ctx {
375#define ECRYPTFS_MSG_CTX_STATE_FREE 0x0001 378#define ECRYPTFS_MSG_CTX_STATE_FREE 0x01
376#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x0002 379#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x02
377#define ECRYPTFS_MSG_CTX_STATE_DONE 0x0003 380#define ECRYPTFS_MSG_CTX_STATE_DONE 0x03
378 u32 state; 381#define ECRYPTFS_MSG_CTX_STATE_NO_REPLY 0x04
379 unsigned int index; 382 u8 state;
380 unsigned int counter; 383#define ECRYPTFS_MSG_HELO 100
384#define ECRYPTFS_MSG_QUIT 101
385#define ECRYPTFS_MSG_REQUEST 102
386#define ECRYPTFS_MSG_RESPONSE 103
387 u8 type;
388 u32 index;
389 /* Counter converts to a sequence number. Each message sent
390 * out for which we expect a response has an associated
391 * sequence number. The response must have the same sequence
392 * number as the counter for the msg_stc for the message to be
393 * valid. */
394 u32 counter;
395 size_t msg_size;
381 struct ecryptfs_message *msg; 396 struct ecryptfs_message *msg;
382 struct task_struct *task; 397 struct task_struct *task;
383 struct list_head node; 398 struct list_head node;
399 struct list_head daemon_out_list;
384 struct mutex mux; 400 struct mutex mux;
385}; 401};
386 402
387extern unsigned int ecryptfs_transport; 403extern unsigned int ecryptfs_transport;
388 404
389struct ecryptfs_daemon_id { 405struct ecryptfs_daemon;
390 pid_t pid; 406
391 uid_t uid; 407struct ecryptfs_daemon {
392 struct hlist_node id_chain; 408#define ECRYPTFS_DAEMON_IN_READ 0x00000001
409#define ECRYPTFS_DAEMON_IN_POLL 0x00000002
410#define ECRYPTFS_DAEMON_ZOMBIE 0x00000004
411#define ECRYPTFS_DAEMON_MISCDEV_OPEN 0x00000008
412 u32 flags;
413 u32 num_queued_msg_ctx;
414 struct pid *pid;
415 uid_t euid;
416 struct user_namespace *user_ns;
417 struct task_struct *task;
418 struct mutex mux;
419 struct list_head msg_ctx_out_queue;
420 wait_queue_head_t wait;
421 struct hlist_node euid_chain;
393}; 422};
394 423
424extern struct mutex ecryptfs_daemon_hash_mux;
425
395static inline struct ecryptfs_file_info * 426static inline struct ecryptfs_file_info *
396ecryptfs_file_to_private(struct file *file) 427ecryptfs_file_to_private(struct file *file)
397{ 428{
@@ -500,7 +531,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt)
500} 531}
501 532
502#define ecryptfs_printk(type, fmt, arg...) \ 533#define ecryptfs_printk(type, fmt, arg...) \
503 __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg); 534 __ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
504void __ecryptfs_printk(const char *fmt, ...); 535void __ecryptfs_printk(const char *fmt, ...);
505 536
506extern const struct file_operations ecryptfs_main_fops; 537extern const struct file_operations ecryptfs_main_fops;
@@ -581,10 +612,13 @@ int
581ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, 612ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
582 size_t size, int flags); 613 size_t size, int flags);
583int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); 614int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode);
584int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid); 615int ecryptfs_process_helo(unsigned int transport, uid_t euid,
585int ecryptfs_process_quit(uid_t uid, pid_t pid); 616 struct user_namespace *user_ns, struct pid *pid);
586int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, 617int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
587 pid_t pid, u32 seq); 618 struct pid *pid);
619int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
620 struct user_namespace *user_ns, struct pid *pid,
621 u32 seq);
588int ecryptfs_send_message(unsigned int transport, char *data, int data_len, 622int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
589 struct ecryptfs_msg_ctx **msg_ctx); 623 struct ecryptfs_msg_ctx **msg_ctx);
590int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, 624int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
@@ -593,14 +627,14 @@ int ecryptfs_init_messaging(unsigned int transport);
593void ecryptfs_release_messaging(unsigned int transport); 627void ecryptfs_release_messaging(unsigned int transport);
594 628
595int ecryptfs_send_netlink(char *data, int data_len, 629int ecryptfs_send_netlink(char *data, int data_len,
596 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, 630 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
597 u16 msg_flags, pid_t daemon_pid); 631 u16 msg_flags, struct pid *daemon_pid);
598int ecryptfs_init_netlink(void); 632int ecryptfs_init_netlink(void);
599void ecryptfs_release_netlink(void); 633void ecryptfs_release_netlink(void);
600 634
601int ecryptfs_send_connector(char *data, int data_len, 635int ecryptfs_send_connector(char *data, int data_len,
602 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, 636 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
603 u16 msg_flags, pid_t daemon_pid); 637 u16 msg_flags, struct pid *daemon_pid);
604int ecryptfs_init_connector(void); 638int ecryptfs_init_connector(void);
605void ecryptfs_release_connector(void); 639void ecryptfs_release_connector(void);
606void 640void
@@ -642,5 +676,21 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
642 size_t offset_in_page, size_t size, 676 size_t offset_in_page, size_t size,
643 struct inode *ecryptfs_inode); 677 struct inode *ecryptfs_inode);
644struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); 678struct page *ecryptfs_get_locked_page(struct file *file, loff_t index);
679int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon);
680int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
681 struct user_namespace *user_ns);
682int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
683 size_t *length_size);
684int ecryptfs_write_packet_length(char *dest, size_t size,
685 size_t *packet_size_length);
686int ecryptfs_init_ecryptfs_miscdev(void);
687void ecryptfs_destroy_ecryptfs_miscdev(void);
688int ecryptfs_send_miscdev(char *data, size_t data_size,
689 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
690 u16 msg_flags, struct ecryptfs_daemon *daemon);
691void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
692int
693ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
694 struct user_namespace *user_ns, struct pid *pid);
645 695
646#endif /* #ifndef ECRYPTFS_KERNEL_H */ 696#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 2b8f5ed4adea..2258b8f654a6 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -195,7 +195,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
195 file, ecryptfs_inode_to_private(inode)->lower_file); 195 file, ecryptfs_inode_to_private(inode)->lower_file);
196 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 196 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
197 ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); 197 ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
198 mutex_lock(&crypt_stat->cs_mutex);
198 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 199 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
200 mutex_unlock(&crypt_stat->cs_mutex);
199 rc = 0; 201 rc = 0;
200 goto out; 202 goto out;
201 } 203 }
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e23861152101..0a1397335a8e 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -111,7 +111,7 @@ ecryptfs_do_create(struct inode *directory_inode,
111 111
112 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 112 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
113 lower_dir_dentry = lock_parent(lower_dentry); 113 lower_dir_dentry = lock_parent(lower_dentry);
114 if (unlikely(IS_ERR(lower_dir_dentry))) { 114 if (IS_ERR(lower_dir_dentry)) {
115 ecryptfs_printk(KERN_ERR, "Error locking directory of " 115 ecryptfs_printk(KERN_ERR, "Error locking directory of "
116 "dentry\n"); 116 "dentry\n");
117 rc = PTR_ERR(lower_dir_dentry); 117 rc = PTR_ERR(lower_dir_dentry);
@@ -121,7 +121,7 @@ ecryptfs_do_create(struct inode *directory_inode,
121 ecryptfs_dentry, mode, nd); 121 ecryptfs_dentry, mode, nd);
122 if (rc) { 122 if (rc) {
123 printk(KERN_ERR "%s: Failure to create dentry in lower fs; " 123 printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
124 "rc = [%d]\n", __FUNCTION__, rc); 124 "rc = [%d]\n", __func__, rc);
125 goto out_lock; 125 goto out_lock;
126 } 126 }
127 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, 127 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
@@ -908,7 +908,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
908 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) 908 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
909 ia->ia_valid &= ~ATTR_MODE; 909 ia->ia_valid &= ~ATTR_MODE;
910 910
911 mutex_lock(&lower_dentry->d_inode->i_mutex);
911 rc = notify_change(lower_dentry, ia); 912 rc = notify_change(lower_dentry, ia);
913 mutex_unlock(&lower_dentry->d_inode->i_mutex);
912out: 914out:
913 fsstack_copy_attr_all(inode, lower_inode, NULL); 915 fsstack_copy_attr_all(inode, lower_inode, NULL);
914 return rc; 916 return rc;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 682b1b2482c2..e82b457180be 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -65,7 +65,7 @@ static int process_request_key_err(long err_code)
65} 65}
66 66
67/** 67/**
68 * parse_packet_length 68 * ecryptfs_parse_packet_length
69 * @data: Pointer to memory containing length at offset 69 * @data: Pointer to memory containing length at offset
70 * @size: This function writes the decoded size to this memory 70 * @size: This function writes the decoded size to this memory
71 * address; zero on error 71 * address; zero on error
@@ -73,8 +73,8 @@ static int process_request_key_err(long err_code)
73 * 73 *
74 * Returns zero on success; non-zero on error 74 * Returns zero on success; non-zero on error
75 */ 75 */
76static int parse_packet_length(unsigned char *data, size_t *size, 76int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
77 size_t *length_size) 77 size_t *length_size)
78{ 78{
79 int rc = 0; 79 int rc = 0;
80 80
@@ -105,7 +105,7 @@ out:
105} 105}
106 106
107/** 107/**
108 * write_packet_length 108 * ecryptfs_write_packet_length
109 * @dest: The byte array target into which to write the length. Must 109 * @dest: The byte array target into which to write the length. Must
110 * have at least 5 bytes allocated. 110 * have at least 5 bytes allocated.
111 * @size: The length to write. 111 * @size: The length to write.
@@ -114,8 +114,8 @@ out:
114 * 114 *
115 * Returns zero on success; non-zero on error. 115 * Returns zero on success; non-zero on error.
116 */ 116 */
117static int write_packet_length(char *dest, size_t size, 117int ecryptfs_write_packet_length(char *dest, size_t size,
118 size_t *packet_size_length) 118 size_t *packet_size_length)
119{ 119{
120 int rc = 0; 120 int rc = 0;
121 121
@@ -162,8 +162,8 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key,
162 goto out; 162 goto out;
163 } 163 }
164 message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE; 164 message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE;
165 rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, 165 rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
166 &packet_size_len); 166 &packet_size_len);
167 if (rc) { 167 if (rc) {
168 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " 168 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
169 "header; cannot generate packet length\n"); 169 "header; cannot generate packet length\n");
@@ -172,8 +172,9 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key,
172 i += packet_size_len; 172 i += packet_size_len;
173 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); 173 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
174 i += ECRYPTFS_SIG_SIZE_HEX; 174 i += ECRYPTFS_SIG_SIZE_HEX;
175 rc = write_packet_length(&message[i], session_key->encrypted_key_size, 175 rc = ecryptfs_write_packet_length(&message[i],
176 &packet_size_len); 176 session_key->encrypted_key_size,
177 &packet_size_len);
177 if (rc) { 178 if (rc) {
178 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " 179 ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
179 "header; cannot generate packet length\n"); 180 "header; cannot generate packet length\n");
@@ -225,7 +226,7 @@ parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code,
225 rc = -EIO; 226 rc = -EIO;
226 goto out; 227 goto out;
227 } 228 }
228 rc = parse_packet_length(&data[i], &m_size, &data_len); 229 rc = ecryptfs_parse_packet_length(&data[i], &m_size, &data_len);
229 if (rc) { 230 if (rc) {
230 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " 231 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
231 "rc = [%d]\n", rc); 232 "rc = [%d]\n", rc);
@@ -304,8 +305,8 @@ write_tag_66_packet(char *signature, u8 cipher_code,
304 goto out; 305 goto out;
305 } 306 }
306 message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE; 307 message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE;
307 rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, 308 rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
308 &packet_size_len); 309 &packet_size_len);
309 if (rc) { 310 if (rc) {
310 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " 311 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
311 "header; cannot generate packet length\n"); 312 "header; cannot generate packet length\n");
@@ -315,8 +316,8 @@ write_tag_66_packet(char *signature, u8 cipher_code,
315 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); 316 memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
316 i += ECRYPTFS_SIG_SIZE_HEX; 317 i += ECRYPTFS_SIG_SIZE_HEX;
317 /* The encrypted key includes 1 byte cipher code and 2 byte checksum */ 318 /* The encrypted key includes 1 byte cipher code and 2 byte checksum */
318 rc = write_packet_length(&message[i], crypt_stat->key_size + 3, 319 rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3,
319 &packet_size_len); 320 &packet_size_len);
320 if (rc) { 321 if (rc) {
321 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " 322 ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
322 "header; cannot generate packet length\n"); 323 "header; cannot generate packet length\n");
@@ -357,20 +358,25 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
357 /* verify that everything through the encrypted FEK size is present */ 358 /* verify that everything through the encrypted FEK size is present */
358 if (message_len < 4) { 359 if (message_len < 4) {
359 rc = -EIO; 360 rc = -EIO;
361 printk(KERN_ERR "%s: message_len is [%Zd]; minimum acceptable "
362 "message length is [%d]\n", __func__, message_len, 4);
360 goto out; 363 goto out;
361 } 364 }
362 if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) { 365 if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) {
363 ecryptfs_printk(KERN_ERR, "Type should be ECRYPTFS_TAG_67\n");
364 rc = -EIO; 366 rc = -EIO;
367 printk(KERN_ERR "%s: Type should be ECRYPTFS_TAG_67\n",
368 __func__);
365 goto out; 369 goto out;
366 } 370 }
367 if (data[i++]) { 371 if (data[i++]) {
368 ecryptfs_printk(KERN_ERR, "Status indicator has non zero value"
369 " [%d]\n", data[i-1]);
370 rc = -EIO; 372 rc = -EIO;
373 printk(KERN_ERR "%s: Status indicator has non zero "
374 "value [%d]\n", __func__, data[i-1]);
375
371 goto out; 376 goto out;
372 } 377 }
373 rc = parse_packet_length(&data[i], &key_rec->enc_key_size, &data_len); 378 rc = ecryptfs_parse_packet_length(&data[i], &key_rec->enc_key_size,
379 &data_len);
374 if (rc) { 380 if (rc) {
375 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " 381 ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
376 "rc = [%d]\n", rc); 382 "rc = [%d]\n", rc);
@@ -378,17 +384,17 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
378 } 384 }
379 i += data_len; 385 i += data_len;
380 if (message_len < (i + key_rec->enc_key_size)) { 386 if (message_len < (i + key_rec->enc_key_size)) {
381 ecryptfs_printk(KERN_ERR, "message_len [%d]; max len is [%d]\n",
382 message_len, (i + key_rec->enc_key_size));
383 rc = -EIO; 387 rc = -EIO;
388 printk(KERN_ERR "%s: message_len [%Zd]; max len is [%Zd]\n",
389 __func__, message_len, (i + key_rec->enc_key_size));
384 goto out; 390 goto out;
385 } 391 }
386 if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { 392 if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
387 ecryptfs_printk(KERN_ERR, "Encrypted key_size [%d] larger than "
388 "the maximum key size [%d]\n",
389 key_rec->enc_key_size,
390 ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
391 rc = -EIO; 393 rc = -EIO;
394 printk(KERN_ERR "%s: Encrypted key_size [%Zd] larger than "
395 "the maximum key size [%d]\n", __func__,
396 key_rec->enc_key_size,
397 ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
392 goto out; 398 goto out;
393 } 399 }
394 memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size); 400 memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size);
@@ -445,7 +451,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
445 rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), 451 rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key),
446 &netlink_message, &netlink_message_length); 452 &netlink_message, &netlink_message_length);
447 if (rc) { 453 if (rc) {
448 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet"); 454 ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n");
449 goto out; 455 goto out;
450 } 456 }
451 rc = ecryptfs_send_message(ecryptfs_transport, netlink_message, 457 rc = ecryptfs_send_message(ecryptfs_transport, netlink_message,
@@ -570,8 +576,8 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
570 goto out; 576 goto out;
571 } 577 }
572 (*new_auth_tok) = &auth_tok_list_item->auth_tok; 578 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
573 rc = parse_packet_length(&data[(*packet_size)], &body_size, 579 rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
574 &length_size); 580 &length_size);
575 if (rc) { 581 if (rc) {
576 printk(KERN_WARNING "Error parsing packet length; " 582 printk(KERN_WARNING "Error parsing packet length; "
577 "rc = [%d]\n", rc); 583 "rc = [%d]\n", rc);
@@ -704,8 +710,8 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
704 goto out; 710 goto out;
705 } 711 }
706 (*new_auth_tok) = &auth_tok_list_item->auth_tok; 712 (*new_auth_tok) = &auth_tok_list_item->auth_tok;
707 rc = parse_packet_length(&data[(*packet_size)], &body_size, 713 rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
708 &length_size); 714 &length_size);
709 if (rc) { 715 if (rc) {
710 printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n", 716 printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n",
711 rc); 717 rc);
@@ -852,8 +858,8 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents,
852 rc = -EINVAL; 858 rc = -EINVAL;
853 goto out; 859 goto out;
854 } 860 }
855 rc = parse_packet_length(&data[(*packet_size)], &body_size, 861 rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size,
856 &length_size); 862 &length_size);
857 if (rc) { 863 if (rc) {
858 printk(KERN_WARNING "Invalid tag 11 packet format\n"); 864 printk(KERN_WARNING "Invalid tag 11 packet format\n");
859 goto out; 865 goto out;
@@ -1405,8 +1411,8 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes,
1405 auth_tok->token.private_key.key_size; 1411 auth_tok->token.private_key.key_size;
1406 rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec); 1412 rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec);
1407 if (rc) { 1413 if (rc) {
1408 ecryptfs_printk(KERN_ERR, "Failed to encrypt session key " 1414 printk(KERN_ERR "Failed to encrypt session key via a key "
1409 "via a pki"); 1415 "module; rc = [%d]\n", rc);
1410 goto out; 1416 goto out;
1411 } 1417 }
1412 if (ecryptfs_verbosity > 0) { 1418 if (ecryptfs_verbosity > 0) {
@@ -1430,8 +1436,9 @@ encrypted_session_key_set:
1430 goto out; 1436 goto out;
1431 } 1437 }
1432 dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE; 1438 dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE;
1433 rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), 1439 rc = ecryptfs_write_packet_length(&dest[(*packet_size)],
1434 &packet_size_length); 1440 (max_packet_size - 4),
1441 &packet_size_length);
1435 if (rc) { 1442 if (rc) {
1436 ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet " 1443 ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet "
1437 "header; cannot generate packet length\n"); 1444 "header; cannot generate packet length\n");
@@ -1489,8 +1496,9 @@ write_tag_11_packet(char *dest, size_t *remaining_bytes, char *contents,
1489 goto out; 1496 goto out;
1490 } 1497 }
1491 dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE; 1498 dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE;
1492 rc = write_packet_length(&dest[(*packet_length)], 1499 rc = ecryptfs_write_packet_length(&dest[(*packet_length)],
1493 (max_packet_size - 4), &packet_size_length); 1500 (max_packet_size - 4),
1501 &packet_size_length);
1494 if (rc) { 1502 if (rc) {
1495 printk(KERN_ERR "Error generating tag 11 packet header; cannot " 1503 printk(KERN_ERR "Error generating tag 11 packet header; cannot "
1496 "generate packet length. rc = [%d]\n", rc); 1504 "generate packet length. rc = [%d]\n", rc);
@@ -1682,8 +1690,9 @@ encrypted_session_key_set:
1682 dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE; 1690 dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE;
1683 /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3) 1691 /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3)
1684 * to get the number of octets in the actual Tag 3 packet */ 1692 * to get the number of octets in the actual Tag 3 packet */
1685 rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), 1693 rc = ecryptfs_write_packet_length(&dest[(*packet_size)],
1686 &packet_size_length); 1694 (max_packet_size - 4),
1695 &packet_size_length);
1687 if (rc) { 1696 if (rc) {
1688 printk(KERN_ERR "Error generating tag 3 packet header; cannot " 1697 printk(KERN_ERR "Error generating tag 3 packet header; cannot "
1689 "generate packet length. rc = [%d]\n", rc); 1698 "generate packet length. rc = [%d]\n", rc);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d25ac9500a92..d603631601eb 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -219,7 +219,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
219 if (rc) { 219 if (rc) {
220 printk(KERN_ERR "%s: Error attempting to initialize the " 220 printk(KERN_ERR "%s: Error attempting to initialize the "
221 "persistent file for the dentry with name [%s]; " 221 "persistent file for the dentry with name [%s]; "
222 "rc = [%d]\n", __FUNCTION__, dentry->d_name.name, rc); 222 "rc = [%d]\n", __func__, dentry->d_name.name, rc);
223 goto out; 223 goto out;
224 } 224 }
225out: 225out:
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 9cc2aec27b0d..1b5c20058acb 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -1,7 +1,7 @@
1/** 1/**
2 * eCryptfs: Linux filesystem encryption layer 2 * eCryptfs: Linux filesystem encryption layer
3 * 3 *
4 * Copyright (C) 2004-2006 International Business Machines Corp. 4 * Copyright (C) 2004-2008 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> 5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
6 * Tyler Hicks <tyhicks@ou.edu> 6 * Tyler Hicks <tyhicks@ou.edu>
7 * 7 *
@@ -20,19 +20,21 @@
20 * 02111-1307, USA. 20 * 02111-1307, USA.
21 */ 21 */
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/user_namespace.h>
24#include <linux/nsproxy.h>
23#include "ecryptfs_kernel.h" 25#include "ecryptfs_kernel.h"
24 26
25static LIST_HEAD(ecryptfs_msg_ctx_free_list); 27static LIST_HEAD(ecryptfs_msg_ctx_free_list);
26static LIST_HEAD(ecryptfs_msg_ctx_alloc_list); 28static LIST_HEAD(ecryptfs_msg_ctx_alloc_list);
27static struct mutex ecryptfs_msg_ctx_lists_mux; 29static struct mutex ecryptfs_msg_ctx_lists_mux;
28 30
29static struct hlist_head *ecryptfs_daemon_id_hash; 31static struct hlist_head *ecryptfs_daemon_hash;
30static struct mutex ecryptfs_daemon_id_hash_mux; 32struct mutex ecryptfs_daemon_hash_mux;
31static int ecryptfs_hash_buckets; 33static int ecryptfs_hash_buckets;
32#define ecryptfs_uid_hash(uid) \ 34#define ecryptfs_uid_hash(uid) \
33 hash_long((unsigned long)uid, ecryptfs_hash_buckets) 35 hash_long((unsigned long)uid, ecryptfs_hash_buckets)
34 36
35static unsigned int ecryptfs_msg_counter; 37static u32 ecryptfs_msg_counter;
36static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; 38static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
37 39
38/** 40/**
@@ -40,9 +42,10 @@ static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
40 * @msg_ctx: The context that was acquired from the free list 42 * @msg_ctx: The context that was acquired from the free list
41 * 43 *
42 * Acquires a context element from the free list and locks the mutex 44 * Acquires a context element from the free list and locks the mutex
43 * on the context. Returns zero on success; non-zero on error or upon 45 * on the context. Sets the msg_ctx task to current. Returns zero on
44 * failure to acquire a free context element. Be sure to lock the 46 * success; non-zero on error or upon failure to acquire a free
45 * list mutex before calling. 47 * context element. Must be called with ecryptfs_msg_ctx_lists_mux
48 * held.
46 */ 49 */
47static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx) 50static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx)
48{ 51{
@@ -50,11 +53,11 @@ static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx)
50 int rc; 53 int rc;
51 54
52 if (list_empty(&ecryptfs_msg_ctx_free_list)) { 55 if (list_empty(&ecryptfs_msg_ctx_free_list)) {
53 ecryptfs_printk(KERN_WARNING, "The eCryptfs free " 56 printk(KERN_WARNING "%s: The eCryptfs free "
54 "context list is empty. It may be helpful to " 57 "context list is empty. It may be helpful to "
55 "specify the ecryptfs_message_buf_len " 58 "specify the ecryptfs_message_buf_len "
56 "parameter to be greater than the current " 59 "parameter to be greater than the current "
57 "value of [%d]\n", ecryptfs_message_buf_len); 60 "value of [%d]\n", __func__, ecryptfs_message_buf_len);
58 rc = -ENOMEM; 61 rc = -ENOMEM;
59 goto out; 62 goto out;
60 } 63 }
@@ -75,8 +78,7 @@ out:
75 * ecryptfs_msg_ctx_free_to_alloc 78 * ecryptfs_msg_ctx_free_to_alloc
76 * @msg_ctx: The context to move from the free list to the alloc list 79 * @msg_ctx: The context to move from the free list to the alloc list
77 * 80 *
78 * Be sure to lock the list mutex and the context mutex before 81 * Must be called with ecryptfs_msg_ctx_lists_mux held.
79 * calling.
80 */ 82 */
81static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx) 83static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
82{ 84{
@@ -89,36 +91,39 @@ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
89 * ecryptfs_msg_ctx_alloc_to_free 91 * ecryptfs_msg_ctx_alloc_to_free
90 * @msg_ctx: The context to move from the alloc list to the free list 92 * @msg_ctx: The context to move from the alloc list to the free list
91 * 93 *
92 * Be sure to lock the list mutex and the context mutex before 94 * Must be called with ecryptfs_msg_ctx_lists_mux held.
93 * calling.
94 */ 95 */
95static void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) 96void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
96{ 97{
97 list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list); 98 list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list);
98 if (msg_ctx->msg) 99 if (msg_ctx->msg)
99 kfree(msg_ctx->msg); 100 kfree(msg_ctx->msg);
101 msg_ctx->msg = NULL;
100 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE; 102 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE;
101} 103}
102 104
103/** 105/**
104 * ecryptfs_find_daemon_id 106 * ecryptfs_find_daemon_by_euid
105 * @uid: The user id which maps to the desired daemon id 107 * @euid: The effective user id which maps to the desired daemon id
106 * @id: If return value is zero, points to the desired daemon id 108 * @user_ns: The namespace in which @euid applies
107 * pointer 109 * @daemon: If return value is zero, points to the desired daemon pointer
108 * 110 *
109 * Search the hash list for the given user id. Returns zero if the 111 * Must be called with ecryptfs_daemon_hash_mux held.
110 * user id exists in the list; non-zero otherwise. The daemon id hash 112 *
111 * mutex should be held before calling this function. 113 * Search the hash list for the given user id.
114 *
115 * Returns zero if the user id exists in the list; non-zero otherwise.
112 */ 116 */
113static int ecryptfs_find_daemon_id(uid_t uid, struct ecryptfs_daemon_id **id) 117int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
118 struct user_namespace *user_ns)
114{ 119{
115 struct hlist_node *elem; 120 struct hlist_node *elem;
116 int rc; 121 int rc;
117 122
118 hlist_for_each_entry(*id, elem, 123 hlist_for_each_entry(*daemon, elem,
119 &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)], 124 &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)],
120 id_chain) { 125 euid_chain) {
121 if ((*id)->uid == uid) { 126 if ((*daemon)->euid == euid && (*daemon)->user_ns == user_ns) {
122 rc = 0; 127 rc = 0;
123 goto out; 128 goto out;
124 } 129 }
@@ -128,181 +133,325 @@ out:
128 return rc; 133 return rc;
129} 134}
130 135
131static int ecryptfs_send_raw_message(unsigned int transport, u16 msg_type, 136static int
132 pid_t pid) 137ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len,
138 u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx);
139
140/**
141 * ecryptfs_send_raw_message
142 * @transport: Transport type
143 * @msg_type: Message type
144 * @daemon: Daemon struct for recipient of message
145 *
146 * A raw message is one that does not include an ecryptfs_message
147 * struct. It simply has a type.
148 *
149 * Must be called with ecryptfs_daemon_hash_mux held.
150 *
151 * Returns zero on success; non-zero otherwise
152 */
153static int ecryptfs_send_raw_message(unsigned int transport, u8 msg_type,
154 struct ecryptfs_daemon *daemon)
133{ 155{
156 struct ecryptfs_msg_ctx *msg_ctx;
134 int rc; 157 int rc;
135 158
136 switch(transport) { 159 switch(transport) {
137 case ECRYPTFS_TRANSPORT_NETLINK: 160 case ECRYPTFS_TRANSPORT_NETLINK:
138 rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, pid); 161 rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0,
162 daemon->pid);
163 break;
164 case ECRYPTFS_TRANSPORT_MISCDEV:
165 rc = ecryptfs_send_message_locked(transport, NULL, 0, msg_type,
166 &msg_ctx);
167 if (rc) {
168 printk(KERN_ERR "%s: Error whilst attempting to send "
169 "message via procfs; rc = [%d]\n", __func__, rc);
170 goto out;
171 }
172 /* Raw messages are logically context-free (e.g., no
173 * reply is expected), so we set the state of the
174 * ecryptfs_msg_ctx object to indicate that it should
175 * be freed as soon as the transport sends out the message. */
176 mutex_lock(&msg_ctx->mux);
177 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY;
178 mutex_unlock(&msg_ctx->mux);
139 break; 179 break;
140 case ECRYPTFS_TRANSPORT_CONNECTOR: 180 case ECRYPTFS_TRANSPORT_CONNECTOR:
141 case ECRYPTFS_TRANSPORT_RELAYFS: 181 case ECRYPTFS_TRANSPORT_RELAYFS:
142 default: 182 default:
143 rc = -ENOSYS; 183 rc = -ENOSYS;
144 } 184 }
185out:
186 return rc;
187}
188
189/**
190 * ecryptfs_spawn_daemon - Create and initialize a new daemon struct
191 * @daemon: Pointer to set to newly allocated daemon struct
192 * @euid: Effective user id for the daemon
193 * @user_ns: The namespace in which @euid applies
194 * @pid: Process id for the daemon
195 *
196 * Must be called ceremoniously while in possession of
197 * ecryptfs_sacred_daemon_hash_mux
198 *
199 * Returns zero on success; non-zero otherwise
200 */
201int
202ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
203 struct user_namespace *user_ns, struct pid *pid)
204{
205 int rc = 0;
206
207 (*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL);
208 if (!(*daemon)) {
209 rc = -ENOMEM;
210 printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of "
211 "GFP_KERNEL memory\n", __func__, sizeof(**daemon));
212 goto out;
213 }
214 (*daemon)->euid = euid;
215 (*daemon)->user_ns = get_user_ns(user_ns);
216 (*daemon)->pid = get_pid(pid);
217 (*daemon)->task = current;
218 mutex_init(&(*daemon)->mux);
219 INIT_LIST_HEAD(&(*daemon)->msg_ctx_out_queue);
220 init_waitqueue_head(&(*daemon)->wait);
221 (*daemon)->num_queued_msg_ctx = 0;
222 hlist_add_head(&(*daemon)->euid_chain,
223 &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)]);
224out:
145 return rc; 225 return rc;
146} 226}
147 227
148/** 228/**
149 * ecryptfs_process_helo 229 * ecryptfs_process_helo
150 * @transport: The underlying transport (netlink, etc.) 230 * @transport: The underlying transport (netlink, etc.)
151 * @uid: The user ID owner of the message 231 * @euid: The user ID owner of the message
232 * @user_ns: The namespace in which @euid applies
152 * @pid: The process ID for the userspace program that sent the 233 * @pid: The process ID for the userspace program that sent the
153 * message 234 * message
154 * 235 *
155 * Adds the uid and pid values to the daemon id hash. If a uid 236 * Adds the euid and pid values to the daemon euid hash. If an euid
156 * already has a daemon pid registered, the daemon will be 237 * already has a daemon pid registered, the daemon will be
157 * unregistered before the new daemon id is put into the hash list. 238 * unregistered before the new daemon is put into the hash list.
158 * Returns zero after adding a new daemon id to the hash list; 239 * Returns zero after adding a new daemon to the hash list;
159 * non-zero otherwise. 240 * non-zero otherwise.
160 */ 241 */
161int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid) 242int ecryptfs_process_helo(unsigned int transport, uid_t euid,
243 struct user_namespace *user_ns, struct pid *pid)
162{ 244{
163 struct ecryptfs_daemon_id *new_id; 245 struct ecryptfs_daemon *new_daemon;
164 struct ecryptfs_daemon_id *old_id; 246 struct ecryptfs_daemon *old_daemon;
165 int rc; 247 int rc;
166 248
167 mutex_lock(&ecryptfs_daemon_id_hash_mux); 249 mutex_lock(&ecryptfs_daemon_hash_mux);
168 new_id = kmalloc(sizeof(*new_id), GFP_KERNEL); 250 rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns);
169 if (!new_id) { 251 if (rc != 0) {
170 rc = -ENOMEM;
171 ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable "
172 "to register daemon [%d] for user [%d]\n",
173 pid, uid);
174 goto unlock;
175 }
176 if (!ecryptfs_find_daemon_id(uid, &old_id)) {
177 printk(KERN_WARNING "Received request from user [%d] " 252 printk(KERN_WARNING "Received request from user [%d] "
178 "to register daemon [%d]; unregistering daemon " 253 "to register daemon [0x%p]; unregistering daemon "
179 "[%d]\n", uid, pid, old_id->pid); 254 "[0x%p]\n", euid, pid, old_daemon->pid);
180 hlist_del(&old_id->id_chain); 255 rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT,
181 rc = ecryptfs_send_raw_message(transport, ECRYPTFS_NLMSG_QUIT, 256 old_daemon);
182 old_id->pid);
183 if (rc) 257 if (rc)
184 printk(KERN_WARNING "Failed to send QUIT " 258 printk(KERN_WARNING "Failed to send QUIT "
185 "message to daemon [%d]; rc = [%d]\n", 259 "message to daemon [0x%p]; rc = [%d]\n",
186 old_id->pid, rc); 260 old_daemon->pid, rc);
187 kfree(old_id); 261 hlist_del(&old_daemon->euid_chain);
262 kfree(old_daemon);
188 } 263 }
189 new_id->uid = uid; 264 rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid);
190 new_id->pid = pid; 265 if (rc)
191 hlist_add_head(&new_id->id_chain, 266 printk(KERN_ERR "%s: The gods are displeased with this attempt "
192 &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)]); 267 "to create a new daemon object for euid [%d]; pid "
193 rc = 0; 268 "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc);
194unlock: 269 mutex_unlock(&ecryptfs_daemon_hash_mux);
195 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 270 return rc;
271}
272
273/**
274 * ecryptfs_exorcise_daemon - Destroy the daemon struct
275 *
276 * Must be called ceremoniously while in possession of
277 * ecryptfs_daemon_hash_mux and the daemon's own mux.
278 */
279int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon)
280{
281 struct ecryptfs_msg_ctx *msg_ctx, *msg_ctx_tmp;
282 int rc = 0;
283
284 mutex_lock(&daemon->mux);
285 if ((daemon->flags & ECRYPTFS_DAEMON_IN_READ)
286 || (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)) {
287 rc = -EBUSY;
288 printk(KERN_WARNING "%s: Attempt to destroy daemon with pid "
289 "[0x%p], but it is in the midst of a read or a poll\n",
290 __func__, daemon->pid);
291 mutex_unlock(&daemon->mux);
292 goto out;
293 }
294 list_for_each_entry_safe(msg_ctx, msg_ctx_tmp,
295 &daemon->msg_ctx_out_queue, daemon_out_list) {
296 list_del(&msg_ctx->daemon_out_list);
297 daemon->num_queued_msg_ctx--;
298 printk(KERN_WARNING "%s: Warning: dropping message that is in "
299 "the out queue of a dying daemon\n", __func__);
300 ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
301 }
302 hlist_del(&daemon->euid_chain);
303 if (daemon->task)
304 wake_up_process(daemon->task);
305 if (daemon->pid)
306 put_pid(daemon->pid);
307 if (daemon->user_ns)
308 put_user_ns(daemon->user_ns);
309 mutex_unlock(&daemon->mux);
310 memset(daemon, 0, sizeof(*daemon));
311 kfree(daemon);
312out:
196 return rc; 313 return rc;
197} 314}
198 315
199/** 316/**
200 * ecryptfs_process_quit 317 * ecryptfs_process_quit
201 * @uid: The user ID owner of the message 318 * @euid: The user ID owner of the message
319 * @user_ns: The namespace in which @euid applies
202 * @pid: The process ID for the userspace program that sent the 320 * @pid: The process ID for the userspace program that sent the
203 * message 321 * message
204 * 322 *
205 * Deletes the corresponding daemon id for the given uid and pid, if 323 * Deletes the corresponding daemon for the given euid and pid, if
206 * it is the registered that is requesting the deletion. Returns zero 324 * it is the registered that is requesting the deletion. Returns zero
207 * after deleting the desired daemon id; non-zero otherwise. 325 * after deleting the desired daemon; non-zero otherwise.
208 */ 326 */
209int ecryptfs_process_quit(uid_t uid, pid_t pid) 327int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
328 struct pid *pid)
210{ 329{
211 struct ecryptfs_daemon_id *id; 330 struct ecryptfs_daemon *daemon;
212 int rc; 331 int rc;
213 332
214 mutex_lock(&ecryptfs_daemon_id_hash_mux); 333 mutex_lock(&ecryptfs_daemon_hash_mux);
215 if (ecryptfs_find_daemon_id(uid, &id)) { 334 rc = ecryptfs_find_daemon_by_euid(&daemon, euid, user_ns);
335 if (rc || !daemon) {
216 rc = -EINVAL; 336 rc = -EINVAL;
217 ecryptfs_printk(KERN_ERR, "Received request from user [%d] to " 337 printk(KERN_ERR "Received request from user [%d] to "
218 "unregister unrecognized daemon [%d]\n", uid, 338 "unregister unrecognized daemon [0x%p]\n", euid, pid);
219 pid); 339 goto out_unlock;
220 goto unlock;
221 } 340 }
222 if (id->pid != pid) { 341 rc = ecryptfs_exorcise_daemon(daemon);
223 rc = -EINVAL; 342out_unlock:
224 ecryptfs_printk(KERN_WARNING, "Received request from user [%d] " 343 mutex_unlock(&ecryptfs_daemon_hash_mux);
225 "with pid [%d] to unregister daemon [%d]\n",
226 uid, pid, id->pid);
227 goto unlock;
228 }
229 hlist_del(&id->id_chain);
230 kfree(id);
231 rc = 0;
232unlock:
233 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
234 return rc; 344 return rc;
235} 345}
236 346
237/** 347/**
238 * ecryptfs_process_reponse 348 * ecryptfs_process_reponse
239 * @msg: The ecryptfs message received; the caller should sanity check 349 * @msg: The ecryptfs message received; the caller should sanity check
240 * msg->data_len 350 * msg->data_len and free the memory
241 * @pid: The process ID of the userspace application that sent the 351 * @pid: The process ID of the userspace application that sent the
242 * message 352 * message
243 * @seq: The sequence number of the message 353 * @seq: The sequence number of the message; must match the sequence
354 * number for the existing message context waiting for this
355 * response
356 *
357 * Processes a response message after sending an operation request to
358 * userspace. Some other process is awaiting this response. Before
359 * sending out its first communications, the other process allocated a
360 * msg_ctx from the ecryptfs_msg_ctx_arr at a particular index. The
361 * response message contains this index so that we can copy over the
362 * response message into the msg_ctx that the process holds a
363 * reference to. The other process is going to wake up, check to see
364 * that msg_ctx->state == ECRYPTFS_MSG_CTX_STATE_DONE, and then
365 * proceed to read off and process the response message. Returns zero
366 * upon delivery to desired context element; non-zero upon delivery
367 * failure or error.
244 * 368 *
245 * Processes a response message after sending a operation request to 369 * Returns zero on success; non-zero otherwise
246 * userspace. Returns zero upon delivery to desired context element;
247 * non-zero upon delivery failure or error.
248 */ 370 */
249int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, 371int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
250 pid_t pid, u32 seq) 372 struct user_namespace *user_ns, struct pid *pid,
373 u32 seq)
251{ 374{
252 struct ecryptfs_daemon_id *id; 375 struct ecryptfs_daemon *daemon;
253 struct ecryptfs_msg_ctx *msg_ctx; 376 struct ecryptfs_msg_ctx *msg_ctx;
254 int msg_size; 377 size_t msg_size;
378 struct nsproxy *nsproxy;
379 struct user_namespace *current_user_ns;
255 int rc; 380 int rc;
256 381
257 if (msg->index >= ecryptfs_message_buf_len) { 382 if (msg->index >= ecryptfs_message_buf_len) {
258 rc = -EINVAL; 383 rc = -EINVAL;
259 ecryptfs_printk(KERN_ERR, "Attempt to reference " 384 printk(KERN_ERR "%s: Attempt to reference "
260 "context buffer at index [%d]; maximum " 385 "context buffer at index [%d]; maximum "
261 "allowable is [%d]\n", msg->index, 386 "allowable is [%d]\n", __func__, msg->index,
262 (ecryptfs_message_buf_len - 1)); 387 (ecryptfs_message_buf_len - 1));
263 goto out; 388 goto out;
264 } 389 }
265 msg_ctx = &ecryptfs_msg_ctx_arr[msg->index]; 390 msg_ctx = &ecryptfs_msg_ctx_arr[msg->index];
266 mutex_lock(&msg_ctx->mux); 391 mutex_lock(&msg_ctx->mux);
267 if (ecryptfs_find_daemon_id(msg_ctx->task->euid, &id)) { 392 mutex_lock(&ecryptfs_daemon_hash_mux);
393 rcu_read_lock();
394 nsproxy = task_nsproxy(msg_ctx->task);
395 if (nsproxy == NULL) {
268 rc = -EBADMSG; 396 rc = -EBADMSG;
269 ecryptfs_printk(KERN_WARNING, "User [%d] received a " 397 printk(KERN_ERR "%s: Receiving process is a zombie. Dropping "
270 "message response from process [%d] but does " 398 "message.\n", __func__);
271 "not have a registered daemon\n", 399 rcu_read_unlock();
272 msg_ctx->task->euid, pid); 400 mutex_unlock(&ecryptfs_daemon_hash_mux);
273 goto wake_up; 401 goto wake_up;
274 } 402 }
275 if (msg_ctx->task->euid != uid) { 403 current_user_ns = nsproxy->user_ns;
404 rc = ecryptfs_find_daemon_by_euid(&daemon, msg_ctx->task->euid,
405 current_user_ns);
406 rcu_read_unlock();
407 mutex_unlock(&ecryptfs_daemon_hash_mux);
408 if (rc) {
409 rc = -EBADMSG;
410 printk(KERN_WARNING "%s: User [%d] received a "
411 "message response from process [0x%p] but does "
412 "not have a registered daemon\n", __func__,
413 msg_ctx->task->euid, pid);
414 goto wake_up;
415 }
416 if (msg_ctx->task->euid != euid) {
276 rc = -EBADMSG; 417 rc = -EBADMSG;
277 ecryptfs_printk(KERN_WARNING, "Received message from user " 418 printk(KERN_WARNING "%s: Received message from user "
278 "[%d]; expected message from user [%d]\n", 419 "[%d]; expected message from user [%d]\n", __func__,
279 uid, msg_ctx->task->euid); 420 euid, msg_ctx->task->euid);
280 goto unlock; 421 goto unlock;
281 } 422 }
282 if (id->pid != pid) { 423 if (current_user_ns != user_ns) {
283 rc = -EBADMSG; 424 rc = -EBADMSG;
284 ecryptfs_printk(KERN_ERR, "User [%d] received a " 425 printk(KERN_WARNING "%s: Received message from user_ns "
285 "message response from an unrecognized " 426 "[0x%p]; expected message from user_ns [0x%p]\n",
286 "process [%d]\n", msg_ctx->task->euid, pid); 427 __func__, user_ns, nsproxy->user_ns);
428 goto unlock;
429 }
430 if (daemon->pid != pid) {
431 rc = -EBADMSG;
432 printk(KERN_ERR "%s: User [%d] sent a message response "
433 "from an unrecognized process [0x%p]\n",
434 __func__, msg_ctx->task->euid, pid);
287 goto unlock; 435 goto unlock;
288 } 436 }
289 if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) { 437 if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) {
290 rc = -EINVAL; 438 rc = -EINVAL;
291 ecryptfs_printk(KERN_WARNING, "Desired context element is not " 439 printk(KERN_WARNING "%s: Desired context element is not "
292 "pending a response\n"); 440 "pending a response\n", __func__);
293 goto unlock; 441 goto unlock;
294 } else if (msg_ctx->counter != seq) { 442 } else if (msg_ctx->counter != seq) {
295 rc = -EINVAL; 443 rc = -EINVAL;
296 ecryptfs_printk(KERN_WARNING, "Invalid message sequence; " 444 printk(KERN_WARNING "%s: Invalid message sequence; "
297 "expected [%d]; received [%d]\n", 445 "expected [%d]; received [%d]\n", __func__,
298 msg_ctx->counter, seq); 446 msg_ctx->counter, seq);
299 goto unlock; 447 goto unlock;
300 } 448 }
301 msg_size = sizeof(*msg) + msg->data_len; 449 msg_size = (sizeof(*msg) + msg->data_len);
302 msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); 450 msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL);
303 if (!msg_ctx->msg) { 451 if (!msg_ctx->msg) {
304 rc = -ENOMEM; 452 rc = -ENOMEM;
305 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); 453 printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of "
454 "GFP_KERNEL memory\n", __func__, msg_size);
306 goto unlock; 455 goto unlock;
307 } 456 }
308 memcpy(msg_ctx->msg, msg, msg_size); 457 memcpy(msg_ctx->msg, msg, msg_size);
@@ -317,34 +466,38 @@ out:
317} 466}
318 467
319/** 468/**
320 * ecryptfs_send_message 469 * ecryptfs_send_message_locked
321 * @transport: The transport over which to send the message (i.e., 470 * @transport: The transport over which to send the message (i.e.,
322 * netlink) 471 * netlink)
323 * @data: The data to send 472 * @data: The data to send
324 * @data_len: The length of data 473 * @data_len: The length of data
325 * @msg_ctx: The message context allocated for the send 474 * @msg_ctx: The message context allocated for the send
475 *
476 * Must be called with ecryptfs_daemon_hash_mux held.
477 *
478 * Returns zero on success; non-zero otherwise
326 */ 479 */
327int ecryptfs_send_message(unsigned int transport, char *data, int data_len, 480static int
328 struct ecryptfs_msg_ctx **msg_ctx) 481ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len,
482 u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx)
329{ 483{
330 struct ecryptfs_daemon_id *id; 484 struct ecryptfs_daemon *daemon;
331 int rc; 485 int rc;
332 486
333 mutex_lock(&ecryptfs_daemon_id_hash_mux); 487 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
334 if (ecryptfs_find_daemon_id(current->euid, &id)) { 488 current->nsproxy->user_ns);
335 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 489 if (rc || !daemon) {
336 rc = -ENOTCONN; 490 rc = -ENOTCONN;
337 ecryptfs_printk(KERN_ERR, "User [%d] does not have a daemon " 491 printk(KERN_ERR "%s: User [%d] does not have a daemon "
338 "registered\n", current->euid); 492 "registered\n", __func__, current->euid);
339 goto out; 493 goto out;
340 } 494 }
341 mutex_unlock(&ecryptfs_daemon_id_hash_mux);
342 mutex_lock(&ecryptfs_msg_ctx_lists_mux); 495 mutex_lock(&ecryptfs_msg_ctx_lists_mux);
343 rc = ecryptfs_acquire_free_msg_ctx(msg_ctx); 496 rc = ecryptfs_acquire_free_msg_ctx(msg_ctx);
344 if (rc) { 497 if (rc) {
345 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 498 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
346 ecryptfs_printk(KERN_WARNING, "Could not claim a free " 499 printk(KERN_WARNING "%s: Could not claim a free "
347 "context element\n"); 500 "context element\n", __func__);
348 goto out; 501 goto out;
349 } 502 }
350 ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); 503 ecryptfs_msg_ctx_free_to_alloc(*msg_ctx);
@@ -352,23 +505,50 @@ int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
352 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 505 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
353 switch (transport) { 506 switch (transport) {
354 case ECRYPTFS_TRANSPORT_NETLINK: 507 case ECRYPTFS_TRANSPORT_NETLINK:
355 rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, 508 rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, msg_type,
356 ECRYPTFS_NLMSG_REQUEST, 0, id->pid); 509 0, daemon->pid);
510 break;
511 case ECRYPTFS_TRANSPORT_MISCDEV:
512 rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type,
513 0, daemon);
357 break; 514 break;
358 case ECRYPTFS_TRANSPORT_CONNECTOR: 515 case ECRYPTFS_TRANSPORT_CONNECTOR:
359 case ECRYPTFS_TRANSPORT_RELAYFS: 516 case ECRYPTFS_TRANSPORT_RELAYFS:
360 default: 517 default:
361 rc = -ENOSYS; 518 rc = -ENOSYS;
362 } 519 }
363 if (rc) { 520 if (rc)
364 printk(KERN_ERR "Error attempting to send message to userspace " 521 printk(KERN_ERR "%s: Error attempting to send message to "
365 "daemon; rc = [%d]\n", rc); 522 "userspace daemon; rc = [%d]\n", __func__, rc);
366 }
367out: 523out:
368 return rc; 524 return rc;
369} 525}
370 526
371/** 527/**
528 * ecryptfs_send_message
529 * @transport: The transport over which to send the message (i.e.,
530 * netlink)
531 * @data: The data to send
532 * @data_len: The length of data
533 * @msg_ctx: The message context allocated for the send
534 *
535 * Grabs ecryptfs_daemon_hash_mux.
536 *
537 * Returns zero on success; non-zero otherwise
538 */
539int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
540 struct ecryptfs_msg_ctx **msg_ctx)
541{
542 int rc;
543
544 mutex_lock(&ecryptfs_daemon_hash_mux);
545 rc = ecryptfs_send_message_locked(transport, data, data_len,
546 ECRYPTFS_MSG_REQUEST, msg_ctx);
547 mutex_unlock(&ecryptfs_daemon_hash_mux);
548 return rc;
549}
550
551/**
372 * ecryptfs_wait_for_response 552 * ecryptfs_wait_for_response
373 * @msg_ctx: The context that was assigned when sending a message 553 * @msg_ctx: The context that was assigned when sending a message
374 * @msg: The incoming message from userspace; not set if rc != 0 554 * @msg: The incoming message from userspace; not set if rc != 0
@@ -377,7 +557,7 @@ out:
377 * of time exceeds ecryptfs_message_wait_timeout. If zero is 557 * of time exceeds ecryptfs_message_wait_timeout. If zero is
378 * returned, msg will point to a valid message from userspace; a 558 * returned, msg will point to a valid message from userspace; a
379 * non-zero value is returned upon failure to receive a message or an 559 * non-zero value is returned upon failure to receive a message or an
380 * error occurs. 560 * error occurs. Callee must free @msg on success.
381 */ 561 */
382int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, 562int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
383 struct ecryptfs_message **msg) 563 struct ecryptfs_message **msg)
@@ -413,32 +593,32 @@ int ecryptfs_init_messaging(unsigned int transport)
413 593
414 if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) { 594 if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) {
415 ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS; 595 ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS;
416 ecryptfs_printk(KERN_WARNING, "Specified number of users is " 596 printk(KERN_WARNING "%s: Specified number of users is "
417 "too large, defaulting to [%d] users\n", 597 "too large, defaulting to [%d] users\n", __func__,
418 ecryptfs_number_of_users); 598 ecryptfs_number_of_users);
419 } 599 }
420 mutex_init(&ecryptfs_daemon_id_hash_mux); 600 mutex_init(&ecryptfs_daemon_hash_mux);
421 mutex_lock(&ecryptfs_daemon_id_hash_mux); 601 mutex_lock(&ecryptfs_daemon_hash_mux);
422 ecryptfs_hash_buckets = 1; 602 ecryptfs_hash_buckets = 1;
423 while (ecryptfs_number_of_users >> ecryptfs_hash_buckets) 603 while (ecryptfs_number_of_users >> ecryptfs_hash_buckets)
424 ecryptfs_hash_buckets++; 604 ecryptfs_hash_buckets++;
425 ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head) 605 ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head)
426 * ecryptfs_hash_buckets, GFP_KERNEL); 606 * ecryptfs_hash_buckets), GFP_KERNEL);
427 if (!ecryptfs_daemon_id_hash) { 607 if (!ecryptfs_daemon_hash) {
428 rc = -ENOMEM; 608 rc = -ENOMEM;
429 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); 609 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
430 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 610 mutex_unlock(&ecryptfs_daemon_hash_mux);
431 goto out; 611 goto out;
432 } 612 }
433 for (i = 0; i < ecryptfs_hash_buckets; i++) 613 for (i = 0; i < ecryptfs_hash_buckets; i++)
434 INIT_HLIST_HEAD(&ecryptfs_daemon_id_hash[i]); 614 INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]);
435 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 615 mutex_unlock(&ecryptfs_daemon_hash_mux);
436
437 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx) 616 ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx)
438 * ecryptfs_message_buf_len), GFP_KERNEL); 617 * ecryptfs_message_buf_len),
618 GFP_KERNEL);
439 if (!ecryptfs_msg_ctx_arr) { 619 if (!ecryptfs_msg_ctx_arr) {
440 rc = -ENOMEM; 620 rc = -ENOMEM;
441 ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); 621 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
442 goto out; 622 goto out;
443 } 623 }
444 mutex_init(&ecryptfs_msg_ctx_lists_mux); 624 mutex_init(&ecryptfs_msg_ctx_lists_mux);
@@ -446,6 +626,7 @@ int ecryptfs_init_messaging(unsigned int transport)
446 ecryptfs_msg_counter = 0; 626 ecryptfs_msg_counter = 0;
447 for (i = 0; i < ecryptfs_message_buf_len; i++) { 627 for (i = 0; i < ecryptfs_message_buf_len; i++) {
448 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node); 628 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node);
629 INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].daemon_out_list);
449 mutex_init(&ecryptfs_msg_ctx_arr[i].mux); 630 mutex_init(&ecryptfs_msg_ctx_arr[i].mux);
450 mutex_lock(&ecryptfs_msg_ctx_arr[i].mux); 631 mutex_lock(&ecryptfs_msg_ctx_arr[i].mux);
451 ecryptfs_msg_ctx_arr[i].index = i; 632 ecryptfs_msg_ctx_arr[i].index = i;
@@ -464,6 +645,11 @@ int ecryptfs_init_messaging(unsigned int transport)
464 if (rc) 645 if (rc)
465 ecryptfs_release_messaging(transport); 646 ecryptfs_release_messaging(transport);
466 break; 647 break;
648 case ECRYPTFS_TRANSPORT_MISCDEV:
649 rc = ecryptfs_init_ecryptfs_miscdev();
650 if (rc)
651 ecryptfs_release_messaging(transport);
652 break;
467 case ECRYPTFS_TRANSPORT_CONNECTOR: 653 case ECRYPTFS_TRANSPORT_CONNECTOR:
468 case ECRYPTFS_TRANSPORT_RELAYFS: 654 case ECRYPTFS_TRANSPORT_RELAYFS:
469 default: 655 default:
@@ -488,27 +674,37 @@ void ecryptfs_release_messaging(unsigned int transport)
488 kfree(ecryptfs_msg_ctx_arr); 674 kfree(ecryptfs_msg_ctx_arr);
489 mutex_unlock(&ecryptfs_msg_ctx_lists_mux); 675 mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
490 } 676 }
491 if (ecryptfs_daemon_id_hash) { 677 if (ecryptfs_daemon_hash) {
492 struct hlist_node *elem; 678 struct hlist_node *elem;
493 struct ecryptfs_daemon_id *id; 679 struct ecryptfs_daemon *daemon;
494 int i; 680 int i;
495 681
496 mutex_lock(&ecryptfs_daemon_id_hash_mux); 682 mutex_lock(&ecryptfs_daemon_hash_mux);
497 for (i = 0; i < ecryptfs_hash_buckets; i++) { 683 for (i = 0; i < ecryptfs_hash_buckets; i++) {
498 hlist_for_each_entry(id, elem, 684 int rc;
499 &ecryptfs_daemon_id_hash[i], 685
500 id_chain) { 686 hlist_for_each_entry(daemon, elem,
501 hlist_del(elem); 687 &ecryptfs_daemon_hash[i],
502 kfree(id); 688 euid_chain) {
689 rc = ecryptfs_exorcise_daemon(daemon);
690 if (rc)
691 printk(KERN_ERR "%s: Error whilst "
692 "attempting to destroy daemon; "
693 "rc = [%d]. Dazed and confused, "
694 "but trying to continue.\n",
695 __func__, rc);
503 } 696 }
504 } 697 }
505 kfree(ecryptfs_daemon_id_hash); 698 kfree(ecryptfs_daemon_hash);
506 mutex_unlock(&ecryptfs_daemon_id_hash_mux); 699 mutex_unlock(&ecryptfs_daemon_hash_mux);
507 } 700 }
508 switch(transport) { 701 switch(transport) {
509 case ECRYPTFS_TRANSPORT_NETLINK: 702 case ECRYPTFS_TRANSPORT_NETLINK:
510 ecryptfs_release_netlink(); 703 ecryptfs_release_netlink();
511 break; 704 break;
705 case ECRYPTFS_TRANSPORT_MISCDEV:
706 ecryptfs_destroy_ecryptfs_miscdev();
707 break;
512 case ECRYPTFS_TRANSPORT_CONNECTOR: 708 case ECRYPTFS_TRANSPORT_CONNECTOR:
513 case ECRYPTFS_TRANSPORT_RELAYFS: 709 case ECRYPTFS_TRANSPORT_RELAYFS:
514 default: 710 default:
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
new file mode 100644
index 000000000000..788995efd1d3
--- /dev/null
+++ b/fs/ecryptfs/miscdev.c
@@ -0,0 +1,598 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 2008 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19 * 02111-1307, USA.
20 */
21
22#include <linux/fs.h>
23#include <linux/hash.h>
24#include <linux/random.h>
25#include <linux/miscdevice.h>
26#include <linux/poll.h>
27#include <linux/wait.h>
28#include <linux/module.h>
29#include "ecryptfs_kernel.h"
30
31static atomic_t ecryptfs_num_miscdev_opens;
32
33/**
34 * ecryptfs_miscdev_poll
35 * @file: dev file (ignored)
36 * @pt: dev poll table (ignored)
37 *
38 * Returns the poll mask
39 */
40static unsigned int
41ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
42{
43 struct ecryptfs_daemon *daemon;
44 unsigned int mask = 0;
45 int rc;
46
47 mutex_lock(&ecryptfs_daemon_hash_mux);
48 /* TODO: Just use file->private_data? */
49 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
50 current->nsproxy->user_ns);
51 BUG_ON(rc || !daemon);
52 mutex_lock(&daemon->mux);
53 mutex_unlock(&ecryptfs_daemon_hash_mux);
54 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
55 printk(KERN_WARNING "%s: Attempt to poll on zombified "
56 "daemon\n", __func__);
57 goto out_unlock_daemon;
58 }
59 if (daemon->flags & ECRYPTFS_DAEMON_IN_READ)
60 goto out_unlock_daemon;
61 if (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)
62 goto out_unlock_daemon;
63 daemon->flags |= ECRYPTFS_DAEMON_IN_POLL;
64 mutex_unlock(&daemon->mux);
65 poll_wait(file, &daemon->wait, pt);
66 mutex_lock(&daemon->mux);
67 if (!list_empty(&daemon->msg_ctx_out_queue))
68 mask |= POLLIN | POLLRDNORM;
69out_unlock_daemon:
70 daemon->flags &= ~ECRYPTFS_DAEMON_IN_POLL;
71 mutex_unlock(&daemon->mux);
72 return mask;
73}
74
75/**
76 * ecryptfs_miscdev_open
77 * @inode: inode of miscdev handle (ignored)
78 * @file: file for miscdev handle (ignored)
79 *
80 * Returns zero on success; non-zero otherwise
81 */
82static int
83ecryptfs_miscdev_open(struct inode *inode, struct file *file)
84{
85 struct ecryptfs_daemon *daemon = NULL;
86 int rc;
87
88 mutex_lock(&ecryptfs_daemon_hash_mux);
89 rc = try_module_get(THIS_MODULE);
90 if (rc == 0) {
91 rc = -EIO;
92 printk(KERN_ERR "%s: Error attempting to increment module use "
93 "count; rc = [%d]\n", __func__, rc);
94 goto out_unlock_daemon_list;
95 }
96 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
97 current->nsproxy->user_ns);
98 if (rc || !daemon) {
99 rc = ecryptfs_spawn_daemon(&daemon, current->euid,
100 current->nsproxy->user_ns,
101 task_pid(current));
102 if (rc) {
103 printk(KERN_ERR "%s: Error attempting to spawn daemon; "
104 "rc = [%d]\n", __func__, rc);
105 goto out_module_put_unlock_daemon_list;
106 }
107 }
108 mutex_lock(&daemon->mux);
109 if (daemon->pid != task_pid(current)) {
110 rc = -EINVAL;
111 printk(KERN_ERR "%s: pid [0x%p] has registered with euid [%d], "
112 "but pid [0x%p] has attempted to open the handle "
113 "instead\n", __func__, daemon->pid, daemon->euid,
114 task_pid(current));
115 goto out_unlock_daemon;
116 }
117 if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) {
118 rc = -EBUSY;
119 printk(KERN_ERR "%s: Miscellaneous device handle may only be "
120 "opened once per daemon; pid [0x%p] already has this "
121 "handle open\n", __func__, daemon->pid);
122 goto out_unlock_daemon;
123 }
124 daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
125 atomic_inc(&ecryptfs_num_miscdev_opens);
126out_unlock_daemon:
127 mutex_unlock(&daemon->mux);
128out_module_put_unlock_daemon_list:
129 if (rc)
130 module_put(THIS_MODULE);
131out_unlock_daemon_list:
132 mutex_unlock(&ecryptfs_daemon_hash_mux);
133 return rc;
134}
135
136/**
137 * ecryptfs_miscdev_release
138 * @inode: inode of fs/ecryptfs/euid handle (ignored)
139 * @file: file for fs/ecryptfs/euid handle (ignored)
140 *
141 * This keeps the daemon registered until the daemon sends another
142 * ioctl to fs/ecryptfs/ctl or until the kernel module unregisters.
143 *
144 * Returns zero on success; non-zero otherwise
145 */
146static int
147ecryptfs_miscdev_release(struct inode *inode, struct file *file)
148{
149 struct ecryptfs_daemon *daemon = NULL;
150 int rc;
151
152 mutex_lock(&ecryptfs_daemon_hash_mux);
153 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
154 current->nsproxy->user_ns);
155 BUG_ON(rc || !daemon);
156 mutex_lock(&daemon->mux);
157 BUG_ON(daemon->pid != task_pid(current));
158 BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
159 daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
160 atomic_dec(&ecryptfs_num_miscdev_opens);
161 mutex_unlock(&daemon->mux);
162 rc = ecryptfs_exorcise_daemon(daemon);
163 if (rc) {
164 printk(KERN_CRIT "%s: Fatal error whilst attempting to "
165 "shut down daemon; rc = [%d]. Please report this "
166 "bug.\n", __func__, rc);
167 BUG();
168 }
169 module_put(THIS_MODULE);
170 mutex_unlock(&ecryptfs_daemon_hash_mux);
171 return rc;
172}
173
174/**
175 * ecryptfs_send_miscdev
176 * @data: Data to send to daemon; may be NULL
177 * @data_size: Amount of data to send to daemon
178 * @msg_ctx: Message context, which is used to handle the reply. If
179 * this is NULL, then we do not expect a reply.
180 * @msg_type: Type of message
181 * @msg_flags: Flags for message
182 * @daemon: eCryptfs daemon object
183 *
184 * Add msg_ctx to queue and then, if it exists, notify the blocked
185 * miscdevess about the data being available. Must be called with
186 * ecryptfs_daemon_hash_mux held.
187 *
188 * Returns zero on success; non-zero otherwise
189 */
190int ecryptfs_send_miscdev(char *data, size_t data_size,
191 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
192 u16 msg_flags, struct ecryptfs_daemon *daemon)
193{
194 int rc = 0;
195
196 mutex_lock(&msg_ctx->mux);
197 if (data) {
198 msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
199 GFP_KERNEL);
200 if (!msg_ctx->msg) {
201 rc = -ENOMEM;
202 printk(KERN_ERR "%s: Out of memory whilst attempting "
203 "to kmalloc(%Zd, GFP_KERNEL)\n", __func__,
204 (sizeof(*msg_ctx->msg) + data_size));
205 goto out_unlock;
206 }
207 } else
208 msg_ctx->msg = NULL;
209 msg_ctx->msg->index = msg_ctx->index;
210 msg_ctx->msg->data_len = data_size;
211 msg_ctx->type = msg_type;
212 if (data) {
213 memcpy(msg_ctx->msg->data, data, data_size);
214 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
215 } else
216 msg_ctx->msg_size = 0;
217 mutex_lock(&daemon->mux);
218 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
219 daemon->num_queued_msg_ctx++;
220 wake_up_interruptible(&daemon->wait);
221 mutex_unlock(&daemon->mux);
222out_unlock:
223 mutex_unlock(&msg_ctx->mux);
224 return rc;
225}
226
227/**
228 * ecryptfs_miscdev_read - format and send message from queue
229 * @file: fs/ecryptfs/euid miscdevfs handle (ignored)
230 * @buf: User buffer into which to copy the next message on the daemon queue
231 * @count: Amount of space available in @buf
232 * @ppos: Offset in file (ignored)
233 *
234 * Pulls the most recent message from the daemon queue, formats it for
235 * being sent via a miscdevfs handle, and copies it into @buf
236 *
237 * Returns the number of bytes copied into the user buffer
238 */
239static ssize_t
240ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
241 loff_t *ppos)
242{
243 struct ecryptfs_daemon *daemon;
244 struct ecryptfs_msg_ctx *msg_ctx;
245 size_t packet_length_size;
246 u32 counter_nbo;
247 char packet_length[3];
248 size_t i;
249 size_t total_length;
250 int rc;
251
252 mutex_lock(&ecryptfs_daemon_hash_mux);
253 /* TODO: Just use file->private_data? */
254 rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid,
255 current->nsproxy->user_ns);
256 BUG_ON(rc || !daemon);
257 mutex_lock(&daemon->mux);
258 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
259 rc = 0;
260 printk(KERN_WARNING "%s: Attempt to read from zombified "
261 "daemon\n", __func__);
262 goto out_unlock_daemon;
263 }
264 if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) {
265 rc = 0;
266 goto out_unlock_daemon;
267 }
268 /* This daemon will not go away so long as this flag is set */
269 daemon->flags |= ECRYPTFS_DAEMON_IN_READ;
270 mutex_unlock(&ecryptfs_daemon_hash_mux);
271check_list:
272 if (list_empty(&daemon->msg_ctx_out_queue)) {
273 mutex_unlock(&daemon->mux);
274 rc = wait_event_interruptible(
275 daemon->wait, !list_empty(&daemon->msg_ctx_out_queue));
276 mutex_lock(&daemon->mux);
277 if (rc < 0) {
278 rc = 0;
279 goto out_unlock_daemon;
280 }
281 }
282 if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
283 rc = 0;
284 goto out_unlock_daemon;
285 }
286 if (list_empty(&daemon->msg_ctx_out_queue)) {
287 /* Something else jumped in since the
288 * wait_event_interruptable() and removed the
289 * message from the queue; try again */
290 goto check_list;
291 }
292 BUG_ON(current->euid != daemon->euid);
293 BUG_ON(current->nsproxy->user_ns != daemon->user_ns);
294 BUG_ON(task_pid(current) != daemon->pid);
295 msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
296 struct ecryptfs_msg_ctx, daemon_out_list);
297 BUG_ON(!msg_ctx);
298 mutex_lock(&msg_ctx->mux);
299 if (msg_ctx->msg) {
300 rc = ecryptfs_write_packet_length(packet_length,
301 msg_ctx->msg_size,
302 &packet_length_size);
303 if (rc) {
304 rc = 0;
305 printk(KERN_WARNING "%s: Error writing packet length; "
306 "rc = [%d]\n", __func__, rc);
307 goto out_unlock_msg_ctx;
308 }
309 } else {
310 packet_length_size = 0;
311 msg_ctx->msg_size = 0;
312 }
313 /* miscdevfs packet format:
314 * Octet 0: Type
315 * Octets 1-4: network byte order msg_ctx->counter
316 * Octets 5-N0: Size of struct ecryptfs_message to follow
317 * Octets N0-N1: struct ecryptfs_message (including data)
318 *
319 * Octets 5-N1 not written if the packet type does not
320 * include a message */
321 total_length = (1 + 4 + packet_length_size + msg_ctx->msg_size);
322 if (count < total_length) {
323 rc = 0;
324 printk(KERN_WARNING "%s: Only given user buffer of "
325 "size [%Zd], but we need [%Zd] to read the "
326 "pending message\n", __func__, count, total_length);
327 goto out_unlock_msg_ctx;
328 }
329 i = 0;
330 buf[i++] = msg_ctx->type;
331 counter_nbo = cpu_to_be32(msg_ctx->counter);
332 memcpy(&buf[i], (char *)&counter_nbo, 4);
333 i += 4;
334 if (msg_ctx->msg) {
335 memcpy(&buf[i], packet_length, packet_length_size);
336 i += packet_length_size;
337 rc = copy_to_user(&buf[i], msg_ctx->msg, msg_ctx->msg_size);
338 if (rc) {
339 printk(KERN_ERR "%s: copy_to_user returned error "
340 "[%d]\n", __func__, rc);
341 goto out_unlock_msg_ctx;
342 }
343 i += msg_ctx->msg_size;
344 }
345 rc = i;
346 list_del(&msg_ctx->daemon_out_list);
347 kfree(msg_ctx->msg);
348 msg_ctx->msg = NULL;
349 /* We do not expect a reply from the userspace daemon for any
350 * message type other than ECRYPTFS_MSG_REQUEST */
351 if (msg_ctx->type != ECRYPTFS_MSG_REQUEST)
352 ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
353out_unlock_msg_ctx:
354 mutex_unlock(&msg_ctx->mux);
355out_unlock_daemon:
356 daemon->flags &= ~ECRYPTFS_DAEMON_IN_READ;
357 mutex_unlock(&daemon->mux);
358 return rc;
359}
360
361/**
362 * ecryptfs_miscdev_helo
363 * @euid: effective user id of miscdevess sending helo packet
364 * @user_ns: The namespace in which @euid applies
365 * @pid: miscdevess id of miscdevess sending helo packet
366 *
367 * Returns zero on success; non-zero otherwise
368 */
369static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns,
370 struct pid *pid)
371{
372 int rc;
373
374 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns,
375 pid);
376 if (rc)
377 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
378 return rc;
379}
380
381/**
382 * ecryptfs_miscdev_quit
383 * @euid: effective user id of miscdevess sending quit packet
384 * @user_ns: The namespace in which @euid applies
385 * @pid: miscdevess id of miscdevess sending quit packet
386 *
387 * Returns zero on success; non-zero otherwise
388 */
389static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns,
390 struct pid *pid)
391{
392 int rc;
393
394 rc = ecryptfs_process_quit(euid, user_ns, pid);
395 if (rc)
396 printk(KERN_WARNING
397 "Error processing QUIT message; rc = [%d]\n", rc);
398 return rc;
399}
400
401/**
402 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
403 * @data: Bytes comprising struct ecryptfs_message
404 * @data_size: sizeof(struct ecryptfs_message) + data len
405 * @euid: Effective user id of miscdevess sending the miscdev response
406 * @user_ns: The namespace in which @euid applies
407 * @pid: Miscdevess id of miscdevess sending the miscdev response
408 * @seq: Sequence number for miscdev response packet
409 *
410 * Returns zero on success; non-zero otherwise
411 */
412static int ecryptfs_miscdev_response(char *data, size_t data_size,
413 uid_t euid, struct user_namespace *user_ns,
414 struct pid *pid, u32 seq)
415{
416 struct ecryptfs_message *msg = (struct ecryptfs_message *)data;
417 int rc;
418
419 if ((sizeof(*msg) + msg->data_len) != data_size) {
420 printk(KERN_WARNING "%s: (sizeof(*msg) + msg->data_len) = "
421 "[%Zd]; data_size = [%Zd]. Invalid packet.\n", __func__,
422 (sizeof(*msg) + msg->data_len), data_size);
423 rc = -EINVAL;
424 goto out;
425 }
426 rc = ecryptfs_process_response(msg, euid, user_ns, pid, seq);
427 if (rc)
428 printk(KERN_ERR
429 "Error processing response message; rc = [%d]\n", rc);
430out:
431 return rc;
432}
433
434/**
435 * ecryptfs_miscdev_write - handle write to daemon miscdev handle
436 * @file: File for misc dev handle (ignored)
437 * @buf: Buffer containing user data
438 * @count: Amount of data in @buf
439 * @ppos: Pointer to offset in file (ignored)
440 *
441 * miscdevfs packet format:
442 * Octet 0: Type
443 * Octets 1-4: network byte order msg_ctx->counter (0's for non-response)
444 * Octets 5-N0: Size of struct ecryptfs_message to follow
445 * Octets N0-N1: struct ecryptfs_message (including data)
446 *
447 * Returns the number of bytes read from @buf
448 */
449static ssize_t
450ecryptfs_miscdev_write(struct file *file, const char __user *buf,
451 size_t count, loff_t *ppos)
452{
453 u32 counter_nbo, seq;
454 size_t packet_size, packet_size_length, i;
455 ssize_t sz = 0;
456 char *data;
457 int rc;
458
459 if (count == 0)
460 goto out;
461 data = kmalloc(count, GFP_KERNEL);
462 if (!data) {
463 printk(KERN_ERR "%s: Out of memory whilst attempting to "
464 "kmalloc([%Zd], GFP_KERNEL)\n", __func__, count);
465 goto out;
466 }
467 rc = copy_from_user(data, buf, count);
468 if (rc) {
469 printk(KERN_ERR "%s: copy_from_user returned error [%d]\n",
470 __func__, rc);
471 goto out_free;
472 }
473 sz = count;
474 i = 0;
475 switch (data[i++]) {
476 case ECRYPTFS_MSG_RESPONSE:
477 if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) {
478 printk(KERN_WARNING "%s: Minimum acceptable packet "
479 "size is [%Zd], but amount of data written is "
480 "only [%Zd]. Discarding response packet.\n",
481 __func__,
482 (1 + 4 + 1 + sizeof(struct ecryptfs_message)),
483 count);
484 goto out_free;
485 }
486 memcpy((char *)&counter_nbo, &data[i], 4);
487 seq = be32_to_cpu(counter_nbo);
488 i += 4;
489 rc = ecryptfs_parse_packet_length(&data[i], &packet_size,
490 &packet_size_length);
491 if (rc) {
492 printk(KERN_WARNING "%s: Error parsing packet length; "
493 "rc = [%d]\n", __func__, rc);
494 goto out_free;
495 }
496 i += packet_size_length;
497 if ((1 + 4 + packet_size_length + packet_size) != count) {
498 printk(KERN_WARNING "%s: (1 + packet_size_length([%Zd])"
499 " + packet_size([%Zd]))([%Zd]) != "
500 "count([%Zd]). Invalid packet format.\n",
501 __func__, packet_size_length, packet_size,
502 (1 + packet_size_length + packet_size), count);
503 goto out_free;
504 }
505 rc = ecryptfs_miscdev_response(&data[i], packet_size,
506 current->euid,
507 current->nsproxy->user_ns,
508 task_pid(current), seq);
509 if (rc)
510 printk(KERN_WARNING "%s: Failed to deliver miscdev "
511 "response to requesting operation; rc = [%d]\n",
512 __func__, rc);
513 break;
514 case ECRYPTFS_MSG_HELO:
515 rc = ecryptfs_miscdev_helo(current->euid,
516 current->nsproxy->user_ns,
517 task_pid(current));
518 if (rc) {
519 printk(KERN_ERR "%s: Error attempting to process "
520 "helo from pid [0x%p]; rc = [%d]\n", __func__,
521 task_pid(current), rc);
522 goto out_free;
523 }
524 break;
525 case ECRYPTFS_MSG_QUIT:
526 rc = ecryptfs_miscdev_quit(current->euid,
527 current->nsproxy->user_ns,
528 task_pid(current));
529 if (rc) {
530 printk(KERN_ERR "%s: Error attempting to process "
531 "quit from pid [0x%p]; rc = [%d]\n", __func__,
532 task_pid(current), rc);
533 goto out_free;
534 }
535 break;
536 default:
537 ecryptfs_printk(KERN_WARNING, "Dropping miscdev "
538 "message of unrecognized type [%d]\n",
539 data[0]);
540 break;
541 }
542out_free:
543 kfree(data);
544out:
545 return sz;
546}
547
548
549static const struct file_operations ecryptfs_miscdev_fops = {
550 .open = ecryptfs_miscdev_open,
551 .poll = ecryptfs_miscdev_poll,
552 .read = ecryptfs_miscdev_read,
553 .write = ecryptfs_miscdev_write,
554 .release = ecryptfs_miscdev_release,
555};
556
557static struct miscdevice ecryptfs_miscdev = {
558 .minor = MISC_DYNAMIC_MINOR,
559 .name = "ecryptfs",
560 .fops = &ecryptfs_miscdev_fops
561};
562
563/**
564 * ecryptfs_init_ecryptfs_miscdev
565 *
566 * Messages sent to the userspace daemon from the kernel are placed on
567 * a queue associated with the daemon. The next read against the
568 * miscdev handle by that daemon will return the oldest message placed
569 * on the message queue for the daemon.
570 *
571 * Returns zero on success; non-zero otherwise
572 */
573int ecryptfs_init_ecryptfs_miscdev(void)
574{
575 int rc;
576
577 atomic_set(&ecryptfs_num_miscdev_opens, 0);
578 mutex_lock(&ecryptfs_daemon_hash_mux);
579 rc = misc_register(&ecryptfs_miscdev);
580 if (rc)
581 printk(KERN_ERR "%s: Failed to register miscellaneous device "
582 "for communications with userspace daemons; rc = [%d]\n",
583 __func__, rc);
584 mutex_unlock(&ecryptfs_daemon_hash_mux);
585 return rc;
586}
587
588/**
589 * ecryptfs_destroy_ecryptfs_miscdev
590 *
591 * All of the daemons must be exorcised prior to calling this
592 * function.
593 */
594void ecryptfs_destroy_ecryptfs_miscdev(void)
595{
596 BUG_ON(atomic_read(&ecryptfs_num_miscdev_opens) != 0);
597 misc_deregister(&ecryptfs_miscdev);
598}
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 6df1debdccce..2b6fe1e6e8ba 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -153,7 +153,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
153 flush_dcache_page(page); 153 flush_dcache_page(page);
154 if (rc) { 154 if (rc) {
155 printk(KERN_ERR "%s: Error reading xattr " 155 printk(KERN_ERR "%s: Error reading xattr "
156 "region; rc = [%d]\n", __FUNCTION__, rc); 156 "region; rc = [%d]\n", __func__, rc);
157 goto out; 157 goto out;
158 } 158 }
159 } else { 159 } else {
@@ -169,7 +169,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
169 if (rc) { 169 if (rc) {
170 printk(KERN_ERR "%s: Error attempting to read " 170 printk(KERN_ERR "%s: Error attempting to read "
171 "extent at offset [%lld] in the lower " 171 "extent at offset [%lld] in the lower "
172 "file; rc = [%d]\n", __FUNCTION__, 172 "file; rc = [%d]\n", __func__,
173 lower_offset, rc); 173 lower_offset, rc);
174 goto out; 174 goto out;
175 } 175 }
@@ -212,7 +212,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
212 "the encrypted content from the lower " 212 "the encrypted content from the lower "
213 "file whilst inserting the metadata " 213 "file whilst inserting the metadata "
214 "from the xattr into the header; rc = " 214 "from the xattr into the header; rc = "
215 "[%d]\n", __FUNCTION__, rc); 215 "[%d]\n", __func__, rc);
216 goto out; 216 goto out;
217 } 217 }
218 218
@@ -293,7 +293,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
293 if (rc) { 293 if (rc) {
294 printk(KERN_ERR "%s: Error attemping to read " 294 printk(KERN_ERR "%s: Error attemping to read "
295 "lower page segment; rc = [%d]\n", 295 "lower page segment; rc = [%d]\n",
296 __FUNCTION__, rc); 296 __func__, rc);
297 ClearPageUptodate(page); 297 ClearPageUptodate(page);
298 goto out; 298 goto out;
299 } else 299 } else
@@ -308,7 +308,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
308 "from the lower file whilst " 308 "from the lower file whilst "
309 "inserting the metadata from " 309 "inserting the metadata from "
310 "the xattr into the header; rc " 310 "the xattr into the header; rc "
311 "= [%d]\n", __FUNCTION__, rc); 311 "= [%d]\n", __func__, rc);
312 ClearPageUptodate(page); 312 ClearPageUptodate(page);
313 goto out; 313 goto out;
314 } 314 }
@@ -320,7 +320,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
320 if (rc) { 320 if (rc) {
321 printk(KERN_ERR "%s: Error reading " 321 printk(KERN_ERR "%s: Error reading "
322 "page; rc = [%d]\n", 322 "page; rc = [%d]\n",
323 __FUNCTION__, rc); 323 __func__, rc);
324 ClearPageUptodate(page); 324 ClearPageUptodate(page);
325 goto out; 325 goto out;
326 } 326 }
@@ -331,7 +331,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
331 if (rc) { 331 if (rc) {
332 printk(KERN_ERR "%s: Error decrypting page " 332 printk(KERN_ERR "%s: Error decrypting page "
333 "at index [%ld]; rc = [%d]\n", 333 "at index [%ld]; rc = [%d]\n",
334 __FUNCTION__, page->index, rc); 334 __func__, page->index, rc);
335 ClearPageUptodate(page); 335 ClearPageUptodate(page);
336 goto out; 336 goto out;
337 } 337 }
@@ -348,7 +348,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
348 if (rc) { 348 if (rc) {
349 printk(KERN_ERR "%s: Error on attempt to " 349 printk(KERN_ERR "%s: Error on attempt to "
350 "truncate to (higher) offset [%lld];" 350 "truncate to (higher) offset [%lld];"
351 " rc = [%d]\n", __FUNCTION__, 351 " rc = [%d]\n", __func__,
352 prev_page_end_size, rc); 352 prev_page_end_size, rc);
353 goto out; 353 goto out;
354 } 354 }
@@ -389,7 +389,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
389 kfree(file_size_virt); 389 kfree(file_size_virt);
390 if (rc) 390 if (rc)
391 printk(KERN_ERR "%s: Error writing file size to header; " 391 printk(KERN_ERR "%s: Error writing file size to header; "
392 "rc = [%d]\n", __FUNCTION__, rc); 392 "rc = [%d]\n", __func__, rc);
393out: 393out:
394 return rc; 394 return rc;
395} 395}
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index f638a698dc52..e0abad62b395 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -44,8 +44,8 @@ static struct sock *ecryptfs_nl_sock;
44 * upon sending the message; non-zero upon error. 44 * upon sending the message; non-zero upon error.
45 */ 45 */
46int ecryptfs_send_netlink(char *data, int data_len, 46int ecryptfs_send_netlink(char *data, int data_len,
47 struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, 47 struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
48 u16 msg_flags, pid_t daemon_pid) 48 u16 msg_flags, struct pid *daemon_pid)
49{ 49{
50 struct sk_buff *skb; 50 struct sk_buff *skb;
51 struct nlmsghdr *nlh; 51 struct nlmsghdr *nlh;
@@ -60,7 +60,7 @@ int ecryptfs_send_netlink(char *data, int data_len,
60 ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n"); 60 ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n");
61 goto out; 61 goto out;
62 } 62 }
63 nlh = NLMSG_PUT(skb, daemon_pid, msg_ctx ? msg_ctx->counter : 0, 63 nlh = NLMSG_PUT(skb, pid_nr(daemon_pid), msg_ctx ? msg_ctx->counter : 0,
64 msg_type, payload_len); 64 msg_type, payload_len);
65 nlh->nlmsg_flags = msg_flags; 65 nlh->nlmsg_flags = msg_flags;
66 if (msg_ctx && payload_len) { 66 if (msg_ctx && payload_len) {
@@ -69,7 +69,7 @@ int ecryptfs_send_netlink(char *data, int data_len,
69 msg->data_len = data_len; 69 msg->data_len = data_len;
70 memcpy(msg->data, data, data_len); 70 memcpy(msg->data, data, data_len);
71 } 71 }
72 rc = netlink_unicast(ecryptfs_nl_sock, skb, daemon_pid, 0); 72 rc = netlink_unicast(ecryptfs_nl_sock, skb, pid_nr(daemon_pid), 0);
73 if (rc < 0) { 73 if (rc < 0) {
74 ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink " 74 ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink "
75 "message; rc = [%d]\n", rc); 75 "message; rc = [%d]\n", rc);
@@ -99,6 +99,7 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb)
99{ 99{
100 struct nlmsghdr *nlh = nlmsg_hdr(skb); 100 struct nlmsghdr *nlh = nlmsg_hdr(skb);
101 struct ecryptfs_message *msg = NLMSG_DATA(nlh); 101 struct ecryptfs_message *msg = NLMSG_DATA(nlh);
102 struct pid *pid;
102 int rc; 103 int rc;
103 104
104 if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) { 105 if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) {
@@ -107,8 +108,10 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb)
107 "incorrectly specified data length\n"); 108 "incorrectly specified data length\n");
108 goto out; 109 goto out;
109 } 110 }
110 rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, 111 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
111 NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq); 112 rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, NULL,
113 pid, nlh->nlmsg_seq);
114 put_pid(pid);
112 if (rc) 115 if (rc)
113 printk(KERN_ERR 116 printk(KERN_ERR
114 "Error processing response message; rc = [%d]\n", rc); 117 "Error processing response message; rc = [%d]\n", rc);
@@ -126,11 +129,13 @@ out:
126 */ 129 */
127static int ecryptfs_process_nl_helo(struct sk_buff *skb) 130static int ecryptfs_process_nl_helo(struct sk_buff *skb)
128{ 131{
132 struct pid *pid;
129 int rc; 133 int rc;
130 134
135 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
131 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK, 136 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK,
132 NETLINK_CREDS(skb)->uid, 137 NETLINK_CREDS(skb)->uid, NULL, pid);
133 NETLINK_CREDS(skb)->pid); 138 put_pid(pid);
134 if (rc) 139 if (rc)
135 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); 140 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
136 return rc; 141 return rc;
@@ -147,10 +152,12 @@ static int ecryptfs_process_nl_helo(struct sk_buff *skb)
147 */ 152 */
148static int ecryptfs_process_nl_quit(struct sk_buff *skb) 153static int ecryptfs_process_nl_quit(struct sk_buff *skb)
149{ 154{
155 struct pid *pid;
150 int rc; 156 int rc;
151 157
152 rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, 158 pid = find_get_pid(NETLINK_CREDS(skb)->pid);
153 NETLINK_CREDS(skb)->pid); 159 rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, NULL, pid);
160 put_pid(pid);
154 if (rc) 161 if (rc)
155 printk(KERN_WARNING 162 printk(KERN_WARNING
156 "Error processing QUIT message; rc = [%d]\n", rc); 163 "Error processing QUIT message; rc = [%d]\n", rc);
@@ -176,20 +183,20 @@ static void ecryptfs_receive_nl_message(struct sk_buff *skb)
176 goto free; 183 goto free;
177 } 184 }
178 switch (nlh->nlmsg_type) { 185 switch (nlh->nlmsg_type) {
179 case ECRYPTFS_NLMSG_RESPONSE: 186 case ECRYPTFS_MSG_RESPONSE:
180 if (ecryptfs_process_nl_response(skb)) { 187 if (ecryptfs_process_nl_response(skb)) {
181 ecryptfs_printk(KERN_WARNING, "Failed to " 188 ecryptfs_printk(KERN_WARNING, "Failed to "
182 "deliver netlink response to " 189 "deliver netlink response to "
183 "requesting operation\n"); 190 "requesting operation\n");
184 } 191 }
185 break; 192 break;
186 case ECRYPTFS_NLMSG_HELO: 193 case ECRYPTFS_MSG_HELO:
187 if (ecryptfs_process_nl_helo(skb)) { 194 if (ecryptfs_process_nl_helo(skb)) {
188 ecryptfs_printk(KERN_WARNING, "Failed to " 195 ecryptfs_printk(KERN_WARNING, "Failed to "
189 "fulfill HELO request\n"); 196 "fulfill HELO request\n");
190 } 197 }
191 break; 198 break;
192 case ECRYPTFS_NLMSG_QUIT: 199 case ECRYPTFS_MSG_QUIT:
193 if (ecryptfs_process_nl_quit(skb)) { 200 if (ecryptfs_process_nl_quit(skb)) {
194 ecryptfs_printk(KERN_WARNING, "Failed to " 201 ecryptfs_printk(KERN_WARNING, "Failed to "
195 "fulfill QUIT request\n"); 202 "fulfill QUIT request\n");
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 0c4928623bbc..ebf55150be56 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -55,7 +55,7 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
55 set_fs(fs_save); 55 set_fs(fs_save);
56 if (octets_written < 0) { 56 if (octets_written < 0) {
57 printk(KERN_ERR "%s: octets_written = [%td]; " 57 printk(KERN_ERR "%s: octets_written = [%td]; "
58 "expected [%td]\n", __FUNCTION__, octets_written, size); 58 "expected [%td]\n", __func__, octets_written, size);
59 rc = -EINVAL; 59 rc = -EINVAL;
60 } 60 }
61 mutex_unlock(&inode_info->lower_file_mutex); 61 mutex_unlock(&inode_info->lower_file_mutex);
@@ -153,7 +153,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
153 rc = PTR_ERR(ecryptfs_page); 153 rc = PTR_ERR(ecryptfs_page);
154 printk(KERN_ERR "%s: Error getting page at " 154 printk(KERN_ERR "%s: Error getting page at "
155 "index [%ld] from eCryptfs inode " 155 "index [%ld] from eCryptfs inode "
156 "mapping; rc = [%d]\n", __FUNCTION__, 156 "mapping; rc = [%d]\n", __func__,
157 ecryptfs_page_idx, rc); 157 ecryptfs_page_idx, rc);
158 goto out; 158 goto out;
159 } 159 }
@@ -165,7 +165,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
165 if (rc) { 165 if (rc) {
166 printk(KERN_ERR "%s: Error decrypting " 166 printk(KERN_ERR "%s: Error decrypting "
167 "page; rc = [%d]\n", 167 "page; rc = [%d]\n",
168 __FUNCTION__, rc); 168 __func__, rc);
169 ClearPageUptodate(ecryptfs_page); 169 ClearPageUptodate(ecryptfs_page);
170 page_cache_release(ecryptfs_page); 170 page_cache_release(ecryptfs_page);
171 goto out; 171 goto out;
@@ -202,7 +202,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
202 page_cache_release(ecryptfs_page); 202 page_cache_release(ecryptfs_page);
203 if (rc) { 203 if (rc) {
204 printk(KERN_ERR "%s: Error encrypting " 204 printk(KERN_ERR "%s: Error encrypting "
205 "page; rc = [%d]\n", __FUNCTION__, rc); 205 "page; rc = [%d]\n", __func__, rc);
206 goto out; 206 goto out;
207 } 207 }
208 pos += num_bytes; 208 pos += num_bytes;
@@ -254,7 +254,7 @@ int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
254 set_fs(fs_save); 254 set_fs(fs_save);
255 if (octets_read < 0) { 255 if (octets_read < 0) {
256 printk(KERN_ERR "%s: octets_read = [%td]; " 256 printk(KERN_ERR "%s: octets_read = [%td]; "
257 "expected [%td]\n", __FUNCTION__, octets_read, size); 257 "expected [%td]\n", __func__, octets_read, size);
258 rc = -EINVAL; 258 rc = -EINVAL;
259 } 259 }
260 mutex_unlock(&inode_info->lower_file_mutex); 260 mutex_unlock(&inode_info->lower_file_mutex);
@@ -327,7 +327,7 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
327 printk(KERN_ERR "%s: Attempt to read data past the end of the " 327 printk(KERN_ERR "%s: Attempt to read data past the end of the "
328 "file; offset = [%lld]; size = [%td]; " 328 "file; offset = [%lld]; size = [%td]; "
329 "ecryptfs_file_size = [%lld]\n", 329 "ecryptfs_file_size = [%lld]\n",
330 __FUNCTION__, offset, size, ecryptfs_file_size); 330 __func__, offset, size, ecryptfs_file_size);
331 goto out; 331 goto out;
332 } 332 }
333 pos = offset; 333 pos = offset;
@@ -345,14 +345,14 @@ int ecryptfs_read(char *data, loff_t offset, size_t size,
345 rc = PTR_ERR(ecryptfs_page); 345 rc = PTR_ERR(ecryptfs_page);
346 printk(KERN_ERR "%s: Error getting page at " 346 printk(KERN_ERR "%s: Error getting page at "
347 "index [%ld] from eCryptfs inode " 347 "index [%ld] from eCryptfs inode "
348 "mapping; rc = [%d]\n", __FUNCTION__, 348 "mapping; rc = [%d]\n", __func__,
349 ecryptfs_page_idx, rc); 349 ecryptfs_page_idx, rc);
350 goto out; 350 goto out;
351 } 351 }
352 rc = ecryptfs_decrypt_page(ecryptfs_page); 352 rc = ecryptfs_decrypt_page(ecryptfs_page);
353 if (rc) { 353 if (rc) {
354 printk(KERN_ERR "%s: Error decrypting " 354 printk(KERN_ERR "%s: Error decrypting "
355 "page; rc = [%d]\n", __FUNCTION__, rc); 355 "page; rc = [%d]\n", __func__, rc);
356 ClearPageUptodate(ecryptfs_page); 356 ClearPageUptodate(ecryptfs_page);
357 page_cache_release(ecryptfs_page); 357 page_cache_release(ecryptfs_page);
358 goto out; 358 goto out;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index a9f130cd50ac..343942deeec1 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -200,10 +200,8 @@ struct file *eventfd_fget(int fd)
200 200
201asmlinkage long sys_eventfd(unsigned int count) 201asmlinkage long sys_eventfd(unsigned int count)
202{ 202{
203 int error, fd; 203 int fd;
204 struct eventfd_ctx *ctx; 204 struct eventfd_ctx *ctx;
205 struct file *file;
206 struct inode *inode;
207 205
208 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 206 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
209 if (!ctx) 207 if (!ctx)
@@ -216,12 +214,9 @@ asmlinkage long sys_eventfd(unsigned int count)
216 * When we call this, the initialization must be complete, since 214 * When we call this, the initialization must be complete, since
217 * anon_inode_getfd() will install the fd. 215 * anon_inode_getfd() will install the fd.
218 */ 216 */
219 error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]", 217 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx);
220 &eventfd_fops, ctx); 218 if (fd < 0)
221 if (!error) 219 kfree(ctx);
222 return fd; 220 return fd;
223
224 kfree(ctx);
225 return error;
226} 221}
227 222
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a415f42d32cf..990c01d2d66b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -257,25 +257,6 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
257 (p1->file < p2->file ? -1 : p1->fd - p2->fd)); 257 (p1->file < p2->file ? -1 : p1->fd - p2->fd));
258} 258}
259 259
260/* Special initialization for the RB tree node to detect linkage */
261static inline void ep_rb_initnode(struct rb_node *n)
262{
263 rb_set_parent(n, n);
264}
265
266/* Removes a node from the RB tree and marks it for a fast is-linked check */
267static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
268{
269 rb_erase(n, r);
270 rb_set_parent(n, n);
271}
272
273/* Fast check to verify that the item is linked to the main RB tree */
274static inline int ep_rb_linked(struct rb_node *n)
275{
276 return rb_parent(n) != n;
277}
278
279/* Tells us if the item is currently linked */ 260/* Tells us if the item is currently linked */
280static inline int ep_is_linked(struct list_head *p) 261static inline int ep_is_linked(struct list_head *p)
281{ 262{
@@ -283,13 +264,13 @@ static inline int ep_is_linked(struct list_head *p)
283} 264}
284 265
285/* Get the "struct epitem" from a wait queue pointer */ 266/* Get the "struct epitem" from a wait queue pointer */
286static inline struct epitem * ep_item_from_wait(wait_queue_t *p) 267static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
287{ 268{
288 return container_of(p, struct eppoll_entry, wait)->base; 269 return container_of(p, struct eppoll_entry, wait)->base;
289} 270}
290 271
291/* Get the "struct epitem" from an epoll queue wrapper */ 272/* Get the "struct epitem" from an epoll queue wrapper */
292static inline struct epitem * ep_item_from_epqueue(poll_table *p) 273static inline struct epitem *ep_item_from_epqueue(poll_table *p)
293{ 274{
294 return container_of(p, struct ep_pqueue, pt)->epi; 275 return container_of(p, struct ep_pqueue, pt)->epi;
295} 276}
@@ -411,8 +392,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
411 list_del_init(&epi->fllink); 392 list_del_init(&epi->fllink);
412 spin_unlock(&file->f_ep_lock); 393 spin_unlock(&file->f_ep_lock);
413 394
414 if (ep_rb_linked(&epi->rbn)) 395 rb_erase(&epi->rbn, &ep->rbr);
415 ep_rb_erase(&epi->rbn, &ep->rbr);
416 396
417 spin_lock_irqsave(&ep->lock, flags); 397 spin_lock_irqsave(&ep->lock, flags);
418 if (ep_is_linked(&epi->rdllink)) 398 if (ep_is_linked(&epi->rdllink))
@@ -728,7 +708,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
728 goto error_return; 708 goto error_return;
729 709
730 /* Item initialization follow here ... */ 710 /* Item initialization follow here ... */
731 ep_rb_initnode(&epi->rbn);
732 INIT_LIST_HEAD(&epi->rdllink); 711 INIT_LIST_HEAD(&epi->rdllink);
733 INIT_LIST_HEAD(&epi->fllink); 712 INIT_LIST_HEAD(&epi->fllink);
734 INIT_LIST_HEAD(&epi->pwqlist); 713 INIT_LIST_HEAD(&epi->pwqlist);
@@ -1071,8 +1050,6 @@ asmlinkage long sys_epoll_create(int size)
1071{ 1050{
1072 int error, fd = -1; 1051 int error, fd = -1;
1073 struct eventpoll *ep; 1052 struct eventpoll *ep;
1074 struct inode *inode;
1075 struct file *file;
1076 1053
1077 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", 1054 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
1078 current, size)); 1055 current, size));
@@ -1082,29 +1059,24 @@ asmlinkage long sys_epoll_create(int size)
1082 * structure ( "struct eventpoll" ). 1059 * structure ( "struct eventpoll" ).
1083 */ 1060 */
1084 error = -EINVAL; 1061 error = -EINVAL;
1085 if (size <= 0 || (error = ep_alloc(&ep)) != 0) 1062 if (size <= 0 || (error = ep_alloc(&ep)) < 0) {
1063 fd = error;
1086 goto error_return; 1064 goto error_return;
1065 }
1087 1066
1088 /* 1067 /*
1089 * Creates all the items needed to setup an eventpoll file. That is, 1068 * Creates all the items needed to setup an eventpoll file. That is,
1090 * a file structure, and inode and a free file descriptor. 1069 * a file structure and a free file descriptor.
1091 */ 1070 */
1092 error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]", 1071 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
1093 &eventpoll_fops, ep); 1072 if (fd < 0)
1094 if (error) 1073 ep_free(ep);
1095 goto error_free;
1096 1074
1075error_return:
1097 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1076 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1098 current, size, fd)); 1077 current, size, fd));
1099 1078
1100 return fd; 1079 return fd;
1101
1102error_free:
1103 ep_free(ep);
1104error_return:
1105 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1106 current, size, error));
1107 return error;
1108} 1080}
1109 1081
1110/* 1082/*
@@ -1262,7 +1234,7 @@ error_return:
1262 return error; 1234 return error;
1263} 1235}
1264 1236
1265#ifdef TIF_RESTORE_SIGMASK 1237#ifdef HAVE_SET_RESTORE_SIGMASK
1266 1238
1267/* 1239/*
1268 * Implement the event wait interface for the eventpoll file. It is the kernel 1240 * Implement the event wait interface for the eventpoll file. It is the kernel
@@ -1300,7 +1272,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
1300 if (error == -EINTR) { 1272 if (error == -EINTR) {
1301 memcpy(&current->saved_sigmask, &sigsaved, 1273 memcpy(&current->saved_sigmask, &sigsaved,
1302 sizeof(sigsaved)); 1274 sizeof(sigsaved));
1303 set_thread_flag(TIF_RESTORE_SIGMASK); 1275 set_restore_sigmask();
1304 } else 1276 } else
1305 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1277 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1306 } 1278 }
@@ -1308,7 +1280,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
1308 return error; 1280 return error;
1309} 1281}
1310 1282
1311#endif /* #ifdef TIF_RESTORE_SIGMASK */ 1283#endif /* HAVE_SET_RESTORE_SIGMASK */
1312 1284
1313static int __init eventpoll_init(void) 1285static int __init eventpoll_init(void)
1314{ 1286{
@@ -1330,4 +1302,3 @@ static int __init eventpoll_init(void)
1330 return 0; 1302 return 0;
1331} 1303}
1332fs_initcall(eventpoll_init); 1304fs_initcall(eventpoll_init);
1333
diff --git a/fs/exec.c b/fs/exec.c
index b152029f18f6..aeaa9791d8be 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -24,6 +24,7 @@
24 24
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h>
27#include <linux/mman.h> 28#include <linux/mman.h>
28#include <linux/a.out.h> 29#include <linux/a.out.h>
29#include <linux/stat.h> 30#include <linux/stat.h>
@@ -735,6 +736,7 @@ static int exec_mmap(struct mm_struct *mm)
735 tsk->active_mm = mm; 736 tsk->active_mm = mm;
736 activate_mm(active_mm, mm); 737 activate_mm(active_mm, mm);
737 task_unlock(tsk); 738 task_unlock(tsk);
739 mm_update_next_owner(mm);
738 arch_pick_mmap_layout(mm); 740 arch_pick_mmap_layout(mm);
739 if (old_mm) { 741 if (old_mm) {
740 up_read(&old_mm->mmap_sem); 742 up_read(&old_mm->mmap_sem);
@@ -765,9 +767,7 @@ static int de_thread(struct task_struct *tsk)
765 767
766 /* 768 /*
767 * Kill all other threads in the thread group. 769 * Kill all other threads in the thread group.
768 * We must hold tasklist_lock to call zap_other_threads.
769 */ 770 */
770 read_lock(&tasklist_lock);
771 spin_lock_irq(lock); 771 spin_lock_irq(lock);
772 if (signal_group_exit(sig)) { 772 if (signal_group_exit(sig)) {
773 /* 773 /*
@@ -775,21 +775,10 @@ static int de_thread(struct task_struct *tsk)
775 * return so that the signal is processed. 775 * return so that the signal is processed.
776 */ 776 */
777 spin_unlock_irq(lock); 777 spin_unlock_irq(lock);
778 read_unlock(&tasklist_lock);
779 return -EAGAIN; 778 return -EAGAIN;
780 } 779 }
781
782 /*
783 * child_reaper ignores SIGKILL, change it now.
784 * Reparenting needs write_lock on tasklist_lock,
785 * so it is safe to do it under read_lock.
786 */
787 if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
788 task_active_pid_ns(tsk)->child_reaper = tsk;
789
790 sig->group_exit_task = tsk; 780 sig->group_exit_task = tsk;
791 zap_other_threads(tsk); 781 zap_other_threads(tsk);
792 read_unlock(&tasklist_lock);
793 782
794 /* Account for the thread group leader hanging around: */ 783 /* Account for the thread group leader hanging around: */
795 count = thread_group_leader(tsk) ? 1 : 2; 784 count = thread_group_leader(tsk) ? 1 : 2;
@@ -810,7 +799,7 @@ static int de_thread(struct task_struct *tsk)
810 if (!thread_group_leader(tsk)) { 799 if (!thread_group_leader(tsk)) {
811 leader = tsk->group_leader; 800 leader = tsk->group_leader;
812 801
813 sig->notify_count = -1; 802 sig->notify_count = -1; /* for exit_notify() */
814 for (;;) { 803 for (;;) {
815 write_lock_irq(&tasklist_lock); 804 write_lock_irq(&tasklist_lock);
816 if (likely(leader->exit_state)) 805 if (likely(leader->exit_state))
@@ -820,6 +809,8 @@ static int de_thread(struct task_struct *tsk)
820 schedule(); 809 schedule();
821 } 810 }
822 811
812 if (unlikely(task_child_reaper(tsk) == leader))
813 task_active_pid_ns(tsk)->child_reaper = tsk;
823 /* 814 /*
824 * The only record we have of the real-time age of a 815 * The only record we have of the real-time age of a
825 * process, regardless of execs it's done, is start_time. 816 * process, regardless of execs it's done, is start_time.
@@ -963,6 +954,8 @@ int flush_old_exec(struct linux_binprm * bprm)
963 if (retval) 954 if (retval)
964 goto out; 955 goto out;
965 956
957 set_mm_exe_file(bprm->mm, bprm->file);
958
966 /* 959 /*
967 * Release all of the old mmap stuff 960 * Release all of the old mmap stuff
968 */ 961 */
@@ -1268,7 +1261,6 @@ int do_execve(char * filename,
1268{ 1261{
1269 struct linux_binprm *bprm; 1262 struct linux_binprm *bprm;
1270 struct file *file; 1263 struct file *file;
1271 unsigned long env_p;
1272 struct files_struct *displaced; 1264 struct files_struct *displaced;
1273 int retval; 1265 int retval;
1274 1266
@@ -1321,11 +1313,9 @@ int do_execve(char * filename,
1321 if (retval < 0) 1313 if (retval < 0)
1322 goto out; 1314 goto out;
1323 1315
1324 env_p = bprm->p;
1325 retval = copy_strings(bprm->argc, argv, bprm); 1316 retval = copy_strings(bprm->argc, argv, bprm);
1326 if (retval < 0) 1317 if (retval < 0)
1327 goto out; 1318 goto out;
1328 bprm->argv_len = env_p - bprm->p;
1329 1319
1330 retval = search_binary_handler(bprm,regs); 1320 retval = search_binary_handler(bprm,regs);
1331 if (retval >= 0) { 1321 if (retval >= 0) {
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 109ab5e44eca..cc91227d3bb8 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -150,12 +150,12 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
150 if (IS_ERR(ppd)) { 150 if (IS_ERR(ppd)) {
151 err = PTR_ERR(ppd); 151 err = PTR_ERR(ppd);
152 dprintk("%s: get_parent of %ld failed, err %d\n", 152 dprintk("%s: get_parent of %ld failed, err %d\n",
153 __FUNCTION__, pd->d_inode->i_ino, err); 153 __func__, pd->d_inode->i_ino, err);
154 dput(pd); 154 dput(pd);
155 break; 155 break;
156 } 156 }
157 157
158 dprintk("%s: find name of %lu in %lu\n", __FUNCTION__, 158 dprintk("%s: find name of %lu in %lu\n", __func__,
159 pd->d_inode->i_ino, ppd->d_inode->i_ino); 159 pd->d_inode->i_ino, ppd->d_inode->i_ino);
160 err = exportfs_get_name(mnt, ppd, nbuf, pd); 160 err = exportfs_get_name(mnt, ppd, nbuf, pd);
161 if (err) { 161 if (err) {
@@ -168,14 +168,14 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
168 continue; 168 continue;
169 break; 169 break;
170 } 170 }
171 dprintk("%s: found name: %s\n", __FUNCTION__, nbuf); 171 dprintk("%s: found name: %s\n", __func__, nbuf);
172 mutex_lock(&ppd->d_inode->i_mutex); 172 mutex_lock(&ppd->d_inode->i_mutex);
173 npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); 173 npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
174 mutex_unlock(&ppd->d_inode->i_mutex); 174 mutex_unlock(&ppd->d_inode->i_mutex);
175 if (IS_ERR(npd)) { 175 if (IS_ERR(npd)) {
176 err = PTR_ERR(npd); 176 err = PTR_ERR(npd);
177 dprintk("%s: lookup failed: %d\n", 177 dprintk("%s: lookup failed: %d\n",
178 __FUNCTION__, err); 178 __func__, err);
179 dput(ppd); 179 dput(ppd);
180 dput(pd); 180 dput(pd);
181 break; 181 break;
@@ -188,7 +188,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
188 if (npd == pd) 188 if (npd == pd)
189 noprogress = 0; 189 noprogress = 0;
190 else 190 else
191 printk("%s: npd != pd\n", __FUNCTION__); 191 printk("%s: npd != pd\n", __func__);
192 dput(npd); 192 dput(npd);
193 dput(ppd); 193 dput(ppd);
194 if (IS_ROOT(pd)) { 194 if (IS_ROOT(pd)) {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index cc47b76091bf..6ae4ecf3ce40 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1261,10 +1261,11 @@ static int ext3_ordered_write_end(struct file *file,
1261 new_i_size = pos + copied; 1261 new_i_size = pos + copied;
1262 if (new_i_size > EXT3_I(inode)->i_disksize) 1262 if (new_i_size > EXT3_I(inode)->i_disksize)
1263 EXT3_I(inode)->i_disksize = new_i_size; 1263 EXT3_I(inode)->i_disksize = new_i_size;
1264 copied = ext3_generic_write_end(file, mapping, pos, len, copied, 1264 ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
1265 page, fsdata); 1265 page, fsdata);
1266 if (copied < 0) 1266 copied = ret2;
1267 ret = copied; 1267 if (ret2 < 0)
1268 ret = ret2;
1268 } 1269 }
1269 ret2 = ext3_journal_stop(handle); 1270 ret2 = ext3_journal_stop(handle);
1270 if (!ret) 1271 if (!ret)
@@ -1289,10 +1290,11 @@ static int ext3_writeback_write_end(struct file *file,
1289 if (new_i_size > EXT3_I(inode)->i_disksize) 1290 if (new_i_size > EXT3_I(inode)->i_disksize)
1290 EXT3_I(inode)->i_disksize = new_i_size; 1291 EXT3_I(inode)->i_disksize = new_i_size;
1291 1292
1292 copied = ext3_generic_write_end(file, mapping, pos, len, copied, 1293 ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
1293 page, fsdata); 1294 page, fsdata);
1294 if (copied < 0) 1295 copied = ret2;
1295 ret = copied; 1296 if (ret2 < 0)
1297 ret = ret2;
1296 1298
1297 ret2 = ext3_journal_stop(handle); 1299 ret2 = ext3_journal_stop(handle);
1298 if (!ret) 1300 if (!ret)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index a8bae8cd1d5d..3c8dab880d91 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -9,8 +9,8 @@
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/ext4_jbd2.h> 12#include "ext4_jbd2.h"
13#include <linux/ext4_fs.h> 13#include "ext4.h"
14#include "xattr.h" 14#include "xattr.h"
15#include "acl.h" 15#include "acl.h"
16 16
@@ -37,7 +37,7 @@ ext4_acl_from_disk(const void *value, size_t size)
37 return ERR_PTR(-EINVAL); 37 return ERR_PTR(-EINVAL);
38 if (count == 0) 38 if (count == 0)
39 return NULL; 39 return NULL;
40 acl = posix_acl_alloc(count, GFP_KERNEL); 40 acl = posix_acl_alloc(count, GFP_NOFS);
41 if (!acl) 41 if (!acl)
42 return ERR_PTR(-ENOMEM); 42 return ERR_PTR(-ENOMEM);
43 for (n=0; n < count; n++) { 43 for (n=0; n < count; n++) {
@@ -91,7 +91,7 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
91 91
92 *size = ext4_acl_size(acl->a_count); 92 *size = ext4_acl_size(acl->a_count);
93 ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count * 93 ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
94 sizeof(ext4_acl_entry), GFP_KERNEL); 94 sizeof(ext4_acl_entry), GFP_NOFS);
95 if (!ext_acl) 95 if (!ext_acl)
96 return ERR_PTR(-ENOMEM); 96 return ERR_PTR(-ENOMEM);
97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); 97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
@@ -187,7 +187,7 @@ ext4_get_acl(struct inode *inode, int type)
187 } 187 }
188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0); 188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
189 if (retval > 0) { 189 if (retval > 0) {
190 value = kmalloc(retval, GFP_KERNEL); 190 value = kmalloc(retval, GFP_NOFS);
191 if (!value) 191 if (!value)
192 return ERR_PTR(-ENOMEM); 192 return ERR_PTR(-ENOMEM);
193 retval = ext4_xattr_get(inode, name_index, "", value, retval); 193 retval = ext4_xattr_get(inode, name_index, "", value, retval);
@@ -335,7 +335,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
335 if (error) 335 if (error)
336 goto cleanup; 336 goto cleanup;
337 } 337 }
338 clone = posix_acl_clone(acl, GFP_KERNEL); 338 clone = posix_acl_clone(acl, GFP_NOFS);
339 error = -ENOMEM; 339 error = -ENOMEM;
340 if (!clone) 340 if (!clone)
341 goto cleanup; 341 goto cleanup;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0737e05ba3dd..da994374ec3b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -15,12 +15,12 @@
15#include <linux/capability.h> 15#include <linux/capability.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h> 17#include <linux/jbd2.h>
18#include <linux/ext4_fs.h>
19#include <linux/ext4_jbd2.h>
20#include <linux/quotaops.h> 18#include <linux/quotaops.h>
21#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
22 20#include "ext4.h"
21#include "ext4_jbd2.h"
23#include "group.h" 22#include "group.h"
23
24/* 24/*
25 * balloc.c contains the blocks allocation and deallocation routines 25 * balloc.c contains the blocks allocation and deallocation routines
26 */ 26 */
@@ -48,7 +48,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 48unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
49 ext4_group_t block_group, struct ext4_group_desc *gdp) 49 ext4_group_t block_group, struct ext4_group_desc *gdp)
50{ 50{
51 unsigned long start;
52 int bit, bit_max; 51 int bit, bit_max;
53 unsigned free_blocks, group_blocks; 52 unsigned free_blocks, group_blocks;
54 struct ext4_sb_info *sbi = EXT4_SB(sb); 53 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -59,7 +58,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
59 /* If checksum is bad mark all blocks used to prevent allocation 58 /* If checksum is bad mark all blocks used to prevent allocation
60 * essentially implementing a per-group read-only flag. */ 59 * essentially implementing a per-group read-only flag. */
61 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 60 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
62 ext4_error(sb, __FUNCTION__, 61 ext4_error(sb, __func__,
63 "Checksum bad for group %lu\n", block_group); 62 "Checksum bad for group %lu\n", block_group);
64 gdp->bg_free_blocks_count = 0; 63 gdp->bg_free_blocks_count = 0;
65 gdp->bg_free_inodes_count = 0; 64 gdp->bg_free_inodes_count = 0;
@@ -106,11 +105,12 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
106 free_blocks = group_blocks - bit_max; 105 free_blocks = group_blocks - bit_max;
107 106
108 if (bh) { 107 if (bh) {
108 ext4_fsblk_t start;
109
109 for (bit = 0; bit < bit_max; bit++) 110 for (bit = 0; bit < bit_max; bit++)
110 ext4_set_bit(bit, bh->b_data); 111 ext4_set_bit(bit, bh->b_data);
111 112
112 start = block_group * EXT4_BLOCKS_PER_GROUP(sb) + 113 start = ext4_group_first_block_no(sb, block_group);
113 le32_to_cpu(sbi->s_es->s_first_data_block);
114 114
115 /* Set bits for block and inode bitmaps, and inode table */ 115 /* Set bits for block and inode bitmaps, and inode table */
116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); 116 ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
@@ -235,7 +235,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
235 return 1; 235 return 1;
236 236
237err_out: 237err_out:
238 ext4_error(sb, __FUNCTION__, 238 ext4_error(sb, __func__,
239 "Invalid block bitmap - " 239 "Invalid block bitmap - "
240 "block_group = %d, block = %llu", 240 "block_group = %d, block = %llu",
241 block_group, bitmap_blk); 241 block_group, bitmap_blk);
@@ -264,7 +264,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
264 bitmap_blk = ext4_block_bitmap(sb, desc); 264 bitmap_blk = ext4_block_bitmap(sb, desc);
265 bh = sb_getblk(sb, bitmap_blk); 265 bh = sb_getblk(sb, bitmap_blk);
266 if (unlikely(!bh)) { 266 if (unlikely(!bh)) {
267 ext4_error(sb, __FUNCTION__, 267 ext4_error(sb, __func__,
268 "Cannot read block bitmap - " 268 "Cannot read block bitmap - "
269 "block_group = %d, block_bitmap = %llu", 269 "block_group = %d, block_bitmap = %llu",
270 (int)block_group, (unsigned long long)bitmap_blk); 270 (int)block_group, (unsigned long long)bitmap_blk);
@@ -281,7 +281,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
281 } 281 }
282 if (bh_submit_read(bh) < 0) { 282 if (bh_submit_read(bh) < 0) {
283 put_bh(bh); 283 put_bh(bh);
284 ext4_error(sb, __FUNCTION__, 284 ext4_error(sb, __func__,
285 "Cannot read block bitmap - " 285 "Cannot read block bitmap - "
286 "block_group = %d, block_bitmap = %llu", 286 "block_group = %d, block_bitmap = %llu",
287 (int)block_group, (unsigned long long)bitmap_blk); 287 (int)block_group, (unsigned long long)bitmap_blk);
@@ -360,7 +360,7 @@ restart:
360 BUG(); 360 BUG();
361} 361}
362#define rsv_window_dump(root, verbose) \ 362#define rsv_window_dump(root, verbose) \
363 __rsv_window_dump((root), (verbose), __FUNCTION__) 363 __rsv_window_dump((root), (verbose), __func__)
364#else 364#else
365#define rsv_window_dump(root, verbose) do {} while (0) 365#define rsv_window_dump(root, verbose) do {} while (0)
366#endif 366#endif
@@ -740,7 +740,7 @@ do_more:
740 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 740 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
741 bit + i, bitmap_bh->b_data)) { 741 bit + i, bitmap_bh->b_data)) {
742 jbd_unlock_bh_state(bitmap_bh); 742 jbd_unlock_bh_state(bitmap_bh);
743 ext4_error(sb, __FUNCTION__, 743 ext4_error(sb, __func__,
744 "bit already cleared for block %llu", 744 "bit already cleared for block %llu",
745 (ext4_fsblk_t)(block + i)); 745 (ext4_fsblk_t)(block + i));
746 jbd_lock_bh_state(bitmap_bh); 746 jbd_lock_bh_state(bitmap_bh);
@@ -752,9 +752,7 @@ do_more:
752 jbd_unlock_bh_state(bitmap_bh); 752 jbd_unlock_bh_state(bitmap_bh);
753 753
754 spin_lock(sb_bgl_lock(sbi, block_group)); 754 spin_lock(sb_bgl_lock(sbi, block_group));
755 desc->bg_free_blocks_count = 755 le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
756 cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
757 group_freed);
758 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 756 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
759 spin_unlock(sb_bgl_lock(sbi, block_group)); 757 spin_unlock(sb_bgl_lock(sbi, block_group));
760 percpu_counter_add(&sbi->s_freeblocks_counter, count); 758 percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -1798,7 +1796,7 @@ allocated:
1798 if (ext4_test_bit(grp_alloc_blk+i, 1796 if (ext4_test_bit(grp_alloc_blk+i,
1799 bh2jh(bitmap_bh)->b_committed_data)) { 1797 bh2jh(bitmap_bh)->b_committed_data)) {
1800 printk("%s: block was unexpectedly set in " 1798 printk("%s: block was unexpectedly set in "
1801 "b_committed_data\n", __FUNCTION__); 1799 "b_committed_data\n", __func__);
1802 } 1800 }
1803 } 1801 }
1804 } 1802 }
@@ -1823,8 +1821,7 @@ allocated:
1823 spin_lock(sb_bgl_lock(sbi, group_no)); 1821 spin_lock(sb_bgl_lock(sbi, group_no));
1824 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) 1822 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1825 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 1823 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
1826 gdp->bg_free_blocks_count = 1824 le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1827 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
1828 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); 1825 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1829 spin_unlock(sb_bgl_lock(sbi, group_no)); 1826 spin_unlock(sb_bgl_lock(sbi, group_no));
1830 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1827 percpu_counter_sub(&sbi->s_freeblocks_counter, num);
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 420554f8f79d..d37ea6750454 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -9,7 +9,7 @@
9 9
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/jbd2.h> 11#include <linux/jbd2.h>
12#include <linux/ext4_fs.h> 12#include "ext4.h"
13 13
14#ifdef EXT4FS_DEBUG 14#ifdef EXT4FS_DEBUG
15 15
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2c23bade9aa6..2bf0331ea194 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -23,10 +23,10 @@
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/jbd2.h> 25#include <linux/jbd2.h>
26#include <linux/ext4_fs.h>
27#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
28#include <linux/slab.h> 27#include <linux/slab.h>
29#include <linux/rbtree.h> 28#include <linux/rbtree.h>
29#include "ext4.h"
30 30
31static unsigned char ext4_filetype_table[] = { 31static unsigned char ext4_filetype_table[] = {
32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 32 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -42,7 +42,7 @@ const struct file_operations ext4_dir_operations = {
42 .llseek = generic_file_llseek, 42 .llseek = generic_file_llseek,
43 .read = generic_read_dir, 43 .read = generic_read_dir,
44 .readdir = ext4_readdir, /* we take BKL. needed?*/ 44 .readdir = ext4_readdir, /* we take BKL. needed?*/
45 .ioctl = ext4_ioctl, /* BKL held */ 45 .unlocked_ioctl = ext4_ioctl,
46#ifdef CONFIG_COMPAT 46#ifdef CONFIG_COMPAT
47 .compat_ioctl = ext4_compat_ioctl, 47 .compat_ioctl = ext4_compat_ioctl,
48#endif 48#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
new file mode 100644
index 000000000000..8158083f7ac0
--- /dev/null
+++ b/fs/ext4/ext4.h
@@ -0,0 +1,1205 @@
1/*
2 * ext4.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_H
17#define _EXT4_H
18
19#include <linux/types.h>
20#include <linux/blkdev.h>
21#include <linux/magic.h>
22#include "ext4_i.h"
23
24/*
25 * The second extended filesystem constants/structures
26 */
27
28/*
29 * Define EXT4FS_DEBUG to produce debug messages
30 */
31#undef EXT4FS_DEBUG
32
33/*
34 * Define EXT4_RESERVATION to reserve data blocks for expanding files
35 */
36#define EXT4_DEFAULT_RESERVE_BLOCKS 8
37/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
38#define EXT4_MAX_RESERVE_BLOCKS 1027
39#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
40
41/*
42 * Debug code
43 */
44#ifdef EXT4FS_DEBUG
45#define ext4_debug(f, a...) \
46 do { \
47 printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
48 __FILE__, __LINE__, __FUNCTION__); \
49 printk (KERN_DEBUG f, ## a); \
50 } while (0)
51#else
52#define ext4_debug(f, a...) do {} while (0)
53#endif
54
55#define EXT4_MULTIBLOCK_ALLOCATOR 1
56
57/* prefer goal again. length */
58#define EXT4_MB_HINT_MERGE 1
59/* blocks already reserved */
60#define EXT4_MB_HINT_RESERVED 2
61/* metadata is being allocated */
62#define EXT4_MB_HINT_METADATA 4
63/* first blocks in the file */
64#define EXT4_MB_HINT_FIRST 8
65/* search for the best chunk */
66#define EXT4_MB_HINT_BEST 16
67/* data is being allocated */
68#define EXT4_MB_HINT_DATA 32
69/* don't preallocate (for tails) */
70#define EXT4_MB_HINT_NOPREALLOC 64
71/* allocate for locality group */
72#define EXT4_MB_HINT_GROUP_ALLOC 128
73/* allocate goal blocks or none */
74#define EXT4_MB_HINT_GOAL_ONLY 256
75/* goal is meaningful */
76#define EXT4_MB_HINT_TRY_GOAL 512
77
78struct ext4_allocation_request {
79 /* target inode for block we're allocating */
80 struct inode *inode;
81 /* logical block in target inode */
82 ext4_lblk_t logical;
83 /* phys. target (a hint) */
84 ext4_fsblk_t goal;
85 /* the closest logical allocated block to the left */
86 ext4_lblk_t lleft;
87 /* phys. block for ^^^ */
88 ext4_fsblk_t pleft;
89 /* the closest logical allocated block to the right */
90 ext4_lblk_t lright;
91 /* phys. block for ^^^ */
92 ext4_fsblk_t pright;
93 /* how many blocks we want to allocate */
94 unsigned long len;
95 /* flags. see above EXT4_MB_HINT_* */
96 unsigned long flags;
97};
98
99/*
100 * Special inodes numbers
101 */
102#define EXT4_BAD_INO 1 /* Bad blocks inode */
103#define EXT4_ROOT_INO 2 /* Root inode */
104#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */
105#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */
106#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */
107#define EXT4_JOURNAL_INO 8 /* Journal inode */
108
109/* First non-reserved inode for old ext4 filesystems */
110#define EXT4_GOOD_OLD_FIRST_INO 11
111
112/*
113 * Maximal count of links to a file
114 */
115#define EXT4_LINK_MAX 65000
116
117/*
118 * Macro-instructions used to manage several block sizes
119 */
120#define EXT4_MIN_BLOCK_SIZE 1024
121#define EXT4_MAX_BLOCK_SIZE 65536
122#define EXT4_MIN_BLOCK_LOG_SIZE 10
123#ifdef __KERNEL__
124# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
125#else
126# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
127#endif
128#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32))
129#ifdef __KERNEL__
130# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
131#else
132# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
133#endif
134#ifdef __KERNEL__
135#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits)
136#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size)
137#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino)
138#else
139#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
140 EXT4_GOOD_OLD_INODE_SIZE : \
141 (s)->s_inode_size)
142#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
143 EXT4_GOOD_OLD_FIRST_INO : \
144 (s)->s_first_ino)
145#endif
146#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
147
148/*
149 * Structure of a blocks group descriptor
150 */
151struct ext4_group_desc
152{
153 __le32 bg_block_bitmap_lo; /* Blocks bitmap block */
154 __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
155 __le32 bg_inode_table_lo; /* Inodes table block */
156 __le16 bg_free_blocks_count; /* Free blocks count */
157 __le16 bg_free_inodes_count; /* Free inodes count */
158 __le16 bg_used_dirs_count; /* Directories count */
159 __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
160 __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
161 __le16 bg_itable_unused; /* Unused inodes count */
162 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
163 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
164 __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
165 __le32 bg_inode_table_hi; /* Inodes table block MSB */
166 __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
167 __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
168 __le16 bg_used_dirs_count_hi; /* Directories count MSB */
169 __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
170 __u32 bg_reserved2[3];
171};
172
173#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
174#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
175#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
176
177#ifdef __KERNEL__
178#include "ext4_sb.h"
179#endif
180/*
181 * Macro-instructions used to manage group descriptors
182 */
183#define EXT4_MIN_DESC_SIZE 32
184#define EXT4_MIN_DESC_SIZE_64BIT 64
185#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE
186#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size)
187#ifdef __KERNEL__
188# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group)
189# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block)
190# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group)
191# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits)
192#else
193# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group)
194# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s))
195# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group)
196#endif
197
198/*
199 * Constants relative to the data blocks
200 */
201#define EXT4_NDIR_BLOCKS 12
202#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS
203#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1)
204#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1)
205#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1)
206
207/*
208 * Inode flags
209 */
210#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */
211#define EXT4_UNRM_FL 0x00000002 /* Undelete */
212#define EXT4_COMPR_FL 0x00000004 /* Compress file */
213#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */
214#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */
215#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */
216#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */
217#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */
218/* Reserved for compression usage... */
219#define EXT4_DIRTY_FL 0x00000100
220#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
221#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */
222#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */
223/* End compression flags --- maybe not all used */
224#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */
225#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */
226#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
227#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */
228#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
229#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
230#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
231#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
232#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */
233#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
234
235#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
236#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
237
238/*
239 * Inode dynamic state flags
240 */
241#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
242#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
243#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
244#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
245
246/* Used to pass group descriptor data when online resize is done */
247struct ext4_new_group_input {
248 __u32 group; /* Group number for this data */
249 __u64 block_bitmap; /* Absolute block number of block bitmap */
250 __u64 inode_bitmap; /* Absolute block number of inode bitmap */
251 __u64 inode_table; /* Absolute block number of inode table start */
252 __u32 blocks_count; /* Total number of blocks in this group */
253 __u16 reserved_blocks; /* Number of reserved blocks in this group */
254 __u16 unused;
255};
256
257/* The struct ext4_new_group_input in kernel space, with free_blocks_count */
258struct ext4_new_group_data {
259 __u32 group;
260 __u64 block_bitmap;
261 __u64 inode_bitmap;
262 __u64 inode_table;
263 __u32 blocks_count;
264 __u16 reserved_blocks;
265 __u16 unused;
266 __u32 free_blocks_count;
267};
268
269/*
270 * Following is used by preallocation code to tell get_blocks() that we
271 * want uninitialzed extents.
272 */
273#define EXT4_CREATE_UNINITIALIZED_EXT 2
274
275/*
276 * ioctl commands
277 */
278#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
279#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
280#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
281#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
282#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
283#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
284#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
285#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
286#ifdef CONFIG_JBD2_DEBUG
287#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
288#endif
289#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
290#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
291#define EXT4_IOC_MIGRATE _IO('f', 7)
292
293/*
294 * ioctl commands in 32 bit emulation
295 */
296#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS
297#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS
298#define EXT4_IOC32_GETVERSION _IOR('f', 3, int)
299#define EXT4_IOC32_SETVERSION _IOW('f', 4, int)
300#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
301#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
302#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
303#ifdef CONFIG_JBD2_DEBUG
304#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
305#endif
306#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
307#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
308
309
310/*
311 * Mount options
312 */
313struct ext4_mount_options {
314 unsigned long s_mount_opt;
315 uid_t s_resuid;
316 gid_t s_resgid;
317 unsigned long s_commit_interval;
318#ifdef CONFIG_QUOTA
319 int s_jquota_fmt;
320 char *s_qf_names[MAXQUOTAS];
321#endif
322};
323
324/*
325 * Structure of an inode on the disk
326 */
327struct ext4_inode {
328 __le16 i_mode; /* File mode */
329 __le16 i_uid; /* Low 16 bits of Owner Uid */
330 __le32 i_size_lo; /* Size in bytes */
331 __le32 i_atime; /* Access time */
332 __le32 i_ctime; /* Inode Change time */
333 __le32 i_mtime; /* Modification time */
334 __le32 i_dtime; /* Deletion Time */
335 __le16 i_gid; /* Low 16 bits of Group Id */
336 __le16 i_links_count; /* Links count */
337 __le32 i_blocks_lo; /* Blocks count */
338 __le32 i_flags; /* File flags */
339 union {
340 struct {
341 __le32 l_i_version;
342 } linux1;
343 struct {
344 __u32 h_i_translator;
345 } hurd1;
346 struct {
347 __u32 m_i_reserved1;
348 } masix1;
349 } osd1; /* OS dependent 1 */
350 __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
351 __le32 i_generation; /* File version (for NFS) */
352 __le32 i_file_acl_lo; /* File ACL */
353 __le32 i_size_high;
354 __le32 i_obso_faddr; /* Obsoleted fragment address */
355 union {
356 struct {
357 __le16 l_i_blocks_high; /* were l_i_reserved1 */
358 __le16 l_i_file_acl_high;
359 __le16 l_i_uid_high; /* these 2 fields */
360 __le16 l_i_gid_high; /* were reserved2[0] */
361 __u32 l_i_reserved2;
362 } linux2;
363 struct {
364 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
365 __u16 h_i_mode_high;
366 __u16 h_i_uid_high;
367 __u16 h_i_gid_high;
368 __u32 h_i_author;
369 } hurd2;
370 struct {
371 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
372 __le16 m_i_file_acl_high;
373 __u32 m_i_reserved2[2];
374 } masix2;
375 } osd2; /* OS dependent 2 */
376 __le16 i_extra_isize;
377 __le16 i_pad1;
378 __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
379 __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
380 __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
381 __le32 i_crtime; /* File Creation time */
382 __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
383 __le32 i_version_hi; /* high 32 bits for 64-bit version */
384};
385
386
387#define EXT4_EPOCH_BITS 2
388#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
389#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
390
391/*
392 * Extended fields will fit into an inode if the filesystem was formatted
393 * with large inodes (-I 256 or larger) and there are not currently any EAs
394 * consuming all of the available space. For new inodes we always reserve
395 * enough space for the kernel's known extended fields, but for inodes
396 * created with an old kernel this might not have been the case. None of
397 * the extended inode fields is critical for correct filesystem operation.
398 * This macro checks if a certain field fits in the inode. Note that
399 * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
400 */
401#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
402 ((offsetof(typeof(*ext4_inode), field) + \
403 sizeof((ext4_inode)->field)) \
404 <= (EXT4_GOOD_OLD_INODE_SIZE + \
405 (einode)->i_extra_isize)) \
406
407static inline __le32 ext4_encode_extra_time(struct timespec *time)
408{
409 return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
410 time->tv_sec >> 32 : 0) |
411 ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
412}
413
414static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
415{
416 if (sizeof(time->tv_sec) > 4)
417 time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
418 << 32;
419 time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
420}
421
422#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
423do { \
424 (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
425 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
426 (raw_inode)->xtime ## _extra = \
427 ext4_encode_extra_time(&(inode)->xtime); \
428} while (0)
429
430#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
431do { \
432 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
433 (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
434 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
435 (raw_inode)->xtime ## _extra = \
436 ext4_encode_extra_time(&(einode)->xtime); \
437} while (0)
438
439#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
440do { \
441 (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
442 if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
443 ext4_decode_extra_time(&(inode)->xtime, \
444 raw_inode->xtime ## _extra); \
445} while (0)
446
447#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
448do { \
449 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
450 (einode)->xtime.tv_sec = \
451 (signed)le32_to_cpu((raw_inode)->xtime); \
452 if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
453 ext4_decode_extra_time(&(einode)->xtime, \
454 raw_inode->xtime ## _extra); \
455} while (0)
456
457#define i_disk_version osd1.linux1.l_i_version
458
459#if defined(__KERNEL__) || defined(__linux__)
460#define i_reserved1 osd1.linux1.l_i_reserved1
461#define i_file_acl_high osd2.linux2.l_i_file_acl_high
462#define i_blocks_high osd2.linux2.l_i_blocks_high
463#define i_uid_low i_uid
464#define i_gid_low i_gid
465#define i_uid_high osd2.linux2.l_i_uid_high
466#define i_gid_high osd2.linux2.l_i_gid_high
467#define i_reserved2 osd2.linux2.l_i_reserved2
468
469#elif defined(__GNU__)
470
471#define i_translator osd1.hurd1.h_i_translator
472#define i_uid_high osd2.hurd2.h_i_uid_high
473#define i_gid_high osd2.hurd2.h_i_gid_high
474#define i_author osd2.hurd2.h_i_author
475
476#elif defined(__masix__)
477
478#define i_reserved1 osd1.masix1.m_i_reserved1
479#define i_file_acl_high osd2.masix2.m_i_file_acl_high
480#define i_reserved2 osd2.masix2.m_i_reserved2
481
482#endif /* defined(__KERNEL__) || defined(__linux__) */
483
484/*
485 * File system states
486 */
487#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
488#define EXT4_ERROR_FS 0x0002 /* Errors detected */
489#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
490
491/*
492 * Misc. filesystem flags
493 */
494#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
495#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
496#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
497
498/*
499 * Mount flags
500 */
501#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */
502#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
503#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
504#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
505#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
506#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
507#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
508#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
509#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
510#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */
511#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
512#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
513#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
514#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
515#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
516#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
517#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
518#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
519#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */
520#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
521#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */
522#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
523#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
524#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
525#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */
526#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
527#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
528#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
529#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
530/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
531#ifndef _LINUX_EXT2_FS_H
532#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
533#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
534#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
535 EXT4_MOUNT_##opt)
536#else
537#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD
538#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT
539#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS
540#endif
541
542#define ext4_set_bit ext2_set_bit
543#define ext4_set_bit_atomic ext2_set_bit_atomic
544#define ext4_clear_bit ext2_clear_bit
545#define ext4_clear_bit_atomic ext2_clear_bit_atomic
546#define ext4_test_bit ext2_test_bit
547#define ext4_find_first_zero_bit ext2_find_first_zero_bit
548#define ext4_find_next_zero_bit ext2_find_next_zero_bit
549#define ext4_find_next_bit ext2_find_next_bit
550
551/*
552 * Maximal mount counts between two filesystem checks
553 */
554#define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
555#define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */
556
557/*
558 * Behaviour when detecting errors
559 */
560#define EXT4_ERRORS_CONTINUE 1 /* Continue execution */
561#define EXT4_ERRORS_RO 2 /* Remount fs read-only */
562#define EXT4_ERRORS_PANIC 3 /* Panic */
563#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE
564
565/*
566 * Structure of the super block
567 */
568struct ext4_super_block {
569/*00*/ __le32 s_inodes_count; /* Inodes count */
570 __le32 s_blocks_count_lo; /* Blocks count */
571 __le32 s_r_blocks_count_lo; /* Reserved blocks count */
572 __le32 s_free_blocks_count_lo; /* Free blocks count */
573/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
574 __le32 s_first_data_block; /* First Data Block */
575 __le32 s_log_block_size; /* Block size */
576 __le32 s_obso_log_frag_size; /* Obsoleted fragment size */
577/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
578 __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */
579 __le32 s_inodes_per_group; /* # Inodes per group */
580 __le32 s_mtime; /* Mount time */
581/*30*/ __le32 s_wtime; /* Write time */
582 __le16 s_mnt_count; /* Mount count */
583 __le16 s_max_mnt_count; /* Maximal mount count */
584 __le16 s_magic; /* Magic signature */
585 __le16 s_state; /* File system state */
586 __le16 s_errors; /* Behaviour when detecting errors */
587 __le16 s_minor_rev_level; /* minor revision level */
588/*40*/ __le32 s_lastcheck; /* time of last check */
589 __le32 s_checkinterval; /* max. time between checks */
590 __le32 s_creator_os; /* OS */
591 __le32 s_rev_level; /* Revision level */
592/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
593 __le16 s_def_resgid; /* Default gid for reserved blocks */
594 /*
595 * These fields are for EXT4_DYNAMIC_REV superblocks only.
596 *
597 * Note: the difference between the compatible feature set and
598 * the incompatible feature set is that if there is a bit set
599 * in the incompatible feature set that the kernel doesn't
600 * know about, it should refuse to mount the filesystem.
601 *
602 * e2fsck's requirements are more strict; if it doesn't know
603 * about a feature in either the compatible or incompatible
604 * feature set, it must abort and not try to meddle with
605 * things it doesn't understand...
606 */
607 __le32 s_first_ino; /* First non-reserved inode */
608 __le16 s_inode_size; /* size of inode structure */
609 __le16 s_block_group_nr; /* block group # of this superblock */
610 __le32 s_feature_compat; /* compatible feature set */
611/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
612 __le32 s_feature_ro_compat; /* readonly-compatible feature set */
613/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
614/*78*/ char s_volume_name[16]; /* volume name */
615/*88*/ char s_last_mounted[64]; /* directory where last mounted */
616/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
617 /*
618 * Performance hints. Directory preallocation should only
619 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
620 */
621 __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
622 __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
623 __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
624 /*
625 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
626 */
627/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
628/*E0*/ __le32 s_journal_inum; /* inode number of journal file */
629 __le32 s_journal_dev; /* device number of journal file */
630 __le32 s_last_orphan; /* start of list of inodes to delete */
631 __le32 s_hash_seed[4]; /* HTREE hash seed */
632 __u8 s_def_hash_version; /* Default hash version to use */
633 __u8 s_reserved_char_pad;
634 __le16 s_desc_size; /* size of group descriptor */
635/*100*/ __le32 s_default_mount_opts;
636 __le32 s_first_meta_bg; /* First metablock block group */
637 __le32 s_mkfs_time; /* When the filesystem was created */
638 __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
639 /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
640/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
641 __le32 s_r_blocks_count_hi; /* Reserved blocks count */
642 __le32 s_free_blocks_count_hi; /* Free blocks count */
643 __le16 s_min_extra_isize; /* All inodes have at least # bytes */
644 __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
645 __le32 s_flags; /* Miscellaneous flags */
646 __le16 s_raid_stride; /* RAID stride */
647 __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
648 __le64 s_mmp_block; /* Block for multi-mount protection */
649 __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
650 __u32 s_reserved[163]; /* Padding to the end of the block */
651};
652
653#ifdef __KERNEL__
654static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb)
655{
656 return sb->s_fs_info;
657}
658static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
659{
660 return container_of(inode, struct ext4_inode_info, vfs_inode);
661}
662
663static inline struct timespec ext4_current_time(struct inode *inode)
664{
665 return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
666 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
667}
668
669
670static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
671{
672 return ino == EXT4_ROOT_INO ||
673 ino == EXT4_JOURNAL_INO ||
674 ino == EXT4_RESIZE_INO ||
675 (ino >= EXT4_FIRST_INO(sb) &&
676 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
677}
678#else
679/* Assume that user mode programs are passing in an ext4fs superblock, not
680 * a kernel struct super_block. This will allow us to call the feature-test
681 * macros from user land. */
682#define EXT4_SB(sb) (sb)
683#endif
684
685#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
686
687/*
688 * Codes for operating systems
689 */
690#define EXT4_OS_LINUX 0
691#define EXT4_OS_HURD 1
692#define EXT4_OS_MASIX 2
693#define EXT4_OS_FREEBSD 3
694#define EXT4_OS_LITES 4
695
696/*
697 * Revision levels
698 */
699#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */
700#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
701
702#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV
703#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV
704
705#define EXT4_GOOD_OLD_INODE_SIZE 128
706
707/*
708 * Feature set definitions
709 */
710
711#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
712 ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
713#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
714 ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
715#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
716 ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
717#define EXT4_SET_COMPAT_FEATURE(sb,mask) \
718 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
719#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
720 EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
721#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \
722 EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
723#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \
724 EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
725#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
726 EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
727#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \
728 EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
729
730#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001
731#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002
732#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004
733#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008
734#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010
735#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020
736
737#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
738#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
739#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
740#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008
741#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
742#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
743#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
744
745#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
746#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
747#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
748#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
749#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010
750#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
751#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
752#define EXT4_FEATURE_INCOMPAT_MMP 0x0100
753#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
754
755#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
756#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
757 EXT4_FEATURE_INCOMPAT_RECOVER| \
758 EXT4_FEATURE_INCOMPAT_META_BG| \
759 EXT4_FEATURE_INCOMPAT_EXTENTS| \
760 EXT4_FEATURE_INCOMPAT_64BIT| \
761 EXT4_FEATURE_INCOMPAT_FLEX_BG)
762#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
763 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
764 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
765 EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
766 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
767 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
768 EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
769
770/*
771 * Default values for user and/or group using reserved blocks
772 */
773#define EXT4_DEF_RESUID 0
774#define EXT4_DEF_RESGID 0
775
776/*
777 * Default mount options
778 */
779#define EXT4_DEFM_DEBUG 0x0001
780#define EXT4_DEFM_BSDGROUPS 0x0002
781#define EXT4_DEFM_XATTR_USER 0x0004
782#define EXT4_DEFM_ACL 0x0008
783#define EXT4_DEFM_UID16 0x0010
784#define EXT4_DEFM_JMODE 0x0060
785#define EXT4_DEFM_JMODE_DATA 0x0020
786#define EXT4_DEFM_JMODE_ORDERED 0x0040
787#define EXT4_DEFM_JMODE_WBACK 0x0060
788
789/*
790 * Structure of a directory entry
791 */
792#define EXT4_NAME_LEN 255
793
794struct ext4_dir_entry {
795 __le32 inode; /* Inode number */
796 __le16 rec_len; /* Directory entry length */
797 __le16 name_len; /* Name length */
798 char name[EXT4_NAME_LEN]; /* File name */
799};
800
801/*
802 * The new version of the directory entry. Since EXT4 structures are
803 * stored in intel byte order, and the name_len field could never be
804 * bigger than 255 chars, it's safe to reclaim the extra byte for the
805 * file_type field.
806 */
807struct ext4_dir_entry_2 {
808 __le32 inode; /* Inode number */
809 __le16 rec_len; /* Directory entry length */
810 __u8 name_len; /* Name length */
811 __u8 file_type;
812 char name[EXT4_NAME_LEN]; /* File name */
813};
814
815/*
816 * Ext4 directory file types. Only the low 3 bits are used. The
817 * other bits are reserved for now.
818 */
819#define EXT4_FT_UNKNOWN 0
820#define EXT4_FT_REG_FILE 1
821#define EXT4_FT_DIR 2
822#define EXT4_FT_CHRDEV 3
823#define EXT4_FT_BLKDEV 4
824#define EXT4_FT_FIFO 5
825#define EXT4_FT_SOCK 6
826#define EXT4_FT_SYMLINK 7
827
828#define EXT4_FT_MAX 8
829
830/*
831 * EXT4_DIR_PAD defines the directory entries boundaries
832 *
833 * NOTE: It must be a multiple of 4
834 */
835#define EXT4_DIR_PAD 4
836#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
837#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
838 ~EXT4_DIR_ROUND)
839#define EXT4_MAX_REC_LEN ((1<<16)-1)
840
841static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
842{
843 unsigned len = le16_to_cpu(dlen);
844
845 if (len == EXT4_MAX_REC_LEN)
846 return 1 << 16;
847 return len;
848}
849
850static inline __le16 ext4_rec_len_to_disk(unsigned len)
851{
852 if (len == (1 << 16))
853 return cpu_to_le16(EXT4_MAX_REC_LEN);
854 else if (len > (1 << 16))
855 BUG();
856 return cpu_to_le16(len);
857}
858
859/*
860 * Hash Tree Directory indexing
861 * (c) Daniel Phillips, 2001
862 */
863
864#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
865 EXT4_FEATURE_COMPAT_DIR_INDEX) && \
866 (EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
867#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
868#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
869
870/* Legal values for the dx_root hash_version field: */
871
872#define DX_HASH_LEGACY 0
873#define DX_HASH_HALF_MD4 1
874#define DX_HASH_TEA 2
875
876#ifdef __KERNEL__
877
878/* hash info structure used by the directory hash */
879struct dx_hash_info
880{
881 u32 hash;
882 u32 minor_hash;
883 int hash_version;
884 u32 *seed;
885};
886
887#define EXT4_HTREE_EOF 0x7fffffff
888
889/*
890 * Control parameters used by ext4_htree_next_block
891 */
892#define HASH_NB_ALWAYS 1
893
894
895/*
896 * Describe an inode's exact location on disk and in memory
897 */
898struct ext4_iloc
899{
900 struct buffer_head *bh;
901 unsigned long offset;
902 ext4_group_t block_group;
903};
904
905static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
906{
907 return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
908}
909
910/*
911 * This structure is stuffed into the struct file's private_data field
912 * for directories. It is where we put information so that we can do
913 * readdir operations in hash tree order.
914 */
915struct dir_private_info {
916 struct rb_root root;
917 struct rb_node *curr_node;
918 struct fname *extra_fname;
919 loff_t last_pos;
920 __u32 curr_hash;
921 __u32 curr_minor_hash;
922 __u32 next_hash;
923};
924
925/* calculate the first block number of the group */
926static inline ext4_fsblk_t
927ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
928{
929 return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
930 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
931}
932
933/*
934 * Special error return code only used by dx_probe() and its callers.
935 */
936#define ERR_BAD_DX_DIR -75000
937
938void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
939 unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
940
941/*
942 * Function prototypes
943 */
944
945/*
946 * Ok, these declarations are also in <linux/kernel.h> but none of the
947 * ext4 source programs needs to include it so they are duplicated here.
948 */
949# define NORET_TYPE /**/
950# define ATTRIB_NORET __attribute__((noreturn))
951# define NORET_AND noreturn,
952
953/* balloc.c */
954extern unsigned int ext4_block_group(struct super_block *sb,
955 ext4_fsblk_t blocknr);
956extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
957 ext4_fsblk_t blocknr);
958extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
959extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
960 ext4_group_t group);
961extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
962 ext4_fsblk_t goal, int *errp);
963extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
964 ext4_fsblk_t goal, unsigned long *count, int *errp);
965extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
966 ext4_fsblk_t goal, unsigned long *count, int *errp);
967extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
968 ext4_fsblk_t block, unsigned long count, int metadata);
969extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
970 ext4_fsblk_t block, unsigned long count,
971 unsigned long *pdquot_freed_blocks);
972extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
973extern void ext4_check_blocks_bitmap (struct super_block *);
974extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
975 ext4_group_t block_group,
976 struct buffer_head ** bh);
977extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
978extern void ext4_init_block_alloc_info(struct inode *);
979extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
980
981/* dir.c */
982extern int ext4_check_dir_entry(const char *, struct inode *,
983 struct ext4_dir_entry_2 *,
984 struct buffer_head *, unsigned long);
985extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
986 __u32 minor_hash,
987 struct ext4_dir_entry_2 *dirent);
988extern void ext4_htree_free_dir_info(struct dir_private_info *p);
989
990/* fsync.c */
991extern int ext4_sync_file (struct file *, struct dentry *, int);
992
993/* hash.c */
994extern int ext4fs_dirhash(const char *name, int len, struct
995 dx_hash_info *hinfo);
996
997/* ialloc.c */
998extern struct inode * ext4_new_inode (handle_t *, struct inode *, int);
999extern void ext4_free_inode (handle_t *, struct inode *);
1000extern struct inode * ext4_orphan_get (struct super_block *, unsigned long);
1001extern unsigned long ext4_count_free_inodes (struct super_block *);
1002extern unsigned long ext4_count_dirs (struct super_block *);
1003extern void ext4_check_inodes_bitmap (struct super_block *);
1004extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
1005
1006/* mballoc.c */
1007extern long ext4_mb_stats;
1008extern long ext4_mb_max_to_scan;
1009extern int ext4_mb_init(struct super_block *, int);
1010extern int ext4_mb_release(struct super_block *);
1011extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
1012 struct ext4_allocation_request *, int *);
1013extern int ext4_mb_reserve_blocks(struct super_block *, int);
1014extern void ext4_mb_discard_inode_preallocations(struct inode *);
1015extern int __init init_ext4_mballoc(void);
1016extern void exit_ext4_mballoc(void);
1017extern void ext4_mb_free_blocks(handle_t *, struct inode *,
1018 unsigned long, unsigned long, int, unsigned long *);
1019
1020
1021/* inode.c */
1022int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1023 struct buffer_head *bh, ext4_fsblk_t blocknr);
1024struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1025 ext4_lblk_t, int, int *);
1026struct buffer_head *ext4_bread(handle_t *, struct inode *,
1027 ext4_lblk_t, int, int *);
1028int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
1029 ext4_lblk_t iblock, unsigned long maxblocks,
1030 struct buffer_head *bh_result,
1031 int create, int extend_disksize);
1032
1033extern struct inode *ext4_iget(struct super_block *, unsigned long);
1034extern int ext4_write_inode (struct inode *, int);
1035extern int ext4_setattr (struct dentry *, struct iattr *);
1036extern void ext4_delete_inode (struct inode *);
1037extern int ext4_sync_inode (handle_t *, struct inode *);
1038extern void ext4_discard_reservation (struct inode *);
1039extern void ext4_dirty_inode(struct inode *);
1040extern int ext4_change_inode_journal_flag(struct inode *, int);
1041extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
1042extern void ext4_truncate (struct inode *);
1043extern void ext4_set_inode_flags(struct inode *);
1044extern void ext4_get_inode_flags(struct ext4_inode_info *);
1045extern void ext4_set_aops(struct inode *inode);
1046extern int ext4_writepage_trans_blocks(struct inode *);
1047extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
1048 struct address_space *mapping, loff_t from);
1049
1050/* ioctl.c */
1051extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
1052extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
1053
1054/* migrate.c */
1055extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int,
1056 unsigned long);
1057/* namei.c */
1058extern int ext4_orphan_add(handle_t *, struct inode *);
1059extern int ext4_orphan_del(handle_t *, struct inode *);
1060extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1061 __u32 start_minor_hash, __u32 *next_hash);
1062
1063/* resize.c */
1064extern int ext4_group_add(struct super_block *sb,
1065 struct ext4_new_group_data *input);
1066extern int ext4_group_extend(struct super_block *sb,
1067 struct ext4_super_block *es,
1068 ext4_fsblk_t n_blocks_count);
1069
1070/* super.c */
1071extern void ext4_error (struct super_block *, const char *, const char *, ...)
1072 __attribute__ ((format (printf, 3, 4)));
1073extern void __ext4_std_error (struct super_block *, const char *, int);
1074extern void ext4_abort (struct super_block *, const char *, const char *, ...)
1075 __attribute__ ((format (printf, 3, 4)));
1076extern void ext4_warning (struct super_block *, const char *, const char *, ...)
1077 __attribute__ ((format (printf, 3, 4)));
1078extern void ext4_update_dynamic_rev (struct super_block *sb);
1079extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
1080 __u32 compat);
1081extern int ext4_update_rocompat_feature(handle_t *handle,
1082 struct super_block *sb, __u32 rocompat);
1083extern int ext4_update_incompat_feature(handle_t *handle,
1084 struct super_block *sb, __u32 incompat);
1085extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
1086 struct ext4_group_desc *bg);
1087extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
1088 struct ext4_group_desc *bg);
1089extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
1090 struct ext4_group_desc *bg);
1091extern void ext4_block_bitmap_set(struct super_block *sb,
1092 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1093extern void ext4_inode_bitmap_set(struct super_block *sb,
1094 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1095extern void ext4_inode_table_set(struct super_block *sb,
1096 struct ext4_group_desc *bg, ext4_fsblk_t blk);
1097
1098static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
1099{
1100 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
1101 le32_to_cpu(es->s_blocks_count_lo);
1102}
1103
1104static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
1105{
1106 return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
1107 le32_to_cpu(es->s_r_blocks_count_lo);
1108}
1109
1110static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
1111{
1112 return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) |
1113 le32_to_cpu(es->s_free_blocks_count_lo);
1114}
1115
1116static inline void ext4_blocks_count_set(struct ext4_super_block *es,
1117 ext4_fsblk_t blk)
1118{
1119 es->s_blocks_count_lo = cpu_to_le32((u32)blk);
1120 es->s_blocks_count_hi = cpu_to_le32(blk >> 32);
1121}
1122
1123static inline void ext4_free_blocks_count_set(struct ext4_super_block *es,
1124 ext4_fsblk_t blk)
1125{
1126 es->s_free_blocks_count_lo = cpu_to_le32((u32)blk);
1127 es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32);
1128}
1129
1130static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
1131 ext4_fsblk_t blk)
1132{
1133 es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
1134 es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
1135}
1136
1137static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
1138{
1139 return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
1140 le32_to_cpu(raw_inode->i_size_lo);
1141}
1142
1143static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
1144{
1145 raw_inode->i_size_lo = cpu_to_le32(i_size);
1146 raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
1147}
1148
1149static inline
1150struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
1151 ext4_group_t group)
1152{
1153 struct ext4_group_info ***grp_info;
1154 long indexv, indexh;
1155 grp_info = EXT4_SB(sb)->s_group_info;
1156 indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
1157 indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
1158 return grp_info[indexv][indexh];
1159}
1160
1161
1162#define ext4_std_error(sb, errno) \
1163do { \
1164 if ((errno)) \
1165 __ext4_std_error((sb), __FUNCTION__, (errno)); \
1166} while (0)
1167
1168/*
1169 * Inodes and files operations
1170 */
1171
1172/* dir.c */
1173extern const struct file_operations ext4_dir_operations;
1174
1175/* file.c */
1176extern const struct inode_operations ext4_file_inode_operations;
1177extern const struct file_operations ext4_file_operations;
1178
1179/* namei.c */
1180extern const struct inode_operations ext4_dir_inode_operations;
1181extern const struct inode_operations ext4_special_inode_operations;
1182
1183/* symlink.c */
1184extern const struct inode_operations ext4_symlink_inode_operations;
1185extern const struct inode_operations ext4_fast_symlink_inode_operations;
1186
1187/* extents.c */
1188extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
1189extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
1190extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1191 ext4_lblk_t iblock,
1192 unsigned long max_blocks, struct buffer_head *bh_result,
1193 int create, int extend_disksize);
1194extern void ext4_ext_truncate(struct inode *, struct page *);
1195extern void ext4_ext_init(struct super_block *);
1196extern void ext4_ext_release(struct super_block *);
1197extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
1198 loff_t len);
1199extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
1200 sector_t block, unsigned long max_blocks,
1201 struct buffer_head *bh, int create,
1202 int extend_disksize);
1203#endif /* __KERNEL__ */
1204
1205#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
new file mode 100644
index 000000000000..75333b595fab
--- /dev/null
+++ b/fs/ext4/ext4_extents.h
@@ -0,0 +1,232 @@
1/*
2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
3 * Written by Alex Tomas <alex@clusterfs.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public Licens
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17 */
18
19#ifndef _EXT4_EXTENTS
20#define _EXT4_EXTENTS
21
22#include "ext4.h"
23
24/*
25 * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks
26 * becomes very small, so index split, in-depth growing and
27 * other hard changes happen much more often.
28 * This is for debug purposes only.
29 */
30#define AGGRESSIVE_TEST_
31
32/*
33 * With EXTENTS_STATS defined, the number of blocks and extents
34 * are collected in the truncate path. They'll be shown at
35 * umount time.
36 */
37#define EXTENTS_STATS__
38
39/*
40 * If CHECK_BINSEARCH is defined, then the results of the binary search
41 * will also be checked by linear search.
42 */
43#define CHECK_BINSEARCH__
44
45/*
46 * If EXT_DEBUG is defined you can use the 'extdebug' mount option
47 * to get lots of info about what's going on.
48 */
49#define EXT_DEBUG__
50#ifdef EXT_DEBUG
51#define ext_debug(a...) printk(a)
52#else
53#define ext_debug(a...)
54#endif
55
56/*
57 * If EXT_STATS is defined then stats numbers are collected.
58 * These number will be displayed at umount time.
59 */
60#define EXT_STATS_
61
62
63/*
64 * ext4_inode has i_block array (60 bytes total).
65 * The first 12 bytes store ext4_extent_header;
66 * the remainder stores an array of ext4_extent.
67 */
68
69/*
70 * This is the extent on-disk structure.
71 * It's used at the bottom of the tree.
72 */
73struct ext4_extent {
74 __le32 ee_block; /* first logical block extent covers */
75 __le16 ee_len; /* number of blocks covered by extent */
76 __le16 ee_start_hi; /* high 16 bits of physical block */
77 __le32 ee_start_lo; /* low 32 bits of physical block */
78};
79
80/*
81 * This is index on-disk structure.
82 * It's used at all the levels except the bottom.
83 */
84struct ext4_extent_idx {
85 __le32 ei_block; /* index covers logical blocks from 'block' */
86 __le32 ei_leaf_lo; /* pointer to the physical block of the next *
87 * level. leaf or next index could be there */
88 __le16 ei_leaf_hi; /* high 16 bits of physical block */
89 __u16 ei_unused;
90};
91
92/*
93 * Each block (leaves and indexes), even inode-stored has header.
94 */
95struct ext4_extent_header {
96 __le16 eh_magic; /* probably will support different formats */
97 __le16 eh_entries; /* number of valid entries */
98 __le16 eh_max; /* capacity of store in entries */
99 __le16 eh_depth; /* has tree real underlying blocks? */
100 __le32 eh_generation; /* generation of the tree */
101};
102
103#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a)
104
105/*
106 * Array of ext4_ext_path contains path to some extent.
107 * Creation/lookup routines use it for traversal/splitting/etc.
108 * Truncate uses it to simulate recursive walking.
109 */
110struct ext4_ext_path {
111 ext4_fsblk_t p_block;
112 __u16 p_depth;
113 struct ext4_extent *p_ext;
114 struct ext4_extent_idx *p_idx;
115 struct ext4_extent_header *p_hdr;
116 struct buffer_head *p_bh;
117};
118
119/*
120 * structure for external API
121 */
122
123#define EXT4_EXT_CACHE_NO 0
124#define EXT4_EXT_CACHE_GAP 1
125#define EXT4_EXT_CACHE_EXTENT 2
126
127
128#define EXT_MAX_BLOCK 0xffffffff
129
130/*
131 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
132 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
133 * MSB of ee_len field in the extent datastructure to signify if this
134 * particular extent is an initialized extent or an uninitialized (i.e.
135 * preallocated).
136 * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
137 * uninitialized extent.
138 * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
139 * uninitialized one. In other words, if MSB of ee_len is set, it is an
140 * uninitialized extent with only one special scenario when ee_len = 0x8000.
141 * In this case we can not have an uninitialized extent of zero length and
142 * thus we make it as a special case of initialized extent with 0x8000 length.
143 * This way we get better extent-to-group alignment for initialized extents.
144 * Hence, the maximum number of blocks we can have in an *initialized*
145 * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
146 */
147#define EXT_INIT_MAX_LEN (1UL << 15)
148#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1)
149
150
151#define EXT_FIRST_EXTENT(__hdr__) \
152 ((struct ext4_extent *) (((char *) (__hdr__)) + \
153 sizeof(struct ext4_extent_header)))
154#define EXT_FIRST_INDEX(__hdr__) \
155 ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \
156 sizeof(struct ext4_extent_header)))
157#define EXT_HAS_FREE_INDEX(__path__) \
158 (le16_to_cpu((__path__)->p_hdr->eh_entries) \
159 < le16_to_cpu((__path__)->p_hdr->eh_max))
160#define EXT_LAST_EXTENT(__hdr__) \
161 (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
162#define EXT_LAST_INDEX(__hdr__) \
163 (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
164#define EXT_MAX_EXTENT(__hdr__) \
165 (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
166#define EXT_MAX_INDEX(__hdr__) \
167 (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
168
169static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
170{
171 return (struct ext4_extent_header *) EXT4_I(inode)->i_data;
172}
173
174static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh)
175{
176 return (struct ext4_extent_header *) bh->b_data;
177}
178
179static inline unsigned short ext_depth(struct inode *inode)
180{
181 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
182}
183
184static inline void ext4_ext_tree_changed(struct inode *inode)
185{
186 EXT4_I(inode)->i_ext_generation++;
187}
188
189static inline void
190ext4_ext_invalidate_cache(struct inode *inode)
191{
192 EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
193}
194
195static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
196{
197 /* We can not have an uninitialized extent of zero length! */
198 BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
199 ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
200}
201
202static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
203{
204 /* Extent with ee_len of 0x8000 is treated as an initialized extent */
205 return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
206}
207
208static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
209{
210 return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
211 le16_to_cpu(ext->ee_len) :
212 (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
213}
214
215extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
216extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
217extern int ext4_extent_tree_init(handle_t *, struct inode *);
218extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
219extern int ext4_ext_try_to_merge(struct inode *inode,
220 struct ext4_ext_path *path,
221 struct ext4_extent *);
222extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
223extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
224extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
225 struct ext4_ext_path *);
226extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
227 ext4_lblk_t *, ext4_fsblk_t *);
228extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
229 ext4_lblk_t *, ext4_fsblk_t *);
230extern void ext4_ext_drop_refs(struct ext4_ext_path *);
231#endif /* _EXT4_EXTENTS */
232
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
new file mode 100644
index 000000000000..26a4ae255d79
--- /dev/null
+++ b/fs/ext4/ext4_i.h
@@ -0,0 +1,167 @@
1/*
2 * ext4_i.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_i.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_I
17#define _EXT4_I
18
19#include <linux/rwsem.h>
20#include <linux/rbtree.h>
21#include <linux/seqlock.h>
22#include <linux/mutex.h>
23
24/* data type for block offset of block group */
25typedef int ext4_grpblk_t;
26
27/* data type for filesystem-wide blocks number */
28typedef unsigned long long ext4_fsblk_t;
29
30/* data type for file logical block number */
31typedef __u32 ext4_lblk_t;
32
33/* data type for block group number */
34typedef unsigned long ext4_group_t;
35
36struct ext4_reserve_window {
37 ext4_fsblk_t _rsv_start; /* First byte reserved */
38 ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */
39};
40
41struct ext4_reserve_window_node {
42 struct rb_node rsv_node;
43 __u32 rsv_goal_size;
44 __u32 rsv_alloc_hit;
45 struct ext4_reserve_window rsv_window;
46};
47
48struct ext4_block_alloc_info {
49 /* information about reservation window */
50 struct ext4_reserve_window_node rsv_window_node;
51 /*
52 * was i_next_alloc_block in ext4_inode_info
53 * is the logical (file-relative) number of the
54 * most-recently-allocated block in this file.
55 * We use this for detecting linearly ascending allocation requests.
56 */
57 ext4_lblk_t last_alloc_logical_block;
58 /*
59 * Was i_next_alloc_goal in ext4_inode_info
60 * is the *physical* companion to i_next_alloc_block.
61 * it the physical block number of the block which was most-recentl
62 * allocated to this file. This give us the goal (target) for the next
63 * allocation when we detect linearly ascending requests.
64 */
65 ext4_fsblk_t last_alloc_physical_block;
66};
67
68#define rsv_start rsv_window._rsv_start
69#define rsv_end rsv_window._rsv_end
70
71/*
72 * storage for cached extent
73 */
74struct ext4_ext_cache {
75 ext4_fsblk_t ec_start;
76 ext4_lblk_t ec_block;
77 __u32 ec_len; /* must be 32bit to return holes */
78 __u32 ec_type;
79};
80
81/*
82 * third extended file system inode data in memory
83 */
84struct ext4_inode_info {
85 __le32 i_data[15]; /* unconverted */
86 __u32 i_flags;
87 ext4_fsblk_t i_file_acl;
88 __u32 i_dtime;
89
90 /*
91 * i_block_group is the number of the block group which contains
92 * this file's inode. Constant across the lifetime of the inode,
93 * it is ued for making block allocation decisions - we try to
94 * place a file's data blocks near its inode block, and new inodes
95 * near to their parent directory's inode.
96 */
97 ext4_group_t i_block_group;
98 __u32 i_state; /* Dynamic state flags for ext4 */
99
100 /* block reservation info */
101 struct ext4_block_alloc_info *i_block_alloc_info;
102
103 ext4_lblk_t i_dir_start_lookup;
104#ifdef CONFIG_EXT4DEV_FS_XATTR
105 /*
106 * Extended attributes can be read independently of the main file
107 * data. Taking i_mutex even when reading would cause contention
108 * between readers of EAs and writers of regular file data, so
109 * instead we synchronize on xattr_sem when reading or changing
110 * EAs.
111 */
112 struct rw_semaphore xattr_sem;
113#endif
114#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
115 struct posix_acl *i_acl;
116 struct posix_acl *i_default_acl;
117#endif
118
119 struct list_head i_orphan; /* unlinked but open inodes */
120
121 /*
122 * i_disksize keeps track of what the inode size is ON DISK, not
123 * in memory. During truncate, i_size is set to the new size by
124 * the VFS prior to calling ext4_truncate(), but the filesystem won't
125 * set i_disksize to 0 until the truncate is actually under way.
126 *
127 * The intent is that i_disksize always represents the blocks which
128 * are used by this file. This allows recovery to restart truncate
129 * on orphans if we crash during truncate. We actually write i_disksize
130 * into the on-disk inode when writing inodes out, instead of i_size.
131 *
132 * The only time when i_disksize and i_size may be different is when
133 * a truncate is in progress. The only things which change i_disksize
134 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
135 */
136 loff_t i_disksize;
137
138 /* on-disk additional length */
139 __u16 i_extra_isize;
140
141 /*
142 * i_data_sem is for serialising ext4_truncate() against
143 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
144 * data tree are chopped off during truncate. We can't do that in
145 * ext4 because whenever we perform intermediate commits during
146 * truncate, the inode and all the metadata blocks *must* be in a
147 * consistent state which allows truncation of the orphans to restart
148 * during recovery. Hence we must fix the get_block-vs-truncate race
149 * by other means, so we have i_data_sem.
150 */
151 struct rw_semaphore i_data_sem;
152 struct inode vfs_inode;
153
154 unsigned long i_ext_generation;
155 struct ext4_ext_cache i_cached_extent;
156 /*
157 * File creation time. Its function is same as that of
158 * struct timespec i_{a,c,m}time in the generic inode.
159 */
160 struct timespec i_crtime;
161
162 /* mballoc */
163 struct list_head i_prealloc_list;
164 spinlock_t i_prealloc_lock;
165};
166
167#endif /* _EXT4_I */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d6afe4e27340..c75384b34f2c 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -2,14 +2,14 @@
2 * Interface between ext4 and JBD 2 * Interface between ext4 and JBD
3 */ 3 */
4 4
5#include <linux/ext4_jbd2.h> 5#include "ext4_jbd2.h"
6 6
7int __ext4_journal_get_undo_access(const char *where, handle_t *handle, 7int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 8 struct buffer_head *bh)
9{ 9{
10 int err = jbd2_journal_get_undo_access(handle, bh); 10 int err = jbd2_journal_get_undo_access(handle, bh);
11 if (err) 11 if (err)
12 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 12 ext4_journal_abort_handle(where, __func__, bh, handle, err);
13 return err; 13 return err;
14} 14}
15 15
@@ -18,7 +18,7 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
18{ 18{
19 int err = jbd2_journal_get_write_access(handle, bh); 19 int err = jbd2_journal_get_write_access(handle, bh);
20 if (err) 20 if (err)
21 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 21 ext4_journal_abort_handle(where, __func__, bh, handle, err);
22 return err; 22 return err;
23} 23}
24 24
@@ -27,7 +27,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
27{ 27{
28 int err = jbd2_journal_forget(handle, bh); 28 int err = jbd2_journal_forget(handle, bh);
29 if (err) 29 if (err)
30 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 30 ext4_journal_abort_handle(where, __func__, bh, handle, err);
31 return err; 31 return err;
32} 32}
33 33
@@ -36,7 +36,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
36{ 36{
37 int err = jbd2_journal_revoke(handle, blocknr, bh); 37 int err = jbd2_journal_revoke(handle, blocknr, bh);
38 if (err) 38 if (err)
39 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 39 ext4_journal_abort_handle(where, __func__, bh, handle, err);
40 return err; 40 return err;
41} 41}
42 42
@@ -45,7 +45,7 @@ int __ext4_journal_get_create_access(const char *where,
45{ 45{
46 int err = jbd2_journal_get_create_access(handle, bh); 46 int err = jbd2_journal_get_create_access(handle, bh);
47 if (err) 47 if (err)
48 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 48 ext4_journal_abort_handle(where, __func__, bh, handle, err);
49 return err; 49 return err;
50} 50}
51 51
@@ -54,6 +54,6 @@ int __ext4_journal_dirty_metadata(const char *where,
54{ 54{
55 int err = jbd2_journal_dirty_metadata(handle, bh); 55 int err = jbd2_journal_dirty_metadata(handle, bh);
56 if (err) 56 if (err)
57 ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); 57 ext4_journal_abort_handle(where, __func__, bh, handle, err);
58 return err; 58 return err;
59} 59}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
new file mode 100644
index 000000000000..9255a7d28b24
--- /dev/null
+++ b/fs/ext4/ext4_jbd2.h
@@ -0,0 +1,231 @@
1/*
2 * ext4_jbd2.h
3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 *
6 * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
7 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Ext4-specific journaling extensions.
13 */
14
15#ifndef _EXT4_JBD2_H
16#define _EXT4_JBD2_H
17
18#include <linux/fs.h>
19#include <linux/jbd2.h>
20#include "ext4.h"
21
22#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal)
23
24/* Define the number of blocks we need to account to a transaction to
25 * modify one block of data.
26 *
27 * We may have to touch one inode, one bitmap buffer, up to three
28 * indirection blocks, the group and superblock summaries, and the data
29 * block to complete the transaction.
30 *
31 * For extents-enabled fs we may have to allocate and modify up to
32 * 5 levels of tree + root which are stored in the inode. */
33
34#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
35 (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
36 || test_opt(sb, EXTENTS) ? 27U : 8U)
37
38/* Extended attribute operations touch at most two data buffers,
39 * two bitmap buffers, and two group summaries, in addition to the inode
40 * and the superblock, which are already accounted for. */
41
42#define EXT4_XATTR_TRANS_BLOCKS 6U
43
44/* Define the minimum size for a transaction which modifies data. This
45 * needs to take into account the fact that we may end up modifying two
46 * quota files too (one for the group, one for the user quota). The
47 * superblock only gets updated once, of course, so don't bother
48 * counting that again for the quota updates. */
49
50#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
51 EXT4_XATTR_TRANS_BLOCKS - 2 + \
52 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
53
54/* Delete operations potentially hit one directory's namespace plus an
55 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
56 * generous. We can grow the delete transaction later if necessary. */
57
58#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
59
60/* Define an arbitrary limit for the amount of data we will anticipate
61 * writing to any given transaction. For unbounded transactions such as
62 * write(2) and truncate(2) we can write more than this, but we always
63 * start off at the maximum transaction size and grow the transaction
64 * optimistically as we go. */
65
66#define EXT4_MAX_TRANS_DATA 64U
67
68/* We break up a large truncate or write transaction once the handle's
69 * buffer credits gets this low, we need either to extend the
70 * transaction or to start a new one. Reserve enough space here for
71 * inode, bitmap, superblock, group and indirection updates for at least
72 * one block, plus two quota updates. Quota allocations are not
73 * needed. */
74
75#define EXT4_RESERVE_TRANS_BLOCKS 12U
76
77#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8
78
79#ifdef CONFIG_QUOTA
80/* Amount of blocks needed for quota update - we know that the structure was
81 * allocated so we need to update only inode+data */
82#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
83/* Amount of blocks needed for quota insert/delete - we do some block writes
84 * but inode, sb and group updates are done only once */
85#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
86 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
87#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
88 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
89#else
90#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
91#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
92#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
93#endif
94
95int
96ext4_mark_iloc_dirty(handle_t *handle,
97 struct inode *inode,
98 struct ext4_iloc *iloc);
99
100/*
101 * On success, We end up with an outstanding reference count against
102 * iloc->bh. This _must_ be cleaned up later.
103 */
104
105int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
106 struct ext4_iloc *iloc);
107
108int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
109
110/*
111 * Wrapper functions with which ext4 calls into JBD. The intent here is
112 * to allow these to be turned into appropriate stubs so ext4 can control
113 * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't
114 * been done yet.
115 */
116
117static inline void ext4_journal_release_buffer(handle_t *handle,
118 struct buffer_head *bh)
119{
120 jbd2_journal_release_buffer(handle, bh);
121}
122
123void ext4_journal_abort_handle(const char *caller, const char *err_fn,
124 struct buffer_head *bh, handle_t *handle, int err);
125
126int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
127 struct buffer_head *bh);
128
129int __ext4_journal_get_write_access(const char *where, handle_t *handle,
130 struct buffer_head *bh);
131
132int __ext4_journal_forget(const char *where, handle_t *handle,
133 struct buffer_head *bh);
134
135int __ext4_journal_revoke(const char *where, handle_t *handle,
136 ext4_fsblk_t blocknr, struct buffer_head *bh);
137
138int __ext4_journal_get_create_access(const char *where,
139 handle_t *handle, struct buffer_head *bh);
140
141int __ext4_journal_dirty_metadata(const char *where,
142 handle_t *handle, struct buffer_head *bh);
143
144#define ext4_journal_get_undo_access(handle, bh) \
145 __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh))
146#define ext4_journal_get_write_access(handle, bh) \
147 __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh))
148#define ext4_journal_revoke(handle, blocknr, bh) \
149 __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
150#define ext4_journal_get_create_access(handle, bh) \
151 __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh))
152#define ext4_journal_dirty_metadata(handle, bh) \
153 __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
154#define ext4_journal_forget(handle, bh) \
155 __ext4_journal_forget(__FUNCTION__, (handle), (bh))
156
157int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
158
159handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
160int __ext4_journal_stop(const char *where, handle_t *handle);
161
162static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
163{
164 return ext4_journal_start_sb(inode->i_sb, nblocks);
165}
166
167#define ext4_journal_stop(handle) \
168 __ext4_journal_stop(__FUNCTION__, (handle))
169
170static inline handle_t *ext4_journal_current_handle(void)
171{
172 return journal_current_handle();
173}
174
175static inline int ext4_journal_extend(handle_t *handle, int nblocks)
176{
177 return jbd2_journal_extend(handle, nblocks);
178}
179
180static inline int ext4_journal_restart(handle_t *handle, int nblocks)
181{
182 return jbd2_journal_restart(handle, nblocks);
183}
184
185static inline int ext4_journal_blocks_per_page(struct inode *inode)
186{
187 return jbd2_journal_blocks_per_page(inode);
188}
189
190static inline int ext4_journal_force_commit(journal_t *journal)
191{
192 return jbd2_journal_force_commit(journal);
193}
194
195/* super.c */
196int ext4_force_commit(struct super_block *sb);
197
198static inline int ext4_should_journal_data(struct inode *inode)
199{
200 if (!S_ISREG(inode->i_mode))
201 return 1;
202 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
203 return 1;
204 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
205 return 1;
206 return 0;
207}
208
209static inline int ext4_should_order_data(struct inode *inode)
210{
211 if (!S_ISREG(inode->i_mode))
212 return 0;
213 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
214 return 0;
215 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
216 return 1;
217 return 0;
218}
219
220static inline int ext4_should_writeback_data(struct inode *inode)
221{
222 if (!S_ISREG(inode->i_mode))
223 return 0;
224 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
225 return 0;
226 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
227 return 1;
228 return 0;
229}
230
231#endif /* _EXT4_JBD2_H */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
new file mode 100644
index 000000000000..5802e69f2191
--- /dev/null
+++ b/fs/ext4/ext4_sb.h
@@ -0,0 +1,148 @@
1/*
2 * ext4_sb.h
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/include/linux/minix_fs_sb.h
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 */
15
16#ifndef _EXT4_SB
17#define _EXT4_SB
18
19#ifdef __KERNEL__
20#include <linux/timer.h>
21#include <linux/wait.h>
22#include <linux/blockgroup_lock.h>
23#include <linux/percpu_counter.h>
24#endif
25#include <linux/rbtree.h>
26
27/*
28 * third extended-fs super-block data in memory
29 */
30struct ext4_sb_info {
31 unsigned long s_desc_size; /* Size of a group descriptor in bytes */
32 unsigned long s_inodes_per_block;/* Number of inodes per block */
33 unsigned long s_blocks_per_group;/* Number of blocks in a group */
34 unsigned long s_inodes_per_group;/* Number of inodes in a group */
35 unsigned long s_itb_per_group; /* Number of inode table blocks per group */
36 unsigned long s_gdb_count; /* Number of group descriptor blocks */
37 unsigned long s_desc_per_block; /* Number of group descriptors per block */
38 ext4_group_t s_groups_count; /* Number of groups in the fs */
39 unsigned long s_overhead_last; /* Last calculated overhead */
40 unsigned long s_blocks_last; /* Last seen block count */
41 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
42 struct buffer_head * s_sbh; /* Buffer containing the super block */
43 struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */
44 struct buffer_head ** s_group_desc;
45 unsigned long s_mount_opt;
46 ext4_fsblk_t s_sb_block;
47 uid_t s_resuid;
48 gid_t s_resgid;
49 unsigned short s_mount_state;
50 unsigned short s_pad;
51 int s_addr_per_block_bits;
52 int s_desc_per_block_bits;
53 int s_inode_size;
54 int s_first_ino;
55 spinlock_t s_next_gen_lock;
56 u32 s_next_generation;
57 u32 s_hash_seed[4];
58 int s_def_hash_version;
59 struct percpu_counter s_freeblocks_counter;
60 struct percpu_counter s_freeinodes_counter;
61 struct percpu_counter s_dirs_counter;
62 struct blockgroup_lock s_blockgroup_lock;
63
64 /* root of the per fs reservation window tree */
65 spinlock_t s_rsv_window_lock;
66 struct rb_root s_rsv_window_root;
67 struct ext4_reserve_window_node s_rsv_window_head;
68
69 /* Journaling */
70 struct inode * s_journal_inode;
71 struct journal_s * s_journal;
72 struct list_head s_orphan;
73 unsigned long s_commit_interval;
74 struct block_device *journal_bdev;
75#ifdef CONFIG_JBD2_DEBUG
76 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
77 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
78#endif
79#ifdef CONFIG_QUOTA
80 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
81 int s_jquota_fmt; /* Format of quota to use */
82#endif
83 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
84
85#ifdef EXTENTS_STATS
86 /* ext4 extents stats */
87 unsigned long s_ext_min;
88 unsigned long s_ext_max;
89 unsigned long s_depth_max;
90 spinlock_t s_ext_stats_lock;
91 unsigned long s_ext_blocks;
92 unsigned long s_ext_extents;
93#endif
94
95 /* for buddy allocator */
96 struct ext4_group_info ***s_group_info;
97 struct inode *s_buddy_cache;
98 long s_blocks_reserved;
99 spinlock_t s_reserve_lock;
100 struct list_head s_active_transaction;
101 struct list_head s_closed_transaction;
102 struct list_head s_committed_transaction;
103 spinlock_t s_md_lock;
104 tid_t s_last_transaction;
105 unsigned short *s_mb_offsets, *s_mb_maxs;
106
107 /* tunables */
108 unsigned long s_stripe;
109 unsigned long s_mb_stream_request;
110 unsigned long s_mb_max_to_scan;
111 unsigned long s_mb_min_to_scan;
112 unsigned long s_mb_stats;
113 unsigned long s_mb_order2_reqs;
114 unsigned long s_mb_group_prealloc;
115 /* where last allocation was done - for stream allocation */
116 unsigned long s_mb_last_group;
117 unsigned long s_mb_last_start;
118
119 /* history to debug policy */
120 struct ext4_mb_history *s_mb_history;
121 int s_mb_history_cur;
122 int s_mb_history_max;
123 int s_mb_history_num;
124 struct proc_dir_entry *s_mb_proc;
125 spinlock_t s_mb_history_lock;
126 int s_mb_history_filter;
127
128 /* stats for buddy allocator */
129 spinlock_t s_mb_pa_lock;
130 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
131 atomic_t s_bal_success; /* we found long enough chunks */
132 atomic_t s_bal_allocated; /* in blocks */
133 atomic_t s_bal_ex_scanned; /* total extents scanned */
134 atomic_t s_bal_goals; /* goal hits */
135 atomic_t s_bal_breaks; /* too long searches */
136 atomic_t s_bal_2orders; /* 2^order hits */
137 spinlock_t s_bal_lock;
138 unsigned long s_mb_buddies_generated;
139 unsigned long long s_mb_generation_time;
140 atomic_t s_mb_lost_chunks;
141 atomic_t s_mb_preallocated;
142 atomic_t s_mb_discarded;
143
144 /* locality groups */
145 struct ext4_locality_group *s_locality_groups;
146};
147
148#endif /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9ae6e67090cd..47929c4e3dae 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -32,7 +32,6 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/fs.h> 33#include <linux/fs.h>
34#include <linux/time.h> 34#include <linux/time.h>
35#include <linux/ext4_jbd2.h>
36#include <linux/jbd2.h> 35#include <linux/jbd2.h>
37#include <linux/highuid.h> 36#include <linux/highuid.h>
38#include <linux/pagemap.h> 37#include <linux/pagemap.h>
@@ -40,8 +39,9 @@
40#include <linux/string.h> 39#include <linux/string.h>
41#include <linux/slab.h> 40#include <linux/slab.h>
42#include <linux/falloc.h> 41#include <linux/falloc.h>
43#include <linux/ext4_fs_extents.h>
44#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include "ext4_jbd2.h"
44#include "ext4_extents.h"
45 45
46 46
47/* 47/*
@@ -308,7 +308,7 @@ corrupted:
308} 308}
309 309
310#define ext4_ext_check_header(inode, eh, depth) \ 310#define ext4_ext_check_header(inode, eh, depth) \
311 __ext4_ext_check_header(__FUNCTION__, inode, eh, depth) 311 __ext4_ext_check_header(__func__, inode, eh, depth)
312 312
313#ifdef EXT_DEBUG 313#ifdef EXT_DEBUG
314static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 314static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -614,7 +614,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
614 614
615 ix->ei_block = cpu_to_le32(logical); 615 ix->ei_block = cpu_to_le32(logical);
616 ext4_idx_store_pblock(ix, ptr); 616 ext4_idx_store_pblock(ix, ptr);
617 curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); 617 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
618 618
619 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) 619 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
620 > le16_to_cpu(curp->p_hdr->eh_max)); 620 > le16_to_cpu(curp->p_hdr->eh_max));
@@ -736,7 +736,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
736 } 736 }
737 if (m) { 737 if (m) {
738 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); 738 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
739 neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m); 739 le16_add_cpu(&neh->eh_entries, m);
740 } 740 }
741 741
742 set_buffer_uptodate(bh); 742 set_buffer_uptodate(bh);
@@ -753,8 +753,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
753 err = ext4_ext_get_access(handle, inode, path + depth); 753 err = ext4_ext_get_access(handle, inode, path + depth);
754 if (err) 754 if (err)
755 goto cleanup; 755 goto cleanup;
756 path[depth].p_hdr->eh_entries = 756 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
757 cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
758 err = ext4_ext_dirty(handle, inode, path + depth); 757 err = ext4_ext_dirty(handle, inode, path + depth);
759 if (err) 758 if (err)
760 goto cleanup; 759 goto cleanup;
@@ -817,8 +816,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
817 if (m) { 816 if (m) {
818 memmove(++fidx, path[i].p_idx - m, 817 memmove(++fidx, path[i].p_idx - m,
819 sizeof(struct ext4_extent_idx) * m); 818 sizeof(struct ext4_extent_idx) * m);
820 neh->eh_entries = 819 le16_add_cpu(&neh->eh_entries, m);
821 cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
822 } 820 }
823 set_buffer_uptodate(bh); 821 set_buffer_uptodate(bh);
824 unlock_buffer(bh); 822 unlock_buffer(bh);
@@ -834,7 +832,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
834 err = ext4_ext_get_access(handle, inode, path + i); 832 err = ext4_ext_get_access(handle, inode, path + i);
835 if (err) 833 if (err)
836 goto cleanup; 834 goto cleanup;
837 path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m); 835 le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
838 err = ext4_ext_dirty(handle, inode, path + i); 836 err = ext4_ext_dirty(handle, inode, path + i);
839 if (err) 837 if (err)
840 goto cleanup; 838 goto cleanup;
@@ -1369,7 +1367,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
1369 * sizeof(struct ext4_extent); 1367 * sizeof(struct ext4_extent);
1370 memmove(ex + 1, ex + 2, len); 1368 memmove(ex + 1, ex + 2, len);
1371 } 1369 }
1372 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1); 1370 le16_add_cpu(&eh->eh_entries, -1);
1373 merge_done = 1; 1371 merge_done = 1;
1374 WARN_ON(eh->eh_entries == 0); 1372 WARN_ON(eh->eh_entries == 0);
1375 if (!eh->eh_entries) 1373 if (!eh->eh_entries)
@@ -1560,7 +1558,7 @@ has_space:
1560 path[depth].p_ext = nearex; 1558 path[depth].p_ext = nearex;
1561 } 1559 }
1562 1560
1563 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); 1561 le16_add_cpu(&eh->eh_entries, 1);
1564 nearex = path[depth].p_ext; 1562 nearex = path[depth].p_ext;
1565 nearex->ee_block = newext->ee_block; 1563 nearex->ee_block = newext->ee_block;
1566 ext4_ext_store_pblock(nearex, ext_pblock(newext)); 1564 ext4_ext_store_pblock(nearex, ext_pblock(newext));
@@ -1699,7 +1697,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1699 err = ext4_ext_get_access(handle, inode, path); 1697 err = ext4_ext_get_access(handle, inode, path);
1700 if (err) 1698 if (err)
1701 return err; 1699 return err;
1702 path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); 1700 le16_add_cpu(&path->p_hdr->eh_entries, -1);
1703 err = ext4_ext_dirty(handle, inode, path); 1701 err = ext4_ext_dirty(handle, inode, path);
1704 if (err) 1702 if (err)
1705 return err; 1703 return err;
@@ -1902,7 +1900,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1902 if (num == 0) { 1900 if (num == 0) {
1903 /* this extent is removed; mark slot entirely unused */ 1901 /* this extent is removed; mark slot entirely unused */
1904 ext4_ext_store_pblock(ex, 0); 1902 ext4_ext_store_pblock(ex, 0);
1905 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); 1903 le16_add_cpu(&eh->eh_entries, -1);
1906 } 1904 }
1907 1905
1908 ex->ee_block = cpu_to_le32(block); 1906 ex->ee_block = cpu_to_le32(block);
@@ -1979,7 +1977,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
1979 * We start scanning from right side, freeing all the blocks 1977 * We start scanning from right side, freeing all the blocks
1980 * after i_size and walking into the tree depth-wise. 1978 * after i_size and walking into the tree depth-wise.
1981 */ 1979 */
1982 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); 1980 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
1983 if (path == NULL) { 1981 if (path == NULL) {
1984 ext4_journal_stop(handle); 1982 ext4_journal_stop(handle);
1985 return -ENOMEM; 1983 return -ENOMEM;
@@ -2138,6 +2136,82 @@ void ext4_ext_release(struct super_block *sb)
2138#endif 2136#endif
2139} 2137}
2140 2138
2139static void bi_complete(struct bio *bio, int error)
2140{
2141 complete((struct completion *)bio->bi_private);
2142}
2143
2144/* FIXME!! we need to try to merge to left or right after zero-out */
2145static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2146{
2147 int ret = -EIO;
2148 struct bio *bio;
2149 int blkbits, blocksize;
2150 sector_t ee_pblock;
2151 struct completion event;
2152 unsigned int ee_len, len, done, offset;
2153
2154
2155 blkbits = inode->i_blkbits;
2156 blocksize = inode->i_sb->s_blocksize;
2157 ee_len = ext4_ext_get_actual_len(ex);
2158 ee_pblock = ext_pblock(ex);
2159
2160 /* convert ee_pblock to 512 byte sectors */
2161 ee_pblock = ee_pblock << (blkbits - 9);
2162
2163 while (ee_len > 0) {
2164
2165 if (ee_len > BIO_MAX_PAGES)
2166 len = BIO_MAX_PAGES;
2167 else
2168 len = ee_len;
2169
2170 bio = bio_alloc(GFP_NOIO, len);
2171 if (!bio)
2172 return -ENOMEM;
2173 bio->bi_sector = ee_pblock;
2174 bio->bi_bdev = inode->i_sb->s_bdev;
2175
2176 done = 0;
2177 offset = 0;
2178 while (done < len) {
2179 ret = bio_add_page(bio, ZERO_PAGE(0),
2180 blocksize, offset);
2181 if (ret != blocksize) {
2182 /*
2183 * We can't add any more pages because of
2184 * hardware limitations. Start a new bio.
2185 */
2186 break;
2187 }
2188 done++;
2189 offset += blocksize;
2190 if (offset >= PAGE_CACHE_SIZE)
2191 offset = 0;
2192 }
2193
2194 init_completion(&event);
2195 bio->bi_private = &event;
2196 bio->bi_end_io = bi_complete;
2197 submit_bio(WRITE, bio);
2198 wait_for_completion(&event);
2199
2200 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
2201 ret = 0;
2202 else {
2203 ret = -EIO;
2204 break;
2205 }
2206 bio_put(bio);
2207 ee_len -= done;
2208 ee_pblock += done << (blkbits - 9);
2209 }
2210 return ret;
2211}
2212
2213#define EXT4_EXT_ZERO_LEN 7
2214
2141/* 2215/*
2142 * This function is called by ext4_ext_get_blocks() if someone tries to write 2216 * This function is called by ext4_ext_get_blocks() if someone tries to write
2143 * to an uninitialized extent. It may result in splitting the uninitialized 2217 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -2154,7 +2228,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2154 ext4_lblk_t iblock, 2228 ext4_lblk_t iblock,
2155 unsigned long max_blocks) 2229 unsigned long max_blocks)
2156{ 2230{
2157 struct ext4_extent *ex, newex; 2231 struct ext4_extent *ex, newex, orig_ex;
2158 struct ext4_extent *ex1 = NULL; 2232 struct ext4_extent *ex1 = NULL;
2159 struct ext4_extent *ex2 = NULL; 2233 struct ext4_extent *ex2 = NULL;
2160 struct ext4_extent *ex3 = NULL; 2234 struct ext4_extent *ex3 = NULL;
@@ -2173,10 +2247,26 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2173 allocated = ee_len - (iblock - ee_block); 2247 allocated = ee_len - (iblock - ee_block);
2174 newblock = iblock - ee_block + ext_pblock(ex); 2248 newblock = iblock - ee_block + ext_pblock(ex);
2175 ex2 = ex; 2249 ex2 = ex;
2250 orig_ex.ee_block = ex->ee_block;
2251 orig_ex.ee_len = cpu_to_le16(ee_len);
2252 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
2176 2253
2177 err = ext4_ext_get_access(handle, inode, path + depth); 2254 err = ext4_ext_get_access(handle, inode, path + depth);
2178 if (err) 2255 if (err)
2179 goto out; 2256 goto out;
2257 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
2258 if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
2259 err = ext4_ext_zeroout(inode, &orig_ex);
2260 if (err)
2261 goto fix_extent_len;
2262 /* update the extent length and mark as initialized */
2263 ex->ee_block = orig_ex.ee_block;
2264 ex->ee_len = orig_ex.ee_len;
2265 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2266 ext4_ext_dirty(handle, inode, path + depth);
2267 /* zeroed the full extent */
2268 return allocated;
2269 }
2180 2270
2181 /* ex1: ee_block to iblock - 1 : uninitialized */ 2271 /* ex1: ee_block to iblock - 1 : uninitialized */
2182 if (iblock > ee_block) { 2272 if (iblock > ee_block) {
@@ -2195,19 +2285,103 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2195 /* ex3: to ee_block + ee_len : uninitialised */ 2285 /* ex3: to ee_block + ee_len : uninitialised */
2196 if (allocated > max_blocks) { 2286 if (allocated > max_blocks) {
2197 unsigned int newdepth; 2287 unsigned int newdepth;
2288 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
2289 if (allocated <= EXT4_EXT_ZERO_LEN) {
2290 /* Mark first half uninitialized.
2291 * Mark second half initialized and zero out the
2292 * initialized extent
2293 */
2294 ex->ee_block = orig_ex.ee_block;
2295 ex->ee_len = cpu_to_le16(ee_len - allocated);
2296 ext4_ext_mark_uninitialized(ex);
2297 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2298 ext4_ext_dirty(handle, inode, path + depth);
2299
2300 ex3 = &newex;
2301 ex3->ee_block = cpu_to_le32(iblock);
2302 ext4_ext_store_pblock(ex3, newblock);
2303 ex3->ee_len = cpu_to_le16(allocated);
2304 err = ext4_ext_insert_extent(handle, inode, path, ex3);
2305 if (err == -ENOSPC) {
2306 err = ext4_ext_zeroout(inode, &orig_ex);
2307 if (err)
2308 goto fix_extent_len;
2309 ex->ee_block = orig_ex.ee_block;
2310 ex->ee_len = orig_ex.ee_len;
2311 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2312 ext4_ext_dirty(handle, inode, path + depth);
2313 /* zeroed the full extent */
2314 return allocated;
2315
2316 } else if (err)
2317 goto fix_extent_len;
2318
2319 /*
2320 * We need to zero out the second half because
2321 * an fallocate request can update file size and
2322 * converting the second half to initialized extent
2323 * implies that we can leak some junk data to user
2324 * space.
2325 */
2326 err = ext4_ext_zeroout(inode, ex3);
2327 if (err) {
2328 /*
2329 * We should actually mark the
2330 * second half as uninit and return error
2331 * Insert would have changed the extent
2332 */
2333 depth = ext_depth(inode);
2334 ext4_ext_drop_refs(path);
2335 path = ext4_ext_find_extent(inode,
2336 iblock, path);
2337 if (IS_ERR(path)) {
2338 err = PTR_ERR(path);
2339 return err;
2340 }
2341 ex = path[depth].p_ext;
2342 err = ext4_ext_get_access(handle, inode,
2343 path + depth);
2344 if (err)
2345 return err;
2346 ext4_ext_mark_uninitialized(ex);
2347 ext4_ext_dirty(handle, inode, path + depth);
2348 return err;
2349 }
2350
2351 /* zeroed the second half */
2352 return allocated;
2353 }
2198 ex3 = &newex; 2354 ex3 = &newex;
2199 ex3->ee_block = cpu_to_le32(iblock + max_blocks); 2355 ex3->ee_block = cpu_to_le32(iblock + max_blocks);
2200 ext4_ext_store_pblock(ex3, newblock + max_blocks); 2356 ext4_ext_store_pblock(ex3, newblock + max_blocks);
2201 ex3->ee_len = cpu_to_le16(allocated - max_blocks); 2357 ex3->ee_len = cpu_to_le16(allocated - max_blocks);
2202 ext4_ext_mark_uninitialized(ex3); 2358 ext4_ext_mark_uninitialized(ex3);
2203 err = ext4_ext_insert_extent(handle, inode, path, ex3); 2359 err = ext4_ext_insert_extent(handle, inode, path, ex3);
2204 if (err) 2360 if (err == -ENOSPC) {
2205 goto out; 2361 err = ext4_ext_zeroout(inode, &orig_ex);
2362 if (err)
2363 goto fix_extent_len;
2364 /* update the extent length and mark as initialized */
2365 ex->ee_block = orig_ex.ee_block;
2366 ex->ee_len = orig_ex.ee_len;
2367 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2368 ext4_ext_dirty(handle, inode, path + depth);
2369 /* zeroed the full extent */
2370 return allocated;
2371
2372 } else if (err)
2373 goto fix_extent_len;
2206 /* 2374 /*
2207 * The depth, and hence eh & ex might change 2375 * The depth, and hence eh & ex might change
2208 * as part of the insert above. 2376 * as part of the insert above.
2209 */ 2377 */
2210 newdepth = ext_depth(inode); 2378 newdepth = ext_depth(inode);
2379 /*
2380 * update the extent length after successfull insert of the
2381 * split extent
2382 */
2383 orig_ex.ee_len = cpu_to_le16(ee_len -
2384 ext4_ext_get_actual_len(ex3));
2211 if (newdepth != depth) { 2385 if (newdepth != depth) {
2212 depth = newdepth; 2386 depth = newdepth;
2213 ext4_ext_drop_refs(path); 2387 ext4_ext_drop_refs(path);
@@ -2226,6 +2400,24 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2226 goto out; 2400 goto out;
2227 } 2401 }
2228 allocated = max_blocks; 2402 allocated = max_blocks;
2403
2404 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
2405 * to insert a extent in the middle zerout directly
2406 * otherwise give the extent a chance to merge to left
2407 */
2408 if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
2409 iblock != ee_block) {
2410 err = ext4_ext_zeroout(inode, &orig_ex);
2411 if (err)
2412 goto fix_extent_len;
2413 /* update the extent length and mark as initialized */
2414 ex->ee_block = orig_ex.ee_block;
2415 ex->ee_len = orig_ex.ee_len;
2416 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2417 ext4_ext_dirty(handle, inode, path + depth);
2418 /* zero out the first half */
2419 return allocated;
2420 }
2229 } 2421 }
2230 /* 2422 /*
2231 * If there was a change of depth as part of the 2423 * If there was a change of depth as part of the
@@ -2282,8 +2474,29 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2282 goto out; 2474 goto out;
2283insert: 2475insert:
2284 err = ext4_ext_insert_extent(handle, inode, path, &newex); 2476 err = ext4_ext_insert_extent(handle, inode, path, &newex);
2477 if (err == -ENOSPC) {
2478 err = ext4_ext_zeroout(inode, &orig_ex);
2479 if (err)
2480 goto fix_extent_len;
2481 /* update the extent length and mark as initialized */
2482 ex->ee_block = orig_ex.ee_block;
2483 ex->ee_len = orig_ex.ee_len;
2484 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2485 ext4_ext_dirty(handle, inode, path + depth);
2486 /* zero out the first half */
2487 return allocated;
2488 } else if (err)
2489 goto fix_extent_len;
2285out: 2490out:
2286 return err ? err : allocated; 2491 return err ? err : allocated;
2492
2493fix_extent_len:
2494 ex->ee_block = orig_ex.ee_block;
2495 ex->ee_len = orig_ex.ee_len;
2496 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2497 ext4_ext_mark_uninitialized(ex);
2498 ext4_ext_dirty(handle, inode, path + depth);
2499 return err;
2287} 2500}
2288 2501
2289/* 2502/*
@@ -2393,8 +2606,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2393 } 2606 }
2394 if (create == EXT4_CREATE_UNINITIALIZED_EXT) 2607 if (create == EXT4_CREATE_UNINITIALIZED_EXT)
2395 goto out; 2608 goto out;
2396 if (!create) 2609 if (!create) {
2610 /*
2611 * We have blocks reserved already. We
2612 * return allocated blocks so that delalloc
2613 * won't do block reservation for us. But
2614 * the buffer head will be unmapped so that
2615 * a read from the block returns 0s.
2616 */
2617 if (allocated > max_blocks)
2618 allocated = max_blocks;
2619 /* mark the buffer unwritten */
2620 __set_bit(BH_Unwritten, &bh_result->b_state);
2397 goto out2; 2621 goto out2;
2622 }
2398 2623
2399 ret = ext4_ext_convert_to_initialized(handle, inode, 2624 ret = ext4_ext_convert_to_initialized(handle, inode,
2400 path, iblock, 2625 path, iblock,
@@ -2584,6 +2809,8 @@ out_stop:
2584 ext4_orphan_del(handle, inode); 2809 ext4_orphan_del(handle, inode);
2585 2810
2586 up_write(&EXT4_I(inode)->i_data_sem); 2811 up_write(&EXT4_I(inode)->i_data_sem);
2812 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
2813 ext4_mark_inode_dirty(handle, inode);
2587 ext4_journal_stop(handle); 2814 ext4_journal_stop(handle);
2588} 2815}
2589 2816
@@ -2608,6 +2835,28 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
2608 return needed; 2835 return needed;
2609} 2836}
2610 2837
2838static void ext4_falloc_update_inode(struct inode *inode,
2839 int mode, loff_t new_size, int update_ctime)
2840{
2841 struct timespec now;
2842
2843 if (update_ctime) {
2844 now = current_fs_time(inode->i_sb);
2845 if (!timespec_equal(&inode->i_ctime, &now))
2846 inode->i_ctime = now;
2847 }
2848 /*
2849 * Update only when preallocation was requested beyond
2850 * the file size.
2851 */
2852 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
2853 new_size > i_size_read(inode)) {
2854 i_size_write(inode, new_size);
2855 EXT4_I(inode)->i_disksize = new_size;
2856 }
2857
2858}
2859
2611/* 2860/*
2612 * preallocate space for a file. This implements ext4's fallocate inode 2861 * preallocate space for a file. This implements ext4's fallocate inode
2613 * operation, which gets called from sys_fallocate system call. 2862 * operation, which gets called from sys_fallocate system call.
@@ -2619,8 +2868,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2619{ 2868{
2620 handle_t *handle; 2869 handle_t *handle;
2621 ext4_lblk_t block; 2870 ext4_lblk_t block;
2871 loff_t new_size;
2622 unsigned long max_blocks; 2872 unsigned long max_blocks;
2623 ext4_fsblk_t nblocks = 0;
2624 int ret = 0; 2873 int ret = 0;
2625 int ret2 = 0; 2874 int ret2 = 0;
2626 int retries = 0; 2875 int retries = 0;
@@ -2639,9 +2888,12 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
2639 return -ENODEV; 2888 return -ENODEV;
2640 2889
2641 block = offset >> blkbits; 2890 block = offset >> blkbits;
2891 /*
2892 * We can't just convert len to max_blocks because
2893 * If blocksize = 4096 offset = 3072 and len = 2048
2894 */
2642 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 2895 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
2643 - block; 2896 - block;
2644
2645 /* 2897 /*
2646 * credits to insert 1 extent into extent tree + buffers to be able to 2898 * credits to insert 1 extent into extent tree + buffers to be able to
2647 * modify 1 super block, 1 block bitmap and 1 group descriptor. 2899 * modify 1 super block, 1 block bitmap and 1 group descriptor.
@@ -2657,7 +2909,6 @@ retry:
2657 ret = PTR_ERR(handle); 2909 ret = PTR_ERR(handle);
2658 break; 2910 break;
2659 } 2911 }
2660
2661 ret = ext4_get_blocks_wrap(handle, inode, block, 2912 ret = ext4_get_blocks_wrap(handle, inode, block,
2662 max_blocks, &map_bh, 2913 max_blocks, &map_bh,
2663 EXT4_CREATE_UNINITIALIZED_EXT, 0); 2914 EXT4_CREATE_UNINITIALIZED_EXT, 0);
@@ -2673,61 +2924,24 @@ retry:
2673 ret2 = ext4_journal_stop(handle); 2924 ret2 = ext4_journal_stop(handle);
2674 break; 2925 break;
2675 } 2926 }
2676 if (ret > 0) { 2927 if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
2677 /* check wrap through sign-bit/zero here */ 2928 blkbits) >> blkbits))
2678 if ((block + ret) < 0 || (block + ret) < block) { 2929 new_size = offset + len;
2679 ret = -EIO; 2930 else
2680 ext4_mark_inode_dirty(handle, inode); 2931 new_size = (block + ret) << blkbits;
2681 ret2 = ext4_journal_stop(handle);
2682 break;
2683 }
2684 if (buffer_new(&map_bh) && ((block + ret) >
2685 (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
2686 >> blkbits)))
2687 nblocks = nblocks + ret;
2688 }
2689
2690 /* Update ctime if new blocks get allocated */
2691 if (nblocks) {
2692 struct timespec now;
2693
2694 now = current_fs_time(inode->i_sb);
2695 if (!timespec_equal(&inode->i_ctime, &now))
2696 inode->i_ctime = now;
2697 }
2698 2932
2933 ext4_falloc_update_inode(inode, mode, new_size,
2934 buffer_new(&map_bh));
2699 ext4_mark_inode_dirty(handle, inode); 2935 ext4_mark_inode_dirty(handle, inode);
2700 ret2 = ext4_journal_stop(handle); 2936 ret2 = ext4_journal_stop(handle);
2701 if (ret2) 2937 if (ret2)
2702 break; 2938 break;
2703 } 2939 }
2704 2940 if (ret == -ENOSPC &&
2705 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2941 ext4_should_retry_alloc(inode->i_sb, &retries)) {
2942 ret = 0;
2706 goto retry; 2943 goto retry;
2707
2708 /*
2709 * Time to update the file size.
2710 * Update only when preallocation was requested beyond the file size.
2711 */
2712 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
2713 (offset + len) > i_size_read(inode)) {
2714 if (ret > 0) {
2715 /*
2716 * if no error, we assume preallocation succeeded
2717 * completely
2718 */
2719 i_size_write(inode, offset + len);
2720 EXT4_I(inode)->i_disksize = i_size_read(inode);
2721 } else if (ret < 0 && nblocks) {
2722 /* Handle partial allocation scenario */
2723 loff_t newsize;
2724
2725 newsize = (nblocks << blkbits) + i_size_read(inode);
2726 i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
2727 EXT4_I(inode)->i_disksize = i_size_read(inode);
2728 }
2729 } 2944 }
2730
2731 mutex_unlock(&inode->i_mutex); 2945 mutex_unlock(&inode->i_mutex);
2732 return ret > 0 ? ret2 : ret; 2946 return ret > 0 ? ret2 : ret;
2733} 2947}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac35ec58db55..4159be6366ab 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,8 +21,8 @@
21#include <linux/time.h> 21#include <linux/time.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/ext4_fs.h> 24#include "ext4.h"
25#include <linux/ext4_jbd2.h> 25#include "ext4_jbd2.h"
26#include "xattr.h" 26#include "xattr.h"
27#include "acl.h" 27#include "acl.h"
28 28
@@ -129,7 +129,7 @@ const struct file_operations ext4_file_operations = {
129 .write = do_sync_write, 129 .write = do_sync_write,
130 .aio_read = generic_file_aio_read, 130 .aio_read = generic_file_aio_read,
131 .aio_write = ext4_file_write, 131 .aio_write = ext4_file_write,
132 .ioctl = ext4_ioctl, 132 .unlocked_ioctl = ext4_ioctl,
133#ifdef CONFIG_COMPAT 133#ifdef CONFIG_COMPAT
134 .compat_ioctl = ext4_compat_ioctl, 134 .compat_ioctl = ext4_compat_ioctl,
135#endif 135#endif
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 8d50879d1c2c..1c8ba48d4f8d 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -27,8 +27,8 @@
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/ext4_fs.h> 30#include "ext4.h"
31#include <linux/ext4_jbd2.h> 31#include "ext4_jbd2.h"
32 32
33/* 33/*
34 * akpm: A new design for ext4_sync_file(). 34 * akpm: A new design for ext4_sync_file().
@@ -72,6 +72,9 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
72 goto out; 72 goto out;
73 } 73 }
74 74
75 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
76 goto out;
77
75 /* 78 /*
76 * The VFS has written the file data. If the inode is unaltered 79 * The VFS has written the file data. If the inode is unaltered
77 * then we need not start a commit. 80 * then we need not start a commit.
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 1555024e3b36..1d6329dbe390 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -11,8 +11,8 @@
11 11
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/jbd2.h> 13#include <linux/jbd2.h>
14#include <linux/ext4_fs.h>
15#include <linux/cryptohash.h> 14#include <linux/cryptohash.h>
15#include "ext4.h"
16 16
17#define DELTA 0x9E3779B9 17#define DELTA 0x9E3779B9
18 18
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 486e46a3918d..c6efbab0c801 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -15,8 +15,6 @@
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/jbd2.h> 17#include <linux/jbd2.h>
18#include <linux/ext4_fs.h>
19#include <linux/ext4_jbd2.h>
20#include <linux/stat.h> 18#include <linux/stat.h>
21#include <linux/string.h> 19#include <linux/string.h>
22#include <linux/quotaops.h> 20#include <linux/quotaops.h>
@@ -25,7 +23,8 @@
25#include <linux/bitops.h> 23#include <linux/bitops.h>
26#include <linux/blkdev.h> 24#include <linux/blkdev.h>
27#include <asm/byteorder.h> 25#include <asm/byteorder.h>
28 26#include "ext4.h"
27#include "ext4_jbd2.h"
29#include "xattr.h" 28#include "xattr.h"
30#include "acl.h" 29#include "acl.h"
31#include "group.h" 30#include "group.h"
@@ -75,7 +74,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
75 /* If checksum is bad mark all blocks and inodes use to prevent 74 /* If checksum is bad mark all blocks and inodes use to prevent
76 * allocation, essentially implementing a per-group read-only flag. */ 75 * allocation, essentially implementing a per-group read-only flag. */
77 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 76 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
78 ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n", 77 ext4_error(sb, __func__, "Checksum bad for group %lu\n",
79 block_group); 78 block_group);
80 gdp->bg_free_blocks_count = 0; 79 gdp->bg_free_blocks_count = 0;
81 gdp->bg_free_inodes_count = 0; 80 gdp->bg_free_inodes_count = 0;
@@ -223,11 +222,9 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
223 222
224 if (gdp) { 223 if (gdp) {
225 spin_lock(sb_bgl_lock(sbi, block_group)); 224 spin_lock(sb_bgl_lock(sbi, block_group));
226 gdp->bg_free_inodes_count = cpu_to_le16( 225 le16_add_cpu(&gdp->bg_free_inodes_count, 1);
227 le16_to_cpu(gdp->bg_free_inodes_count) + 1);
228 if (is_directory) 226 if (is_directory)
229 gdp->bg_used_dirs_count = cpu_to_le16( 227 le16_add_cpu(&gdp->bg_used_dirs_count, -1);
230 le16_to_cpu(gdp->bg_used_dirs_count) - 1);
231 gdp->bg_checksum = ext4_group_desc_csum(sbi, 228 gdp->bg_checksum = ext4_group_desc_csum(sbi,
232 block_group, gdp); 229 block_group, gdp);
233 spin_unlock(sb_bgl_lock(sbi, block_group)); 230 spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -588,7 +585,7 @@ got:
588 ino++; 585 ino++;
589 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || 586 if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
590 ino > EXT4_INODES_PER_GROUP(sb)) { 587 ino > EXT4_INODES_PER_GROUP(sb)) {
591 ext4_error(sb, __FUNCTION__, 588 ext4_error(sb, __func__,
592 "reserved inode or inode > inodes count - " 589 "reserved inode or inode > inodes count - "
593 "block_group = %lu, inode=%lu", group, 590 "block_group = %lu, inode=%lu", group,
594 ino + group * EXT4_INODES_PER_GROUP(sb)); 591 ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -664,11 +661,9 @@ got:
664 cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); 661 cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
665 } 662 }
666 663
667 gdp->bg_free_inodes_count = 664 le16_add_cpu(&gdp->bg_free_inodes_count, -1);
668 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
669 if (S_ISDIR(mode)) { 665 if (S_ISDIR(mode)) {
670 gdp->bg_used_dirs_count = 666 le16_add_cpu(&gdp->bg_used_dirs_count, 1);
671 cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
672 } 667 }
673 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 668 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
674 spin_unlock(sb_bgl_lock(sbi, group)); 669 spin_unlock(sb_bgl_lock(sbi, group));
@@ -744,23 +739,24 @@ got:
744 if (err) 739 if (err)
745 goto fail_free_drop; 740 goto fail_free_drop;
746 741
747 err = ext4_mark_inode_dirty(handle, inode);
748 if (err) {
749 ext4_std_error(sb, err);
750 goto fail_free_drop;
751 }
752 if (test_opt(sb, EXTENTS)) { 742 if (test_opt(sb, EXTENTS)) {
753 /* set extent flag only for directory and file */ 743 /* set extent flag only for diretory, file and normal symlink*/
754 if (S_ISDIR(mode) || S_ISREG(mode)) { 744 if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
755 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; 745 EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
756 ext4_ext_tree_init(handle, inode); 746 ext4_ext_tree_init(handle, inode);
757 err = ext4_update_incompat_feature(handle, sb, 747 err = ext4_update_incompat_feature(handle, sb,
758 EXT4_FEATURE_INCOMPAT_EXTENTS); 748 EXT4_FEATURE_INCOMPAT_EXTENTS);
759 if (err) 749 if (err)
760 goto fail; 750 goto fail_free_drop;
761 } 751 }
762 } 752 }
763 753
754 err = ext4_mark_inode_dirty(handle, inode);
755 if (err) {
756 ext4_std_error(sb, err);
757 goto fail_free_drop;
758 }
759
764 ext4_debug("allocating inode %lu\n", inode->i_ino); 760 ext4_debug("allocating inode %lu\n", inode->i_ino);
765 goto really_out; 761 goto really_out;
766fail: 762fail:
@@ -796,7 +792,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
796 792
797 /* Error cases - e2fsck has already cleaned up for us */ 793 /* Error cases - e2fsck has already cleaned up for us */
798 if (ino > max_ino) { 794 if (ino > max_ino) {
799 ext4_warning(sb, __FUNCTION__, 795 ext4_warning(sb, __func__,
800 "bad orphan ino %lu! e2fsck was run?", ino); 796 "bad orphan ino %lu! e2fsck was run?", ino);
801 goto error; 797 goto error;
802 } 798 }
@@ -805,7 +801,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
805 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 801 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
806 bitmap_bh = read_inode_bitmap(sb, block_group); 802 bitmap_bh = read_inode_bitmap(sb, block_group);
807 if (!bitmap_bh) { 803 if (!bitmap_bh) {
808 ext4_warning(sb, __FUNCTION__, 804 ext4_warning(sb, __func__,
809 "inode bitmap error for orphan %lu", ino); 805 "inode bitmap error for orphan %lu", ino);
810 goto error; 806 goto error;
811 } 807 }
@@ -830,7 +826,7 @@ iget_failed:
830 err = PTR_ERR(inode); 826 err = PTR_ERR(inode);
831 inode = NULL; 827 inode = NULL;
832bad_orphan: 828bad_orphan:
833 ext4_warning(sb, __FUNCTION__, 829 ext4_warning(sb, __func__,
834 "bad orphan inode %lu! e2fsck was run?", ino); 830 "bad orphan inode %lu! e2fsck was run?", ino);
835 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", 831 printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
836 bit, (unsigned long long)bitmap_bh->b_blocknr, 832 bit, (unsigned long long)bitmap_bh->b_blocknr,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8fab233cb05f..8d9707746413 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -25,7 +25,6 @@
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/time.h> 27#include <linux/time.h>
28#include <linux/ext4_jbd2.h>
29#include <linux/jbd2.h> 28#include <linux/jbd2.h>
30#include <linux/highuid.h> 29#include <linux/highuid.h>
31#include <linux/pagemap.h> 30#include <linux/pagemap.h>
@@ -36,6 +35,7 @@
36#include <linux/mpage.h> 35#include <linux/mpage.h>
37#include <linux/uio.h> 36#include <linux/uio.h>
38#include <linux/bio.h> 37#include <linux/bio.h>
38#include "ext4_jbd2.h"
39#include "xattr.h" 39#include "xattr.h"
40#include "acl.h" 40#include "acl.h"
41 41
@@ -93,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
93 BUFFER_TRACE(bh, "call ext4_journal_revoke"); 93 BUFFER_TRACE(bh, "call ext4_journal_revoke");
94 err = ext4_journal_revoke(handle, blocknr, bh); 94 err = ext4_journal_revoke(handle, blocknr, bh);
95 if (err) 95 if (err)
96 ext4_abort(inode->i_sb, __FUNCTION__, 96 ext4_abort(inode->i_sb, __func__,
97 "error %d when attempting revoke", err); 97 "error %d when attempting revoke", err);
98 BUFFER_TRACE(bh, "exit"); 98 BUFFER_TRACE(bh, "exit");
99 return err; 99 return err;
@@ -985,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
985 } else { 985 } else {
986 retval = ext4_get_blocks_handle(handle, inode, block, 986 retval = ext4_get_blocks_handle(handle, inode, block,
987 max_blocks, bh, create, extend_disksize); 987 max_blocks, bh, create, extend_disksize);
988
989 if (retval > 0 && buffer_new(bh)) {
990 /*
991 * We allocated new blocks which will result in
992 * i_data's format changing. Force the migrate
993 * to fail by clearing migrate flags
994 */
995 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
996 ~EXT4_EXT_MIGRATE;
997 }
988 } 998 }
989 up_write((&EXT4_I(inode)->i_data_sem)); 999 up_write((&EXT4_I(inode)->i_data_sem));
990 return retval; 1000 return retval;
@@ -1230,7 +1240,7 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1230{ 1240{
1231 int err = jbd2_journal_dirty_data(handle, bh); 1241 int err = jbd2_journal_dirty_data(handle, bh);
1232 if (err) 1242 if (err)
1233 ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__, 1243 ext4_journal_abort_handle(__func__, __func__,
1234 bh, handle, err); 1244 bh, handle, err);
1235 return err; 1245 return err;
1236} 1246}
@@ -1301,10 +1311,11 @@ static int ext4_ordered_write_end(struct file *file,
1301 new_i_size = pos + copied; 1311 new_i_size = pos + copied;
1302 if (new_i_size > EXT4_I(inode)->i_disksize) 1312 if (new_i_size > EXT4_I(inode)->i_disksize)
1303 EXT4_I(inode)->i_disksize = new_i_size; 1313 EXT4_I(inode)->i_disksize = new_i_size;
1304 copied = ext4_generic_write_end(file, mapping, pos, len, copied, 1314 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1305 page, fsdata); 1315 page, fsdata);
1306 if (copied < 0) 1316 copied = ret2;
1307 ret = copied; 1317 if (ret2 < 0)
1318 ret = ret2;
1308 } 1319 }
1309 ret2 = ext4_journal_stop(handle); 1320 ret2 = ext4_journal_stop(handle);
1310 if (!ret) 1321 if (!ret)
@@ -1329,10 +1340,11 @@ static int ext4_writeback_write_end(struct file *file,
1329 if (new_i_size > EXT4_I(inode)->i_disksize) 1340 if (new_i_size > EXT4_I(inode)->i_disksize)
1330 EXT4_I(inode)->i_disksize = new_i_size; 1341 EXT4_I(inode)->i_disksize = new_i_size;
1331 1342
1332 copied = ext4_generic_write_end(file, mapping, pos, len, copied, 1343 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1333 page, fsdata); 1344 page, fsdata);
1334 if (copied < 0) 1345 copied = ret2;
1335 ret = copied; 1346 if (ret2 < 0)
1347 ret = ret2;
1336 1348
1337 ret2 = ext4_journal_stop(handle); 1349 ret2 = ext4_journal_stop(handle);
1338 if (!ret) 1350 if (!ret)
@@ -2501,12 +2513,10 @@ out_stop:
2501static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, 2513static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
2502 unsigned long ino, struct ext4_iloc *iloc) 2514 unsigned long ino, struct ext4_iloc *iloc)
2503{ 2515{
2504 unsigned long desc, group_desc;
2505 ext4_group_t block_group; 2516 ext4_group_t block_group;
2506 unsigned long offset; 2517 unsigned long offset;
2507 ext4_fsblk_t block; 2518 ext4_fsblk_t block;
2508 struct buffer_head *bh; 2519 struct ext4_group_desc *gdp;
2509 struct ext4_group_desc * gdp;
2510 2520
2511 if (!ext4_valid_inum(sb, ino)) { 2521 if (!ext4_valid_inum(sb, ino)) {
2512 /* 2522 /*
@@ -2518,22 +2528,10 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
2518 } 2528 }
2519 2529
2520 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 2530 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
2521 if (block_group >= EXT4_SB(sb)->s_groups_count) { 2531 gdp = ext4_get_group_desc(sb, block_group, NULL);
2522 ext4_error(sb,"ext4_get_inode_block","group >= groups count"); 2532 if (!gdp)
2523 return 0; 2533 return 0;
2524 }
2525 smp_rmb();
2526 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
2527 desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2528 bh = EXT4_SB(sb)->s_group_desc[group_desc];
2529 if (!bh) {
2530 ext4_error (sb, "ext4_get_inode_block",
2531 "Descriptor not loaded");
2532 return 0;
2533 }
2534 2534
2535 gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
2536 desc * EXT4_DESC_SIZE(sb));
2537 /* 2535 /*
2538 * Figure out the offset within the block group inode table 2536 * Figure out the offset within the block group inode table
2539 */ 2537 */
@@ -2976,7 +2974,8 @@ static int ext4_do_update_inode(handle_t *handle,
2976 if (ext4_inode_blocks_set(handle, raw_inode, ei)) 2974 if (ext4_inode_blocks_set(handle, raw_inode, ei))
2977 goto out_brelse; 2975 goto out_brelse;
2978 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 2976 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
2979 raw_inode->i_flags = cpu_to_le32(ei->i_flags); 2977 /* clear the migrate flag in the raw_inode */
2978 raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
2980 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 2979 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
2981 cpu_to_le32(EXT4_OS_HURD)) 2980 cpu_to_le32(EXT4_OS_HURD))
2982 raw_inode->i_file_acl_high = 2981 raw_inode->i_file_acl_high =
@@ -3374,7 +3373,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
3374 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; 3373 EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
3375 if (mnt_count != 3374 if (mnt_count !=
3376 le16_to_cpu(sbi->s_es->s_mnt_count)) { 3375 le16_to_cpu(sbi->s_es->s_mnt_count)) {
3377 ext4_warning(inode->i_sb, __FUNCTION__, 3376 ext4_warning(inode->i_sb, __func__,
3378 "Unable to expand inode %lu. Delete" 3377 "Unable to expand inode %lu. Delete"
3379 " some EAs or run e2fsck.", 3378 " some EAs or run e2fsck.",
3380 inode->i_ino); 3379 inode->i_ino);
@@ -3415,7 +3414,7 @@ void ext4_dirty_inode(struct inode *inode)
3415 current_handle->h_transaction != handle->h_transaction) { 3414 current_handle->h_transaction != handle->h_transaction) {
3416 /* This task has a transaction open against a different fs */ 3415 /* This task has a transaction open against a different fs */
3417 printk(KERN_EMERG "%s: transactions do not match!\n", 3416 printk(KERN_EMERG "%s: transactions do not match!\n",
3418 __FUNCTION__); 3417 __func__);
3419 } else { 3418 } else {
3420 jbd_debug(5, "marking dirty. outer handle=%p\n", 3419 jbd_debug(5, "marking dirty. outer handle=%p\n",
3421 current_handle); 3420 current_handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 25b13ede8086..7a6c2f1faba6 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -10,17 +10,17 @@
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/jbd2.h> 11#include <linux/jbd2.h>
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/ext4_fs.h>
14#include <linux/ext4_jbd2.h>
15#include <linux/time.h> 13#include <linux/time.h>
16#include <linux/compat.h> 14#include <linux/compat.h>
17#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
18#include <linux/mount.h> 16#include <linux/mount.h>
19#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include "ext4_jbd2.h"
19#include "ext4.h"
20 20
21int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, 21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
22 unsigned long arg)
23{ 22{
23 struct inode *inode = filp->f_dentry->d_inode;
24 struct ext4_inode_info *ei = EXT4_I(inode); 24 struct ext4_inode_info *ei = EXT4_I(inode);
25 unsigned int flags; 25 unsigned int flags;
26 unsigned short rsv_window_size; 26 unsigned short rsv_window_size;
@@ -277,9 +277,6 @@ setversion_out:
277#ifdef CONFIG_COMPAT 277#ifdef CONFIG_COMPAT
278long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 278long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
279{ 279{
280 struct inode *inode = file->f_path.dentry->d_inode;
281 int ret;
282
283 /* These are just misnamed, they actually get/put from/to user an int */ 280 /* These are just misnamed, they actually get/put from/to user an int */
284 switch (cmd) { 281 switch (cmd) {
285 case EXT4_IOC32_GETFLAGS: 282 case EXT4_IOC32_GETFLAGS:
@@ -319,9 +316,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
319 default: 316 default:
320 return -ENOIOCTLCMD; 317 return -ENOIOCTLCMD;
321 } 318 }
322 lock_kernel(); 319 return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
323 ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
324 unlock_kernel();
325 return ret;
326} 320}
327#endif 321#endif
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index ef97f19c2f9d..fbec2ef93797 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,21 +21,7 @@
21 * mballoc.c contains the multiblocks allocation routines 21 * mballoc.c contains the multiblocks allocation routines
22 */ 22 */
23 23
24#include <linux/time.h> 24#include "mballoc.h"
25#include <linux/fs.h>
26#include <linux/namei.h>
27#include <linux/ext4_jbd2.h>
28#include <linux/ext4_fs.h>
29#include <linux/quotaops.h>
30#include <linux/buffer_head.h>
31#include <linux/module.h>
32#include <linux/swap.h>
33#include <linux/proc_fs.h>
34#include <linux/pagemap.h>
35#include <linux/seq_file.h>
36#include <linux/version.h>
37#include "group.h"
38
39/* 25/*
40 * MUSTDO: 26 * MUSTDO:
41 * - test ext4_ext_search_left() and ext4_ext_search_right() 27 * - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -345,288 +331,6 @@
345 * 331 *
346 */ 332 */
347 333
348/*
349 * with AGGRESSIVE_CHECK allocator runs consistency checks over
350 * structures. these checks slow things down a lot
351 */
352#define AGGRESSIVE_CHECK__
353
354/*
355 * with DOUBLE_CHECK defined mballoc creates persistent in-core
356 * bitmaps, maintains and uses them to check for double allocations
357 */
358#define DOUBLE_CHECK__
359
360/*
361 */
362#define MB_DEBUG__
363#ifdef MB_DEBUG
364#define mb_debug(fmt, a...) printk(fmt, ##a)
365#else
366#define mb_debug(fmt, a...)
367#endif
368
369/*
370 * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
371 * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
372 */
373#define EXT4_MB_HISTORY
374#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
375#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
376#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
377#define EXT4_MB_HISTORY_FREE 8 /* free */
378
379#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
380 EXT4_MB_HISTORY_PREALLOC)
381
382/*
383 * How long mballoc can look for a best extent (in found extents)
384 */
385#define MB_DEFAULT_MAX_TO_SCAN 200
386
387/*
388 * How long mballoc must look for a best extent
389 */
390#define MB_DEFAULT_MIN_TO_SCAN 10
391
392/*
393 * How many groups mballoc will scan looking for the best chunk
394 */
395#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
396
397/*
398 * with 'ext4_mb_stats' allocator will collect stats that will be
399 * shown at umount. The collecting costs though!
400 */
401#define MB_DEFAULT_STATS 1
402
403/*
404 * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
405 * by the stream allocator, which purpose is to pack requests
406 * as close each to other as possible to produce smooth I/O traffic
407 * We use locality group prealloc space for stream request.
408 * We can tune the same via /proc/fs/ext4/<parition>/stream_req
409 */
410#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
411
412/*
413 * for which requests use 2^N search using buddies
414 */
415#define MB_DEFAULT_ORDER2_REQS 2
416
417/*
418 * default group prealloc size 512 blocks
419 */
420#define MB_DEFAULT_GROUP_PREALLOC 512
421
422static struct kmem_cache *ext4_pspace_cachep;
423static struct kmem_cache *ext4_ac_cachep;
424
425#ifdef EXT4_BB_MAX_BLOCKS
426#undef EXT4_BB_MAX_BLOCKS
427#endif
428#define EXT4_BB_MAX_BLOCKS 30
429
430struct ext4_free_metadata {
431 ext4_group_t group;
432 unsigned short num;
433 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
434 struct list_head list;
435};
436
437struct ext4_group_info {
438 unsigned long bb_state;
439 unsigned long bb_tid;
440 struct ext4_free_metadata *bb_md_cur;
441 unsigned short bb_first_free;
442 unsigned short bb_free;
443 unsigned short bb_fragments;
444 struct list_head bb_prealloc_list;
445#ifdef DOUBLE_CHECK
446 void *bb_bitmap;
447#endif
448 unsigned short bb_counters[];
449};
450
451#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
452#define EXT4_GROUP_INFO_LOCKED_BIT 1
453
454#define EXT4_MB_GRP_NEED_INIT(grp) \
455 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
456
457
458struct ext4_prealloc_space {
459 struct list_head pa_inode_list;
460 struct list_head pa_group_list;
461 union {
462 struct list_head pa_tmp_list;
463 struct rcu_head pa_rcu;
464 } u;
465 spinlock_t pa_lock;
466 atomic_t pa_count;
467 unsigned pa_deleted;
468 ext4_fsblk_t pa_pstart; /* phys. block */
469 ext4_lblk_t pa_lstart; /* log. block */
470 unsigned short pa_len; /* len of preallocated chunk */
471 unsigned short pa_free; /* how many blocks are free */
472 unsigned short pa_linear; /* consumed in one direction
473 * strictly, for grp prealloc */
474 spinlock_t *pa_obj_lock;
475 struct inode *pa_inode; /* hack, for history only */
476};
477
478
479struct ext4_free_extent {
480 ext4_lblk_t fe_logical;
481 ext4_grpblk_t fe_start;
482 ext4_group_t fe_group;
483 int fe_len;
484};
485
486/*
487 * Locality group:
488 * we try to group all related changes together
489 * so that writeback can flush/allocate them together as well
490 */
491struct ext4_locality_group {
492 /* for allocator */
493 struct mutex lg_mutex; /* to serialize allocates */
494 struct list_head lg_prealloc_list;/* list of preallocations */
495 spinlock_t lg_prealloc_lock;
496};
497
498struct ext4_allocation_context {
499 struct inode *ac_inode;
500 struct super_block *ac_sb;
501
502 /* original request */
503 struct ext4_free_extent ac_o_ex;
504
505 /* goal request (after normalization) */
506 struct ext4_free_extent ac_g_ex;
507
508 /* the best found extent */
509 struct ext4_free_extent ac_b_ex;
510
511 /* copy of the bext found extent taken before preallocation efforts */
512 struct ext4_free_extent ac_f_ex;
513
514 /* number of iterations done. we have to track to limit searching */
515 unsigned long ac_ex_scanned;
516 __u16 ac_groups_scanned;
517 __u16 ac_found;
518 __u16 ac_tail;
519 __u16 ac_buddy;
520 __u16 ac_flags; /* allocation hints */
521 __u8 ac_status;
522 __u8 ac_criteria;
523 __u8 ac_repeats;
524 __u8 ac_2order; /* if request is to allocate 2^N blocks and
525 * N > 0, the field stores N, otherwise 0 */
526 __u8 ac_op; /* operation, for history only */
527 struct page *ac_bitmap_page;
528 struct page *ac_buddy_page;
529 struct ext4_prealloc_space *ac_pa;
530 struct ext4_locality_group *ac_lg;
531};
532
533#define AC_STATUS_CONTINUE 1
534#define AC_STATUS_FOUND 2
535#define AC_STATUS_BREAK 3
536
537struct ext4_mb_history {
538 struct ext4_free_extent orig; /* orig allocation */
539 struct ext4_free_extent goal; /* goal allocation */
540 struct ext4_free_extent result; /* result allocation */
541 unsigned pid;
542 unsigned ino;
543 __u16 found; /* how many extents have been found */
544 __u16 groups; /* how many groups have been scanned */
545 __u16 tail; /* what tail broke some buddy */
546 __u16 buddy; /* buddy the tail ^^^ broke */
547 __u16 flags;
548 __u8 cr:3; /* which phase the result extent was found at */
549 __u8 op:4;
550 __u8 merged:1;
551};
552
553struct ext4_buddy {
554 struct page *bd_buddy_page;
555 void *bd_buddy;
556 struct page *bd_bitmap_page;
557 void *bd_bitmap;
558 struct ext4_group_info *bd_info;
559 struct super_block *bd_sb;
560 __u16 bd_blkbits;
561 ext4_group_t bd_group;
562};
563#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
564#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
565
566#ifndef EXT4_MB_HISTORY
567static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
568{
569 return;
570}
571#else
572static void ext4_mb_store_history(struct ext4_allocation_context *ac);
573#endif
574
575#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
576
577static struct proc_dir_entry *proc_root_ext4;
578struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
579ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
580 ext4_fsblk_t goal, unsigned long *count, int *errp);
581
582static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
583 ext4_group_t group);
584static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
585static void ext4_mb_free_committed_blocks(struct super_block *);
586static void ext4_mb_return_to_preallocation(struct inode *inode,
587 struct ext4_buddy *e4b, sector_t block,
588 int count);
589static void ext4_mb_put_pa(struct ext4_allocation_context *,
590 struct super_block *, struct ext4_prealloc_space *pa);
591static int ext4_mb_init_per_dev_proc(struct super_block *sb);
592static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
593
594
595static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
596{
597 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
598
599 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
600}
601
602static inline void ext4_unlock_group(struct super_block *sb,
603 ext4_group_t group)
604{
605 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
606
607 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
608}
609
610static inline int ext4_is_group_locked(struct super_block *sb,
611 ext4_group_t group)
612{
613 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
614
615 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
616 &(grinfo->bb_state));
617}
618
619static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
620 struct ext4_free_extent *fex)
621{
622 ext4_fsblk_t block;
623
624 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
625 + fex->fe_start
626 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
627 return block;
628}
629
630static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 334static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
631{ 335{
632#if BITS_PER_LONG == 64 336#if BITS_PER_LONG == 64
@@ -736,7 +440,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
736 blocknr += 440 blocknr +=
737 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 441 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
738 442
739 ext4_error(sb, __FUNCTION__, "double-free of inode" 443 ext4_error(sb, __func__, "double-free of inode"
740 " %lu's block %llu(bit %u in group %lu)\n", 444 " %lu's block %llu(bit %u in group %lu)\n",
741 inode ? inode->i_ino : 0, blocknr, 445 inode ? inode->i_ino : 0, blocknr,
742 first + i, e4b->bd_group); 446 first + i, e4b->bd_group);
@@ -898,17 +602,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
898 list_for_each(cur, &grp->bb_prealloc_list) { 602 list_for_each(cur, &grp->bb_prealloc_list) {
899 ext4_group_t groupnr; 603 ext4_group_t groupnr;
900 struct ext4_prealloc_space *pa; 604 struct ext4_prealloc_space *pa;
901 pa = list_entry(cur, struct ext4_prealloc_space, group_list); 605 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
902 ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k); 606 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
903 MB_CHECK_ASSERT(groupnr == e4b->bd_group); 607 MB_CHECK_ASSERT(groupnr == e4b->bd_group);
904 for (i = 0; i < pa->len; i++) 608 for (i = 0; i < pa->pa_len; i++)
905 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy)); 609 MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
906 } 610 }
907 return 0; 611 return 0;
908} 612}
909#undef MB_CHECK_ASSERT 613#undef MB_CHECK_ASSERT
910#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \ 614#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
911 __FILE__, __FUNCTION__, __LINE__) 615 __FILE__, __func__, __LINE__)
912#else 616#else
913#define mb_check_buddy(e4b) 617#define mb_check_buddy(e4b)
914#endif 618#endif
@@ -982,7 +686,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
982 grp->bb_fragments = fragments; 686 grp->bb_fragments = fragments;
983 687
984 if (free != grp->bb_free) { 688 if (free != grp->bb_free) {
985 ext4_error(sb, __FUNCTION__, 689 ext4_error(sb, __func__,
986 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", 690 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
987 group, free, grp->bb_free); 691 group, free, grp->bb_free);
988 /* 692 /*
@@ -1168,8 +872,9 @@ out:
1168 return err; 872 return err;
1169} 873}
1170 874
1171static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, 875static noinline_for_stack int
1172 struct ext4_buddy *e4b) 876ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
877 struct ext4_buddy *e4b)
1173{ 878{
1174 struct ext4_sb_info *sbi = EXT4_SB(sb); 879 struct ext4_sb_info *sbi = EXT4_SB(sb);
1175 struct inode *inode = sbi->s_buddy_cache; 880 struct inode *inode = sbi->s_buddy_cache;
@@ -1367,7 +1072,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1367 blocknr += 1072 blocknr +=
1368 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 1073 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1369 1074
1370 ext4_error(sb, __FUNCTION__, "double-free of inode" 1075 ext4_error(sb, __func__, "double-free of inode"
1371 " %lu's block %llu(bit %u in group %lu)\n", 1076 " %lu's block %llu(bit %u in group %lu)\n",
1372 inode ? inode->i_ino : 0, blocknr, block, 1077 inode ? inode->i_ino : 0, blocknr, block,
1373 e4b->bd_group); 1078 e4b->bd_group);
@@ -1848,7 +1553,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1848 * free blocks even though group info says we 1553 * free blocks even though group info says we
1849 * we have free blocks 1554 * we have free blocks
1850 */ 1555 */
1851 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1556 ext4_error(sb, __func__, "%d free blocks as per "
1852 "group info. But bitmap says 0\n", 1557 "group info. But bitmap says 0\n",
1853 free); 1558 free);
1854 break; 1559 break;
@@ -1857,7 +1562,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1857 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1562 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1858 BUG_ON(ex.fe_len <= 0); 1563 BUG_ON(ex.fe_len <= 0);
1859 if (free < ex.fe_len) { 1564 if (free < ex.fe_len) {
1860 ext4_error(sb, __FUNCTION__, "%d free blocks as per " 1565 ext4_error(sb, __func__, "%d free blocks as per "
1861 "group info. But got %d blocks\n", 1566 "group info. But got %d blocks\n",
1862 free, ex.fe_len); 1567 free, ex.fe_len);
1863 /* 1568 /*
@@ -1965,7 +1670,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1965 return 0; 1670 return 0;
1966} 1671}
1967 1672
1968static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1673static noinline_for_stack int
1674ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1969{ 1675{
1970 ext4_group_t group; 1676 ext4_group_t group;
1971 ext4_group_t i; 1677 ext4_group_t i;
@@ -2449,17 +2155,10 @@ static void ext4_mb_history_init(struct super_block *sb)
2449 int i; 2155 int i;
2450 2156
2451 if (sbi->s_mb_proc != NULL) { 2157 if (sbi->s_mb_proc != NULL) {
2452 struct proc_dir_entry *p; 2158 proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc,
2453 p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); 2159 &ext4_mb_seq_history_fops, sb);
2454 if (p) { 2160 proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc,
2455 p->proc_fops = &ext4_mb_seq_history_fops; 2161 &ext4_mb_seq_groups_fops, sb);
2456 p->data = sb;
2457 }
2458 p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
2459 if (p) {
2460 p->proc_fops = &ext4_mb_seq_groups_fops;
2461 p->data = sb;
2462 }
2463 } 2162 }
2464 2163
2465 sbi->s_mb_history_max = 1000; 2164 sbi->s_mb_history_max = 1000;
@@ -2472,7 +2171,8 @@ static void ext4_mb_history_init(struct super_block *sb)
2472 /* if we can't allocate history, then we simple won't use it */ 2171 /* if we can't allocate history, then we simple won't use it */
2473} 2172}
2474 2173
2475static void ext4_mb_store_history(struct ext4_allocation_context *ac) 2174static noinline_for_stack void
2175ext4_mb_store_history(struct ext4_allocation_context *ac)
2476{ 2176{
2477 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2177 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2478 struct ext4_mb_history h; 2178 struct ext4_mb_history h;
@@ -2572,13 +2272,13 @@ static int ext4_mb_init_backend(struct super_block *sb)
2572 meta_group_info[j] = kzalloc(len, GFP_KERNEL); 2272 meta_group_info[j] = kzalloc(len, GFP_KERNEL);
2573 if (meta_group_info[j] == NULL) { 2273 if (meta_group_info[j] == NULL) {
2574 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); 2274 printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
2575 i--;
2576 goto err_freebuddy; 2275 goto err_freebuddy;
2577 } 2276 }
2578 desc = ext4_get_group_desc(sb, i, NULL); 2277 desc = ext4_get_group_desc(sb, i, NULL);
2579 if (desc == NULL) { 2278 if (desc == NULL) {
2580 printk(KERN_ERR 2279 printk(KERN_ERR
2581 "EXT4-fs: can't read descriptor %lu\n", i); 2280 "EXT4-fs: can't read descriptor %lu\n", i);
2281 i++;
2582 goto err_freebuddy; 2282 goto err_freebuddy;
2583 } 2283 }
2584 memset(meta_group_info[j], 0, len); 2284 memset(meta_group_info[j], 0, len);
@@ -2618,13 +2318,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
2618 return 0; 2318 return 0;
2619 2319
2620err_freebuddy: 2320err_freebuddy:
2621 while (i >= 0) { 2321 while (i-- > 0)
2622 kfree(ext4_get_group_info(sb, i)); 2322 kfree(ext4_get_group_info(sb, i));
2623 i--;
2624 }
2625 i = num_meta_group_infos; 2323 i = num_meta_group_infos;
2626err_freemeta: 2324err_freemeta:
2627 while (--i >= 0) 2325 while (i-- > 0)
2628 kfree(sbi->s_group_info[i]); 2326 kfree(sbi->s_group_info[i]);
2629 iput(sbi->s_buddy_cache); 2327 iput(sbi->s_buddy_cache);
2630err_freesgi: 2328err_freesgi:
@@ -2808,7 +2506,8 @@ int ext4_mb_release(struct super_block *sb)
2808 return 0; 2506 return 0;
2809} 2507}
2810 2508
2811static void ext4_mb_free_committed_blocks(struct super_block *sb) 2509static noinline_for_stack void
2510ext4_mb_free_committed_blocks(struct super_block *sb)
2812{ 2511{
2813 struct ext4_sb_info *sbi = EXT4_SB(sb); 2512 struct ext4_sb_info *sbi = EXT4_SB(sb);
2814 int err; 2513 int err;
@@ -2867,7 +2566,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb)
2867 mb_debug("freed %u blocks in %u structures\n", count, count2); 2566 mb_debug("freed %u blocks in %u structures\n", count, count2);
2868} 2567}
2869 2568
2870#define EXT4_ROOT "ext4"
2871#define EXT4_MB_STATS_NAME "stats" 2569#define EXT4_MB_STATS_NAME "stats"
2872#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" 2570#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan"
2873#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" 2571#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan"
@@ -3007,9 +2705,9 @@ int __init init_ext4_mballoc(void)
3007 return -ENOMEM; 2705 return -ENOMEM;
3008 } 2706 }
3009#ifdef CONFIG_PROC_FS 2707#ifdef CONFIG_PROC_FS
3010 proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); 2708 proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
3011 if (proc_root_ext4 == NULL) 2709 if (proc_root_ext4 == NULL)
3012 printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); 2710 printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n");
3013#endif 2711#endif
3014 return 0; 2712 return 0;
3015} 2713}
@@ -3020,7 +2718,7 @@ void exit_ext4_mballoc(void)
3020 kmem_cache_destroy(ext4_pspace_cachep); 2718 kmem_cache_destroy(ext4_pspace_cachep);
3021 kmem_cache_destroy(ext4_ac_cachep); 2719 kmem_cache_destroy(ext4_ac_cachep);
3022#ifdef CONFIG_PROC_FS 2720#ifdef CONFIG_PROC_FS
3023 remove_proc_entry(EXT4_ROOT, proc_root_fs); 2721 remove_proc_entry("fs/ext4", NULL);
3024#endif 2722#endif
3025} 2723}
3026 2724
@@ -3029,7 +2727,8 @@ void exit_ext4_mballoc(void)
3029 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps 2727 * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
3030 * Returns 0 if success or error code 2728 * Returns 0 if success or error code
3031 */ 2729 */
3032static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2730static noinline_for_stack int
2731ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3033 handle_t *handle) 2732 handle_t *handle)
3034{ 2733{
3035 struct buffer_head *bitmap_bh = NULL; 2734 struct buffer_head *bitmap_bh = NULL;
@@ -3078,7 +2777,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3078 in_range(block, ext4_inode_table(sb, gdp), 2777 in_range(block, ext4_inode_table(sb, gdp),
3079 EXT4_SB(sb)->s_itb_per_group)) { 2778 EXT4_SB(sb)->s_itb_per_group)) {
3080 2779
3081 ext4_error(sb, __FUNCTION__, 2780 ext4_error(sb, __func__,
3082 "Allocating block in system zone - block = %llu", 2781 "Allocating block in system zone - block = %llu",
3083 block); 2782 block);
3084 } 2783 }
@@ -3102,9 +2801,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3102 ac->ac_b_ex.fe_group, 2801 ac->ac_b_ex.fe_group,
3103 gdp)); 2802 gdp));
3104 } 2803 }
3105 gdp->bg_free_blocks_count = 2804 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
3106 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
3107 - ac->ac_b_ex.fe_len);
3108 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2805 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
3109 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2806 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
3110 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2807 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3138,7 +2835,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3138 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; 2835 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
3139 else 2836 else
3140 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; 2837 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3141 mb_debug("#%u: goal %lu blocks for locality group\n", 2838 mb_debug("#%u: goal %u blocks for locality group\n",
3142 current->pid, ac->ac_g_ex.fe_len); 2839 current->pid, ac->ac_g_ex.fe_len);
3143} 2840}
3144 2841
@@ -3146,15 +2843,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3146 * Normalization means making request better in terms of 2843 * Normalization means making request better in terms of
3147 * size and alignment 2844 * size and alignment
3148 */ 2845 */
3149static void ext4_mb_normalize_request(struct ext4_allocation_context *ac, 2846static noinline_for_stack void
2847ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3150 struct ext4_allocation_request *ar) 2848 struct ext4_allocation_request *ar)
3151{ 2849{
3152 int bsbits, max; 2850 int bsbits, max;
3153 ext4_lblk_t end; 2851 ext4_lblk_t end;
3154 struct list_head *cur;
3155 loff_t size, orig_size, start_off; 2852 loff_t size, orig_size, start_off;
3156 ext4_lblk_t start, orig_start; 2853 ext4_lblk_t start, orig_start;
3157 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 2854 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2855 struct ext4_prealloc_space *pa;
3158 2856
3159 /* do normalize only data requests, metadata requests 2857 /* do normalize only data requests, metadata requests
3160 do not need preallocation */ 2858 do not need preallocation */
@@ -3240,12 +2938,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3240 2938
3241 /* check we don't cross already preallocated blocks */ 2939 /* check we don't cross already preallocated blocks */
3242 rcu_read_lock(); 2940 rcu_read_lock();
3243 list_for_each_rcu(cur, &ei->i_prealloc_list) { 2941 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3244 struct ext4_prealloc_space *pa;
3245 unsigned long pa_end; 2942 unsigned long pa_end;
3246 2943
3247 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3248
3249 if (pa->pa_deleted) 2944 if (pa->pa_deleted)
3250 continue; 2945 continue;
3251 spin_lock(&pa->pa_lock); 2946 spin_lock(&pa->pa_lock);
@@ -3287,10 +2982,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3287 2982
3288 /* XXX: extra loop to check we really don't overlap preallocations */ 2983 /* XXX: extra loop to check we really don't overlap preallocations */
3289 rcu_read_lock(); 2984 rcu_read_lock();
3290 list_for_each_rcu(cur, &ei->i_prealloc_list) { 2985 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3291 struct ext4_prealloc_space *pa;
3292 unsigned long pa_end; 2986 unsigned long pa_end;
3293 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3294 spin_lock(&pa->pa_lock); 2987 spin_lock(&pa->pa_lock);
3295 if (pa->pa_deleted == 0) { 2988 if (pa->pa_deleted == 0) {
3296 pa_end = pa->pa_lstart + pa->pa_len; 2989 pa_end = pa->pa_lstart + pa->pa_len;
@@ -3382,7 +3075,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3382 BUG_ON(pa->pa_free < len); 3075 BUG_ON(pa->pa_free < len);
3383 pa->pa_free -= len; 3076 pa->pa_free -= len;
3384 3077
3385 mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa); 3078 mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
3386} 3079}
3387 3080
3388/* 3081/*
@@ -3412,12 +3105,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3412/* 3105/*
3413 * search goal blocks in preallocated space 3106 * search goal blocks in preallocated space
3414 */ 3107 */
3415static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3108static noinline_for_stack int
3109ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3416{ 3110{
3417 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3111 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3418 struct ext4_locality_group *lg; 3112 struct ext4_locality_group *lg;
3419 struct ext4_prealloc_space *pa; 3113 struct ext4_prealloc_space *pa;
3420 struct list_head *cur;
3421 3114
3422 /* only data can be preallocated */ 3115 /* only data can be preallocated */
3423 if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) 3116 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3425,8 +3118,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3425 3118
3426 /* first, try per-file preallocation */ 3119 /* first, try per-file preallocation */
3427 rcu_read_lock(); 3120 rcu_read_lock();
3428 list_for_each_rcu(cur, &ei->i_prealloc_list) { 3121 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3429 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3430 3122
3431 /* all fields in this condition don't change, 3123 /* all fields in this condition don't change,
3432 * so we can skip locking for them */ 3124 * so we can skip locking for them */
@@ -3458,8 +3150,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3458 return 0; 3150 return 0;
3459 3151
3460 rcu_read_lock(); 3152 rcu_read_lock();
3461 list_for_each_rcu(cur, &lg->lg_prealloc_list) { 3153 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
3462 pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
3463 spin_lock(&pa->pa_lock); 3154 spin_lock(&pa->pa_lock);
3464 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { 3155 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
3465 atomic_inc(&pa->pa_count); 3156 atomic_inc(&pa->pa_count);
@@ -3579,7 +3270,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3579/* 3270/*
3580 * creates new preallocated space for given inode 3271 * creates new preallocated space for given inode
3581 */ 3272 */
3582static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) 3273static noinline_for_stack int
3274ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3583{ 3275{
3584 struct super_block *sb = ac->ac_sb; 3276 struct super_block *sb = ac->ac_sb;
3585 struct ext4_prealloc_space *pa; 3277 struct ext4_prealloc_space *pa;
@@ -3666,7 +3358,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3666/* 3358/*
3667 * creates new preallocated space for locality group inodes belongs to 3359 * creates new preallocated space for locality group inodes belongs to
3668 */ 3360 */
3669static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac) 3361static noinline_for_stack int
3362ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3670{ 3363{
3671 struct super_block *sb = ac->ac_sb; 3364 struct super_block *sb = ac->ac_sb;
3672 struct ext4_locality_group *lg; 3365 struct ext4_locality_group *lg;
@@ -3739,11 +3432,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
3739 * the caller MUST hold group/inode locks. 3432 * the caller MUST hold group/inode locks.
3740 * TODO: optimize the case when there are no in-core structures yet 3433 * TODO: optimize the case when there are no in-core structures yet
3741 */ 3434 */
3742static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b, 3435static noinline_for_stack int
3743 struct buffer_head *bitmap_bh, 3436ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3744 struct ext4_prealloc_space *pa) 3437 struct ext4_prealloc_space *pa,
3438 struct ext4_allocation_context *ac)
3745{ 3439{
3746 struct ext4_allocation_context *ac;
3747 struct super_block *sb = e4b->bd_sb; 3440 struct super_block *sb = e4b->bd_sb;
3748 struct ext4_sb_info *sbi = EXT4_SB(sb); 3441 struct ext4_sb_info *sbi = EXT4_SB(sb);
3749 unsigned long end; 3442 unsigned long end;
@@ -3759,8 +3452,6 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3759 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3452 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3760 end = bit + pa->pa_len; 3453 end = bit + pa->pa_len;
3761 3454
3762 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3763
3764 if (ac) { 3455 if (ac) {
3765 ac->ac_sb = sb; 3456 ac->ac_sb = sb;
3766 ac->ac_inode = pa->pa_inode; 3457 ac->ac_inode = pa->pa_inode;
@@ -3797,7 +3488,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3797 pa, (unsigned long) pa->pa_lstart, 3488 pa, (unsigned long) pa->pa_lstart,
3798 (unsigned long) pa->pa_pstart, 3489 (unsigned long) pa->pa_pstart,
3799 (unsigned long) pa->pa_len); 3490 (unsigned long) pa->pa_len);
3800 ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n", 3491 ext4_error(sb, __func__, "free %u, pa_free %u\n",
3801 free, pa->pa_free); 3492 free, pa->pa_free);
3802 /* 3493 /*
3803 * pa is already deleted so we use the value obtained 3494 * pa is already deleted so we use the value obtained
@@ -3805,22 +3496,19 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3805 */ 3496 */
3806 } 3497 }
3807 atomic_add(free, &sbi->s_mb_discarded); 3498 atomic_add(free, &sbi->s_mb_discarded);
3808 if (ac)
3809 kmem_cache_free(ext4_ac_cachep, ac);
3810 3499
3811 return err; 3500 return err;
3812} 3501}
3813 3502
3814static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, 3503static noinline_for_stack int
3815 struct ext4_prealloc_space *pa) 3504ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3505 struct ext4_prealloc_space *pa,
3506 struct ext4_allocation_context *ac)
3816{ 3507{
3817 struct ext4_allocation_context *ac;
3818 struct super_block *sb = e4b->bd_sb; 3508 struct super_block *sb = e4b->bd_sb;
3819 ext4_group_t group; 3509 ext4_group_t group;
3820 ext4_grpblk_t bit; 3510 ext4_grpblk_t bit;
3821 3511
3822 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3823
3824 if (ac) 3512 if (ac)
3825 ac->ac_op = EXT4_MB_HISTORY_DISCARD; 3513 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3826 3514
@@ -3838,7 +3526,6 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3838 ac->ac_b_ex.fe_len = pa->pa_len; 3526 ac->ac_b_ex.fe_len = pa->pa_len;
3839 ac->ac_b_ex.fe_logical = 0; 3527 ac->ac_b_ex.fe_logical = 0;
3840 ext4_mb_store_history(ac); 3528 ext4_mb_store_history(ac);
3841 kmem_cache_free(ext4_ac_cachep, ac);
3842 } 3529 }
3843 3530
3844 return 0; 3531 return 0;
@@ -3853,12 +3540,14 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3853 * - how many do we discard 3540 * - how many do we discard
3854 * 1) how many requested 3541 * 1) how many requested
3855 */ 3542 */
3856static int ext4_mb_discard_group_preallocations(struct super_block *sb, 3543static noinline_for_stack int
3544ext4_mb_discard_group_preallocations(struct super_block *sb,
3857 ext4_group_t group, int needed) 3545 ext4_group_t group, int needed)
3858{ 3546{
3859 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 3547 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
3860 struct buffer_head *bitmap_bh = NULL; 3548 struct buffer_head *bitmap_bh = NULL;
3861 struct ext4_prealloc_space *pa, *tmp; 3549 struct ext4_prealloc_space *pa, *tmp;
3550 struct ext4_allocation_context *ac;
3862 struct list_head list; 3551 struct list_head list;
3863 struct ext4_buddy e4b; 3552 struct ext4_buddy e4b;
3864 int err; 3553 int err;
@@ -3886,6 +3575,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
3886 grp = ext4_get_group_info(sb, group); 3575 grp = ext4_get_group_info(sb, group);
3887 INIT_LIST_HEAD(&list); 3576 INIT_LIST_HEAD(&list);
3888 3577
3578 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3889repeat: 3579repeat:
3890 ext4_lock_group(sb, group); 3580 ext4_lock_group(sb, group);
3891 list_for_each_entry_safe(pa, tmp, 3581 list_for_each_entry_safe(pa, tmp,
@@ -3940,9 +3630,9 @@ repeat:
3940 spin_unlock(pa->pa_obj_lock); 3630 spin_unlock(pa->pa_obj_lock);
3941 3631
3942 if (pa->pa_linear) 3632 if (pa->pa_linear)
3943 ext4_mb_release_group_pa(&e4b, pa); 3633 ext4_mb_release_group_pa(&e4b, pa, ac);
3944 else 3634 else
3945 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); 3635 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
3946 3636
3947 list_del(&pa->u.pa_tmp_list); 3637 list_del(&pa->u.pa_tmp_list);
3948 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3638 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3950,6 +3640,8 @@ repeat:
3950 3640
3951out: 3641out:
3952 ext4_unlock_group(sb, group); 3642 ext4_unlock_group(sb, group);
3643 if (ac)
3644 kmem_cache_free(ext4_ac_cachep, ac);
3953 ext4_mb_release_desc(&e4b); 3645 ext4_mb_release_desc(&e4b);
3954 put_bh(bitmap_bh); 3646 put_bh(bitmap_bh);
3955 return free; 3647 return free;
@@ -3970,6 +3662,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3970 struct super_block *sb = inode->i_sb; 3662 struct super_block *sb = inode->i_sb;
3971 struct buffer_head *bitmap_bh = NULL; 3663 struct buffer_head *bitmap_bh = NULL;
3972 struct ext4_prealloc_space *pa, *tmp; 3664 struct ext4_prealloc_space *pa, *tmp;
3665 struct ext4_allocation_context *ac;
3973 ext4_group_t group = 0; 3666 ext4_group_t group = 0;
3974 struct list_head list; 3667 struct list_head list;
3975 struct ext4_buddy e4b; 3668 struct ext4_buddy e4b;
@@ -3984,6 +3677,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3984 3677
3985 INIT_LIST_HEAD(&list); 3678 INIT_LIST_HEAD(&list);
3986 3679
3680 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3987repeat: 3681repeat:
3988 /* first, collect all pa's in the inode */ 3682 /* first, collect all pa's in the inode */
3989 spin_lock(&ei->i_prealloc_lock); 3683 spin_lock(&ei->i_prealloc_lock);
@@ -4048,7 +3742,7 @@ repeat:
4048 3742
4049 ext4_lock_group(sb, group); 3743 ext4_lock_group(sb, group);
4050 list_del(&pa->pa_group_list); 3744 list_del(&pa->pa_group_list);
4051 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); 3745 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
4052 ext4_unlock_group(sb, group); 3746 ext4_unlock_group(sb, group);
4053 3747
4054 ext4_mb_release_desc(&e4b); 3748 ext4_mb_release_desc(&e4b);
@@ -4057,6 +3751,8 @@ repeat:
4057 list_del(&pa->u.pa_tmp_list); 3751 list_del(&pa->u.pa_tmp_list);
4058 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); 3752 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4059 } 3753 }
3754 if (ac)
3755 kmem_cache_free(ext4_ac_cachep, ac);
4060} 3756}
4061 3757
4062/* 3758/*
@@ -4116,7 +3812,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4116 printk(KERN_ERR "PA:%lu:%d:%u \n", i, 3812 printk(KERN_ERR "PA:%lu:%d:%u \n", i,
4117 start, pa->pa_len); 3813 start, pa->pa_len);
4118 } 3814 }
4119 ext4_lock_group(sb, i); 3815 ext4_unlock_group(sb, i);
4120 3816
4121 if (grp->bb_free == 0) 3817 if (grp->bb_free == 0)
4122 continue; 3818 continue;
@@ -4175,7 +3871,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4175 mutex_lock(&ac->ac_lg->lg_mutex); 3871 mutex_lock(&ac->ac_lg->lg_mutex);
4176} 3872}
4177 3873
4178static int ext4_mb_initialize_context(struct ext4_allocation_context *ac, 3874static noinline_for_stack int
3875ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4179 struct ext4_allocation_request *ar) 3876 struct ext4_allocation_request *ar)
4180{ 3877{
4181 struct super_block *sb = ar->inode->i_sb; 3878 struct super_block *sb = ar->inode->i_sb;
@@ -4406,7 +4103,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
4406 ext4_mb_free_committed_blocks(sb); 4103 ext4_mb_free_committed_blocks(sb);
4407} 4104}
4408 4105
4409static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, 4106static noinline_for_stack int
4107ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4410 ext4_group_t group, ext4_grpblk_t block, int count) 4108 ext4_group_t group, ext4_grpblk_t block, int count)
4411{ 4109{
4412 struct ext4_group_info *db = e4b->bd_info; 4110 struct ext4_group_info *db = e4b->bd_info;
@@ -4497,7 +4195,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4497 if (block < le32_to_cpu(es->s_first_data_block) || 4195 if (block < le32_to_cpu(es->s_first_data_block) ||
4498 block + count < block || 4196 block + count < block ||
4499 block + count > ext4_blocks_count(es)) { 4197 block + count > ext4_blocks_count(es)) {
4500 ext4_error(sb, __FUNCTION__, 4198 ext4_error(sb, __func__,
4501 "Freeing blocks not in datazone - " 4199 "Freeing blocks not in datazone - "
4502 "block = %lu, count = %lu", block, count); 4200 "block = %lu, count = %lu", block, count);
4503 goto error_return; 4201 goto error_return;
@@ -4538,7 +4236,7 @@ do_more:
4538 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4236 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4539 EXT4_SB(sb)->s_itb_per_group)) { 4237 EXT4_SB(sb)->s_itb_per_group)) {
4540 4238
4541 ext4_error(sb, __FUNCTION__, 4239 ext4_error(sb, __func__,
4542 "Freeing blocks in system zone - " 4240 "Freeing blocks in system zone - "
4543 "Block = %lu, count = %lu", block, count); 4241 "Block = %lu, count = %lu", block, count);
4544 } 4242 }
@@ -4596,8 +4294,7 @@ do_more:
4596 } 4294 }
4597 4295
4598 spin_lock(sb_bgl_lock(sbi, block_group)); 4296 spin_lock(sb_bgl_lock(sbi, block_group));
4599 gdp->bg_free_blocks_count = 4297 le16_add_cpu(&gdp->bg_free_blocks_count, count);
4600 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
4601 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4298 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4602 spin_unlock(sb_bgl_lock(sbi, block_group)); 4299 spin_unlock(sb_bgl_lock(sbi, block_group));
4603 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4300 percpu_counter_add(&sbi->s_freeblocks_counter, count);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
new file mode 100644
index 000000000000..bfe6add46bcf
--- /dev/null
+++ b/fs/ext4/mballoc.h
@@ -0,0 +1,304 @@
1/*
2 * fs/ext4/mballoc.h
3 *
4 * Written by: Alex Tomas <alex@clusterfs.com>
5 *
6 */
7#ifndef _EXT4_MBALLOC_H
8#define _EXT4_MBALLOC_H
9
10#include <linux/time.h>
11#include <linux/fs.h>
12#include <linux/namei.h>
13#include <linux/quotaops.h>
14#include <linux/buffer_head.h>
15#include <linux/module.h>
16#include <linux/swap.h>
17#include <linux/proc_fs.h>
18#include <linux/pagemap.h>
19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include "ext4_jbd2.h"
22#include "ext4.h"
23#include "group.h"
24
25/*
26 * with AGGRESSIVE_CHECK allocator runs consistency checks over
27 * structures. these checks slow things down a lot
28 */
29#define AGGRESSIVE_CHECK__
30
31/*
32 * with DOUBLE_CHECK defined mballoc creates persistent in-core
33 * bitmaps, maintains and uses them to check for double allocations
34 */
35#define DOUBLE_CHECK__
36
37/*
38 */
39#define MB_DEBUG__
40#ifdef MB_DEBUG
41#define mb_debug(fmt, a...) printk(fmt, ##a)
42#else
43#define mb_debug(fmt, a...)
44#endif
45
46/*
47 * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
48 * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
49 */
50#define EXT4_MB_HISTORY
51#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
52#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
53#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
54#define EXT4_MB_HISTORY_FREE 8 /* free */
55
56#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
57 EXT4_MB_HISTORY_PREALLOC)
58
59/*
60 * How long mballoc can look for a best extent (in found extents)
61 */
62#define MB_DEFAULT_MAX_TO_SCAN 200
63
64/*
65 * How long mballoc must look for a best extent
66 */
67#define MB_DEFAULT_MIN_TO_SCAN 10
68
69/*
70 * How many groups mballoc will scan looking for the best chunk
71 */
72#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
73
74/*
75 * with 'ext4_mb_stats' allocator will collect stats that will be
76 * shown at umount. The collecting costs though!
77 */
78#define MB_DEFAULT_STATS 1
79
80/*
81 * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
82 * by the stream allocator, which purpose is to pack requests
83 * as close each to other as possible to produce smooth I/O traffic
84 * We use locality group prealloc space for stream request.
85 * We can tune the same via /proc/fs/ext4/<parition>/stream_req
86 */
87#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
88
89/*
90 * for which requests use 2^N search using buddies
91 */
92#define MB_DEFAULT_ORDER2_REQS 2
93
94/*
95 * default group prealloc size 512 blocks
96 */
97#define MB_DEFAULT_GROUP_PREALLOC 512
98
99static struct kmem_cache *ext4_pspace_cachep;
100static struct kmem_cache *ext4_ac_cachep;
101
102#ifdef EXT4_BB_MAX_BLOCKS
103#undef EXT4_BB_MAX_BLOCKS
104#endif
105#define EXT4_BB_MAX_BLOCKS 30
106
107struct ext4_free_metadata {
108 ext4_group_t group;
109 unsigned short num;
110 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
111 struct list_head list;
112};
113
114struct ext4_group_info {
115 unsigned long bb_state;
116 unsigned long bb_tid;
117 struct ext4_free_metadata *bb_md_cur;
118 unsigned short bb_first_free;
119 unsigned short bb_free;
120 unsigned short bb_fragments;
121 struct list_head bb_prealloc_list;
122#ifdef DOUBLE_CHECK
123 void *bb_bitmap;
124#endif
125 unsigned short bb_counters[];
126};
127
128#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
129#define EXT4_GROUP_INFO_LOCKED_BIT 1
130
131#define EXT4_MB_GRP_NEED_INIT(grp) \
132 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
133
134
135struct ext4_prealloc_space {
136 struct list_head pa_inode_list;
137 struct list_head pa_group_list;
138 union {
139 struct list_head pa_tmp_list;
140 struct rcu_head pa_rcu;
141 } u;
142 spinlock_t pa_lock;
143 atomic_t pa_count;
144 unsigned pa_deleted;
145 ext4_fsblk_t pa_pstart; /* phys. block */
146 ext4_lblk_t pa_lstart; /* log. block */
147 unsigned short pa_len; /* len of preallocated chunk */
148 unsigned short pa_free; /* how many blocks are free */
149 unsigned short pa_linear; /* consumed in one direction
150 * strictly, for grp prealloc */
151 spinlock_t *pa_obj_lock;
152 struct inode *pa_inode; /* hack, for history only */
153};
154
155
156struct ext4_free_extent {
157 ext4_lblk_t fe_logical;
158 ext4_grpblk_t fe_start;
159 ext4_group_t fe_group;
160 int fe_len;
161};
162
163/*
164 * Locality group:
165 * we try to group all related changes together
166 * so that writeback can flush/allocate them together as well
167 */
168struct ext4_locality_group {
169 /* for allocator */
170 struct mutex lg_mutex; /* to serialize allocates */
171 struct list_head lg_prealloc_list;/* list of preallocations */
172 spinlock_t lg_prealloc_lock;
173};
174
175struct ext4_allocation_context {
176 struct inode *ac_inode;
177 struct super_block *ac_sb;
178
179 /* original request */
180 struct ext4_free_extent ac_o_ex;
181
182 /* goal request (after normalization) */
183 struct ext4_free_extent ac_g_ex;
184
185 /* the best found extent */
186 struct ext4_free_extent ac_b_ex;
187
188 /* copy of the bext found extent taken before preallocation efforts */
189 struct ext4_free_extent ac_f_ex;
190
191 /* number of iterations done. we have to track to limit searching */
192 unsigned long ac_ex_scanned;
193 __u16 ac_groups_scanned;
194 __u16 ac_found;
195 __u16 ac_tail;
196 __u16 ac_buddy;
197 __u16 ac_flags; /* allocation hints */
198 __u8 ac_status;
199 __u8 ac_criteria;
200 __u8 ac_repeats;
201 __u8 ac_2order; /* if request is to allocate 2^N blocks and
202 * N > 0, the field stores N, otherwise 0 */
203 __u8 ac_op; /* operation, for history only */
204 struct page *ac_bitmap_page;
205 struct page *ac_buddy_page;
206 struct ext4_prealloc_space *ac_pa;
207 struct ext4_locality_group *ac_lg;
208};
209
210#define AC_STATUS_CONTINUE 1
211#define AC_STATUS_FOUND 2
212#define AC_STATUS_BREAK 3
213
214struct ext4_mb_history {
215 struct ext4_free_extent orig; /* orig allocation */
216 struct ext4_free_extent goal; /* goal allocation */
217 struct ext4_free_extent result; /* result allocation */
218 unsigned pid;
219 unsigned ino;
220 __u16 found; /* how many extents have been found */
221 __u16 groups; /* how many groups have been scanned */
222 __u16 tail; /* what tail broke some buddy */
223 __u16 buddy; /* buddy the tail ^^^ broke */
224 __u16 flags;
225 __u8 cr:3; /* which phase the result extent was found at */
226 __u8 op:4;
227 __u8 merged:1;
228};
229
230struct ext4_buddy {
231 struct page *bd_buddy_page;
232 void *bd_buddy;
233 struct page *bd_bitmap_page;
234 void *bd_bitmap;
235 struct ext4_group_info *bd_info;
236 struct super_block *bd_sb;
237 __u16 bd_blkbits;
238 ext4_group_t bd_group;
239};
240#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
241#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
242
243#ifndef EXT4_MB_HISTORY
244static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
245{
246 return;
247}
248#else
249static void ext4_mb_store_history(struct ext4_allocation_context *ac);
250#endif
251
252#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
253
254static struct proc_dir_entry *proc_root_ext4;
255struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
256
257static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
258 ext4_group_t group);
259static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
260static void ext4_mb_free_committed_blocks(struct super_block *);
261static void ext4_mb_return_to_preallocation(struct inode *inode,
262 struct ext4_buddy *e4b, sector_t block,
263 int count);
264static void ext4_mb_put_pa(struct ext4_allocation_context *,
265 struct super_block *, struct ext4_prealloc_space *pa);
266static int ext4_mb_init_per_dev_proc(struct super_block *sb);
267static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
268
269
270static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
271{
272 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
273
274 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
275}
276
277static inline void ext4_unlock_group(struct super_block *sb,
278 ext4_group_t group)
279{
280 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
281
282 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
283}
284
285static inline int ext4_is_group_locked(struct super_block *sb,
286 ext4_group_t group)
287{
288 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
289
290 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
291 &(grinfo->bb_state));
292}
293
294static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
295 struct ext4_free_extent *fex)
296{
297 ext4_fsblk_t block;
298
299 block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
300 + fex->fe_start
301 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
302 return block;
303}
304#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 5c1e27de7755..b9e077ba07e9 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -13,8 +13,8 @@
13 */ 13 */
14 14
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/ext4_jbd2.h> 16#include "ext4_jbd2.h"
17#include <linux/ext4_fs_extents.h> 17#include "ext4_extents.h"
18 18
19/* 19/*
20 * The contiguous blocks details which can be 20 * The contiguous blocks details which can be
@@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
327} 327}
328 328
329static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, 329static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
330 struct inode *tmp_inode) 330 struct inode *tmp_inode)
331{ 331{
332 int retval; 332 int retval;
333 __le32 i_data[3]; 333 __le32 i_data[3];
@@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
339 * i_data field of the original inode 339 * i_data field of the original inode
340 */ 340 */
341 retval = ext4_journal_extend(handle, 1); 341 retval = ext4_journal_extend(handle, 1);
342 if (retval != 0) { 342 if (retval) {
343 retval = ext4_journal_restart(handle, 1); 343 retval = ext4_journal_restart(handle, 1);
344 if (retval) 344 if (retval)
345 goto err_out; 345 goto err_out;
@@ -351,6 +351,18 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
351 351
352 down_write(&EXT4_I(inode)->i_data_sem); 352 down_write(&EXT4_I(inode)->i_data_sem);
353 /* 353 /*
354 * if EXT4_EXT_MIGRATE is cleared a block allocation
355 * happened after we started the migrate. We need to
356 * fail the migrate
357 */
358 if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) {
359 retval = -EAGAIN;
360 up_write(&EXT4_I(inode)->i_data_sem);
361 goto err_out;
362 } else
363 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
364 ~EXT4_EXT_MIGRATE;
365 /*
354 * We have the extent map build with the tmp inode. 366 * We have the extent map build with the tmp inode.
355 * Now copy the i_data across 367 * Now copy the i_data across
356 */ 368 */
@@ -508,6 +520,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
508 * switch the inode format to prevent read. 520 * switch the inode format to prevent read.
509 */ 521 */
510 mutex_lock(&(inode->i_mutex)); 522 mutex_lock(&(inode->i_mutex));
523 /*
524 * Even though we take i_mutex we can still cause block allocation
525 * via mmap write to holes. If we have allocated new blocks we fail
526 * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag.
527 * The flag is updated with i_data_sem held to prevent racing with
528 * block allocation.
529 */
530 down_read((&EXT4_I(inode)->i_data_sem));
531 EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE;
532 up_read((&EXT4_I(inode)->i_data_sem));
533
511 handle = ext4_journal_start(inode, 1); 534 handle = ext4_journal_start(inode, 1);
512 535
513 ei = EXT4_I(inode); 536 ei = EXT4_I(inode);
@@ -559,9 +582,15 @@ err_out:
559 * tmp_inode 582 * tmp_inode
560 */ 583 */
561 free_ext_block(handle, tmp_inode); 584 free_ext_block(handle, tmp_inode);
562 else 585 else {
563 retval = ext4_ext_swap_inode_data(handle, inode, 586 retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
564 tmp_inode); 587 if (retval)
588 /*
589 * if we fail to swap inode data free the extent
590 * details of the tmp inode
591 */
592 free_ext_block(handle, tmp_inode);
593 }
565 594
566 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */ 595 /* We mark the tmp_inode dirty via ext4_ext_tree_init. */
567 if (ext4_journal_extend(handle, 1) != 0) 596 if (ext4_journal_extend(handle, 1) != 0)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 28aa2ed4297e..ab16beaa830d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -28,14 +28,14 @@
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/jbd2.h> 29#include <linux/jbd2.h>
30#include <linux/time.h> 30#include <linux/time.h>
31#include <linux/ext4_fs.h>
32#include <linux/ext4_jbd2.h>
33#include <linux/fcntl.h> 31#include <linux/fcntl.h>
34#include <linux/stat.h> 32#include <linux/stat.h>
35#include <linux/string.h> 33#include <linux/string.h>
36#include <linux/quotaops.h> 34#include <linux/quotaops.h>
37#include <linux/buffer_head.h> 35#include <linux/buffer_head.h>
38#include <linux/bio.h> 36#include <linux/bio.h>
37#include "ext4.h"
38#include "ext4_jbd2.h"
39 39
40#include "namei.h" 40#include "namei.h"
41#include "xattr.h" 41#include "xattr.h"
@@ -57,10 +57,15 @@ static struct buffer_head *ext4_append(handle_t *handle,
57 57
58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
59 59
60 if ((bh = ext4_bread(handle, inode, *block, 1, err))) { 60 bh = ext4_bread(handle, inode, *block, 1, err);
61 if (bh) {
61 inode->i_size += inode->i_sb->s_blocksize; 62 inode->i_size += inode->i_sb->s_blocksize;
62 EXT4_I(inode)->i_disksize = inode->i_size; 63 EXT4_I(inode)->i_disksize = inode->i_size;
63 ext4_journal_get_write_access(handle,bh); 64 *err = ext4_journal_get_write_access(handle, bh);
65 if (*err) {
66 brelse(bh);
67 bh = NULL;
68 }
64 } 69 }
65 return bh; 70 return bh;
66} 71}
@@ -348,7 +353,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
348 if (root->info.hash_version != DX_HASH_TEA && 353 if (root->info.hash_version != DX_HASH_TEA &&
349 root->info.hash_version != DX_HASH_HALF_MD4 && 354 root->info.hash_version != DX_HASH_HALF_MD4 &&
350 root->info.hash_version != DX_HASH_LEGACY) { 355 root->info.hash_version != DX_HASH_LEGACY) {
351 ext4_warning(dir->i_sb, __FUNCTION__, 356 ext4_warning(dir->i_sb, __func__,
352 "Unrecognised inode hash code %d", 357 "Unrecognised inode hash code %d",
353 root->info.hash_version); 358 root->info.hash_version);
354 brelse(bh); 359 brelse(bh);
@@ -362,7 +367,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
362 hash = hinfo->hash; 367 hash = hinfo->hash;
363 368
364 if (root->info.unused_flags & 1) { 369 if (root->info.unused_flags & 1) {
365 ext4_warning(dir->i_sb, __FUNCTION__, 370 ext4_warning(dir->i_sb, __func__,
366 "Unimplemented inode hash flags: %#06x", 371 "Unimplemented inode hash flags: %#06x",
367 root->info.unused_flags); 372 root->info.unused_flags);
368 brelse(bh); 373 brelse(bh);
@@ -371,7 +376,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
371 } 376 }
372 377
373 if ((indirect = root->info.indirect_levels) > 1) { 378 if ((indirect = root->info.indirect_levels) > 1) {
374 ext4_warning(dir->i_sb, __FUNCTION__, 379 ext4_warning(dir->i_sb, __func__,
375 "Unimplemented inode hash depth: %#06x", 380 "Unimplemented inode hash depth: %#06x",
376 root->info.indirect_levels); 381 root->info.indirect_levels);
377 brelse(bh); 382 brelse(bh);
@@ -384,7 +389,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
384 389
385 if (dx_get_limit(entries) != dx_root_limit(dir, 390 if (dx_get_limit(entries) != dx_root_limit(dir,
386 root->info.info_length)) { 391 root->info.info_length)) {
387 ext4_warning(dir->i_sb, __FUNCTION__, 392 ext4_warning(dir->i_sb, __func__,
388 "dx entry: limit != root limit"); 393 "dx entry: limit != root limit");
389 brelse(bh); 394 brelse(bh);
390 *err = ERR_BAD_DX_DIR; 395 *err = ERR_BAD_DX_DIR;
@@ -396,7 +401,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
396 { 401 {
397 count = dx_get_count(entries); 402 count = dx_get_count(entries);
398 if (!count || count > dx_get_limit(entries)) { 403 if (!count || count > dx_get_limit(entries)) {
399 ext4_warning(dir->i_sb, __FUNCTION__, 404 ext4_warning(dir->i_sb, __func__,
400 "dx entry: no count or count > limit"); 405 "dx entry: no count or count > limit");
401 brelse(bh); 406 brelse(bh);
402 *err = ERR_BAD_DX_DIR; 407 *err = ERR_BAD_DX_DIR;
@@ -441,7 +446,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
441 goto fail2; 446 goto fail2;
442 at = entries = ((struct dx_node *) bh->b_data)->entries; 447 at = entries = ((struct dx_node *) bh->b_data)->entries;
443 if (dx_get_limit(entries) != dx_node_limit (dir)) { 448 if (dx_get_limit(entries) != dx_node_limit (dir)) {
444 ext4_warning(dir->i_sb, __FUNCTION__, 449 ext4_warning(dir->i_sb, __func__,
445 "dx entry: limit != node limit"); 450 "dx entry: limit != node limit");
446 brelse(bh); 451 brelse(bh);
447 *err = ERR_BAD_DX_DIR; 452 *err = ERR_BAD_DX_DIR;
@@ -457,7 +462,7 @@ fail2:
457 } 462 }
458fail: 463fail:
459 if (*err == ERR_BAD_DX_DIR) 464 if (*err == ERR_BAD_DX_DIR)
460 ext4_warning(dir->i_sb, __FUNCTION__, 465 ext4_warning(dir->i_sb, __func__,
461 "Corrupt dir inode %ld, running e2fsck is " 466 "Corrupt dir inode %ld, running e2fsck is "
462 "recommended.", dir->i_ino); 467 "recommended.", dir->i_ino);
463 return NULL; 468 return NULL;
@@ -914,7 +919,7 @@ restart:
914 wait_on_buffer(bh); 919 wait_on_buffer(bh);
915 if (!buffer_uptodate(bh)) { 920 if (!buffer_uptodate(bh)) {
916 /* read error, skip block & hope for the best */ 921 /* read error, skip block & hope for the best */
917 ext4_error(sb, __FUNCTION__, "reading directory #%lu " 922 ext4_error(sb, __func__, "reading directory #%lu "
918 "offset %lu", dir->i_ino, 923 "offset %lu", dir->i_ino,
919 (unsigned long)block); 924 (unsigned long)block);
920 brelse(bh); 925 brelse(bh);
@@ -1007,7 +1012,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
1007 retval = ext4_htree_next_block(dir, hash, frame, 1012 retval = ext4_htree_next_block(dir, hash, frame,
1008 frames, NULL); 1013 frames, NULL);
1009 if (retval < 0) { 1014 if (retval < 0) {
1010 ext4_warning(sb, __FUNCTION__, 1015 ext4_warning(sb, __func__,
1011 "error reading index page in directory #%lu", 1016 "error reading index page in directory #%lu",
1012 dir->i_ino); 1017 dir->i_ino);
1013 *err = retval; 1018 *err = retval;
@@ -1532,7 +1537,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1532 1537
1533 if (levels && (dx_get_count(frames->entries) == 1538 if (levels && (dx_get_count(frames->entries) ==
1534 dx_get_limit(frames->entries))) { 1539 dx_get_limit(frames->entries))) {
1535 ext4_warning(sb, __FUNCTION__, 1540 ext4_warning(sb, __func__,
1536 "Directory index full!"); 1541 "Directory index full!");
1537 err = -ENOSPC; 1542 err = -ENOSPC;
1538 goto cleanup; 1543 goto cleanup;
@@ -1860,11 +1865,11 @@ static int empty_dir (struct inode * inode)
1860 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || 1865 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
1861 !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { 1866 !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
1862 if (err) 1867 if (err)
1863 ext4_error(inode->i_sb, __FUNCTION__, 1868 ext4_error(inode->i_sb, __func__,
1864 "error %d reading directory #%lu offset 0", 1869 "error %d reading directory #%lu offset 0",
1865 err, inode->i_ino); 1870 err, inode->i_ino);
1866 else 1871 else
1867 ext4_warning(inode->i_sb, __FUNCTION__, 1872 ext4_warning(inode->i_sb, __func__,
1868 "bad directory (dir #%lu) - no data block", 1873 "bad directory (dir #%lu) - no data block",
1869 inode->i_ino); 1874 inode->i_ino);
1870 return 1; 1875 return 1;
@@ -1893,7 +1898,7 @@ static int empty_dir (struct inode * inode)
1893 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); 1898 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
1894 if (!bh) { 1899 if (!bh) {
1895 if (err) 1900 if (err)
1896 ext4_error(sb, __FUNCTION__, 1901 ext4_error(sb, __func__,
1897 "error %d reading directory" 1902 "error %d reading directory"
1898 " #%lu offset %lu", 1903 " #%lu offset %lu",
1899 err, inode->i_ino, offset); 1904 err, inode->i_ino, offset);
@@ -2217,6 +2222,8 @@ retry:
2217 goto out_stop; 2222 goto out_stop;
2218 } 2223 }
2219 } else { 2224 } else {
2225 /* clear the extent format for fast symlink */
2226 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
2220 inode->i_op = &ext4_fast_symlink_inode_operations; 2227 inode->i_op = &ext4_fast_symlink_inode_operations;
2221 memcpy((char*)&EXT4_I(inode)->i_data,symname,l); 2228 memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
2222 inode->i_size = l-1; 2229 inode->i_size = l-1;
@@ -2347,6 +2354,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
2347 EXT4_FEATURE_INCOMPAT_FILETYPE)) 2354 EXT4_FEATURE_INCOMPAT_FILETYPE))
2348 new_de->file_type = old_de->file_type; 2355 new_de->file_type = old_de->file_type;
2349 new_dir->i_version++; 2356 new_dir->i_version++;
2357 new_dir->i_ctime = new_dir->i_mtime =
2358 ext4_current_time(new_dir);
2359 ext4_mark_inode_dirty(handle, new_dir);
2350 BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata"); 2360 BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
2351 ext4_journal_dirty_metadata(handle, new_bh); 2361 ext4_journal_dirty_metadata(handle, new_bh);
2352 brelse(new_bh); 2362 brelse(new_bh);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index e29efa0f9d62..9f086a6a472b 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -11,11 +11,10 @@
11 11
12#define EXT4FS_DEBUG 12#define EXT4FS_DEBUG
13 13
14#include <linux/ext4_jbd2.h>
15
16#include <linux/errno.h> 14#include <linux/errno.h>
17#include <linux/slab.h> 15#include <linux/slab.h>
18 16
17#include "ext4_jbd2.h"
19#include "group.h" 18#include "group.h"
20 19
21#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 20#define outside(b, first, last) ((b) < (first) || (b) >= (last))
@@ -50,63 +49,63 @@ static int verify_group_input(struct super_block *sb,
50 49
51 ext4_get_group_no_and_offset(sb, start, NULL, &offset); 50 ext4_get_group_no_and_offset(sb, start, NULL, &offset);
52 if (group != sbi->s_groups_count) 51 if (group != sbi->s_groups_count)
53 ext4_warning(sb, __FUNCTION__, 52 ext4_warning(sb, __func__,
54 "Cannot add at group %u (only %lu groups)", 53 "Cannot add at group %u (only %lu groups)",
55 input->group, sbi->s_groups_count); 54 input->group, sbi->s_groups_count);
56 else if (offset != 0) 55 else if (offset != 0)
57 ext4_warning(sb, __FUNCTION__, "Last group not full"); 56 ext4_warning(sb, __func__, "Last group not full");
58 else if (input->reserved_blocks > input->blocks_count / 5) 57 else if (input->reserved_blocks > input->blocks_count / 5)
59 ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)", 58 ext4_warning(sb, __func__, "Reserved blocks too high (%u)",
60 input->reserved_blocks); 59 input->reserved_blocks);
61 else if (free_blocks_count < 0) 60 else if (free_blocks_count < 0)
62 ext4_warning(sb, __FUNCTION__, "Bad blocks count %u", 61 ext4_warning(sb, __func__, "Bad blocks count %u",
63 input->blocks_count); 62 input->blocks_count);
64 else if (!(bh = sb_bread(sb, end - 1))) 63 else if (!(bh = sb_bread(sb, end - 1)))
65 ext4_warning(sb, __FUNCTION__, 64 ext4_warning(sb, __func__,
66 "Cannot read last block (%llu)", 65 "Cannot read last block (%llu)",
67 end - 1); 66 end - 1);
68 else if (outside(input->block_bitmap, start, end)) 67 else if (outside(input->block_bitmap, start, end))
69 ext4_warning(sb, __FUNCTION__, 68 ext4_warning(sb, __func__,
70 "Block bitmap not in group (block %llu)", 69 "Block bitmap not in group (block %llu)",
71 (unsigned long long)input->block_bitmap); 70 (unsigned long long)input->block_bitmap);
72 else if (outside(input->inode_bitmap, start, end)) 71 else if (outside(input->inode_bitmap, start, end))
73 ext4_warning(sb, __FUNCTION__, 72 ext4_warning(sb, __func__,
74 "Inode bitmap not in group (block %llu)", 73 "Inode bitmap not in group (block %llu)",
75 (unsigned long long)input->inode_bitmap); 74 (unsigned long long)input->inode_bitmap);
76 else if (outside(input->inode_table, start, end) || 75 else if (outside(input->inode_table, start, end) ||
77 outside(itend - 1, start, end)) 76 outside(itend - 1, start, end))
78 ext4_warning(sb, __FUNCTION__, 77 ext4_warning(sb, __func__,
79 "Inode table not in group (blocks %llu-%llu)", 78 "Inode table not in group (blocks %llu-%llu)",
80 (unsigned long long)input->inode_table, itend - 1); 79 (unsigned long long)input->inode_table, itend - 1);
81 else if (input->inode_bitmap == input->block_bitmap) 80 else if (input->inode_bitmap == input->block_bitmap)
82 ext4_warning(sb, __FUNCTION__, 81 ext4_warning(sb, __func__,
83 "Block bitmap same as inode bitmap (%llu)", 82 "Block bitmap same as inode bitmap (%llu)",
84 (unsigned long long)input->block_bitmap); 83 (unsigned long long)input->block_bitmap);
85 else if (inside(input->block_bitmap, input->inode_table, itend)) 84 else if (inside(input->block_bitmap, input->inode_table, itend))
86 ext4_warning(sb, __FUNCTION__, 85 ext4_warning(sb, __func__,
87 "Block bitmap (%llu) in inode table (%llu-%llu)", 86 "Block bitmap (%llu) in inode table (%llu-%llu)",
88 (unsigned long long)input->block_bitmap, 87 (unsigned long long)input->block_bitmap,
89 (unsigned long long)input->inode_table, itend - 1); 88 (unsigned long long)input->inode_table, itend - 1);
90 else if (inside(input->inode_bitmap, input->inode_table, itend)) 89 else if (inside(input->inode_bitmap, input->inode_table, itend))
91 ext4_warning(sb, __FUNCTION__, 90 ext4_warning(sb, __func__,
92 "Inode bitmap (%llu) in inode table (%llu-%llu)", 91 "Inode bitmap (%llu) in inode table (%llu-%llu)",
93 (unsigned long long)input->inode_bitmap, 92 (unsigned long long)input->inode_bitmap,
94 (unsigned long long)input->inode_table, itend - 1); 93 (unsigned long long)input->inode_table, itend - 1);
95 else if (inside(input->block_bitmap, start, metaend)) 94 else if (inside(input->block_bitmap, start, metaend))
96 ext4_warning(sb, __FUNCTION__, 95 ext4_warning(sb, __func__,
97 "Block bitmap (%llu) in GDT table" 96 "Block bitmap (%llu) in GDT table"
98 " (%llu-%llu)", 97 " (%llu-%llu)",
99 (unsigned long long)input->block_bitmap, 98 (unsigned long long)input->block_bitmap,
100 start, metaend - 1); 99 start, metaend - 1);
101 else if (inside(input->inode_bitmap, start, metaend)) 100 else if (inside(input->inode_bitmap, start, metaend))
102 ext4_warning(sb, __FUNCTION__, 101 ext4_warning(sb, __func__,
103 "Inode bitmap (%llu) in GDT table" 102 "Inode bitmap (%llu) in GDT table"
104 " (%llu-%llu)", 103 " (%llu-%llu)",
105 (unsigned long long)input->inode_bitmap, 104 (unsigned long long)input->inode_bitmap,
106 start, metaend - 1); 105 start, metaend - 1);
107 else if (inside(input->inode_table, start, metaend) || 106 else if (inside(input->inode_table, start, metaend) ||
108 inside(itend - 1, start, metaend)) 107 inside(itend - 1, start, metaend))
109 ext4_warning(sb, __FUNCTION__, 108 ext4_warning(sb, __func__,
110 "Inode table (%llu-%llu) overlaps" 109 "Inode table (%llu-%llu) overlaps"
111 "GDT table (%llu-%llu)", 110 "GDT table (%llu-%llu)",
112 (unsigned long long)input->inode_table, 111 (unsigned long long)input->inode_table,
@@ -368,7 +367,7 @@ static int verify_reserved_gdb(struct super_block *sb,
368 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { 367 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
369 if (le32_to_cpu(*p++) != 368 if (le32_to_cpu(*p++) !=
370 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ 369 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
371 ext4_warning(sb, __FUNCTION__, 370 ext4_warning(sb, __func__,
372 "reserved GDT %llu" 371 "reserved GDT %llu"
373 " missing grp %d (%llu)", 372 " missing grp %d (%llu)",
374 blk, grp, 373 blk, grp,
@@ -424,7 +423,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
424 */ 423 */
425 if (EXT4_SB(sb)->s_sbh->b_blocknr != 424 if (EXT4_SB(sb)->s_sbh->b_blocknr !=
426 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
427 ext4_warning(sb, __FUNCTION__, 426 ext4_warning(sb, __func__,
428 "won't resize using backup superblock at %llu", 427 "won't resize using backup superblock at %llu",
429 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); 428 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
430 return -EPERM; 429 return -EPERM;
@@ -448,7 +447,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
448 447
449 data = (__le32 *)dind->b_data; 448 data = (__le32 *)dind->b_data;
450 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { 449 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
451 ext4_warning(sb, __FUNCTION__, 450 ext4_warning(sb, __func__,
452 "new group %u GDT block %llu not reserved", 451 "new group %u GDT block %llu not reserved",
453 input->group, gdblock); 452 input->group, gdblock);
454 err = -EINVAL; 453 err = -EINVAL;
@@ -469,10 +468,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
469 goto exit_dindj; 468 goto exit_dindj;
470 469
471 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), 470 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
472 GFP_KERNEL); 471 GFP_NOFS);
473 if (!n_group_desc) { 472 if (!n_group_desc) {
474 err = -ENOMEM; 473 err = -ENOMEM;
475 ext4_warning (sb, __FUNCTION__, 474 ext4_warning(sb, __func__,
476 "not enough memory for %lu groups", gdb_num + 1); 475 "not enough memory for %lu groups", gdb_num + 1);
477 goto exit_inode; 476 goto exit_inode;
478 } 477 }
@@ -502,8 +501,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
502 EXT4_SB(sb)->s_gdb_count++; 501 EXT4_SB(sb)->s_gdb_count++;
503 kfree(o_group_desc); 502 kfree(o_group_desc);
504 503
505 es->s_reserved_gdt_blocks = 504 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
506 cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
507 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); 505 ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
508 506
509 return 0; 507 return 0;
@@ -553,7 +551,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
553 int res, i; 551 int res, i;
554 int err; 552 int err;
555 553
556 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL); 554 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
557 if (!primary) 555 if (!primary)
558 return -ENOMEM; 556 return -ENOMEM;
559 557
@@ -571,7 +569,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
571 /* Get each reserved primary GDT block and verify it holds backups */ 569 /* Get each reserved primary GDT block and verify it holds backups */
572 for (res = 0; res < reserved_gdb; res++, blk++) { 570 for (res = 0; res < reserved_gdb; res++, blk++) {
573 if (le32_to_cpu(*data) != blk) { 571 if (le32_to_cpu(*data) != blk) {
574 ext4_warning(sb, __FUNCTION__, 572 ext4_warning(sb, __func__,
575 "reserved block %llu" 573 "reserved block %llu"
576 " not at offset %ld", 574 " not at offset %ld",
577 blk, 575 blk,
@@ -715,7 +713,7 @@ static void update_backups(struct super_block *sb,
715 */ 713 */
716exit_err: 714exit_err:
717 if (err) { 715 if (err) {
718 ext4_warning(sb, __FUNCTION__, 716 ext4_warning(sb, __func__,
719 "can't update backup for group %lu (err %d), " 717 "can't update backup for group %lu (err %d), "
720 "forcing fsck on next reboot", group, err); 718 "forcing fsck on next reboot", group, err);
721 sbi->s_mount_state &= ~EXT4_VALID_FS; 719 sbi->s_mount_state &= ~EXT4_VALID_FS;
@@ -755,33 +753,33 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
755 753
756 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, 754 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
757 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 755 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
758 ext4_warning(sb, __FUNCTION__, 756 ext4_warning(sb, __func__,
759 "Can't resize non-sparse filesystem further"); 757 "Can't resize non-sparse filesystem further");
760 return -EPERM; 758 return -EPERM;
761 } 759 }
762 760
763 if (ext4_blocks_count(es) + input->blocks_count < 761 if (ext4_blocks_count(es) + input->blocks_count <
764 ext4_blocks_count(es)) { 762 ext4_blocks_count(es)) {
765 ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n"); 763 ext4_warning(sb, __func__, "blocks_count overflow\n");
766 return -EINVAL; 764 return -EINVAL;
767 } 765 }
768 766
769 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < 767 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
770 le32_to_cpu(es->s_inodes_count)) { 768 le32_to_cpu(es->s_inodes_count)) {
771 ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n"); 769 ext4_warning(sb, __func__, "inodes_count overflow\n");
772 return -EINVAL; 770 return -EINVAL;
773 } 771 }
774 772
775 if (reserved_gdb || gdb_off == 0) { 773 if (reserved_gdb || gdb_off == 0) {
776 if (!EXT4_HAS_COMPAT_FEATURE(sb, 774 if (!EXT4_HAS_COMPAT_FEATURE(sb,
777 EXT4_FEATURE_COMPAT_RESIZE_INODE)){ 775 EXT4_FEATURE_COMPAT_RESIZE_INODE)){
778 ext4_warning(sb, __FUNCTION__, 776 ext4_warning(sb, __func__,
779 "No reserved GDT blocks, can't resize"); 777 "No reserved GDT blocks, can't resize");
780 return -EPERM; 778 return -EPERM;
781 } 779 }
782 inode = ext4_iget(sb, EXT4_RESIZE_INO); 780 inode = ext4_iget(sb, EXT4_RESIZE_INO);
783 if (IS_ERR(inode)) { 781 if (IS_ERR(inode)) {
784 ext4_warning(sb, __FUNCTION__, 782 ext4_warning(sb, __func__,
785 "Error opening resize inode"); 783 "Error opening resize inode");
786 return PTR_ERR(inode); 784 return PTR_ERR(inode);
787 } 785 }
@@ -810,7 +808,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
810 808
811 lock_super(sb); 809 lock_super(sb);
812 if (input->group != sbi->s_groups_count) { 810 if (input->group != sbi->s_groups_count) {
813 ext4_warning(sb, __FUNCTION__, 811 ext4_warning(sb, __func__,
814 "multiple resizers run on filesystem!"); 812 "multiple resizers run on filesystem!");
815 err = -EBUSY; 813 err = -EBUSY;
816 goto exit_journal; 814 goto exit_journal;
@@ -877,8 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
877 */ 875 */
878 ext4_blocks_count_set(es, ext4_blocks_count(es) + 876 ext4_blocks_count_set(es, ext4_blocks_count(es) +
879 input->blocks_count); 877 input->blocks_count);
880 es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) + 878 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
881 EXT4_INODES_PER_GROUP(sb));
882 879
883 /* 880 /*
884 * We need to protect s_groups_count against other CPUs seeing 881 * We need to protect s_groups_count against other CPUs seeing
@@ -977,13 +974,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
977 " too large to resize to %llu blocks safely\n", 974 " too large to resize to %llu blocks safely\n",
978 sb->s_id, n_blocks_count); 975 sb->s_id, n_blocks_count);
979 if (sizeof(sector_t) < 8) 976 if (sizeof(sector_t) < 8)
980 ext4_warning(sb, __FUNCTION__, 977 ext4_warning(sb, __func__,
981 "CONFIG_LBD not enabled\n"); 978 "CONFIG_LBD not enabled\n");
982 return -EINVAL; 979 return -EINVAL;
983 } 980 }
984 981
985 if (n_blocks_count < o_blocks_count) { 982 if (n_blocks_count < o_blocks_count) {
986 ext4_warning(sb, __FUNCTION__, 983 ext4_warning(sb, __func__,
987 "can't shrink FS - resize aborted"); 984 "can't shrink FS - resize aborted");
988 return -EBUSY; 985 return -EBUSY;
989 } 986 }
@@ -992,7 +989,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
992 ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last); 989 ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
993 990
994 if (last == 0) { 991 if (last == 0) {
995 ext4_warning(sb, __FUNCTION__, 992 ext4_warning(sb, __func__,
996 "need to use ext2online to resize further"); 993 "need to use ext2online to resize further");
997 return -EPERM; 994 return -EPERM;
998 } 995 }
@@ -1000,7 +997,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1000 add = EXT4_BLOCKS_PER_GROUP(sb) - last; 997 add = EXT4_BLOCKS_PER_GROUP(sb) - last;
1001 998
1002 if (o_blocks_count + add < o_blocks_count) { 999 if (o_blocks_count + add < o_blocks_count) {
1003 ext4_warning(sb, __FUNCTION__, "blocks_count overflow"); 1000 ext4_warning(sb, __func__, "blocks_count overflow");
1004 return -EINVAL; 1001 return -EINVAL;
1005 } 1002 }
1006 1003
@@ -1008,7 +1005,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1008 add = n_blocks_count - o_blocks_count; 1005 add = n_blocks_count - o_blocks_count;
1009 1006
1010 if (o_blocks_count + add < n_blocks_count) 1007 if (o_blocks_count + add < n_blocks_count)
1011 ext4_warning(sb, __FUNCTION__, 1008 ext4_warning(sb, __func__,
1012 "will only finish group (%llu" 1009 "will only finish group (%llu"
1013 " blocks, %u new)", 1010 " blocks, %u new)",
1014 o_blocks_count + add, add); 1011 o_blocks_count + add, add);
@@ -1016,7 +1013,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1016 /* See if the device is actually as big as what was requested */ 1013 /* See if the device is actually as big as what was requested */
1017 bh = sb_bread(sb, o_blocks_count + add -1); 1014 bh = sb_bread(sb, o_blocks_count + add -1);
1018 if (!bh) { 1015 if (!bh) {
1019 ext4_warning(sb, __FUNCTION__, 1016 ext4_warning(sb, __func__,
1020 "can't read last block, resize aborted"); 1017 "can't read last block, resize aborted");
1021 return -ENOSPC; 1018 return -ENOSPC;
1022 } 1019 }
@@ -1028,13 +1025,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1028 handle = ext4_journal_start_sb(sb, 3); 1025 handle = ext4_journal_start_sb(sb, 3);
1029 if (IS_ERR(handle)) { 1026 if (IS_ERR(handle)) {
1030 err = PTR_ERR(handle); 1027 err = PTR_ERR(handle);
1031 ext4_warning(sb, __FUNCTION__, "error %d on journal start",err); 1028 ext4_warning(sb, __func__, "error %d on journal start", err);
1032 goto exit_put; 1029 goto exit_put;
1033 } 1030 }
1034 1031
1035 lock_super(sb); 1032 lock_super(sb);
1036 if (o_blocks_count != ext4_blocks_count(es)) { 1033 if (o_blocks_count != ext4_blocks_count(es)) {
1037 ext4_warning(sb, __FUNCTION__, 1034 ext4_warning(sb, __func__,
1038 "multiple resizers run on filesystem!"); 1035 "multiple resizers run on filesystem!");
1039 unlock_super(sb); 1036 unlock_super(sb);
1040 ext4_journal_stop(handle); 1037 ext4_journal_stop(handle);
@@ -1044,7 +1041,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1044 1041
1045 if ((err = ext4_journal_get_write_access(handle, 1042 if ((err = ext4_journal_get_write_access(handle,
1046 EXT4_SB(sb)->s_sbh))) { 1043 EXT4_SB(sb)->s_sbh))) {
1047 ext4_warning(sb, __FUNCTION__, 1044 ext4_warning(sb, __func__,
1048 "error %d on journal write access", err); 1045 "error %d on journal write access", err);
1049 unlock_super(sb); 1046 unlock_super(sb);
1050 ext4_journal_stop(handle); 1047 ext4_journal_stop(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c81a8e759bad..52dd0679a4e2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -21,8 +21,6 @@
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/jbd2.h> 23#include <linux/jbd2.h>
24#include <linux/ext4_fs.h>
25#include <linux/ext4_jbd2.h>
26#include <linux/slab.h> 24#include <linux/slab.h>
27#include <linux/init.h> 25#include <linux/init.h>
28#include <linux/blkdev.h> 26#include <linux/blkdev.h>
@@ -38,9 +36,10 @@
38#include <linux/seq_file.h> 36#include <linux/seq_file.h>
39#include <linux/log2.h> 37#include <linux/log2.h>
40#include <linux/crc16.h> 38#include <linux/crc16.h>
41
42#include <asm/uaccess.h> 39#include <asm/uaccess.h>
43 40
41#include "ext4.h"
42#include "ext4_jbd2.h"
44#include "xattr.h" 43#include "xattr.h"
45#include "acl.h" 44#include "acl.h"
46#include "namei.h" 45#include "namei.h"
@@ -135,7 +134,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
135 * take the FS itself readonly cleanly. */ 134 * take the FS itself readonly cleanly. */
136 journal = EXT4_SB(sb)->s_journal; 135 journal = EXT4_SB(sb)->s_journal;
137 if (is_journal_aborted(journal)) { 136 if (is_journal_aborted(journal)) {
138 ext4_abort(sb, __FUNCTION__, 137 ext4_abort(sb, __func__,
139 "Detected aborted journal"); 138 "Detected aborted journal");
140 return ERR_PTR(-EROFS); 139 return ERR_PTR(-EROFS);
141 } 140 }
@@ -355,7 +354,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
355 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) 354 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
356 return; 355 return;
357 356
358 ext4_warning(sb, __FUNCTION__, 357 ext4_warning(sb, __func__,
359 "updating to rev %d because of new feature flag, " 358 "updating to rev %d because of new feature flag, "
360 "running e2fsck is recommended", 359 "running e2fsck is recommended",
361 EXT4_DYNAMIC_REV); 360 EXT4_DYNAMIC_REV);
@@ -945,8 +944,8 @@ static match_table_t tokens = {
945 {Opt_mballoc, "mballoc"}, 944 {Opt_mballoc, "mballoc"},
946 {Opt_nomballoc, "nomballoc"}, 945 {Opt_nomballoc, "nomballoc"},
947 {Opt_stripe, "stripe=%u"}, 946 {Opt_stripe, "stripe=%u"},
948 {Opt_err, NULL},
949 {Opt_resize, "resize"}, 947 {Opt_resize, "resize"},
948 {Opt_err, NULL},
950}; 949};
951 950
952static ext4_fsblk_t get_sb_block(void **data) 951static ext4_fsblk_t get_sb_block(void **data)
@@ -1388,11 +1387,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1388 * a plain journaled filesystem we can keep it set as 1387 * a plain journaled filesystem we can keep it set as
1389 * valid forever! :) 1388 * valid forever! :)
1390 */ 1389 */
1391 es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS); 1390 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1392#endif 1391#endif
1393 if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) 1392 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1394 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 1393 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1395 es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); 1394 le16_add_cpu(&es->s_mnt_count, 1);
1396 es->s_mtime = cpu_to_le32(get_seconds()); 1395 es->s_mtime = cpu_to_le32(get_seconds());
1397 ext4_update_dynamic_rev(sb); 1396 ext4_update_dynamic_rev(sb);
1398 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 1397 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -1485,36 +1484,33 @@ static int ext4_check_descriptors(struct super_block *sb)
1485 block_bitmap = ext4_block_bitmap(sb, gdp); 1484 block_bitmap = ext4_block_bitmap(sb, gdp);
1486 if (block_bitmap < first_block || block_bitmap > last_block) 1485 if (block_bitmap < first_block || block_bitmap > last_block)
1487 { 1486 {
1488 ext4_error (sb, "ext4_check_descriptors", 1487 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1489 "Block bitmap for group %lu" 1488 "Block bitmap for group %lu not in group "
1490 " not in group (block %llu)!", 1489 "(block %llu)!", i, block_bitmap);
1491 i, block_bitmap);
1492 return 0; 1490 return 0;
1493 } 1491 }
1494 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1492 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1495 if (inode_bitmap < first_block || inode_bitmap > last_block) 1493 if (inode_bitmap < first_block || inode_bitmap > last_block)
1496 { 1494 {
1497 ext4_error (sb, "ext4_check_descriptors", 1495 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1498 "Inode bitmap for group %lu" 1496 "Inode bitmap for group %lu not in group "
1499 " not in group (block %llu)!", 1497 "(block %llu)!", i, inode_bitmap);
1500 i, inode_bitmap);
1501 return 0; 1498 return 0;
1502 } 1499 }
1503 inode_table = ext4_inode_table(sb, gdp); 1500 inode_table = ext4_inode_table(sb, gdp);
1504 if (inode_table < first_block || 1501 if (inode_table < first_block ||
1505 inode_table + sbi->s_itb_per_group - 1 > last_block) 1502 inode_table + sbi->s_itb_per_group - 1 > last_block)
1506 { 1503 {
1507 ext4_error (sb, "ext4_check_descriptors", 1504 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1508 "Inode table for group %lu" 1505 "Inode table for group %lu not in group "
1509 " not in group (block %llu)!", 1506 "(block %llu)!", i, inode_table);
1510 i, inode_table);
1511 return 0; 1507 return 0;
1512 } 1508 }
1513 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1509 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1514 ext4_error(sb, __FUNCTION__, 1510 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1515 "Checksum for group %lu failed (%u!=%u)\n", 1511 "Checksum for group %lu failed (%u!=%u)\n",
1516 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1512 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1517 gdp)), le16_to_cpu(gdp->bg_checksum)); 1513 gdp)), le16_to_cpu(gdp->bg_checksum));
1518 return 0; 1514 return 0;
1519 } 1515 }
1520 if (!flexbg_flag) 1516 if (!flexbg_flag)
@@ -1594,8 +1590,8 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1594 while (es->s_last_orphan) { 1590 while (es->s_last_orphan) {
1595 struct inode *inode; 1591 struct inode *inode;
1596 1592
1597 if (!(inode = 1593 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1598 ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { 1594 if (IS_ERR(inode)) {
1599 es->s_last_orphan = 0; 1595 es->s_last_orphan = 0;
1600 break; 1596 break;
1601 } 1597 }
@@ -1605,7 +1601,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1605 if (inode->i_nlink) { 1601 if (inode->i_nlink) {
1606 printk(KERN_DEBUG 1602 printk(KERN_DEBUG
1607 "%s: truncating inode %lu to %Ld bytes\n", 1603 "%s: truncating inode %lu to %Ld bytes\n",
1608 __FUNCTION__, inode->i_ino, inode->i_size); 1604 __func__, inode->i_ino, inode->i_size);
1609 jbd_debug(2, "truncating inode %lu to %Ld bytes\n", 1605 jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1610 inode->i_ino, inode->i_size); 1606 inode->i_ino, inode->i_size);
1611 ext4_truncate(inode); 1607 ext4_truncate(inode);
@@ -1613,7 +1609,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1613 } else { 1609 } else {
1614 printk(KERN_DEBUG 1610 printk(KERN_DEBUG
1615 "%s: deleting unreferenced inode %lu\n", 1611 "%s: deleting unreferenced inode %lu\n",
1616 __FUNCTION__, inode->i_ino); 1612 __func__, inode->i_ino);
1617 jbd_debug(2, "deleting unreferenced inode %lu\n", 1613 jbd_debug(2, "deleting unreferenced inode %lu\n",
1618 inode->i_ino); 1614 inode->i_ino);
1619 nr_orphans++; 1615 nr_orphans++;
@@ -2699,9 +2695,9 @@ static void ext4_clear_journal_err(struct super_block * sb,
2699 char nbuf[16]; 2695 char nbuf[16];
2700 2696
2701 errstr = ext4_decode_error(sb, j_errno, nbuf); 2697 errstr = ext4_decode_error(sb, j_errno, nbuf);
2702 ext4_warning(sb, __FUNCTION__, "Filesystem error recorded " 2698 ext4_warning(sb, __func__, "Filesystem error recorded "
2703 "from previous mount: %s", errstr); 2699 "from previous mount: %s", errstr);
2704 ext4_warning(sb, __FUNCTION__, "Marking fs in need of " 2700 ext4_warning(sb, __func__, "Marking fs in need of "
2705 "filesystem check."); 2701 "filesystem check.");
2706 2702
2707 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2703 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
@@ -2828,7 +2824,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
2828 } 2824 }
2829 2825
2830 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) 2826 if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
2831 ext4_abort(sb, __FUNCTION__, "Abort forced by user"); 2827 ext4_abort(sb, __func__, "Abort forced by user");
2832 2828
2833 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2829 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2834 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 2830 ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3040,8 +3036,14 @@ static int ext4_dquot_drop(struct inode *inode)
3040 3036
3041 /* We may delete quota structure so we need to reserve enough blocks */ 3037 /* We may delete quota structure so we need to reserve enough blocks */
3042 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb)); 3038 handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
3043 if (IS_ERR(handle)) 3039 if (IS_ERR(handle)) {
3040 /*
3041 * We call dquot_drop() anyway to at least release references
3042 * to quota structures so that umount does not hang.
3043 */
3044 dquot_drop(inode);
3044 return PTR_ERR(handle); 3045 return PTR_ERR(handle);
3046 }
3045 ret = dquot_drop(inode); 3047 ret = dquot_drop(inode);
3046 err = ext4_journal_stop(handle); 3048 err = ext4_journal_stop(handle);
3047 if (!ret) 3049 if (!ret)
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e6f9da4287c4..e9178643dc01 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -19,8 +19,8 @@
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/jbd2.h> 21#include <linux/jbd2.h>
22#include <linux/ext4_fs.h>
23#include <linux/namei.h> 22#include <linux/namei.h>
23#include "ext4.h"
24#include "xattr.h" 24#include "xattr.h"
25 25
26static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) 26static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e9054c1c7d93..3fbc2c6c3d0e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,11 +53,11 @@
53#include <linux/init.h> 53#include <linux/init.h>
54#include <linux/fs.h> 54#include <linux/fs.h>
55#include <linux/slab.h> 55#include <linux/slab.h>
56#include <linux/ext4_jbd2.h>
57#include <linux/ext4_fs.h>
58#include <linux/mbcache.h> 56#include <linux/mbcache.h>
59#include <linux/quotaops.h> 57#include <linux/quotaops.h>
60#include <linux/rwsem.h> 58#include <linux/rwsem.h>
59#include "ext4_jbd2.h"
60#include "ext4.h"
61#include "xattr.h" 61#include "xattr.h"
62#include "acl.h" 62#include "acl.h"
63 63
@@ -92,6 +92,8 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
92 struct mb_cache_entry **); 92 struct mb_cache_entry **);
93static void ext4_xattr_rehash(struct ext4_xattr_header *, 93static void ext4_xattr_rehash(struct ext4_xattr_header *,
94 struct ext4_xattr_entry *); 94 struct ext4_xattr_entry *);
95static int ext4_xattr_list(struct inode *inode, char *buffer,
96 size_t buffer_size);
95 97
96static struct mb_cache *ext4_xattr_cache; 98static struct mb_cache *ext4_xattr_cache;
97 99
@@ -225,7 +227,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
225 ea_bdebug(bh, "b_count=%d, refcount=%d", 227 ea_bdebug(bh, "b_count=%d, refcount=%d",
226 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 228 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
227 if (ext4_xattr_check_block(bh)) { 229 if (ext4_xattr_check_block(bh)) {
228bad_block: ext4_error(inode->i_sb, __FUNCTION__, 230bad_block: ext4_error(inode->i_sb, __func__,
229 "inode %lu: bad block %llu", inode->i_ino, 231 "inode %lu: bad block %llu", inode->i_ino,
230 EXT4_I(inode)->i_file_acl); 232 EXT4_I(inode)->i_file_acl);
231 error = -EIO; 233 error = -EIO;
@@ -367,7 +369,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
367 ea_bdebug(bh, "b_count=%d, refcount=%d", 369 ea_bdebug(bh, "b_count=%d, refcount=%d",
368 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); 370 atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
369 if (ext4_xattr_check_block(bh)) { 371 if (ext4_xattr_check_block(bh)) {
370 ext4_error(inode->i_sb, __FUNCTION__, 372 ext4_error(inode->i_sb, __func__,
371 "inode %lu: bad block %llu", inode->i_ino, 373 "inode %lu: bad block %llu", inode->i_ino,
372 EXT4_I(inode)->i_file_acl); 374 EXT4_I(inode)->i_file_acl);
373 error = -EIO; 375 error = -EIO;
@@ -420,7 +422,7 @@ cleanup:
420 * Returns a negative error number on failure, or the number of bytes 422 * Returns a negative error number on failure, or the number of bytes
421 * used / required on success. 423 * used / required on success.
422 */ 424 */
423int 425static int
424ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) 426ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
425{ 427{
426 int i_error, b_error; 428 int i_error, b_error;
@@ -484,8 +486,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
484 get_bh(bh); 486 get_bh(bh);
485 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 487 ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
486 } else { 488 } else {
487 BHDR(bh)->h_refcount = cpu_to_le32( 489 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
488 le32_to_cpu(BHDR(bh)->h_refcount) - 1);
489 error = ext4_journal_dirty_metadata(handle, bh); 490 error = ext4_journal_dirty_metadata(handle, bh);
490 if (IS_SYNC(inode)) 491 if (IS_SYNC(inode))
491 handle->h_sync = 1; 492 handle->h_sync = 1;
@@ -660,7 +661,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
660 atomic_read(&(bs->bh->b_count)), 661 atomic_read(&(bs->bh->b_count)),
661 le32_to_cpu(BHDR(bs->bh)->h_refcount)); 662 le32_to_cpu(BHDR(bs->bh)->h_refcount));
662 if (ext4_xattr_check_block(bs->bh)) { 663 if (ext4_xattr_check_block(bs->bh)) {
663 ext4_error(sb, __FUNCTION__, 664 ext4_error(sb, __func__,
664 "inode %lu: bad block %llu", inode->i_ino, 665 "inode %lu: bad block %llu", inode->i_ino,
665 EXT4_I(inode)->i_file_acl); 666 EXT4_I(inode)->i_file_acl);
666 error = -EIO; 667 error = -EIO;
@@ -738,7 +739,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
738 ce = NULL; 739 ce = NULL;
739 } 740 }
740 ea_bdebug(bs->bh, "cloning"); 741 ea_bdebug(bs->bh, "cloning");
741 s->base = kmalloc(bs->bh->b_size, GFP_KERNEL); 742 s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
742 error = -ENOMEM; 743 error = -ENOMEM;
743 if (s->base == NULL) 744 if (s->base == NULL)
744 goto cleanup; 745 goto cleanup;
@@ -750,7 +751,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
750 } 751 }
751 } else { 752 } else {
752 /* Allocate a buffer where we construct the new block. */ 753 /* Allocate a buffer where we construct the new block. */
753 s->base = kzalloc(sb->s_blocksize, GFP_KERNEL); 754 s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
754 /* assert(header == s->base) */ 755 /* assert(header == s->base) */
755 error = -ENOMEM; 756 error = -ENOMEM;
756 if (s->base == NULL) 757 if (s->base == NULL)
@@ -789,8 +790,7 @@ inserted:
789 if (error) 790 if (error)
790 goto cleanup_dquot; 791 goto cleanup_dquot;
791 lock_buffer(new_bh); 792 lock_buffer(new_bh);
792 BHDR(new_bh)->h_refcount = cpu_to_le32(1 + 793 le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
793 le32_to_cpu(BHDR(new_bh)->h_refcount));
794 ea_bdebug(new_bh, "reusing; refcount now=%d", 794 ea_bdebug(new_bh, "reusing; refcount now=%d",
795 le32_to_cpu(BHDR(new_bh)->h_refcount)); 795 le32_to_cpu(BHDR(new_bh)->h_refcount));
796 unlock_buffer(new_bh); 796 unlock_buffer(new_bh);
@@ -808,10 +808,8 @@ inserted:
808 get_bh(new_bh); 808 get_bh(new_bh);
809 } else { 809 } else {
810 /* We need to allocate a new block */ 810 /* We need to allocate a new block */
811 ext4_fsblk_t goal = le32_to_cpu( 811 ext4_fsblk_t goal = ext4_group_first_block_no(sb,
812 EXT4_SB(sb)->s_es->s_first_data_block) + 812 EXT4_I(inode)->i_block_group);
813 (ext4_fsblk_t)EXT4_I(inode)->i_block_group *
814 EXT4_BLOCKS_PER_GROUP(sb);
815 ext4_fsblk_t block = ext4_new_block(handle, inode, 813 ext4_fsblk_t block = ext4_new_block(handle, inode,
816 goal, &error); 814 goal, &error);
817 if (error) 815 if (error)
@@ -863,7 +861,7 @@ cleanup_dquot:
863 goto cleanup; 861 goto cleanup;
864 862
865bad_block: 863bad_block:
866 ext4_error(inode->i_sb, __FUNCTION__, 864 ext4_error(inode->i_sb, __func__,
867 "inode %lu: bad block %llu", inode->i_ino, 865 "inode %lu: bad block %llu", inode->i_ino,
868 EXT4_I(inode)->i_file_acl); 866 EXT4_I(inode)->i_file_acl);
869 goto cleanup; 867 goto cleanup;
@@ -1166,7 +1164,7 @@ retry:
1166 if (!bh) 1164 if (!bh)
1167 goto cleanup; 1165 goto cleanup;
1168 if (ext4_xattr_check_block(bh)) { 1166 if (ext4_xattr_check_block(bh)) {
1169 ext4_error(inode->i_sb, __FUNCTION__, 1167 ext4_error(inode->i_sb, __func__,
1170 "inode %lu: bad block %llu", inode->i_ino, 1168 "inode %lu: bad block %llu", inode->i_ino,
1171 EXT4_I(inode)->i_file_acl); 1169 EXT4_I(inode)->i_file_acl);
1172 error = -EIO; 1170 error = -EIO;
@@ -1341,14 +1339,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1341 goto cleanup; 1339 goto cleanup;
1342 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); 1340 bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1343 if (!bh) { 1341 if (!bh) {
1344 ext4_error(inode->i_sb, __FUNCTION__, 1342 ext4_error(inode->i_sb, __func__,
1345 "inode %lu: block %llu read error", inode->i_ino, 1343 "inode %lu: block %llu read error", inode->i_ino,
1346 EXT4_I(inode)->i_file_acl); 1344 EXT4_I(inode)->i_file_acl);
1347 goto cleanup; 1345 goto cleanup;
1348 } 1346 }
1349 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || 1347 if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1350 BHDR(bh)->h_blocks != cpu_to_le32(1)) { 1348 BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1351 ext4_error(inode->i_sb, __FUNCTION__, 1349 ext4_error(inode->i_sb, __func__,
1352 "inode %lu: bad block %llu", inode->i_ino, 1350 "inode %lu: bad block %llu", inode->i_ino,
1353 EXT4_I(inode)->i_file_acl); 1351 EXT4_I(inode)->i_file_acl);
1354 goto cleanup; 1352 goto cleanup;
@@ -1475,7 +1473,7 @@ again:
1475 } 1473 }
1476 bh = sb_bread(inode->i_sb, ce->e_block); 1474 bh = sb_bread(inode->i_sb, ce->e_block);
1477 if (!bh) { 1475 if (!bh) {
1478 ext4_error(inode->i_sb, __FUNCTION__, 1476 ext4_error(inode->i_sb, __func__,
1479 "inode %lu: block %lu read error", 1477 "inode %lu: block %lu read error",
1480 inode->i_ino, (unsigned long) ce->e_block); 1478 inode->i_ino, (unsigned long) ce->e_block);
1481 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= 1479 } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index d7f5d6a12651..5992fe979bb9 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -74,7 +74,6 @@ extern struct xattr_handler ext4_xattr_security_handler;
74extern ssize_t ext4_listxattr(struct dentry *, char *, size_t); 74extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
75 75
76extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); 76extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
77extern int ext4_xattr_list(struct inode *, char *, size_t);
78extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); 77extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
79extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); 78extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
80 79
@@ -99,12 +98,6 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
99} 98}
100 99
101static inline int 100static inline int
102ext4_xattr_list(struct inode *inode, void *buffer, size_t size)
103{
104 return -EOPNOTSUPP;
105}
106
107static inline int
108ext4_xattr_set(struct inode *inode, int name_index, const char *name, 101ext4_xattr_set(struct inode *inode, int name_index, const char *name,
109 const void *value, size_t size, int flags) 102 const void *value, size_t size, int flags)
110{ 103{
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index f17eaf2321b9..ca5f89fc6cae 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -6,9 +6,9 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/ext4_jbd2.h>
10#include <linux/ext4_fs.h>
11#include <linux/security.h> 9#include <linux/security.h>
10#include "ext4_jbd2.h"
11#include "ext4.h"
12#include "xattr.h" 12#include "xattr.h"
13 13
14static size_t 14static size_t
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index e0f05acdafec..fff33382cadc 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -9,8 +9,8 @@
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/ext4_jbd2.h> 12#include "ext4_jbd2.h"
13#include <linux/ext4_fs.h> 13#include "ext4.h"
14#include "xattr.h" 14#include "xattr.h"
15 15
16#define XATTR_TRUSTED_PREFIX "trusted." 16#define XATTR_TRUSTED_PREFIX "trusted."
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index 7ed3d8ebf096..67be723fcc4e 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -8,8 +8,8 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/ext4_jbd2.h> 11#include "ext4_jbd2.h"
12#include <linux/ext4_fs.h> 12#include "ext4.h"
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_USER_PREFIX "user." 15#define XATTR_USER_PREFIX "user."
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 639b3b4f86d1..fda25479af26 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -242,7 +242,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
242 /* prevent the infinite loop of cluster chain */ 242 /* prevent the infinite loop of cluster chain */
243 if (*fclus > limit) { 243 if (*fclus > limit) {
244 fat_fs_panic(sb, "%s: detected the cluster chain loop" 244 fat_fs_panic(sb, "%s: detected the cluster chain loop"
245 " (i_pos %lld)", __FUNCTION__, 245 " (i_pos %lld)", __func__,
246 MSDOS_I(inode)->i_pos); 246 MSDOS_I(inode)->i_pos);
247 nr = -EIO; 247 nr = -EIO;
248 goto out; 248 goto out;
@@ -253,7 +253,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
253 goto out; 253 goto out;
254 else if (nr == FAT_ENT_FREE) { 254 else if (nr == FAT_ENT_FREE) {
255 fat_fs_panic(sb, "%s: invalid cluster chain" 255 fat_fs_panic(sb, "%s: invalid cluster chain"
256 " (i_pos %lld)", __FUNCTION__, 256 " (i_pos %lld)", __func__,
257 MSDOS_I(inode)->i_pos); 257 MSDOS_I(inode)->i_pos);
258 nr = -EIO; 258 nr = -EIO;
259 goto out; 259 goto out;
@@ -286,7 +286,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
286 return ret; 286 return ret;
287 else if (ret == FAT_ENT_EOF) { 287 else if (ret == FAT_ENT_EOF) {
288 fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)", 288 fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)",
289 __FUNCTION__, MSDOS_I(inode)->i_pos); 289 __func__, MSDOS_I(inode)->i_pos);
290 return -EIO; 290 return -EIO;
291 } 291 }
292 return dclus; 292 return dclus;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 13ab763cc510..302e95c4af7e 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -546,7 +546,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
546 goto error; 546 goto error;
547 } else if (cluster == FAT_ENT_FREE) { 547 } else if (cluster == FAT_ENT_FREE) {
548 fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF", 548 fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF",
549 __FUNCTION__); 549 __func__);
550 err = -EIO; 550 err = -EIO;
551 goto error; 551 goto error;
552 } 552 }
diff --git a/fs/fat/file.c b/fs/fat/file.c
index d604bb132422..27cc1164ec36 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -208,7 +208,7 @@ static int fat_free(struct inode *inode, int skip)
208 } else if (ret == FAT_ENT_FREE) { 208 } else if (ret == FAT_ENT_FREE) {
209 fat_fs_panic(sb, 209 fat_fs_panic(sb,
210 "%s: invalid cluster chain (i_pos %lld)", 210 "%s: invalid cluster chain (i_pos %lld)",
211 __FUNCTION__, MSDOS_I(inode)->i_pos); 211 __func__, MSDOS_I(inode)->i_pos);
212 ret = -EIO; 212 ret = -EIO;
213 } else if (ret > 0) { 213 } else if (ret > 0) {
214 err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait); 214 err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 5f522a55b596..4e0a3dd9d677 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1222,8 +1222,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1222 brelse(bh); 1222 brelse(bh);
1223 goto out_invalid; 1223 goto out_invalid;
1224 } 1224 }
1225 logical_sector_size = 1225 logical_sector_size = get_unaligned_le16(&b->sector_size);
1226 le16_to_cpu(get_unaligned((__le16 *)&b->sector_size));
1227 if (!is_power_of_2(logical_sector_size) 1226 if (!is_power_of_2(logical_sector_size)
1228 || (logical_sector_size < 512) 1227 || (logical_sector_size < 512)
1229 || (logical_sector_size > 4096)) { 1228 || (logical_sector_size > 4096)) {
@@ -1322,8 +1321,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1322 sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; 1321 sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
1323 1322
1324 sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length; 1323 sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length;
1325 sbi->dir_entries = 1324 sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
1326 le16_to_cpu(get_unaligned((__le16 *)&b->dir_entries));
1327 if (sbi->dir_entries & (sbi->dir_per_block - 1)) { 1325 if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
1328 if (!silent) 1326 if (!silent)
1329 printk(KERN_ERR "FAT: bogus directroy-entries per block" 1327 printk(KERN_ERR "FAT: bogus directroy-entries per block"
@@ -1335,7 +1333,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1335 rootdir_sectors = sbi->dir_entries 1333 rootdir_sectors = sbi->dir_entries
1336 * sizeof(struct msdos_dir_entry) / sb->s_blocksize; 1334 * sizeof(struct msdos_dir_entry) / sb->s_blocksize;
1337 sbi->data_start = sbi->dir_start + rootdir_sectors; 1335 sbi->data_start = sbi->dir_start + rootdir_sectors;
1338 total_sectors = le16_to_cpu(get_unaligned((__le16 *)&b->sectors)); 1336 total_sectors = get_unaligned_le16(&b->sectors);
1339 if (total_sectors == 0) 1337 if (total_sectors == 0)
1340 total_sectors = le32_to_cpu(b->total_sect); 1338 total_sectors = le32_to_cpu(b->total_sect);
1341 1339
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3f3ac630ccde..bfd776509a72 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/file.h> 11#include <linux/file.h>
12#include <linux/fdtable.h>
12#include <linux/capability.h> 13#include <linux/capability.h>
13#include <linux/dnotify.h> 14#include <linux/dnotify.h>
14#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
diff --git a/fs/file.c b/fs/file.c
index 5110acb1c9ef..4c6f0ea12c41 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -12,6 +12,7 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/file.h> 14#include <linux/file.h>
15#include <linux/fdtable.h>
15#include <linux/bitops.h> 16#include <linux/bitops.h>
16#include <linux/interrupt.h> 17#include <linux/interrupt.h>
17#include <linux/spinlock.h> 18#include <linux/spinlock.h>
@@ -149,8 +150,16 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
149 nr /= (1024 / sizeof(struct file *)); 150 nr /= (1024 / sizeof(struct file *));
150 nr = roundup_pow_of_two(nr + 1); 151 nr = roundup_pow_of_two(nr + 1);
151 nr *= (1024 / sizeof(struct file *)); 152 nr *= (1024 / sizeof(struct file *));
152 if (nr > sysctl_nr_open) 153 /*
153 nr = sysctl_nr_open; 154 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
155 * had been set lower between the check in expand_files() and here. Deal
156 * with that in caller, it's cheaper that way.
157 *
158 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
159 * bitmaps handling below becomes unpleasant, to put it mildly...
160 */
161 if (unlikely(nr > sysctl_nr_open))
162 nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
154 163
155 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); 164 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
156 if (!fdt) 165 if (!fdt)
@@ -199,6 +208,16 @@ static int expand_fdtable(struct files_struct *files, int nr)
199 if (!new_fdt) 208 if (!new_fdt)
200 return -ENOMEM; 209 return -ENOMEM;
201 /* 210 /*
211 * extremely unlikely race - sysctl_nr_open decreased between the check in
212 * caller and alloc_fdtable(). Cheaper to catch it here...
213 */
214 if (unlikely(new_fdt->max_fds <= nr)) {
215 free_fdarr(new_fdt);
216 free_fdset(new_fdt);
217 kfree(new_fdt);
218 return -EMFILE;
219 }
220 /*
202 * Check again since another task may have expanded the fd table while 221 * Check again since another task may have expanded the fd table while
203 * we dropped the lock 222 * we dropped the lock
204 */ 223 */
diff --git a/fs/file_table.c b/fs/file_table.c
index 7a0a9b872251..83084225b4c3 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -8,6 +8,7 @@
8#include <linux/string.h> 8#include <linux/string.h>
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/fdtable.h>
11#include <linux/init.h> 12#include <linux/init.h>
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/fs.h> 14#include <linux/fs.h>
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 2b46064f66b2..50ab5eecb99b 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -50,7 +50,11 @@ extern daddr_t vxfs_bmap1(struct inode *, long);
50/* vxfs_fshead.c */ 50/* vxfs_fshead.c */
51extern int vxfs_read_fshead(struct super_block *); 51extern int vxfs_read_fshead(struct super_block *);
52 52
53/* vxfs_immed.c */
54extern const struct inode_operations vxfs_immed_symlink_iops;
55
53/* vxfs_inode.c */ 56/* vxfs_inode.c */
57extern const struct address_space_operations vxfs_immed_aops;
54extern struct kmem_cache *vxfs_inode_cachep; 58extern struct kmem_cache *vxfs_inode_cachep;
55extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t); 59extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t);
56extern struct inode * vxfs_get_fake_inode(struct super_block *, 60extern struct inode * vxfs_get_fake_inode(struct super_block *,
@@ -69,6 +73,7 @@ extern const struct file_operations vxfs_dir_operations;
69extern int vxfs_read_olt(struct super_block *, u_long); 73extern int vxfs_read_olt(struct super_block *, u_long);
70 74
71/* vxfs_subr.c */ 75/* vxfs_subr.c */
76extern const struct address_space_operations vxfs_aops;
72extern struct page * vxfs_get_page(struct address_space *, u_long); 77extern struct page * vxfs_get_page(struct address_space *, u_long);
73extern void vxfs_put_page(struct page *); 78extern void vxfs_put_page(struct page *);
74extern struct buffer_head * vxfs_bread(struct inode *, int); 79extern struct buffer_head * vxfs_bread(struct inode *, int);
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index 8a5959a61ba9..c36aeaf92e41 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -35,6 +35,7 @@
35#include <linux/namei.h> 35#include <linux/namei.h>
36 36
37#include "vxfs.h" 37#include "vxfs.h"
38#include "vxfs_extern.h"
38#include "vxfs_inode.h" 39#include "vxfs_inode.h"
39 40
40 41
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ad88d2364bc2..9f3f2ceb73f0 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -41,11 +41,6 @@
41#include "vxfs_extern.h" 41#include "vxfs_extern.h"
42 42
43 43
44extern const struct address_space_operations vxfs_aops;
45extern const struct address_space_operations vxfs_immed_aops;
46
47extern const struct inode_operations vxfs_immed_symlink_iops;
48
49struct kmem_cache *vxfs_inode_cachep; 44struct kmem_cache *vxfs_inode_cachep;
50 45
51 46
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 06557679ca41..ae45f77765c0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -25,6 +25,45 @@
25#include <linux/buffer_head.h> 25#include <linux/buffer_head.h>
26#include "internal.h" 26#include "internal.h"
27 27
28
29/**
30 * writeback_acquire - attempt to get exclusive writeback access to a device
31 * @bdi: the device's backing_dev_info structure
32 *
33 * It is a waste of resources to have more than one pdflush thread blocked on
34 * a single request queue. Exclusion at the request_queue level is obtained
35 * via a flag in the request_queue's backing_dev_info.state.
36 *
37 * Non-request_queue-backed address_spaces will share default_backing_dev_info,
38 * unless they implement their own. Which is somewhat inefficient, as this
39 * may prevent concurrent writeback against multiple devices.
40 */
41static int writeback_acquire(struct backing_dev_info *bdi)
42{
43 return !test_and_set_bit(BDI_pdflush, &bdi->state);
44}
45
46/**
47 * writeback_in_progress - determine whether there is writeback in progress
48 * @bdi: the device's backing_dev_info structure.
49 *
50 * Determine whether there is writeback in progress against a backing device.
51 */
52int writeback_in_progress(struct backing_dev_info *bdi)
53{
54 return test_bit(BDI_pdflush, &bdi->state);
55}
56
57/**
58 * writeback_release - relinquish exclusive writeback access against a device.
59 * @bdi: the device's backing_dev_info structure
60 */
61static void writeback_release(struct backing_dev_info *bdi)
62{
63 BUG_ON(!writeback_in_progress(bdi));
64 clear_bit(BDI_pdflush, &bdi->state);
65}
66
28/** 67/**
29 * __mark_inode_dirty - internal function 68 * __mark_inode_dirty - internal function
30 * @inode: inode to mark 69 * @inode: inode to mark
@@ -747,43 +786,4 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int
747 786
748 return err; 787 return err;
749} 788}
750
751EXPORT_SYMBOL(generic_osync_inode); 789EXPORT_SYMBOL(generic_osync_inode);
752
753/**
754 * writeback_acquire - attempt to get exclusive writeback access to a device
755 * @bdi: the device's backing_dev_info structure
756 *
757 * It is a waste of resources to have more than one pdflush thread blocked on
758 * a single request queue. Exclusion at the request_queue level is obtained
759 * via a flag in the request_queue's backing_dev_info.state.
760 *
761 * Non-request_queue-backed address_spaces will share default_backing_dev_info,
762 * unless they implement their own. Which is somewhat inefficient, as this
763 * may prevent concurrent writeback against multiple devices.
764 */
765int writeback_acquire(struct backing_dev_info *bdi)
766{
767 return !test_and_set_bit(BDI_pdflush, &bdi->state);
768}
769
770/**
771 * writeback_in_progress - determine whether there is writeback in progress
772 * @bdi: the device's backing_dev_info structure.
773 *
774 * Determine whether there is writeback in progress against a backing device.
775 */
776int writeback_in_progress(struct backing_dev_info *bdi)
777{
778 return test_bit(BDI_pdflush, &bdi->state);
779}
780
781/**
782 * writeback_release - relinquish exclusive writeback access against a device.
783 * @bdi: the device's backing_dev_info structure
784 */
785void writeback_release(struct backing_dev_info *bdi)
786{
787 BUG_ON(!writeback_in_progress(bdi));
788 clear_bit(BDI_pdflush, &bdi->state);
789}
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 105d4a271e07..4f3cab321415 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -117,7 +117,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
117 117
118 parent = fuse_control_sb->s_root; 118 parent = fuse_control_sb->s_root;
119 inc_nlink(parent->d_inode); 119 inc_nlink(parent->d_inode);
120 sprintf(name, "%llu", (unsigned long long) fc->id); 120 sprintf(name, "%u", fc->dev);
121 parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2, 121 parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
122 &simple_dir_inode_operations, 122 &simple_dir_inode_operations,
123 &simple_dir_operations); 123 &simple_dir_operations);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index af639807524e..87250b6a8682 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -47,6 +47,14 @@ struct fuse_req *fuse_request_alloc(void)
47 return req; 47 return req;
48} 48}
49 49
50struct fuse_req *fuse_request_alloc_nofs(void)
51{
52 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
53 if (req)
54 fuse_request_init(req);
55 return req;
56}
57
50void fuse_request_free(struct fuse_req *req) 58void fuse_request_free(struct fuse_req *req)
51{ 59{
52 kmem_cache_free(fuse_req_cachep, req); 60 kmem_cache_free(fuse_req_cachep, req);
@@ -291,6 +299,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
291 299
292static void wait_answer_interruptible(struct fuse_conn *fc, 300static void wait_answer_interruptible(struct fuse_conn *fc,
293 struct fuse_req *req) 301 struct fuse_req *req)
302 __releases(fc->lock) __acquires(fc->lock)
294{ 303{
295 if (signal_pending(current)) 304 if (signal_pending(current))
296 return; 305 return;
@@ -307,8 +316,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
307 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 316 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
308} 317}
309 318
310/* Called with fc->lock held. Releases, and then reacquires it. */
311static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) 319static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
320 __releases(fc->lock) __acquires(fc->lock)
312{ 321{
313 if (!fc->no_interrupt) { 322 if (!fc->no_interrupt) {
314 /* Any signal may interrupt this */ 323 /* Any signal may interrupt this */
@@ -430,6 +439,17 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
430} 439}
431 440
432/* 441/*
442 * Called under fc->lock
443 *
444 * fc->connected must have been checked previously
445 */
446void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req)
447{
448 req->isreply = 1;
449 request_send_nowait_locked(fc, req);
450}
451
452/*
433 * Lock the request. Up to the next unlock_request() there mustn't be 453 * Lock the request. Up to the next unlock_request() there mustn't be
434 * anything that could cause a page-fault. If the request was already 454 * anything that could cause a page-fault. If the request was already
435 * aborted bail out. 455 * aborted bail out.
@@ -968,6 +988,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
968 * locked). 988 * locked).
969 */ 989 */
970static void end_io_requests(struct fuse_conn *fc) 990static void end_io_requests(struct fuse_conn *fc)
991 __releases(fc->lock) __acquires(fc->lock)
971{ 992{
972 while (!list_empty(&fc->io)) { 993 while (!list_empty(&fc->io)) {
973 struct fuse_req *req = 994 struct fuse_req *req =
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c4807b3fc8a3..2060bf06b906 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -132,7 +132,7 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
132 req->out.args[0].value = outarg; 132 req->out.args[0].value = outarg;
133} 133}
134 134
135static u64 fuse_get_attr_version(struct fuse_conn *fc) 135u64 fuse_get_attr_version(struct fuse_conn *fc)
136{ 136{
137 u64 curr_version; 137 u64 curr_version;
138 138
@@ -1107,6 +1107,50 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
1107} 1107}
1108 1108
1109/* 1109/*
1110 * Prevent concurrent writepages on inode
1111 *
1112 * This is done by adding a negative bias to the inode write counter
1113 * and waiting for all pending writes to finish.
1114 */
1115void fuse_set_nowrite(struct inode *inode)
1116{
1117 struct fuse_conn *fc = get_fuse_conn(inode);
1118 struct fuse_inode *fi = get_fuse_inode(inode);
1119
1120 BUG_ON(!mutex_is_locked(&inode->i_mutex));
1121
1122 spin_lock(&fc->lock);
1123 BUG_ON(fi->writectr < 0);
1124 fi->writectr += FUSE_NOWRITE;
1125 spin_unlock(&fc->lock);
1126 wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1127}
1128
1129/*
1130 * Allow writepages on inode
1131 *
1132 * Remove the bias from the writecounter and send any queued
1133 * writepages.
1134 */
1135static void __fuse_release_nowrite(struct inode *inode)
1136{
1137 struct fuse_inode *fi = get_fuse_inode(inode);
1138
1139 BUG_ON(fi->writectr != FUSE_NOWRITE);
1140 fi->writectr = 0;
1141 fuse_flush_writepages(inode);
1142}
1143
1144void fuse_release_nowrite(struct inode *inode)
1145{
1146 struct fuse_conn *fc = get_fuse_conn(inode);
1147
1148 spin_lock(&fc->lock);
1149 __fuse_release_nowrite(inode);
1150 spin_unlock(&fc->lock);
1151}
1152
1153/*
1110 * Set attributes, and at the same time refresh them. 1154 * Set attributes, and at the same time refresh them.
1111 * 1155 *
1112 * Truncation is slightly complicated, because the 'truncate' request 1156 * Truncation is slightly complicated, because the 'truncate' request
@@ -1122,6 +1166,8 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1122 struct fuse_req *req; 1166 struct fuse_req *req;
1123 struct fuse_setattr_in inarg; 1167 struct fuse_setattr_in inarg;
1124 struct fuse_attr_out outarg; 1168 struct fuse_attr_out outarg;
1169 bool is_truncate = false;
1170 loff_t oldsize;
1125 int err; 1171 int err;
1126 1172
1127 if (!fuse_allow_task(fc, current)) 1173 if (!fuse_allow_task(fc, current))
@@ -1145,12 +1191,16 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1145 send_sig(SIGXFSZ, current, 0); 1191 send_sig(SIGXFSZ, current, 0);
1146 return -EFBIG; 1192 return -EFBIG;
1147 } 1193 }
1194 is_truncate = true;
1148 } 1195 }
1149 1196
1150 req = fuse_get_req(fc); 1197 req = fuse_get_req(fc);
1151 if (IS_ERR(req)) 1198 if (IS_ERR(req))
1152 return PTR_ERR(req); 1199 return PTR_ERR(req);
1153 1200
1201 if (is_truncate)
1202 fuse_set_nowrite(inode);
1203
1154 memset(&inarg, 0, sizeof(inarg)); 1204 memset(&inarg, 0, sizeof(inarg));
1155 memset(&outarg, 0, sizeof(outarg)); 1205 memset(&outarg, 0, sizeof(outarg));
1156 iattr_to_fattr(attr, &inarg); 1206 iattr_to_fattr(attr, &inarg);
@@ -1181,16 +1231,44 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1181 if (err) { 1231 if (err) {
1182 if (err == -EINTR) 1232 if (err == -EINTR)
1183 fuse_invalidate_attr(inode); 1233 fuse_invalidate_attr(inode);
1184 return err; 1234 goto error;
1185 } 1235 }
1186 1236
1187 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { 1237 if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
1188 make_bad_inode(inode); 1238 make_bad_inode(inode);
1189 return -EIO; 1239 err = -EIO;
1240 goto error;
1241 }
1242
1243 spin_lock(&fc->lock);
1244 fuse_change_attributes_common(inode, &outarg.attr,
1245 attr_timeout(&outarg));
1246 oldsize = inode->i_size;
1247 i_size_write(inode, outarg.attr.size);
1248
1249 if (is_truncate) {
1250 /* NOTE: this may release/reacquire fc->lock */
1251 __fuse_release_nowrite(inode);
1252 }
1253 spin_unlock(&fc->lock);
1254
1255 /*
1256 * Only call invalidate_inode_pages2() after removing
1257 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
1258 */
1259 if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1260 if (outarg.attr.size < oldsize)
1261 fuse_truncate(inode->i_mapping, outarg.attr.size);
1262 invalidate_inode_pages2(inode->i_mapping);
1190 } 1263 }
1191 1264
1192 fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0);
1193 return 0; 1265 return 0;
1266
1267error:
1268 if (is_truncate)
1269 fuse_release_nowrite(inode);
1270
1271 return err;
1194} 1272}
1195 1273
1196static int fuse_setattr(struct dentry *entry, struct iattr *attr) 1274static int fuse_setattr(struct dentry *entry, struct iattr *attr)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 676b0bc8a86d..f28cf8b46f80 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -210,6 +210,49 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
210 return (u64) v0 + ((u64) v1 << 32); 210 return (u64) v0 + ((u64) v1 << 32);
211} 211}
212 212
213/*
214 * Check if page is under writeback
215 *
216 * This is currently done by walking the list of writepage requests
217 * for the inode, which can be pretty inefficient.
218 */
219static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
220{
221 struct fuse_conn *fc = get_fuse_conn(inode);
222 struct fuse_inode *fi = get_fuse_inode(inode);
223 struct fuse_req *req;
224 bool found = false;
225
226 spin_lock(&fc->lock);
227 list_for_each_entry(req, &fi->writepages, writepages_entry) {
228 pgoff_t curr_index;
229
230 BUG_ON(req->inode != inode);
231 curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
232 if (curr_index == index) {
233 found = true;
234 break;
235 }
236 }
237 spin_unlock(&fc->lock);
238
239 return found;
240}
241
242/*
243 * Wait for page writeback to be completed.
244 *
245 * Since fuse doesn't rely on the VM writeback tracking, this has to
246 * use some other means.
247 */
248static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
249{
250 struct fuse_inode *fi = get_fuse_inode(inode);
251
252 wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
253 return 0;
254}
255
213static int fuse_flush(struct file *file, fl_owner_t id) 256static int fuse_flush(struct file *file, fl_owner_t id)
214{ 257{
215 struct inode *inode = file->f_path.dentry->d_inode; 258 struct inode *inode = file->f_path.dentry->d_inode;
@@ -245,6 +288,21 @@ static int fuse_flush(struct file *file, fl_owner_t id)
245 return err; 288 return err;
246} 289}
247 290
291/*
292 * Wait for all pending writepages on the inode to finish.
293 *
294 * This is currently done by blocking further writes with FUSE_NOWRITE
295 * and waiting for all sent writes to complete.
296 *
297 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
298 * could conflict with truncation.
299 */
300static void fuse_sync_writes(struct inode *inode)
301{
302 fuse_set_nowrite(inode);
303 fuse_release_nowrite(inode);
304}
305
248int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, 306int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
249 int isdir) 307 int isdir)
250{ 308{
@@ -261,6 +319,17 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
261 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 319 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
262 return 0; 320 return 0;
263 321
322 /*
323 * Start writeback against all dirty pages of the inode, then
324 * wait for all outstanding writes, before sending the FSYNC
325 * request.
326 */
327 err = write_inode_now(inode, 0);
328 if (err)
329 return err;
330
331 fuse_sync_writes(inode);
332
264 req = fuse_get_req(fc); 333 req = fuse_get_req(fc);
265 if (IS_ERR(req)) 334 if (IS_ERR(req))
266 return PTR_ERR(req); 335 return PTR_ERR(req);
@@ -294,7 +363,7 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
294void fuse_read_fill(struct fuse_req *req, struct file *file, 363void fuse_read_fill(struct fuse_req *req, struct file *file,
295 struct inode *inode, loff_t pos, size_t count, int opcode) 364 struct inode *inode, loff_t pos, size_t count, int opcode)
296{ 365{
297 struct fuse_read_in *inarg = &req->misc.read_in; 366 struct fuse_read_in *inarg = &req->misc.read.in;
298 struct fuse_file *ff = file->private_data; 367 struct fuse_file *ff = file->private_data;
299 368
300 inarg->fh = ff->fh; 369 inarg->fh = ff->fh;
@@ -320,7 +389,7 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
320 389
321 fuse_read_fill(req, file, inode, pos, count, FUSE_READ); 390 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
322 if (owner != NULL) { 391 if (owner != NULL) {
323 struct fuse_read_in *inarg = &req->misc.read_in; 392 struct fuse_read_in *inarg = &req->misc.read.in;
324 393
325 inarg->read_flags |= FUSE_READ_LOCKOWNER; 394 inarg->read_flags |= FUSE_READ_LOCKOWNER;
326 inarg->lock_owner = fuse_lock_owner_id(fc, owner); 395 inarg->lock_owner = fuse_lock_owner_id(fc, owner);
@@ -329,31 +398,66 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
329 return req->out.args[0].size; 398 return req->out.args[0].size;
330} 399}
331 400
401static void fuse_read_update_size(struct inode *inode, loff_t size,
402 u64 attr_ver)
403{
404 struct fuse_conn *fc = get_fuse_conn(inode);
405 struct fuse_inode *fi = get_fuse_inode(inode);
406
407 spin_lock(&fc->lock);
408 if (attr_ver == fi->attr_version && size < inode->i_size) {
409 fi->attr_version = ++fc->attr_version;
410 i_size_write(inode, size);
411 }
412 spin_unlock(&fc->lock);
413}
414
332static int fuse_readpage(struct file *file, struct page *page) 415static int fuse_readpage(struct file *file, struct page *page)
333{ 416{
334 struct inode *inode = page->mapping->host; 417 struct inode *inode = page->mapping->host;
335 struct fuse_conn *fc = get_fuse_conn(inode); 418 struct fuse_conn *fc = get_fuse_conn(inode);
336 struct fuse_req *req; 419 struct fuse_req *req;
420 size_t num_read;
421 loff_t pos = page_offset(page);
422 size_t count = PAGE_CACHE_SIZE;
423 u64 attr_ver;
337 int err; 424 int err;
338 425
339 err = -EIO; 426 err = -EIO;
340 if (is_bad_inode(inode)) 427 if (is_bad_inode(inode))
341 goto out; 428 goto out;
342 429
430 /*
431 * Page writeback can extend beyond the liftime of the
432 * page-cache page, so make sure we read a properly synced
433 * page.
434 */
435 fuse_wait_on_page_writeback(inode, page->index);
436
343 req = fuse_get_req(fc); 437 req = fuse_get_req(fc);
344 err = PTR_ERR(req); 438 err = PTR_ERR(req);
345 if (IS_ERR(req)) 439 if (IS_ERR(req))
346 goto out; 440 goto out;
347 441
442 attr_ver = fuse_get_attr_version(fc);
443
348 req->out.page_zeroing = 1; 444 req->out.page_zeroing = 1;
349 req->num_pages = 1; 445 req->num_pages = 1;
350 req->pages[0] = page; 446 req->pages[0] = page;
351 fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE, 447 num_read = fuse_send_read(req, file, inode, pos, count, NULL);
352 NULL);
353 err = req->out.h.error; 448 err = req->out.h.error;
354 fuse_put_request(fc, req); 449 fuse_put_request(fc, req);
355 if (!err) 450
451 if (!err) {
452 /*
453 * Short read means EOF. If file size is larger, truncate it
454 */
455 if (num_read < count)
456 fuse_read_update_size(inode, pos + num_read, attr_ver);
457
356 SetPageUptodate(page); 458 SetPageUptodate(page);
459 }
460
357 fuse_invalidate_attr(inode); /* atime changed */ 461 fuse_invalidate_attr(inode); /* atime changed */
358 out: 462 out:
359 unlock_page(page); 463 unlock_page(page);
@@ -363,8 +467,19 @@ static int fuse_readpage(struct file *file, struct page *page)
363static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) 467static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
364{ 468{
365 int i; 469 int i;
470 size_t count = req->misc.read.in.size;
471 size_t num_read = req->out.args[0].size;
472 struct inode *inode = req->pages[0]->mapping->host;
473
474 /*
475 * Short read means EOF. If file size is larger, truncate it
476 */
477 if (!req->out.h.error && num_read < count) {
478 loff_t pos = page_offset(req->pages[0]) + num_read;
479 fuse_read_update_size(inode, pos, req->misc.read.attr_ver);
480 }
366 481
367 fuse_invalidate_attr(req->pages[0]->mapping->host); /* atime changed */ 482 fuse_invalidate_attr(inode); /* atime changed */
368 483
369 for (i = 0; i < req->num_pages; i++) { 484 for (i = 0; i < req->num_pages; i++) {
370 struct page *page = req->pages[i]; 485 struct page *page = req->pages[i];
@@ -387,6 +502,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file,
387 size_t count = req->num_pages << PAGE_CACHE_SHIFT; 502 size_t count = req->num_pages << PAGE_CACHE_SHIFT;
388 req->out.page_zeroing = 1; 503 req->out.page_zeroing = 1;
389 fuse_read_fill(req, file, inode, pos, count, FUSE_READ); 504 fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
505 req->misc.read.attr_ver = fuse_get_attr_version(fc);
390 if (fc->async_read) { 506 if (fc->async_read) {
391 struct fuse_file *ff = file->private_data; 507 struct fuse_file *ff = file->private_data;
392 req->ff = fuse_file_get(ff); 508 req->ff = fuse_file_get(ff);
@@ -411,6 +527,8 @@ static int fuse_readpages_fill(void *_data, struct page *page)
411 struct inode *inode = data->inode; 527 struct inode *inode = data->inode;
412 struct fuse_conn *fc = get_fuse_conn(inode); 528 struct fuse_conn *fc = get_fuse_conn(inode);
413 529
530 fuse_wait_on_page_writeback(inode, page->index);
531
414 if (req->num_pages && 532 if (req->num_pages &&
415 (req->num_pages == FUSE_MAX_PAGES_PER_REQ || 533 (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
416 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || 534 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
@@ -477,11 +595,10 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
477} 595}
478 596
479static void fuse_write_fill(struct fuse_req *req, struct file *file, 597static void fuse_write_fill(struct fuse_req *req, struct file *file,
480 struct inode *inode, loff_t pos, size_t count, 598 struct fuse_file *ff, struct inode *inode,
481 int writepage) 599 loff_t pos, size_t count, int writepage)
482{ 600{
483 struct fuse_conn *fc = get_fuse_conn(inode); 601 struct fuse_conn *fc = get_fuse_conn(inode);
484 struct fuse_file *ff = file->private_data;
485 struct fuse_write_in *inarg = &req->misc.write.in; 602 struct fuse_write_in *inarg = &req->misc.write.in;
486 struct fuse_write_out *outarg = &req->misc.write.out; 603 struct fuse_write_out *outarg = &req->misc.write.out;
487 604
@@ -490,7 +607,7 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file,
490 inarg->offset = pos; 607 inarg->offset = pos;
491 inarg->size = count; 608 inarg->size = count;
492 inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; 609 inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
493 inarg->flags = file->f_flags; 610 inarg->flags = file ? file->f_flags : 0;
494 req->in.h.opcode = FUSE_WRITE; 611 req->in.h.opcode = FUSE_WRITE;
495 req->in.h.nodeid = get_node_id(inode); 612 req->in.h.nodeid = get_node_id(inode);
496 req->in.argpages = 1; 613 req->in.argpages = 1;
@@ -511,7 +628,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
511 fl_owner_t owner) 628 fl_owner_t owner)
512{ 629{
513 struct fuse_conn *fc = get_fuse_conn(inode); 630 struct fuse_conn *fc = get_fuse_conn(inode);
514 fuse_write_fill(req, file, inode, pos, count, 0); 631 fuse_write_fill(req, file, file->private_data, inode, pos, count, 0);
515 if (owner != NULL) { 632 if (owner != NULL) {
516 struct fuse_write_in *inarg = &req->misc.write.in; 633 struct fuse_write_in *inarg = &req->misc.write.in;
517 inarg->write_flags |= FUSE_WRITE_LOCKOWNER; 634 inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
@@ -533,19 +650,36 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
533 return 0; 650 return 0;
534} 651}
535 652
653static void fuse_write_update_size(struct inode *inode, loff_t pos)
654{
655 struct fuse_conn *fc = get_fuse_conn(inode);
656 struct fuse_inode *fi = get_fuse_inode(inode);
657
658 spin_lock(&fc->lock);
659 fi->attr_version = ++fc->attr_version;
660 if (pos > inode->i_size)
661 i_size_write(inode, pos);
662 spin_unlock(&fc->lock);
663}
664
536static int fuse_buffered_write(struct file *file, struct inode *inode, 665static int fuse_buffered_write(struct file *file, struct inode *inode,
537 loff_t pos, unsigned count, struct page *page) 666 loff_t pos, unsigned count, struct page *page)
538{ 667{
539 int err; 668 int err;
540 size_t nres; 669 size_t nres;
541 struct fuse_conn *fc = get_fuse_conn(inode); 670 struct fuse_conn *fc = get_fuse_conn(inode);
542 struct fuse_inode *fi = get_fuse_inode(inode);
543 unsigned offset = pos & (PAGE_CACHE_SIZE - 1); 671 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
544 struct fuse_req *req; 672 struct fuse_req *req;
545 673
546 if (is_bad_inode(inode)) 674 if (is_bad_inode(inode))
547 return -EIO; 675 return -EIO;
548 676
677 /*
678 * Make sure writepages on the same page are not mixed up with
679 * plain writes.
680 */
681 fuse_wait_on_page_writeback(inode, page->index);
682
549 req = fuse_get_req(fc); 683 req = fuse_get_req(fc);
550 if (IS_ERR(req)) 684 if (IS_ERR(req))
551 return PTR_ERR(req); 685 return PTR_ERR(req);
@@ -560,12 +694,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
560 err = -EIO; 694 err = -EIO;
561 if (!err) { 695 if (!err) {
562 pos += nres; 696 pos += nres;
563 spin_lock(&fc->lock); 697 fuse_write_update_size(inode, pos);
564 fi->attr_version = ++fc->attr_version;
565 if (pos > inode->i_size)
566 i_size_write(inode, pos);
567 spin_unlock(&fc->lock);
568
569 if (count == PAGE_CACHE_SIZE) 698 if (count == PAGE_CACHE_SIZE)
570 SetPageUptodate(page); 699 SetPageUptodate(page);
571 } 700 }
@@ -588,6 +717,198 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
588 return res; 717 return res;
589} 718}
590 719
720static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
721 struct inode *inode, loff_t pos,
722 size_t count)
723{
724 size_t res;
725 unsigned offset;
726 unsigned i;
727
728 for (i = 0; i < req->num_pages; i++)
729 fuse_wait_on_page_writeback(inode, req->pages[i]->index);
730
731 res = fuse_send_write(req, file, inode, pos, count, NULL);
732
733 offset = req->page_offset;
734 count = res;
735 for (i = 0; i < req->num_pages; i++) {
736 struct page *page = req->pages[i];
737
738 if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
739 SetPageUptodate(page);
740
741 if (count > PAGE_CACHE_SIZE - offset)
742 count -= PAGE_CACHE_SIZE - offset;
743 else
744 count = 0;
745 offset = 0;
746
747 unlock_page(page);
748 page_cache_release(page);
749 }
750
751 return res;
752}
753
754static ssize_t fuse_fill_write_pages(struct fuse_req *req,
755 struct address_space *mapping,
756 struct iov_iter *ii, loff_t pos)
757{
758 struct fuse_conn *fc = get_fuse_conn(mapping->host);
759 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
760 size_t count = 0;
761 int err;
762
763 req->page_offset = offset;
764
765 do {
766 size_t tmp;
767 struct page *page;
768 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
769 size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
770 iov_iter_count(ii));
771
772 bytes = min_t(size_t, bytes, fc->max_write - count);
773
774 again:
775 err = -EFAULT;
776 if (iov_iter_fault_in_readable(ii, bytes))
777 break;
778
779 err = -ENOMEM;
780 page = __grab_cache_page(mapping, index);
781 if (!page)
782 break;
783
784 pagefault_disable();
785 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
786 pagefault_enable();
787 flush_dcache_page(page);
788
789 if (!tmp) {
790 unlock_page(page);
791 page_cache_release(page);
792 bytes = min(bytes, iov_iter_single_seg_count(ii));
793 goto again;
794 }
795
796 err = 0;
797 req->pages[req->num_pages] = page;
798 req->num_pages++;
799
800 iov_iter_advance(ii, tmp);
801 count += tmp;
802 pos += tmp;
803 offset += tmp;
804 if (offset == PAGE_CACHE_SIZE)
805 offset = 0;
806
807 } while (iov_iter_count(ii) && count < fc->max_write &&
808 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
809
810 return count > 0 ? count : err;
811}
812
813static ssize_t fuse_perform_write(struct file *file,
814 struct address_space *mapping,
815 struct iov_iter *ii, loff_t pos)
816{
817 struct inode *inode = mapping->host;
818 struct fuse_conn *fc = get_fuse_conn(inode);
819 int err = 0;
820 ssize_t res = 0;
821
822 if (is_bad_inode(inode))
823 return -EIO;
824
825 do {
826 struct fuse_req *req;
827 ssize_t count;
828
829 req = fuse_get_req(fc);
830 if (IS_ERR(req)) {
831 err = PTR_ERR(req);
832 break;
833 }
834
835 count = fuse_fill_write_pages(req, mapping, ii, pos);
836 if (count <= 0) {
837 err = count;
838 } else {
839 size_t num_written;
840
841 num_written = fuse_send_write_pages(req, file, inode,
842 pos, count);
843 err = req->out.h.error;
844 if (!err) {
845 res += num_written;
846 pos += num_written;
847
848 /* break out of the loop on short write */
849 if (num_written != count)
850 err = -EIO;
851 }
852 }
853 fuse_put_request(fc, req);
854 } while (!err && iov_iter_count(ii));
855
856 if (res > 0)
857 fuse_write_update_size(inode, pos);
858
859 fuse_invalidate_attr(inode);
860
861 return res > 0 ? res : err;
862}
863
864static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
865 unsigned long nr_segs, loff_t pos)
866{
867 struct file *file = iocb->ki_filp;
868 struct address_space *mapping = file->f_mapping;
869 size_t count = 0;
870 ssize_t written = 0;
871 struct inode *inode = mapping->host;
872 ssize_t err;
873 struct iov_iter i;
874
875 WARN_ON(iocb->ki_pos != pos);
876
877 err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
878 if (err)
879 return err;
880
881 mutex_lock(&inode->i_mutex);
882 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
883
884 /* We can write back this queue in page reclaim */
885 current->backing_dev_info = mapping->backing_dev_info;
886
887 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
888 if (err)
889 goto out;
890
891 if (count == 0)
892 goto out;
893
894 err = remove_suid(file->f_path.dentry);
895 if (err)
896 goto out;
897
898 file_update_time(file);
899
900 iov_iter_init(&i, iov, nr_segs, count, 0);
901 written = fuse_perform_write(file, mapping, &i, pos);
902 if (written >= 0)
903 iocb->ki_pos = pos + written;
904
905out:
906 current->backing_dev_info = NULL;
907 mutex_unlock(&inode->i_mutex);
908
909 return written ? written : err;
910}
911
591static void fuse_release_user_pages(struct fuse_req *req, int write) 912static void fuse_release_user_pages(struct fuse_req *req, int write)
592{ 913{
593 unsigned i; 914 unsigned i;
@@ -613,7 +934,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
613 934
614 nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); 935 nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
615 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 936 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
616 npages = min(max(npages, 1), FUSE_MAX_PAGES_PER_REQ); 937 npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
617 down_read(&current->mm->mmap_sem); 938 down_read(&current->mm->mmap_sem);
618 npages = get_user_pages(current, current->mm, user_addr, npages, write, 939 npages = get_user_pages(current, current->mm, user_addr, npages, write,
619 0, req->pages, NULL); 940 0, req->pages, NULL);
@@ -645,14 +966,15 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
645 966
646 while (count) { 967 while (count) {
647 size_t nres; 968 size_t nres;
648 size_t nbytes = min(count, nmax); 969 size_t nbytes_limit = min(count, nmax);
649 int err = fuse_get_user_pages(req, buf, nbytes, !write); 970 size_t nbytes;
971 int err = fuse_get_user_pages(req, buf, nbytes_limit, !write);
650 if (err) { 972 if (err) {
651 res = err; 973 res = err;
652 break; 974 break;
653 } 975 }
654 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; 976 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
655 nbytes = min(count, nbytes); 977 nbytes = min(nbytes_limit, nbytes);
656 if (write) 978 if (write)
657 nres = fuse_send_write(req, file, inode, pos, nbytes, 979 nres = fuse_send_write(req, file, inode, pos, nbytes,
658 current->files); 980 current->files);
@@ -683,12 +1005,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
683 } 1005 }
684 fuse_put_request(fc, req); 1006 fuse_put_request(fc, req);
685 if (res > 0) { 1007 if (res > 0) {
686 if (write) { 1008 if (write)
687 spin_lock(&fc->lock); 1009 fuse_write_update_size(inode, pos);
688 if (pos > inode->i_size)
689 i_size_write(inode, pos);
690 spin_unlock(&fc->lock);
691 }
692 *ppos = pos; 1010 *ppos = pos;
693 } 1011 }
694 fuse_invalidate_attr(inode); 1012 fuse_invalidate_attr(inode);
@@ -716,21 +1034,225 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
716 return res; 1034 return res;
717} 1035}
718 1036
719static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) 1037static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
720{ 1038{
721 if ((vma->vm_flags & VM_SHARED)) { 1039 __free_page(req->pages[0]);
722 if ((vma->vm_flags & VM_WRITE)) 1040 fuse_file_put(req->ff);
723 return -ENODEV; 1041 fuse_put_request(fc, req);
724 else 1042}
725 vma->vm_flags &= ~VM_MAYWRITE; 1043
1044static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
1045{
1046 struct inode *inode = req->inode;
1047 struct fuse_inode *fi = get_fuse_inode(inode);
1048 struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
1049
1050 list_del(&req->writepages_entry);
1051 dec_bdi_stat(bdi, BDI_WRITEBACK);
1052 dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
1053 bdi_writeout_inc(bdi);
1054 wake_up(&fi->page_waitq);
1055}
1056
1057/* Called under fc->lock, may release and reacquire it */
1058static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
1059{
1060 struct fuse_inode *fi = get_fuse_inode(req->inode);
1061 loff_t size = i_size_read(req->inode);
1062 struct fuse_write_in *inarg = &req->misc.write.in;
1063
1064 if (!fc->connected)
1065 goto out_free;
1066
1067 if (inarg->offset + PAGE_CACHE_SIZE <= size) {
1068 inarg->size = PAGE_CACHE_SIZE;
1069 } else if (inarg->offset < size) {
1070 inarg->size = size & (PAGE_CACHE_SIZE - 1);
1071 } else {
1072 /* Got truncated off completely */
1073 goto out_free;
1074 }
1075
1076 req->in.args[1].size = inarg->size;
1077 fi->writectr++;
1078 request_send_background_locked(fc, req);
1079 return;
1080
1081 out_free:
1082 fuse_writepage_finish(fc, req);
1083 spin_unlock(&fc->lock);
1084 fuse_writepage_free(fc, req);
1085 spin_lock(&fc->lock);
1086}
1087
1088/*
1089 * If fi->writectr is positive (no truncate or fsync going on) send
1090 * all queued writepage requests.
1091 *
1092 * Called with fc->lock
1093 */
1094void fuse_flush_writepages(struct inode *inode)
1095{
1096 struct fuse_conn *fc = get_fuse_conn(inode);
1097 struct fuse_inode *fi = get_fuse_inode(inode);
1098 struct fuse_req *req;
1099
1100 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
1101 req = list_entry(fi->queued_writes.next, struct fuse_req, list);
1102 list_del_init(&req->list);
1103 fuse_send_writepage(fc, req);
1104 }
1105}
1106
1107static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
1108{
1109 struct inode *inode = req->inode;
1110 struct fuse_inode *fi = get_fuse_inode(inode);
1111
1112 mapping_set_error(inode->i_mapping, req->out.h.error);
1113 spin_lock(&fc->lock);
1114 fi->writectr--;
1115 fuse_writepage_finish(fc, req);
1116 spin_unlock(&fc->lock);
1117 fuse_writepage_free(fc, req);
1118}
1119
1120static int fuse_writepage_locked(struct page *page)
1121{
1122 struct address_space *mapping = page->mapping;
1123 struct inode *inode = mapping->host;
1124 struct fuse_conn *fc = get_fuse_conn(inode);
1125 struct fuse_inode *fi = get_fuse_inode(inode);
1126 struct fuse_req *req;
1127 struct fuse_file *ff;
1128 struct page *tmp_page;
1129
1130 set_page_writeback(page);
1131
1132 req = fuse_request_alloc_nofs();
1133 if (!req)
1134 goto err;
1135
1136 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
1137 if (!tmp_page)
1138 goto err_free;
1139
1140 spin_lock(&fc->lock);
1141 BUG_ON(list_empty(&fi->write_files));
1142 ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
1143 req->ff = fuse_file_get(ff);
1144 spin_unlock(&fc->lock);
1145
1146 fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
1147
1148 copy_highpage(tmp_page, page);
1149 req->num_pages = 1;
1150 req->pages[0] = tmp_page;
1151 req->page_offset = 0;
1152 req->end = fuse_writepage_end;
1153 req->inode = inode;
1154
1155 inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
1156 inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
1157 end_page_writeback(page);
1158
1159 spin_lock(&fc->lock);
1160 list_add(&req->writepages_entry, &fi->writepages);
1161 list_add_tail(&req->list, &fi->queued_writes);
1162 fuse_flush_writepages(inode);
1163 spin_unlock(&fc->lock);
1164
1165 return 0;
1166
1167err_free:
1168 fuse_request_free(req);
1169err:
1170 end_page_writeback(page);
1171 return -ENOMEM;
1172}
1173
1174static int fuse_writepage(struct page *page, struct writeback_control *wbc)
1175{
1176 int err;
1177
1178 err = fuse_writepage_locked(page);
1179 unlock_page(page);
1180
1181 return err;
1182}
1183
1184static int fuse_launder_page(struct page *page)
1185{
1186 int err = 0;
1187 if (clear_page_dirty_for_io(page)) {
1188 struct inode *inode = page->mapping->host;
1189 err = fuse_writepage_locked(page);
1190 if (!err)
1191 fuse_wait_on_page_writeback(inode, page->index);
726 } 1192 }
727 return generic_file_mmap(file, vma); 1193 return err;
728} 1194}
729 1195
730static int fuse_set_page_dirty(struct page *page) 1196/*
1197 * Write back dirty pages now, because there may not be any suitable
1198 * open files later
1199 */
1200static void fuse_vma_close(struct vm_area_struct *vma)
731{ 1201{
732 printk("fuse_set_page_dirty: should not happen\n"); 1202 filemap_write_and_wait(vma->vm_file->f_mapping);
733 dump_stack(); 1203}
1204
1205/*
1206 * Wait for writeback against this page to complete before allowing it
1207 * to be marked dirty again, and hence written back again, possibly
1208 * before the previous writepage completed.
1209 *
1210 * Block here, instead of in ->writepage(), so that the userspace fs
1211 * can only block processes actually operating on the filesystem.
1212 *
1213 * Otherwise unprivileged userspace fs would be able to block
1214 * unrelated:
1215 *
1216 * - page migration
1217 * - sync(2)
1218 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
1219 */
1220static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1221{
1222 /*
1223 * Don't use page->mapping as it may become NULL from a
1224 * concurrent truncate.
1225 */
1226 struct inode *inode = vma->vm_file->f_mapping->host;
1227
1228 fuse_wait_on_page_writeback(inode, page->index);
1229 return 0;
1230}
1231
1232static struct vm_operations_struct fuse_file_vm_ops = {
1233 .close = fuse_vma_close,
1234 .fault = filemap_fault,
1235 .page_mkwrite = fuse_page_mkwrite,
1236};
1237
1238static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
1239{
1240 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
1241 struct inode *inode = file->f_dentry->d_inode;
1242 struct fuse_conn *fc = get_fuse_conn(inode);
1243 struct fuse_inode *fi = get_fuse_inode(inode);
1244 struct fuse_file *ff = file->private_data;
1245 /*
1246 * file may be written through mmap, so chain it onto the
1247 * inodes's write_file list
1248 */
1249 spin_lock(&fc->lock);
1250 if (list_empty(&ff->write_entry))
1251 list_add(&ff->write_entry, &fi->write_files);
1252 spin_unlock(&fc->lock);
1253 }
1254 file_accessed(file);
1255 vma->vm_ops = &fuse_file_vm_ops;
734 return 0; 1256 return 0;
735} 1257}
736 1258
@@ -909,12 +1431,37 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
909 return err ? 0 : outarg.block; 1431 return err ? 0 : outarg.block;
910} 1432}
911 1433
1434static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1435{
1436 loff_t retval;
1437 struct inode *inode = file->f_path.dentry->d_inode;
1438
1439 mutex_lock(&inode->i_mutex);
1440 switch (origin) {
1441 case SEEK_END:
1442 offset += i_size_read(inode);
1443 break;
1444 case SEEK_CUR:
1445 offset += file->f_pos;
1446 }
1447 retval = -EINVAL;
1448 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
1449 if (offset != file->f_pos) {
1450 file->f_pos = offset;
1451 file->f_version = 0;
1452 }
1453 retval = offset;
1454 }
1455 mutex_unlock(&inode->i_mutex);
1456 return retval;
1457}
1458
912static const struct file_operations fuse_file_operations = { 1459static const struct file_operations fuse_file_operations = {
913 .llseek = generic_file_llseek, 1460 .llseek = fuse_file_llseek,
914 .read = do_sync_read, 1461 .read = do_sync_read,
915 .aio_read = fuse_file_aio_read, 1462 .aio_read = fuse_file_aio_read,
916 .write = do_sync_write, 1463 .write = do_sync_write,
917 .aio_write = generic_file_aio_write, 1464 .aio_write = fuse_file_aio_write,
918 .mmap = fuse_file_mmap, 1465 .mmap = fuse_file_mmap,
919 .open = fuse_open, 1466 .open = fuse_open,
920 .flush = fuse_flush, 1467 .flush = fuse_flush,
@@ -926,7 +1473,7 @@ static const struct file_operations fuse_file_operations = {
926}; 1473};
927 1474
928static const struct file_operations fuse_direct_io_file_operations = { 1475static const struct file_operations fuse_direct_io_file_operations = {
929 .llseek = generic_file_llseek, 1476 .llseek = fuse_file_llseek,
930 .read = fuse_direct_read, 1477 .read = fuse_direct_read,
931 .write = fuse_direct_write, 1478 .write = fuse_direct_write,
932 .open = fuse_open, 1479 .open = fuse_open,
@@ -940,10 +1487,12 @@ static const struct file_operations fuse_direct_io_file_operations = {
940 1487
941static const struct address_space_operations fuse_file_aops = { 1488static const struct address_space_operations fuse_file_aops = {
942 .readpage = fuse_readpage, 1489 .readpage = fuse_readpage,
1490 .writepage = fuse_writepage,
1491 .launder_page = fuse_launder_page,
943 .write_begin = fuse_write_begin, 1492 .write_begin = fuse_write_begin,
944 .write_end = fuse_write_end, 1493 .write_end = fuse_write_end,
945 .readpages = fuse_readpages, 1494 .readpages = fuse_readpages,
946 .set_page_dirty = fuse_set_page_dirty, 1495 .set_page_dirty = __set_page_dirty_nobuffers,
947 .bmap = fuse_bmap, 1496 .bmap = fuse_bmap,
948}; 1497};
949 1498
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 67aaf6ee38ea..dadffa21a206 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -15,6 +15,7 @@
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
17#include <linux/mutex.h> 17#include <linux/mutex.h>
18#include <linux/rwsem.h>
18 19
19/** Max number of pages that can be used in a single read request */ 20/** Max number of pages that can be used in a single read request */
20#define FUSE_MAX_PAGES_PER_REQ 32 21#define FUSE_MAX_PAGES_PER_REQ 32
@@ -25,6 +26,9 @@
25/** Congestion starts at 75% of maximum */ 26/** Congestion starts at 75% of maximum */
26#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100) 27#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100)
27 28
29/** Bias for fi->writectr, meaning new writepages must not be sent */
30#define FUSE_NOWRITE INT_MIN
31
28/** It could be as large as PATH_MAX, but would that have any uses? */ 32/** It could be as large as PATH_MAX, but would that have any uses? */
29#define FUSE_NAME_MAX 1024 33#define FUSE_NAME_MAX 1024
30 34
@@ -73,6 +77,19 @@ struct fuse_inode {
73 77
74 /** Files usable in writepage. Protected by fc->lock */ 78 /** Files usable in writepage. Protected by fc->lock */
75 struct list_head write_files; 79 struct list_head write_files;
80
81 /** Writepages pending on truncate or fsync */
82 struct list_head queued_writes;
83
84 /** Number of sent writes, a negative bias (FUSE_NOWRITE)
85 * means more writes are blocked */
86 int writectr;
87
88 /** Waitq for writepage completion */
89 wait_queue_head_t page_waitq;
90
91 /** List of writepage requestst (pending or sent) */
92 struct list_head writepages;
76}; 93};
77 94
78/** FUSE specific file data */ 95/** FUSE specific file data */
@@ -222,7 +239,10 @@ struct fuse_req {
222 } release; 239 } release;
223 struct fuse_init_in init_in; 240 struct fuse_init_in init_in;
224 struct fuse_init_out init_out; 241 struct fuse_init_out init_out;
225 struct fuse_read_in read_in; 242 struct {
243 struct fuse_read_in in;
244 u64 attr_ver;
245 } read;
226 struct { 246 struct {
227 struct fuse_write_in in; 247 struct fuse_write_in in;
228 struct fuse_write_out out; 248 struct fuse_write_out out;
@@ -242,6 +262,12 @@ struct fuse_req {
242 /** File used in the request (or NULL) */ 262 /** File used in the request (or NULL) */
243 struct fuse_file *ff; 263 struct fuse_file *ff;
244 264
265 /** Inode used in the request or NULL */
266 struct inode *inode;
267
268 /** Link on fi->writepages */
269 struct list_head writepages_entry;
270
245 /** Request completion callback */ 271 /** Request completion callback */
246 void (*end)(struct fuse_conn *, struct fuse_req *); 272 void (*end)(struct fuse_conn *, struct fuse_req *);
247 273
@@ -390,8 +416,8 @@ struct fuse_conn {
390 /** Entry on the fuse_conn_list */ 416 /** Entry on the fuse_conn_list */
391 struct list_head entry; 417 struct list_head entry;
392 418
393 /** Unique ID */ 419 /** Device ID from super block */
394 u64 id; 420 dev_t dev;
395 421
396 /** Dentries in the control filesystem */ 422 /** Dentries in the control filesystem */
397 struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES]; 423 struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
@@ -438,7 +464,7 @@ extern const struct file_operations fuse_dev_operations;
438/** 464/**
439 * Get a filled in inode 465 * Get a filled in inode
440 */ 466 */
441struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 467struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
442 int generation, struct fuse_attr *attr, 468 int generation, struct fuse_attr *attr,
443 u64 attr_valid, u64 attr_version); 469 u64 attr_valid, u64 attr_version);
444 470
@@ -446,7 +472,7 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
446 * Send FORGET command 472 * Send FORGET command
447 */ 473 */
448void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 474void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
449 unsigned long nodeid, u64 nlookup); 475 u64 nodeid, u64 nlookup);
450 476
451/** 477/**
452 * Initialize READ or READDIR request 478 * Initialize READ or READDIR request
@@ -504,6 +530,11 @@ void fuse_init_symlink(struct inode *inode);
504void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 530void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
505 u64 attr_valid, u64 attr_version); 531 u64 attr_valid, u64 attr_version);
506 532
533void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
534 u64 attr_valid);
535
536void fuse_truncate(struct address_space *mapping, loff_t offset);
537
507/** 538/**
508 * Initialize the client device 539 * Initialize the client device
509 */ 540 */
@@ -522,6 +553,8 @@ void fuse_ctl_cleanup(void);
522 */ 553 */
523struct fuse_req *fuse_request_alloc(void); 554struct fuse_req *fuse_request_alloc(void);
524 555
556struct fuse_req *fuse_request_alloc_nofs(void);
557
525/** 558/**
526 * Free a request 559 * Free a request
527 */ 560 */
@@ -558,6 +591,8 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
558 */ 591 */
559void request_send_background(struct fuse_conn *fc, struct fuse_req *req); 592void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
560 593
594void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req);
595
561/* Abort all requests */ 596/* Abort all requests */
562void fuse_abort_conn(struct fuse_conn *fc); 597void fuse_abort_conn(struct fuse_conn *fc);
563 598
@@ -600,3 +635,10 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
600 635
601int fuse_update_attributes(struct inode *inode, struct kstat *stat, 636int fuse_update_attributes(struct inode *inode, struct kstat *stat,
602 struct file *file, bool *refreshed); 637 struct file *file, bool *refreshed);
638
639void fuse_flush_writepages(struct inode *inode);
640
641void fuse_set_nowrite(struct inode *inode);
642void fuse_release_nowrite(struct inode *inode);
643
644u64 fuse_get_attr_version(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4df34da2284a..79b615873838 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -59,7 +59,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
59 fi->nodeid = 0; 59 fi->nodeid = 0;
60 fi->nlookup = 0; 60 fi->nlookup = 0;
61 fi->attr_version = 0; 61 fi->attr_version = 0;
62 fi->writectr = 0;
62 INIT_LIST_HEAD(&fi->write_files); 63 INIT_LIST_HEAD(&fi->write_files);
64 INIT_LIST_HEAD(&fi->queued_writes);
65 INIT_LIST_HEAD(&fi->writepages);
66 init_waitqueue_head(&fi->page_waitq);
63 fi->forget_req = fuse_request_alloc(); 67 fi->forget_req = fuse_request_alloc();
64 if (!fi->forget_req) { 68 if (!fi->forget_req) {
65 kmem_cache_free(fuse_inode_cachep, inode); 69 kmem_cache_free(fuse_inode_cachep, inode);
@@ -73,13 +77,14 @@ static void fuse_destroy_inode(struct inode *inode)
73{ 77{
74 struct fuse_inode *fi = get_fuse_inode(inode); 78 struct fuse_inode *fi = get_fuse_inode(inode);
75 BUG_ON(!list_empty(&fi->write_files)); 79 BUG_ON(!list_empty(&fi->write_files));
80 BUG_ON(!list_empty(&fi->queued_writes));
76 if (fi->forget_req) 81 if (fi->forget_req)
77 fuse_request_free(fi->forget_req); 82 fuse_request_free(fi->forget_req);
78 kmem_cache_free(fuse_inode_cachep, inode); 83 kmem_cache_free(fuse_inode_cachep, inode);
79} 84}
80 85
81void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 86void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
82 unsigned long nodeid, u64 nlookup) 87 u64 nodeid, u64 nlookup)
83{ 88{
84 struct fuse_forget_in *inarg = &req->misc.forget_in; 89 struct fuse_forget_in *inarg = &req->misc.forget_in;
85 inarg->nlookup = nlookup; 90 inarg->nlookup = nlookup;
@@ -109,7 +114,7 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
109 return 0; 114 return 0;
110} 115}
111 116
112static void fuse_truncate(struct address_space *mapping, loff_t offset) 117void fuse_truncate(struct address_space *mapping, loff_t offset)
113{ 118{
114 /* See vmtruncate() */ 119 /* See vmtruncate() */
115 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 120 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
@@ -117,19 +122,12 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset)
117 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 122 unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
118} 123}
119 124
120 125void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
121void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 126 u64 attr_valid)
122 u64 attr_valid, u64 attr_version)
123{ 127{
124 struct fuse_conn *fc = get_fuse_conn(inode); 128 struct fuse_conn *fc = get_fuse_conn(inode);
125 struct fuse_inode *fi = get_fuse_inode(inode); 129 struct fuse_inode *fi = get_fuse_inode(inode);
126 loff_t oldsize;
127 130
128 spin_lock(&fc->lock);
129 if (attr_version != 0 && fi->attr_version > attr_version) {
130 spin_unlock(&fc->lock);
131 return;
132 }
133 fi->attr_version = ++fc->attr_version; 131 fi->attr_version = ++fc->attr_version;
134 fi->i_time = attr_valid; 132 fi->i_time = attr_valid;
135 133
@@ -159,6 +157,22 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
159 fi->orig_i_mode = inode->i_mode; 157 fi->orig_i_mode = inode->i_mode;
160 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) 158 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
161 inode->i_mode &= ~S_ISVTX; 159 inode->i_mode &= ~S_ISVTX;
160}
161
162void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
163 u64 attr_valid, u64 attr_version)
164{
165 struct fuse_conn *fc = get_fuse_conn(inode);
166 struct fuse_inode *fi = get_fuse_inode(inode);
167 loff_t oldsize;
168
169 spin_lock(&fc->lock);
170 if (attr_version != 0 && fi->attr_version > attr_version) {
171 spin_unlock(&fc->lock);
172 return;
173 }
174
175 fuse_change_attributes_common(inode, attr, attr_valid);
162 176
163 oldsize = inode->i_size; 177 oldsize = inode->i_size;
164 i_size_write(inode, attr->size); 178 i_size_write(inode, attr->size);
@@ -193,7 +207,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
193 207
194static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 208static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
195{ 209{
196 unsigned long nodeid = *(unsigned long *) _nodeidp; 210 u64 nodeid = *(u64 *) _nodeidp;
197 if (get_node_id(inode) == nodeid) 211 if (get_node_id(inode) == nodeid)
198 return 1; 212 return 1;
199 else 213 else
@@ -202,12 +216,12 @@ static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
202 216
203static int fuse_inode_set(struct inode *inode, void *_nodeidp) 217static int fuse_inode_set(struct inode *inode, void *_nodeidp)
204{ 218{
205 unsigned long nodeid = *(unsigned long *) _nodeidp; 219 u64 nodeid = *(u64 *) _nodeidp;
206 get_fuse_inode(inode)->nodeid = nodeid; 220 get_fuse_inode(inode)->nodeid = nodeid;
207 return 0; 221 return 0;
208} 222}
209 223
210struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, 224struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
211 int generation, struct fuse_attr *attr, 225 int generation, struct fuse_attr *attr,
212 u64 attr_valid, u64 attr_version) 226 u64 attr_valid, u64 attr_version)
213{ 227{
@@ -447,7 +461,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
447 return 0; 461 return 0;
448} 462}
449 463
450static struct fuse_conn *new_conn(void) 464static struct fuse_conn *new_conn(struct super_block *sb)
451{ 465{
452 struct fuse_conn *fc; 466 struct fuse_conn *fc;
453 int err; 467 int err;
@@ -468,19 +482,41 @@ static struct fuse_conn *new_conn(void)
468 atomic_set(&fc->num_waiting, 0); 482 atomic_set(&fc->num_waiting, 0);
469 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 483 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
470 fc->bdi.unplug_io_fn = default_unplug_io_fn; 484 fc->bdi.unplug_io_fn = default_unplug_io_fn;
485 /* fuse does it's own writeback accounting */
486 fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
487 fc->dev = sb->s_dev;
471 err = bdi_init(&fc->bdi); 488 err = bdi_init(&fc->bdi);
472 if (err) { 489 if (err)
473 kfree(fc); 490 goto error_kfree;
474 fc = NULL; 491 err = bdi_register_dev(&fc->bdi, fc->dev);
475 goto out; 492 if (err)
476 } 493 goto error_bdi_destroy;
494 /*
495 * For a single fuse filesystem use max 1% of dirty +
496 * writeback threshold.
497 *
498 * This gives about 1M of write buffer for memory maps on a
499 * machine with 1G and 10% dirty_ratio, which should be more
500 * than enough.
501 *
502 * Privileged users can raise it by writing to
503 *
504 * /sys/class/bdi/<bdi>/max_ratio
505 */
506 bdi_set_max_ratio(&fc->bdi, 1);
477 fc->reqctr = 0; 507 fc->reqctr = 0;
478 fc->blocked = 1; 508 fc->blocked = 1;
479 fc->attr_version = 1; 509 fc->attr_version = 1;
480 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 510 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
481 } 511 }
482out:
483 return fc; 512 return fc;
513
514error_bdi_destroy:
515 bdi_destroy(&fc->bdi);
516error_kfree:
517 mutex_destroy(&fc->inst_mutex);
518 kfree(fc);
519 return NULL;
484} 520}
485 521
486void fuse_conn_put(struct fuse_conn *fc) 522void fuse_conn_put(struct fuse_conn *fc)
@@ -548,6 +584,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
548 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); 584 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
549 fc->minor = arg->minor; 585 fc->minor = arg->minor;
550 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 586 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
587 fc->max_write = min_t(unsigned, 4096, fc->max_write);
551 fc->conn_init = 1; 588 fc->conn_init = 1;
552 } 589 }
553 fuse_put_request(fc, req); 590 fuse_put_request(fc, req);
@@ -578,12 +615,6 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
578 request_send_background(fc, req); 615 request_send_background(fc, req);
579} 616}
580 617
581static u64 conn_id(void)
582{
583 static u64 ctr = 1;
584 return ctr++;
585}
586
587static int fuse_fill_super(struct super_block *sb, void *data, int silent) 618static int fuse_fill_super(struct super_block *sb, void *data, int silent)
588{ 619{
589 struct fuse_conn *fc; 620 struct fuse_conn *fc;
@@ -621,14 +652,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
621 if (file->f_op != &fuse_dev_operations) 652 if (file->f_op != &fuse_dev_operations)
622 return -EINVAL; 653 return -EINVAL;
623 654
624 fc = new_conn(); 655 fc = new_conn(sb);
625 if (!fc) 656 if (!fc)
626 return -ENOMEM; 657 return -ENOMEM;
627 658
628 fc->flags = d.flags; 659 fc->flags = d.flags;
629 fc->user_id = d.user_id; 660 fc->user_id = d.user_id;
630 fc->group_id = d.group_id; 661 fc->group_id = d.group_id;
631 fc->max_read = d.max_read; 662 fc->max_read = min_t(unsigned, 4096, d.max_read);
632 663
633 /* Used by get_root_inode() */ 664 /* Used by get_root_inode() */
634 sb->s_fs_info = fc; 665 sb->s_fs_info = fc;
@@ -659,7 +690,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
659 if (file->private_data) 690 if (file->private_data)
660 goto err_unlock; 691 goto err_unlock;
661 692
662 fc->id = conn_id();
663 err = fuse_ctl_add_conn(fc); 693 err = fuse_ctl_add_conn(fc);
664 if (err) 694 if (err)
665 goto err_unlock; 695 goto err_unlock;
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 8479da47049c..a4ff271df9ee 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -212,7 +212,7 @@ int gdlm_sysfs_init(void)
212{ 212{
213 gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj); 213 gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj);
214 if (!gdlm_kset) { 214 if (!gdlm_kset) {
215 printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__); 215 printk(KERN_WARNING "%s: can not create kset\n", __func__);
216 return -ENOMEM; 216 return -ENOMEM;
217 } 217 }
218 return 0; 218 return 0;
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 509c5d60bd80..7f48576289c9 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -41,7 +41,7 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
41 41
42#define gfs2_assert_withdraw(sdp, assertion) \ 42#define gfs2_assert_withdraw(sdp, assertion) \
43((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \ 43((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
44 __FUNCTION__, __FILE__, __LINE__)) 44 __func__, __FILE__, __LINE__))
45 45
46 46
47int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, 47int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
@@ -49,28 +49,28 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
49 49
50#define gfs2_assert_warn(sdp, assertion) \ 50#define gfs2_assert_warn(sdp, assertion) \
51((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \ 51((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
52 __FUNCTION__, __FILE__, __LINE__)) 52 __func__, __FILE__, __LINE__))
53 53
54 54
55int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, 55int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
56 const char *function, char *file, unsigned int line); 56 const char *function, char *file, unsigned int line);
57 57
58#define gfs2_consist(sdp) \ 58#define gfs2_consist(sdp) \
59gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__) 59gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__)
60 60
61 61
62int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, 62int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
63 const char *function, char *file, unsigned int line); 63 const char *function, char *file, unsigned int line);
64 64
65#define gfs2_consist_inode(ip) \ 65#define gfs2_consist_inode(ip) \
66gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__) 66gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__)
67 67
68 68
69int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, 69int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
70 const char *function, char *file, unsigned int line); 70 const char *function, char *file, unsigned int line);
71 71
72#define gfs2_consist_rgrpd(rgd) \ 72#define gfs2_consist_rgrpd(rgd) \
73gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__) 73gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__)
74 74
75 75
76int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 76int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -91,7 +91,7 @@ static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
91} 91}
92 92
93#define gfs2_meta_check(sdp, bh) \ 93#define gfs2_meta_check(sdp, bh) \
94gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__) 94gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__)
95 95
96 96
97int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 97int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -118,7 +118,7 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
118} 118}
119 119
120#define gfs2_metatype_check(sdp, bh, type) \ 120#define gfs2_metatype_check(sdp, bh, type) \
121gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__) 121gfs2_metatype_check_i((sdp), (bh), (type), __func__, __FILE__, __LINE__)
122 122
123static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type, 123static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
124 u16 format) 124 u16 format)
@@ -134,14 +134,14 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
134 char *file, unsigned int line); 134 char *file, unsigned int line);
135 135
136#define gfs2_io_error(sdp) \ 136#define gfs2_io_error(sdp) \
137gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__); 137gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__);
138 138
139 139
140int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, 140int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
141 const char *function, char *file, unsigned int line); 141 const char *function, char *file, unsigned int line);
142 142
143#define gfs2_io_error_bh(sdp, bh) \ 143#define gfs2_io_error_bh(sdp, bh) \
144gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); 144gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__);
145 145
146 146
147extern struct kmem_cache *gfs2_glock_cachep; 147extern struct kmem_cache *gfs2_glock_cachep;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 24cf6fc43021..f6621a785202 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -208,7 +208,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
208 struct hfs_bnode *node, *next_node; 208 struct hfs_bnode *node, *next_node;
209 struct page **pagep; 209 struct page **pagep;
210 u32 nidx, idx; 210 u32 nidx, idx;
211 u16 off, len; 211 unsigned off;
212 u16 off16;
213 u16 len;
212 u8 *data, byte, m; 214 u8 *data, byte, m;
213 int i; 215 int i;
214 216
@@ -235,7 +237,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
235 node = hfs_bnode_find(tree, nidx); 237 node = hfs_bnode_find(tree, nidx);
236 if (IS_ERR(node)) 238 if (IS_ERR(node))
237 return node; 239 return node;
238 len = hfs_brec_lenoff(node, 2, &off); 240 len = hfs_brec_lenoff(node, 2, &off16);
241 off = off16;
239 242
240 off += node->page_offset; 243 off += node->page_offset;
241 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 244 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
@@ -280,7 +283,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
280 return next_node; 283 return next_node;
281 node = next_node; 284 node = next_node;
282 285
283 len = hfs_brec_lenoff(node, 0, &off); 286 len = hfs_brec_lenoff(node, 0, &off16);
287 off = off16;
284 off += node->page_offset; 288 off += node->page_offset;
285 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 289 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
286 data = kmap(*pagep); 290 data = kmap(*pagep);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index b4651e128d7f..36ca2e1a4fa3 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -215,7 +215,7 @@ int hfs_mdb_get(struct super_block *sb)
215 attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT); 215 attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT);
216 attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT); 216 attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT);
217 mdb->drAtrb = attrib; 217 mdb->drAtrb = attrib;
218 mdb->drWrCnt = cpu_to_be32(be32_to_cpu(mdb->drWrCnt) + 1); 218 be32_add_cpu(&mdb->drWrCnt, 1);
219 mdb->drLsMod = hfs_mtime(); 219 mdb->drLsMod = hfs_mtime();
220 220
221 mark_buffer_dirty(HFS_SB(sb)->mdb_bh); 221 mark_buffer_dirty(HFS_SB(sb)->mdb_bh);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 32de44ed0021..8cf67974adf6 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -297,7 +297,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
297 return 0; 297 return 0;
298 } 298 }
299 p = match_strdup(&args[0]); 299 p = match_strdup(&args[0]);
300 hsb->nls_disk = load_nls(p); 300 if (p)
301 hsb->nls_disk = load_nls(p);
301 if (!hsb->nls_disk) { 302 if (!hsb->nls_disk) {
302 printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p); 303 printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p);
303 kfree(p); 304 kfree(p);
@@ -311,7 +312,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
311 return 0; 312 return 0;
312 } 313 }
313 p = match_strdup(&args[0]); 314 p = match_strdup(&args[0]);
314 hsb->nls_io = load_nls(p); 315 if (p)
316 hsb->nls_io = load_nls(p);
315 if (!hsb->nls_io) { 317 if (!hsb->nls_io) {
316 printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p); 318 printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p);
317 kfree(p); 319 kfree(p);
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index bb5433608a42..e49fcee1e293 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -184,7 +184,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
184 struct hfs_bnode *node, *next_node; 184 struct hfs_bnode *node, *next_node;
185 struct page **pagep; 185 struct page **pagep;
186 u32 nidx, idx; 186 u32 nidx, idx;
187 u16 off, len; 187 unsigned off;
188 u16 off16;
189 u16 len;
188 u8 *data, byte, m; 190 u8 *data, byte, m;
189 int i; 191 int i;
190 192
@@ -211,7 +213,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
211 node = hfs_bnode_find(tree, nidx); 213 node = hfs_bnode_find(tree, nidx);
212 if (IS_ERR(node)) 214 if (IS_ERR(node))
213 return node; 215 return node;
214 len = hfs_brec_lenoff(node, 2, &off); 216 len = hfs_brec_lenoff(node, 2, &off16);
217 off = off16;
215 218
216 off += node->page_offset; 219 off += node->page_offset;
217 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 220 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
@@ -256,7 +259,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
256 return next_node; 259 return next_node;
257 node = next_node; 260 node = next_node;
258 261
259 len = hfs_brec_lenoff(node, 0, &off); 262 len = hfs_brec_lenoff(node, 0, &off16);
263 off = off16;
260 off += node->page_offset; 264 off += node->page_offset;
261 pagep = node->page + (off >> PAGE_CACHE_SHIFT); 265 pagep = node->page + (off >> PAGE_CACHE_SHIFT);
262 data = kmap(*pagep); 266 data = kmap(*pagep);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d72d0a8b25aa..9e59537b43d5 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -311,6 +311,10 @@ int hfsplus_delete_cat(u32, struct inode *, struct qstr *);
311int hfsplus_rename_cat(u32, struct inode *, struct qstr *, 311int hfsplus_rename_cat(u32, struct inode *, struct qstr *,
312 struct inode *, struct qstr *); 312 struct inode *, struct qstr *);
313 313
314/* dir.c */
315extern const struct inode_operations hfsplus_dir_inode_operations;
316extern const struct file_operations hfsplus_dir_operations;
317
314/* extents.c */ 318/* extents.c */
315int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); 319int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
316void hfsplus_ext_write_extent(struct inode *); 320void hfsplus_ext_write_extent(struct inode *);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 37744cf3706a..d53b2af91c25 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -278,9 +278,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
278 return 0; 278 return 0;
279} 279}
280 280
281extern const struct inode_operations hfsplus_dir_inode_operations;
282extern struct file_operations hfsplus_dir_operations;
283
284static const struct inode_operations hfsplus_file_inode_operations = { 281static const struct inode_operations hfsplus_file_inode_operations = {
285 .lookup = hfsplus_file_lookup, 282 .lookup = hfsplus_file_lookup,
286 .truncate = hfsplus_file_truncate, 283 .truncate = hfsplus_file_truncate,
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index dc64fac00831..9997cbf8beb5 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -132,7 +132,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
132 return 0; 132 return 0;
133 } 133 }
134 p = match_strdup(&args[0]); 134 p = match_strdup(&args[0]);
135 sbi->nls = load_nls(p); 135 if (p)
136 sbi->nls = load_nls(p);
136 if (!sbi->nls) { 137 if (!sbi->nls) {
137 printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p); 138 printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p);
138 kfree(p); 139 kfree(p);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index b0f9ad362d1d..ce97a54518d8 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -357,7 +357,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
357 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); 357 printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n");
358 sb->s_flags |= MS_RDONLY; 358 sb->s_flags |= MS_RDONLY;
359 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { 359 } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
360 printk(KERN_WARNING "hfs: write access to a jounaled filesystem is not supported, " 360 printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, "
361 "use the force option at your own risk, mounting read-only.\n"); 361 "use the force option at your own risk, mounting read-only.\n");
362 sb->s_flags |= MS_RDONLY; 362 sb->s_flags |= MS_RDONLY;
363 } 363 }
@@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
423 */ 423 */
424 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION); 424 vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
425 vhdr->modify_date = hfsp_now2mt(); 425 vhdr->modify_date = hfsp_now2mt();
426 vhdr->write_count = cpu_to_be32(be32_to_cpu(vhdr->write_count) + 1); 426 be32_add_cpu(&vhdr->write_count, 1);
427 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); 427 vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
428 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); 428 vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
429 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); 429 mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 72cab78f0509..175d08eacc86 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -47,7 +47,7 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
47 return 0; 47 return 0;
48 wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART)); 48 wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART));
49 49
50 extent = be32_to_cpu(get_unaligned((__be32 *)(bufptr + HFSP_WRAPOFF_EMBEDEXT))); 50 extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT);
51 wd->embed_start = (extent >> 16) & 0xFFFF; 51 wd->embed_start = (extent >> 16) & 0xFFFF;
52 wd->embed_count = extent & 0xFFFF; 52 wd->embed_count = extent & 0xFFFF;
53 53
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9783723e8ffe..aeabf80f81a5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -45,7 +45,7 @@ static const struct inode_operations hugetlbfs_inode_operations;
45 45
46static struct backing_dev_info hugetlbfs_backing_dev_info = { 46static struct backing_dev_info hugetlbfs_backing_dev_info = {
47 .ra_pages = 0, /* No readahead */ 47 .ra_pages = 0, /* No readahead */
48 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 48 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
49}; 49};
50 50
51int sysctl_hugetlb_shm_group; 51int sysctl_hugetlb_shm_group;
diff --git a/fs/inode.c b/fs/inode.c
index 27ee1af50d02..bf6478130424 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -495,8 +495,7 @@ static struct inode * find_inode(struct super_block * sb, struct hlist_head *hea
495 struct inode * inode = NULL; 495 struct inode * inode = NULL;
496 496
497repeat: 497repeat:
498 hlist_for_each (node, head) { 498 hlist_for_each_entry(inode, node, head, i_hash) {
499 inode = hlist_entry(node, struct inode, i_hash);
500 if (inode->i_sb != sb) 499 if (inode->i_sb != sb)
501 continue; 500 continue;
502 if (!test(inode, data)) 501 if (!test(inode, data))
@@ -520,8 +519,7 @@ static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head
520 struct inode * inode = NULL; 519 struct inode * inode = NULL;
521 520
522repeat: 521repeat:
523 hlist_for_each (node, head) { 522 hlist_for_each_entry(inode, node, head, i_hash) {
524 inode = hlist_entry(node, struct inode, i_hash);
525 if (inode->i_ino != ino) 523 if (inode->i_ino != ino)
526 continue; 524 continue;
527 if (inode->i_sb != sb) 525 if (inode->i_sb != sb)
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 7b94a1e3c015..6676c06bb7c1 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -598,7 +598,7 @@ asmlinkage long sys_inotify_init(void)
598 } 598 }
599 599
600 ih = inotify_init(&inotify_user_ops); 600 ih = inotify_init(&inotify_user_ops);
601 if (unlikely(IS_ERR(ih))) { 601 if (IS_ERR(ih)) {
602 ret = PTR_ERR(ih); 602 ret = PTR_ERR(ih);
603 goto out_free_dev; 603 goto out_free_dev;
604 } 604 }
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f32fbde2175e..7db32b3382d3 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -28,8 +28,8 @@
28 * 28 *
29 * Returns 0 on success, -errno on error. 29 * Returns 0 on success, -errno on error.
30 */ 30 */
31long vfs_ioctl(struct file *filp, unsigned int cmd, 31static long vfs_ioctl(struct file *filp, unsigned int cmd,
32 unsigned long arg) 32 unsigned long arg)
33{ 33{
34 int error = -ENOTTY; 34 int error = -ENOTTY;
35 35
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 1ba407c64df1..2f0dc5a14633 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -145,6 +145,14 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
145 } 145 }
146 de = tmpde; 146 de = tmpde;
147 } 147 }
148 /* Basic sanity check, whether name doesn't exceed dir entry */
149 if (de_len < de->name_len[0] +
150 sizeof(struct iso_directory_record)) {
151 printk(KERN_NOTICE "iso9660: Corrupted directory entry"
152 " in block %lu of inode %lu\n", block,
153 inode->i_ino);
154 return -EIO;
155 }
148 156
149 if (first_de) { 157 if (first_de) {
150 isofs_normalize_block_and_offset(de, 158 isofs_normalize_block_and_offset(de,
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index d1bdf8adb351..ccbf72faf27a 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -78,29 +78,29 @@ static inline int isonum_712(char *p)
78} 78}
79static inline unsigned int isonum_721(char *p) 79static inline unsigned int isonum_721(char *p)
80{ 80{
81 return le16_to_cpu(get_unaligned((__le16 *)p)); 81 return get_unaligned_le16(p);
82} 82}
83static inline unsigned int isonum_722(char *p) 83static inline unsigned int isonum_722(char *p)
84{ 84{
85 return be16_to_cpu(get_unaligned((__le16 *)p)); 85 return get_unaligned_be16(p);
86} 86}
87static inline unsigned int isonum_723(char *p) 87static inline unsigned int isonum_723(char *p)
88{ 88{
89 /* Ignore bigendian datum due to broken mastering programs */ 89 /* Ignore bigendian datum due to broken mastering programs */
90 return le16_to_cpu(get_unaligned((__le16 *)p)); 90 return get_unaligned_le16(p);
91} 91}
92static inline unsigned int isonum_731(char *p) 92static inline unsigned int isonum_731(char *p)
93{ 93{
94 return le32_to_cpu(get_unaligned((__le32 *)p)); 94 return get_unaligned_le32(p);
95} 95}
96static inline unsigned int isonum_732(char *p) 96static inline unsigned int isonum_732(char *p)
97{ 97{
98 return be32_to_cpu(get_unaligned((__le32 *)p)); 98 return get_unaligned_be32(p);
99} 99}
100static inline unsigned int isonum_733(char *p) 100static inline unsigned int isonum_733(char *p)
101{ 101{
102 /* Ignore bigendian datum due to broken mastering programs */ 102 /* Ignore bigendian datum due to broken mastering programs */
103 return le32_to_cpu(get_unaligned((__le32 *)p)); 103 return get_unaligned_le32(p);
104} 104}
105extern int iso_date(char *, int); 105extern int iso_date(char *, int);
106 106
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 344b247bc29a..8299889a835e 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -111,6 +111,13 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
111 111
112 dlen = de->name_len[0]; 112 dlen = de->name_len[0];
113 dpnt = de->name; 113 dpnt = de->name;
114 /* Basic sanity check, whether name doesn't exceed dir entry */
115 if (de_len < dlen + sizeof(struct iso_directory_record)) {
116 printk(KERN_NOTICE "iso9660: Corrupted directory entry"
117 " in block %lu of inode %lu\n", block,
118 dir->i_ino);
119 return 0;
120 }
114 121
115 if (sbi->s_rock && 122 if (sbi->s_rock &&
116 ((i = get_rock_ridge_filename(de, tmpname, dir)))) { 123 ((i = get_rock_ridge_filename(de, tmpname, dir)))) {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index a8173081f831..e0139786f717 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -520,22 +520,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
520 jbd_debug (3, "JBD: commit phase 2\n"); 520 jbd_debug (3, "JBD: commit phase 2\n");
521 521
522 /* 522 /*
523 * First, drop modified flag: all accesses to the buffers
524 * will be tracked for a new trasaction only -bzzz
525 */
526 spin_lock(&journal->j_list_lock);
527 if (commit_transaction->t_buffers) {
528 new_jh = jh = commit_transaction->t_buffers->b_tnext;
529 do {
530 J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
531 new_jh->b_modified == 0);
532 new_jh->b_modified = 0;
533 new_jh = new_jh->b_tnext;
534 } while (new_jh != jh);
535 }
536 spin_unlock(&journal->j_list_lock);
537
538 /*
539 * Now start flushing things to disk, in the order they appear 523 * Now start flushing things to disk, in the order they appear
540 * on the transaction lists. Data blocks go first. 524 * on the transaction lists. Data blocks go first.
541 */ 525 */
@@ -584,6 +568,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
584 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits; 568 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
585 stats.u.run.rs_blocks_logged = 0; 569 stats.u.run.rs_blocks_logged = 0;
586 570
571 J_ASSERT(commit_transaction->t_nr_buffers <=
572 commit_transaction->t_outstanding_credits);
573
587 descriptor = NULL; 574 descriptor = NULL;
588 bufs = 0; 575 bufs = 0;
589 while (commit_transaction->t_buffers) { 576 while (commit_transaction->t_buffers) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 954cff001df6..53632e3e8457 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -534,7 +534,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
534 if (!tid_geq(journal->j_commit_request, tid)) { 534 if (!tid_geq(journal->j_commit_request, tid)) {
535 printk(KERN_EMERG 535 printk(KERN_EMERG
536 "%s: error: j_commit_request=%d, tid=%d\n", 536 "%s: error: j_commit_request=%d, tid=%d\n",
537 __FUNCTION__, journal->j_commit_request, tid); 537 __func__, journal->j_commit_request, tid);
538 } 538 }
539 spin_unlock(&journal->j_state_lock); 539 spin_unlock(&journal->j_state_lock);
540#endif 540#endif
@@ -599,7 +599,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
599 599
600 printk(KERN_ALERT "%s: journal block not found " 600 printk(KERN_ALERT "%s: journal block not found "
601 "at offset %lu on %s\n", 601 "at offset %lu on %s\n",
602 __FUNCTION__, 602 __func__,
603 blocknr, 603 blocknr,
604 bdevname(journal->j_dev, b)); 604 bdevname(journal->j_dev, b));
605 err = -EIO; 605 err = -EIO;
@@ -904,19 +904,10 @@ static void jbd2_stats_proc_init(journal_t *journal)
904 snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); 904 snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
905 journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); 905 journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats);
906 if (journal->j_proc_entry) { 906 if (journal->j_proc_entry) {
907 struct proc_dir_entry *p; 907 proc_create_data("history", S_IRUGO, journal->j_proc_entry,
908 p = create_proc_entry("history", S_IRUGO, 908 &jbd2_seq_history_fops, journal);
909 journal->j_proc_entry); 909 proc_create_data("info", S_IRUGO, journal->j_proc_entry,
910 if (p) { 910 &jbd2_seq_info_fops, journal);
911 p->proc_fops = &jbd2_seq_history_fops;
912 p->data = journal;
913 p = create_proc_entry("info", S_IRUGO,
914 journal->j_proc_entry);
915 if (p) {
916 p->proc_fops = &jbd2_seq_info_fops;
917 p->data = journal;
918 }
919 }
920 } 911 }
921} 912}
922 913
@@ -1006,13 +997,14 @@ fail:
1006 */ 997 */
1007 998
1008/** 999/**
1009 * journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure 1000 * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
1010 * @bdev: Block device on which to create the journal 1001 * @bdev: Block device on which to create the journal
1011 * @fs_dev: Device which hold journalled filesystem for this journal. 1002 * @fs_dev: Device which hold journalled filesystem for this journal.
1012 * @start: Block nr Start of journal. 1003 * @start: Block nr Start of journal.
1013 * @len: Length of the journal in blocks. 1004 * @len: Length of the journal in blocks.
1014 * @blocksize: blocksize of journalling device 1005 * @blocksize: blocksize of journalling device
1015 * @returns: a newly created journal_t * 1006 *
1007 * Returns: a newly created journal_t *
1016 * 1008 *
1017 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous 1009 * jbd2_journal_init_dev creates a journal which maps a fixed contiguous
1018 * range of blocks on an arbitrary block device. 1010 * range of blocks on an arbitrary block device.
@@ -1036,7 +1028,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
1036 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1028 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
1037 if (!journal->j_wbuf) { 1029 if (!journal->j_wbuf) {
1038 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1030 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1039 __FUNCTION__); 1031 __func__);
1040 kfree(journal); 1032 kfree(journal);
1041 journal = NULL; 1033 journal = NULL;
1042 goto out; 1034 goto out;
@@ -1092,7 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1092 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 1084 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
1093 if (!journal->j_wbuf) { 1085 if (!journal->j_wbuf) {
1094 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1086 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1095 __FUNCTION__); 1087 __func__);
1096 kfree(journal); 1088 kfree(journal);
1097 return NULL; 1089 return NULL;
1098 } 1090 }
@@ -1101,7 +1093,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1101 /* If that failed, give up */ 1093 /* If that failed, give up */
1102 if (err) { 1094 if (err) {
1103 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 1095 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
1104 __FUNCTION__); 1096 __func__);
1105 kfree(journal); 1097 kfree(journal);
1106 return NULL; 1098 return NULL;
1107 } 1099 }
@@ -1187,7 +1179,7 @@ int jbd2_journal_create(journal_t *journal)
1187 */ 1179 */
1188 printk(KERN_EMERG 1180 printk(KERN_EMERG
1189 "%s: creation of journal on external device!\n", 1181 "%s: creation of journal on external device!\n",
1190 __FUNCTION__); 1182 __func__);
1191 BUG(); 1183 BUG();
1192 } 1184 }
1193 1185
@@ -1985,9 +1977,10 @@ static int journal_init_jbd2_journal_head_cache(void)
1985 1977
1986static void jbd2_journal_destroy_jbd2_journal_head_cache(void) 1978static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
1987{ 1979{
1988 J_ASSERT(jbd2_journal_head_cache != NULL); 1980 if (jbd2_journal_head_cache) {
1989 kmem_cache_destroy(jbd2_journal_head_cache); 1981 kmem_cache_destroy(jbd2_journal_head_cache);
1990 jbd2_journal_head_cache = NULL; 1982 jbd2_journal_head_cache = NULL;
1983 }
1991} 1984}
1992 1985
1993/* 1986/*
@@ -2006,7 +1999,7 @@ static struct journal_head *journal_alloc_journal_head(void)
2006 jbd_debug(1, "out of memory for journal_head\n"); 1999 jbd_debug(1, "out of memory for journal_head\n");
2007 if (time_after(jiffies, last_warning + 5*HZ)) { 2000 if (time_after(jiffies, last_warning + 5*HZ)) {
2008 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", 2001 printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
2009 __FUNCTION__); 2002 __func__);
2010 last_warning = jiffies; 2003 last_warning = jiffies;
2011 } 2004 }
2012 while (!ret) { 2005 while (!ret) {
@@ -2143,13 +2136,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
2143 if (jh->b_frozen_data) { 2136 if (jh->b_frozen_data) {
2144 printk(KERN_WARNING "%s: freeing " 2137 printk(KERN_WARNING "%s: freeing "
2145 "b_frozen_data\n", 2138 "b_frozen_data\n",
2146 __FUNCTION__); 2139 __func__);
2147 jbd2_free(jh->b_frozen_data, bh->b_size); 2140 jbd2_free(jh->b_frozen_data, bh->b_size);
2148 } 2141 }
2149 if (jh->b_committed_data) { 2142 if (jh->b_committed_data) {
2150 printk(KERN_WARNING "%s: freeing " 2143 printk(KERN_WARNING "%s: freeing "
2151 "b_committed_data\n", 2144 "b_committed_data\n",
2152 __FUNCTION__); 2145 __func__);
2153 jbd2_free(jh->b_committed_data, bh->b_size); 2146 jbd2_free(jh->b_committed_data, bh->b_size);
2154 } 2147 }
2155 bh->b_private = NULL; 2148 bh->b_private = NULL;
@@ -2314,10 +2307,12 @@ static int __init journal_init(void)
2314 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); 2307 BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
2315 2308
2316 ret = journal_init_caches(); 2309 ret = journal_init_caches();
2317 if (ret != 0) 2310 if (ret == 0) {
2311 jbd2_create_debugfs_entry();
2312 jbd2_create_jbd_stats_proc_entry();
2313 } else {
2318 jbd2_journal_destroy_caches(); 2314 jbd2_journal_destroy_caches();
2319 jbd2_create_debugfs_entry(); 2315 }
2320 jbd2_create_jbd_stats_proc_entry();
2321 return ret; 2316 return ret;
2322} 2317}
2323 2318
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 2e1453a5e998..257ff2625765 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -139,7 +139,7 @@ repeat:
139oom: 139oom:
140 if (!journal_oom_retry) 140 if (!journal_oom_retry)
141 return -ENOMEM; 141 return -ENOMEM;
142 jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__); 142 jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
143 yield(); 143 yield();
144 goto repeat; 144 goto repeat;
145} 145}
@@ -167,138 +167,121 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
167 return NULL; 167 return NULL;
168} 168}
169 169
170void jbd2_journal_destroy_revoke_caches(void)
171{
172 if (jbd2_revoke_record_cache) {
173 kmem_cache_destroy(jbd2_revoke_record_cache);
174 jbd2_revoke_record_cache = NULL;
175 }
176 if (jbd2_revoke_table_cache) {
177 kmem_cache_destroy(jbd2_revoke_table_cache);
178 jbd2_revoke_table_cache = NULL;
179 }
180}
181
170int __init jbd2_journal_init_revoke_caches(void) 182int __init jbd2_journal_init_revoke_caches(void)
171{ 183{
184 J_ASSERT(!jbd2_revoke_record_cache);
185 J_ASSERT(!jbd2_revoke_table_cache);
186
172 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", 187 jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
173 sizeof(struct jbd2_revoke_record_s), 188 sizeof(struct jbd2_revoke_record_s),
174 0, 189 0,
175 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 190 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
176 NULL); 191 NULL);
177 if (!jbd2_revoke_record_cache) 192 if (!jbd2_revoke_record_cache)
178 return -ENOMEM; 193 goto record_cache_failure;
179 194
180 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", 195 jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
181 sizeof(struct jbd2_revoke_table_s), 196 sizeof(struct jbd2_revoke_table_s),
182 0, SLAB_TEMPORARY, NULL); 197 0, SLAB_TEMPORARY, NULL);
183 if (!jbd2_revoke_table_cache) { 198 if (!jbd2_revoke_table_cache)
184 kmem_cache_destroy(jbd2_revoke_record_cache); 199 goto table_cache_failure;
185 jbd2_revoke_record_cache = NULL;
186 return -ENOMEM;
187 }
188 return 0; 200 return 0;
201table_cache_failure:
202 jbd2_journal_destroy_revoke_caches();
203record_cache_failure:
204 return -ENOMEM;
189} 205}
190 206
191void jbd2_journal_destroy_revoke_caches(void) 207static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
192{ 208{
193 kmem_cache_destroy(jbd2_revoke_record_cache); 209 int shift = 0;
194 jbd2_revoke_record_cache = NULL; 210 int tmp = hash_size;
195 kmem_cache_destroy(jbd2_revoke_table_cache); 211 struct jbd2_revoke_table_s *table;
196 jbd2_revoke_table_cache = NULL;
197}
198
199/* Initialise the revoke table for a given journal to a given size. */
200
201int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
202{
203 int shift, tmp;
204 212
205 J_ASSERT (journal->j_revoke_table[0] == NULL); 213 table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
214 if (!table)
215 goto out;
206 216
207 shift = 0;
208 tmp = hash_size;
209 while((tmp >>= 1UL) != 0UL) 217 while((tmp >>= 1UL) != 0UL)
210 shift++; 218 shift++;
211 219
212 journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); 220 table->hash_size = hash_size;
213 if (!journal->j_revoke_table[0]) 221 table->hash_shift = shift;
214 return -ENOMEM; 222 table->hash_table =
215 journal->j_revoke = journal->j_revoke_table[0];
216
217 /* Check that the hash_size is a power of two */
218 J_ASSERT(is_power_of_2(hash_size));
219
220 journal->j_revoke->hash_size = hash_size;
221
222 journal->j_revoke->hash_shift = shift;
223
224 journal->j_revoke->hash_table =
225 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 223 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
226 if (!journal->j_revoke->hash_table) { 224 if (!table->hash_table) {
227 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); 225 kmem_cache_free(jbd2_revoke_table_cache, table);
228 journal->j_revoke = NULL; 226 table = NULL;
229 return -ENOMEM; 227 goto out;
230 } 228 }
231 229
232 for (tmp = 0; tmp < hash_size; tmp++) 230 for (tmp = 0; tmp < hash_size; tmp++)
233 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); 231 INIT_LIST_HEAD(&table->hash_table[tmp]);
234 232
235 journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); 233out:
236 if (!journal->j_revoke_table[1]) { 234 return table;
237 kfree(journal->j_revoke_table[0]->hash_table); 235}
238 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]); 236
239 return -ENOMEM; 237static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
238{
239 int i;
240 struct list_head *hash_list;
241
242 for (i = 0; i < table->hash_size; i++) {
243 hash_list = &table->hash_table[i];
244 J_ASSERT(list_empty(hash_list));
240 } 245 }
241 246
242 journal->j_revoke = journal->j_revoke_table[1]; 247 kfree(table->hash_table);
248 kmem_cache_free(jbd2_revoke_table_cache, table);
249}
243 250
244 /* Check that the hash_size is a power of two */ 251/* Initialise the revoke table for a given journal to a given size. */
252int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
253{
254 J_ASSERT(journal->j_revoke_table[0] == NULL);
245 J_ASSERT(is_power_of_2(hash_size)); 255 J_ASSERT(is_power_of_2(hash_size));
246 256
247 journal->j_revoke->hash_size = hash_size; 257 journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size);
248 258 if (!journal->j_revoke_table[0])
249 journal->j_revoke->hash_shift = shift; 259 goto fail0;
250 260
251 journal->j_revoke->hash_table = 261 journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size);
252 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 262 if (!journal->j_revoke_table[1])
253 if (!journal->j_revoke->hash_table) { 263 goto fail1;
254 kfree(journal->j_revoke_table[0]->hash_table);
255 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
256 kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]);
257 journal->j_revoke = NULL;
258 return -ENOMEM;
259 }
260 264
261 for (tmp = 0; tmp < hash_size; tmp++) 265 journal->j_revoke = journal->j_revoke_table[1];
262 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
263 266
264 spin_lock_init(&journal->j_revoke_lock); 267 spin_lock_init(&journal->j_revoke_lock);
265 268
266 return 0; 269 return 0;
267}
268 270
269/* Destoy a journal's revoke table. The table must already be empty! */ 271fail1:
272 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
273fail0:
274 return -ENOMEM;
275}
270 276
277/* Destroy a journal's revoke table. The table must already be empty! */
271void jbd2_journal_destroy_revoke(journal_t *journal) 278void jbd2_journal_destroy_revoke(journal_t *journal)
272{ 279{
273 struct jbd2_revoke_table_s *table;
274 struct list_head *hash_list;
275 int i;
276
277 table = journal->j_revoke_table[0];
278 if (!table)
279 return;
280
281 for (i=0; i<table->hash_size; i++) {
282 hash_list = &table->hash_table[i];
283 J_ASSERT (list_empty(hash_list));
284 }
285
286 kfree(table->hash_table);
287 kmem_cache_free(jbd2_revoke_table_cache, table);
288 journal->j_revoke = NULL;
289
290 table = journal->j_revoke_table[1];
291 if (!table)
292 return;
293
294 for (i=0; i<table->hash_size; i++) {
295 hash_list = &table->hash_table[i];
296 J_ASSERT (list_empty(hash_list));
297 }
298
299 kfree(table->hash_table);
300 kmem_cache_free(jbd2_revoke_table_cache, table);
301 journal->j_revoke = NULL; 280 journal->j_revoke = NULL;
281 if (journal->j_revoke_table[0])
282 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
283 if (journal->j_revoke_table[1])
284 jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]);
302} 285}
303 286
304 287
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b9b0b6f899b9..d6e006e67804 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -618,6 +618,12 @@ repeat:
618 goto done; 618 goto done;
619 619
620 /* 620 /*
621 * this is the first time this transaction is touching this buffer,
622 * reset the modified flag
623 */
624 jh->b_modified = 0;
625
626 /*
621 * If there is already a copy-out version of this buffer, then we don't 627 * If there is already a copy-out version of this buffer, then we don't
622 * need to make another one 628 * need to make another one
623 */ 629 */
@@ -690,7 +696,7 @@ repeat:
690 if (!frozen_buffer) { 696 if (!frozen_buffer) {
691 printk(KERN_EMERG 697 printk(KERN_EMERG
692 "%s: OOM for frozen_buffer\n", 698 "%s: OOM for frozen_buffer\n",
693 __FUNCTION__); 699 __func__);
694 JBUFFER_TRACE(jh, "oom!"); 700 JBUFFER_TRACE(jh, "oom!");
695 error = -ENOMEM; 701 error = -ENOMEM;
696 jbd_lock_bh_state(bh); 702 jbd_lock_bh_state(bh);
@@ -829,9 +835,16 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
829 835
830 if (jh->b_transaction == NULL) { 836 if (jh->b_transaction == NULL) {
831 jh->b_transaction = transaction; 837 jh->b_transaction = transaction;
838
839 /* first access by this transaction */
840 jh->b_modified = 0;
841
832 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 842 JBUFFER_TRACE(jh, "file as BJ_Reserved");
833 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 843 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
834 } else if (jh->b_transaction == journal->j_committing_transaction) { 844 } else if (jh->b_transaction == journal->j_committing_transaction) {
845 /* first access by this transaction */
846 jh->b_modified = 0;
847
835 JBUFFER_TRACE(jh, "set next transaction"); 848 JBUFFER_TRACE(jh, "set next transaction");
836 jh->b_next_transaction = transaction; 849 jh->b_next_transaction = transaction;
837 } 850 }
@@ -901,7 +914,7 @@ repeat:
901 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); 914 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
902 if (!committed_data) { 915 if (!committed_data) {
903 printk(KERN_EMERG "%s: No memory for committed data\n", 916 printk(KERN_EMERG "%s: No memory for committed data\n",
904 __FUNCTION__); 917 __func__);
905 err = -ENOMEM; 918 err = -ENOMEM;
906 goto out; 919 goto out;
907 } 920 }
@@ -1230,6 +1243,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1230 struct journal_head *jh; 1243 struct journal_head *jh;
1231 int drop_reserve = 0; 1244 int drop_reserve = 0;
1232 int err = 0; 1245 int err = 0;
1246 int was_modified = 0;
1233 1247
1234 BUFFER_TRACE(bh, "entry"); 1248 BUFFER_TRACE(bh, "entry");
1235 1249
@@ -1248,6 +1262,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1248 goto not_jbd; 1262 goto not_jbd;
1249 } 1263 }
1250 1264
1265 /* keep track of wether or not this transaction modified us */
1266 was_modified = jh->b_modified;
1267
1251 /* 1268 /*
1252 * The buffer's going from the transaction, we must drop 1269 * The buffer's going from the transaction, we must drop
1253 * all references -bzzz 1270 * all references -bzzz
@@ -1265,7 +1282,12 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1265 1282
1266 JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); 1283 JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
1267 1284
1268 drop_reserve = 1; 1285 /*
1286 * we only want to drop a reference if this transaction
1287 * modified the buffer
1288 */
1289 if (was_modified)
1290 drop_reserve = 1;
1269 1291
1270 /* 1292 /*
1271 * We are no longer going to journal this buffer. 1293 * We are no longer going to journal this buffer.
@@ -1305,7 +1327,13 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1305 if (jh->b_next_transaction) { 1327 if (jh->b_next_transaction) {
1306 J_ASSERT(jh->b_next_transaction == transaction); 1328 J_ASSERT(jh->b_next_transaction == transaction);
1307 jh->b_next_transaction = NULL; 1329 jh->b_next_transaction = NULL;
1308 drop_reserve = 1; 1330
1331 /*
1332 * only drop a reference if this transaction modified
1333 * the buffer
1334 */
1335 if (was_modified)
1336 drop_reserve = 1;
1309 } 1337 }
1310 } 1338 }
1311 1339
@@ -1434,7 +1462,8 @@ int jbd2_journal_stop(handle_t *handle)
1434 return err; 1462 return err;
1435} 1463}
1436 1464
1437/**int jbd2_journal_force_commit() - force any uncommitted transactions 1465/**
1466 * int jbd2_journal_force_commit() - force any uncommitted transactions
1438 * @journal: journal to force 1467 * @journal: journal to force
1439 * 1468 *
1440 * For synchronous operations: force any uncommitted transactions 1469 * For synchronous operations: force any uncommitted transactions
@@ -2077,7 +2106,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
2077 jh->b_transaction = jh->b_next_transaction; 2106 jh->b_transaction = jh->b_next_transaction;
2078 jh->b_next_transaction = NULL; 2107 jh->b_next_transaction = NULL;
2079 __jbd2_journal_file_buffer(jh, jh->b_transaction, 2108 __jbd2_journal_file_buffer(jh, jh->b_transaction,
2080 was_dirty ? BJ_Metadata : BJ_Reserved); 2109 jh->b_modified ? BJ_Metadata : BJ_Reserved);
2081 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2110 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
2082 2111
2083 if (was_dirty) 2112 if (was_dirty)
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index d58f845ccb85..c5e1450d79f9 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -46,7 +46,7 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
46 46
47 47
48static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, 48static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
49 struct jffs2_inode_cache *ic) 49 struct jffs2_inode_cache *ic)
50{ 50{
51 struct jffs2_full_dirent *fd; 51 struct jffs2_full_dirent *fd;
52 52
@@ -68,11 +68,17 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
68 continue; 68 continue;
69 } 69 }
70 70
71 if (child_ic->nlink++ && fd->type == DT_DIR) { 71 if (fd->type == DT_DIR) {
72 JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", 72 if (child_ic->pino_nlink) {
73 fd->name, fd->ino, ic->ino); 73 JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
74 /* TODO: What do we do about it? */ 74 fd->name, fd->ino, ic->ino);
75 } 75 /* TODO: What do we do about it? */
76 } else {
77 child_ic->pino_nlink = ic->ino;
78 }
79 } else
80 child_ic->pino_nlink++;
81
76 dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); 82 dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
77 /* Can't free scan_dents so far. We might need them in pass 2 */ 83 /* Can't free scan_dents so far. We might need them in pass 2 */
78 } 84 }
@@ -125,7 +131,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
125 dbg_fsbuild("pass 2 starting\n"); 131 dbg_fsbuild("pass 2 starting\n");
126 132
127 for_each_inode(i, c, ic) { 133 for_each_inode(i, c, ic) {
128 if (ic->nlink) 134 if (ic->pino_nlink)
129 continue; 135 continue;
130 136
131 jffs2_build_remove_unlinked_inode(c, ic, &dead_fds); 137 jffs2_build_remove_unlinked_inode(c, ic, &dead_fds);
@@ -232,16 +238,19 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c,
232 /* Reduce nlink of the child. If it's now zero, stick it on the 238 /* Reduce nlink of the child. If it's now zero, stick it on the
233 dead_fds list to be cleaned up later. Else just free the fd */ 239 dead_fds list to be cleaned up later. Else just free the fd */
234 240
235 child_ic->nlink--; 241 if (fd->type == DT_DIR)
242 child_ic->pino_nlink = 0;
243 else
244 child_ic->pino_nlink--;
236 245
237 if (!child_ic->nlink) { 246 if (!child_ic->pino_nlink) {
238 dbg_fsbuild("inode #%u (\"%s\") has now got zero nlink, adding to dead_fds list.\n", 247 dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",
239 fd->ino, fd->name); 248 fd->ino, fd->name);
240 fd->next = *dead_fds; 249 fd->next = *dead_fds;
241 *dead_fds = fd; 250 *dead_fds = fd;
242 } else { 251 } else {
243 dbg_fsbuild("inode #%u (\"%s\") has now got nlink %d. Ignoring.\n", 252 dbg_fsbuild("inode #%u (\"%s\") has now got nlink %d. Ignoring.\n",
244 fd->ino, fd->name, child_ic->nlink); 253 fd->ino, fd->name, child_ic->pino_nlink);
245 jffs2_free_full_dirent(fd); 254 jffs2_free_full_dirent(fd);
246 } 255 }
247 } 256 }
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index 9645275023e6..a113ecc3bafe 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -82,28 +82,28 @@
82 do { \ 82 do { \
83 printk(JFFS2_ERR_MSG_PREFIX \ 83 printk(JFFS2_ERR_MSG_PREFIX \
84 " (%d) %s: " fmt, task_pid_nr(current), \ 84 " (%d) %s: " fmt, task_pid_nr(current), \
85 __FUNCTION__ , ##__VA_ARGS__); \ 85 __func__ , ##__VA_ARGS__); \
86 } while(0) 86 } while(0)
87 87
88#define JFFS2_WARNING(fmt, ...) \ 88#define JFFS2_WARNING(fmt, ...) \
89 do { \ 89 do { \
90 printk(JFFS2_WARN_MSG_PREFIX \ 90 printk(JFFS2_WARN_MSG_PREFIX \
91 " (%d) %s: " fmt, task_pid_nr(current), \ 91 " (%d) %s: " fmt, task_pid_nr(current), \
92 __FUNCTION__ , ##__VA_ARGS__); \ 92 __func__ , ##__VA_ARGS__); \
93 } while(0) 93 } while(0)
94 94
95#define JFFS2_NOTICE(fmt, ...) \ 95#define JFFS2_NOTICE(fmt, ...) \
96 do { \ 96 do { \
97 printk(JFFS2_NOTICE_MSG_PREFIX \ 97 printk(JFFS2_NOTICE_MSG_PREFIX \
98 " (%d) %s: " fmt, task_pid_nr(current), \ 98 " (%d) %s: " fmt, task_pid_nr(current), \
99 __FUNCTION__ , ##__VA_ARGS__); \ 99 __func__ , ##__VA_ARGS__); \
100 } while(0) 100 } while(0)
101 101
102#define JFFS2_DEBUG(fmt, ...) \ 102#define JFFS2_DEBUG(fmt, ...) \
103 do { \ 103 do { \
104 printk(JFFS2_DBG_MSG_PREFIX \ 104 printk(JFFS2_DBG_MSG_PREFIX \
105 " (%d) %s: " fmt, task_pid_nr(current), \ 105 " (%d) %s: " fmt, task_pid_nr(current), \
106 __FUNCTION__ , ##__VA_ARGS__); \ 106 __func__ , ##__VA_ARGS__); \
107 } while(0) 107 } while(0)
108 108
109/* 109/*
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index c63e7a96af0d..c0c141f6fde1 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -208,6 +208,13 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
208 f = JFFS2_INODE_INFO(inode); 208 f = JFFS2_INODE_INFO(inode);
209 dir_f = JFFS2_INODE_INFO(dir_i); 209 dir_f = JFFS2_INODE_INFO(dir_i);
210 210
211 /* jffs2_do_create() will want to lock it, _after_ reserving
212 space and taking c-alloc_sem. If we keep it locked here,
213 lockdep gets unhappy (although it's a false positive;
214 nothing else will be looking at this inode yet so there's
215 no chance of AB-BA deadlock involving its f->sem). */
216 mutex_unlock(&f->sem);
217
211 ret = jffs2_do_create(c, dir_f, f, ri, 218 ret = jffs2_do_create(c, dir_f, f, ri,
212 dentry->d_name.name, dentry->d_name.len); 219 dentry->d_name.name, dentry->d_name.len);
213 if (ret) 220 if (ret)
@@ -219,7 +226,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
219 d_instantiate(dentry, inode); 226 d_instantiate(dentry, inode);
220 227
221 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n", 228 D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
222 inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages)); 229 inode->i_ino, inode->i_mode, inode->i_nlink,
230 f->inocache->pino_nlink, inode->i_mapping->nrpages));
223 return 0; 231 return 0;
224 232
225 fail: 233 fail:
@@ -243,7 +251,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
243 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name, 251 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
244 dentry->d_name.len, dead_f, now); 252 dentry->d_name.len, dead_f, now);
245 if (dead_f->inocache) 253 if (dead_f->inocache)
246 dentry->d_inode->i_nlink = dead_f->inocache->nlink; 254 dentry->d_inode->i_nlink = dead_f->inocache->pino_nlink;
247 if (!ret) 255 if (!ret)
248 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 256 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
249 return ret; 257 return ret;
@@ -276,7 +284,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
276 284
277 if (!ret) { 285 if (!ret) {
278 mutex_lock(&f->sem); 286 mutex_lock(&f->sem);
279 old_dentry->d_inode->i_nlink = ++f->inocache->nlink; 287 old_dentry->d_inode->i_nlink = ++f->inocache->pino_nlink;
280 mutex_unlock(&f->sem); 288 mutex_unlock(&f->sem);
281 d_instantiate(dentry, old_dentry->d_inode); 289 d_instantiate(dentry, old_dentry->d_inode);
282 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 290 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
@@ -493,11 +501,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
493 501
494 inode->i_op = &jffs2_dir_inode_operations; 502 inode->i_op = &jffs2_dir_inode_operations;
495 inode->i_fop = &jffs2_dir_operations; 503 inode->i_fop = &jffs2_dir_operations;
496 /* Directories get nlink 2 at start */
497 inode->i_nlink = 2;
498 504
499 f = JFFS2_INODE_INFO(inode); 505 f = JFFS2_INODE_INFO(inode);
500 506
507 /* Directories get nlink 2 at start */
508 inode->i_nlink = 2;
509 /* but ic->pino_nlink is the parent ino# */
510 f->inocache->pino_nlink = dir_i->i_ino;
511
501 ri->data_crc = cpu_to_je32(0); 512 ri->data_crc = cpu_to_je32(0);
502 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 513 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
503 514
@@ -594,17 +605,25 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
594 605
595static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) 606static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
596{ 607{
608 struct jffs2_sb_info *c = JFFS2_SB_INFO(dir_i->i_sb);
609 struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i);
597 struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode); 610 struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode);
598 struct jffs2_full_dirent *fd; 611 struct jffs2_full_dirent *fd;
599 int ret; 612 int ret;
613 uint32_t now = get_seconds();
600 614
601 for (fd = f->dents ; fd; fd = fd->next) { 615 for (fd = f->dents ; fd; fd = fd->next) {
602 if (fd->ino) 616 if (fd->ino)
603 return -ENOTEMPTY; 617 return -ENOTEMPTY;
604 } 618 }
605 ret = jffs2_unlink(dir_i, dentry); 619
606 if (!ret) 620 ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
621 dentry->d_name.len, f, now);
622 if (!ret) {
623 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
624 clear_nlink(dentry->d_inode);
607 drop_nlink(dir_i); 625 drop_nlink(dir_i);
626 }
608 return ret; 627 return ret;
609} 628}
610 629
@@ -817,7 +836,10 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
817 inode which didn't exist. */ 836 inode which didn't exist. */
818 if (victim_f->inocache) { 837 if (victim_f->inocache) {
819 mutex_lock(&victim_f->sem); 838 mutex_lock(&victim_f->sem);
820 victim_f->inocache->nlink--; 839 if (S_ISDIR(new_dentry->d_inode->i_mode))
840 victim_f->inocache->pino_nlink = 0;
841 else
842 victim_f->inocache->pino_nlink--;
821 mutex_unlock(&victim_f->sem); 843 mutex_unlock(&victim_f->sem);
822 } 844 }
823 } 845 }
@@ -838,8 +860,8 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
838 struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode); 860 struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode);
839 mutex_lock(&f->sem); 861 mutex_lock(&f->sem);
840 inc_nlink(old_dentry->d_inode); 862 inc_nlink(old_dentry->d_inode);
841 if (f->inocache) 863 if (f->inocache && !S_ISDIR(old_dentry->d_inode->i_mode))
842 f->inocache->nlink++; 864 f->inocache->pino_nlink++;
843 mutex_unlock(&f->sem); 865 mutex_unlock(&f->sem);
844 866
845 printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret); 867 printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 25a640e566d3..dddb2a6c9e2c 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -294,7 +294,7 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
294 break; 294 break;
295#endif 295#endif
296 default: 296 default:
297 if (ic->nodes == (void *)ic && ic->nlink == 0) 297 if (ic->nodes == (void *)ic && ic->pino_nlink == 0)
298 jffs2_del_ino_cache(c, ic); 298 jffs2_del_ino_cache(c, ic);
299 } 299 }
300} 300}
@@ -332,7 +332,8 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
332 if (c->mtd->point) { 332 if (c->mtd->point) {
333 unsigned long *wordebuf; 333 unsigned long *wordebuf;
334 334
335 ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size, &retlen, (unsigned char **)&ebuf); 335 ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size,
336 &retlen, &ebuf, NULL);
336 if (ret) { 337 if (ret) {
337 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret)); 338 D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
338 goto do_flash_read; 339 goto do_flash_read;
@@ -340,7 +341,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
340 if (retlen < c->sector_size) { 341 if (retlen < c->sector_size) {
341 /* Don't muck about if it won't let us point to the whole erase sector */ 342 /* Don't muck about if it won't let us point to the whole erase sector */
342 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen)); 343 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen));
343 c->mtd->unpoint(c->mtd, ebuf, jeb->offset, retlen); 344 c->mtd->unpoint(c->mtd, jeb->offset, retlen);
344 goto do_flash_read; 345 goto do_flash_read;
345 } 346 }
346 wordebuf = ebuf-sizeof(*wordebuf); 347 wordebuf = ebuf-sizeof(*wordebuf);
@@ -349,7 +350,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
349 if (*++wordebuf != ~0) 350 if (*++wordebuf != ~0)
350 break; 351 break;
351 } while(--retlen); 352 } while(--retlen);
352 c->mtd->unpoint(c->mtd, ebuf, jeb->offset, c->sector_size); 353 c->mtd->unpoint(c->mtd, jeb->offset, c->sector_size);
353 if (retlen) { 354 if (retlen) {
354 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n", 355 printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n",
355 *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf)); 356 *wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf));
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 3eb1c84b0a33..086c43830221 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -273,7 +273,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
273 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime)); 273 inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
274 inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime)); 274 inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
275 275
276 inode->i_nlink = f->inocache->nlink; 276 inode->i_nlink = f->inocache->pino_nlink;
277 277
278 inode->i_blocks = (inode->i_size + 511) >> 9; 278 inode->i_blocks = (inode->i_size + 511) >> 9;
279 279
@@ -286,13 +286,12 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
286 case S_IFDIR: 286 case S_IFDIR:
287 { 287 {
288 struct jffs2_full_dirent *fd; 288 struct jffs2_full_dirent *fd;
289 inode->i_nlink = 2; /* parent and '.' */
289 290
290 for (fd=f->dents; fd; fd = fd->next) { 291 for (fd=f->dents; fd; fd = fd->next) {
291 if (fd->type == DT_DIR && fd->ino) 292 if (fd->type == DT_DIR && fd->ino)
292 inc_nlink(inode); 293 inc_nlink(inode);
293 } 294 }
294 /* and '..' */
295 inc_nlink(inode);
296 /* Root dir gets i_nlink 3 for some reason */ 295 /* Root dir gets i_nlink 3 for some reason */
297 if (inode->i_ino == 1) 296 if (inode->i_ino == 1)
298 inc_nlink(inode); 297 inc_nlink(inode);
@@ -586,11 +585,12 @@ void jffs2_gc_release_inode(struct jffs2_sb_info *c,
586} 585}
587 586
588struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, 587struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
589 int inum, int nlink) 588 int inum, int unlinked)
590{ 589{
591 struct inode *inode; 590 struct inode *inode;
592 struct jffs2_inode_cache *ic; 591 struct jffs2_inode_cache *ic;
593 if (!nlink) { 592
593 if (unlinked) {
594 /* The inode has zero nlink but its nodes weren't yet marked 594 /* The inode has zero nlink but its nodes weren't yet marked
595 obsolete. This has to be because we're still waiting for 595 obsolete. This has to be because we're still waiting for
596 the final (close() and) iput() to happen. 596 the final (close() and) iput() to happen.
@@ -638,8 +638,8 @@ struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
638 return ERR_CAST(inode); 638 return ERR_CAST(inode);
639 } 639 }
640 if (is_bad_inode(inode)) { 640 if (is_bad_inode(inode)) {
641 printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d\n", 641 printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. unlinked %d\n",
642 inum, nlink); 642 inum, unlinked);
643 /* NB. This will happen again. We need to do something appropriate here. */ 643 /* NB. This will happen again. We need to do something appropriate here. */
644 iput(inode); 644 iput(inode);
645 return ERR_PTR(-EIO); 645 return ERR_PTR(-EIO);
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index bad005664e30..090c556ffed2 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -161,8 +161,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
161 continue; 161 continue;
162 } 162 }
163 163
164 if (!ic->nlink) { 164 if (!ic->pino_nlink) {
165 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n", 165 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
166 ic->ino)); 166 ic->ino));
167 spin_unlock(&c->inocache_lock); 167 spin_unlock(&c->inocache_lock);
168 jffs2_xattr_delete_inode(c, ic); 168 jffs2_xattr_delete_inode(c, ic);
@@ -398,10 +398,10 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
398 it's vaguely possible. */ 398 it's vaguely possible. */
399 399
400 inum = ic->ino; 400 inum = ic->ino;
401 nlink = ic->nlink; 401 nlink = ic->pino_nlink;
402 spin_unlock(&c->inocache_lock); 402 spin_unlock(&c->inocache_lock);
403 403
404 f = jffs2_gc_fetch_inode(c, inum, nlink); 404 f = jffs2_gc_fetch_inode(c, inum, !nlink);
405 if (IS_ERR(f)) { 405 if (IS_ERR(f)) {
406 ret = PTR_ERR(f); 406 ret = PTR_ERR(f);
407 goto release_sem; 407 goto release_sem;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 8219df6eb6d8..1750445556c3 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -177,7 +177,10 @@ struct jffs2_inode_cache {
177#ifdef CONFIG_JFFS2_FS_XATTR 177#ifdef CONFIG_JFFS2_FS_XATTR
178 struct jffs2_xattr_ref *xref; 178 struct jffs2_xattr_ref *xref;
179#endif 179#endif
180 int nlink; 180 uint32_t pino_nlink; /* Directories store parent inode
181 here; other inodes store nlink.
182 Zero always means that it's
183 completely unlinked. */
181}; 184};
182 185
183/* Inode states for 'state' above. We need the 'GC' state to prevent 186/* Inode states for 'state' above. We need the 'GC' state to prevent
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 9df8f3ef20df..a9bf9603c1ba 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -709,7 +709,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
709 break; 709 break;
710#endif 710#endif
711 default: 711 default:
712 if (ic->nodes == (void *)ic && ic->nlink == 0) 712 if (ic->nodes == (void *)ic && ic->pino_nlink == 0)
713 jffs2_del_ino_cache(c, ic); 713 jffs2_del_ino_cache(c, ic);
714 break; 714 break;
715 } 715 }
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 1b10d2594092..2cc866cf134f 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -187,7 +187,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
187void jffs2_gc_release_inode(struct jffs2_sb_info *c, 187void jffs2_gc_release_inode(struct jffs2_sb_info *c,
188 struct jffs2_inode_info *f); 188 struct jffs2_inode_info *f);
189struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, 189struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
190 int inum, int nlink); 190 int inum, int unlinked);
191 191
192unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c, 192unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
193 struct jffs2_inode_info *f, 193 struct jffs2_inode_info *f,
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 4cb4d76de07f..6ca08ad887c0 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -63,10 +63,11 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
63 /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(), 63 /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
64 * adding and jffs2_flash_read_end() interface. */ 64 * adding and jffs2_flash_read_end() interface. */
65 if (c->mtd->point) { 65 if (c->mtd->point) {
66 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer); 66 err = c->mtd->point(c->mtd, ofs, len, &retlen,
67 (void **)&buffer, NULL);
67 if (!err && retlen < len) { 68 if (!err && retlen < len) {
68 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize); 69 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
69 c->mtd->unpoint(c->mtd, buffer, ofs, retlen); 70 c->mtd->unpoint(c->mtd, ofs, retlen);
70 } else if (err) 71 } else if (err)
71 JFFS2_WARNING("MTD point failed: error code %d.\n", err); 72 JFFS2_WARNING("MTD point failed: error code %d.\n", err);
72 else 73 else
@@ -100,7 +101,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
100 kfree(buffer); 101 kfree(buffer);
101#ifndef __ECOS 102#ifndef __ECOS
102 else 103 else
103 c->mtd->unpoint(c->mtd, buffer, ofs, len); 104 c->mtd->unpoint(c->mtd, ofs, len);
104#endif 105#endif
105 106
106 if (crc != tn->data_crc) { 107 if (crc != tn->data_crc) {
@@ -136,7 +137,7 @@ free_out:
136 kfree(buffer); 137 kfree(buffer);
137#ifndef __ECOS 138#ifndef __ECOS
138 else 139 else
139 c->mtd->unpoint(c->mtd, buffer, ofs, len); 140 c->mtd->unpoint(c->mtd, ofs, len);
140#endif 141#endif
141 return err; 142 return err;
142} 143}
@@ -1123,7 +1124,8 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
1123 size_t retlen; 1124 size_t retlen;
1124 int ret; 1125 int ret;
1125 1126
1126 dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink); 1127 dbg_readinode("ino #%u pino/nlink is %d\n", f->inocache->ino,
1128 f->inocache->pino_nlink);
1127 1129
1128 memset(&rii, 0, sizeof(rii)); 1130 memset(&rii, 0, sizeof(rii));
1129 1131
@@ -1358,7 +1360,7 @@ int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
1358 } 1360 }
1359 dbg_readinode("creating inocache for root inode\n"); 1361 dbg_readinode("creating inocache for root inode\n");
1360 memset(f->inocache, 0, sizeof(struct jffs2_inode_cache)); 1362 memset(f->inocache, 0, sizeof(struct jffs2_inode_cache));
1361 f->inocache->ino = f->inocache->nlink = 1; 1363 f->inocache->ino = f->inocache->pino_nlink = 1;
1362 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; 1364 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
1363 f->inocache->state = INO_STATE_READING; 1365 f->inocache->state = INO_STATE_READING;
1364 jffs2_add_ino_cache(c, f->inocache); 1366 jffs2_add_ino_cache(c, f->inocache);
@@ -1401,7 +1403,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
1401 jffs2_clear_acl(f); 1403 jffs2_clear_acl(f);
1402 jffs2_xattr_delete_inode(c, f->inocache); 1404 jffs2_xattr_delete_inode(c, f->inocache);
1403 mutex_lock(&f->sem); 1405 mutex_lock(&f->sem);
1404 deleted = f->inocache && !f->inocache->nlink; 1406 deleted = f->inocache && !f->inocache->pino_nlink;
1405 1407
1406 if (f->inocache && f->inocache->state != INO_STATE_CHECKING) 1408 if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
1407 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING); 1409 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 272872d27fd5..1d437de1e9a8 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -97,11 +97,12 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
97 size_t pointlen; 97 size_t pointlen;
98 98
99 if (c->mtd->point) { 99 if (c->mtd->point) {
100 ret = c->mtd->point (c->mtd, 0, c->mtd->size, &pointlen, &flashbuf); 100 ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen,
101 (void **)&flashbuf, NULL);
101 if (!ret && pointlen < c->mtd->size) { 102 if (!ret && pointlen < c->mtd->size) {
102 /* Don't muck about if it won't let us point to the whole flash */ 103 /* Don't muck about if it won't let us point to the whole flash */
103 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen)); 104 D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen));
104 c->mtd->unpoint(c->mtd, flashbuf, 0, pointlen); 105 c->mtd->unpoint(c->mtd, 0, pointlen);
105 flashbuf = NULL; 106 flashbuf = NULL;
106 } 107 }
107 if (ret) 108 if (ret)
@@ -267,7 +268,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
267 kfree(flashbuf); 268 kfree(flashbuf);
268#ifndef __ECOS 269#ifndef __ECOS
269 else 270 else
270 c->mtd->unpoint(c->mtd, flashbuf, 0, c->mtd->size); 271 c->mtd->unpoint(c->mtd, 0, c->mtd->size);
271#endif 272#endif
272 if (s) 273 if (s)
273 kfree(s); 274 kfree(s);
@@ -940,7 +941,7 @@ struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uin
940 ic->nodes = (void *)ic; 941 ic->nodes = (void *)ic;
941 jffs2_add_ino_cache(c, ic); 942 jffs2_add_ino_cache(c, ic);
942 if (ino == 1) 943 if (ino == 1)
943 ic->nlink = 1; 944 ic->pino_nlink = 1;
944 return ic; 945 return ic;
945} 946}
946 947
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index f3353df178e7..7da69eae49e4 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -31,11 +31,12 @@ static struct kmem_cache *jffs2_inode_cachep;
31 31
32static struct inode *jffs2_alloc_inode(struct super_block *sb) 32static struct inode *jffs2_alloc_inode(struct super_block *sb)
33{ 33{
34 struct jffs2_inode_info *ei; 34 struct jffs2_inode_info *f;
35 ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL); 35
36 if (!ei) 36 f = kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL);
37 if (!f)
37 return NULL; 38 return NULL;
38 return &ei->vfs_inode; 39 return &f->vfs_inode;
39} 40}
40 41
41static void jffs2_destroy_inode(struct inode *inode) 42static void jffs2_destroy_inode(struct inode *inode)
@@ -45,10 +46,10 @@ static void jffs2_destroy_inode(struct inode *inode)
45 46
46static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo) 47static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo)
47{ 48{
48 struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo; 49 struct jffs2_inode_info *f = foo;
49 50
50 mutex_init(&ei->sem); 51 mutex_init(&f->sem);
51 inode_init_once(&ei->vfs_inode); 52 inode_init_once(&f->vfs_inode);
52} 53}
53 54
54static int jffs2_sync_fs(struct super_block *sb, int wait) 55static int jffs2_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 8de52b607678..0e78b00035e4 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -494,7 +494,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
494 /* If it's an in-core inode, then we have to adjust any 494 /* If it's an in-core inode, then we have to adjust any
495 full_dirent or full_dnode structure to point to the 495 full_dirent or full_dnode structure to point to the
496 new version instead of the old */ 496 new version instead of the old */
497 f = jffs2_gc_fetch_inode(c, ic->ino, ic->nlink); 497 f = jffs2_gc_fetch_inode(c, ic->ino, !ic->pino_nlink);
498 if (IS_ERR(f)) { 498 if (IS_ERR(f)) {
499 /* Should never happen; it _must_ be present */ 499 /* Should never happen; it _must_ be present */
500 JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n", 500 JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n",
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index 665fce9797d3..ca29440e9435 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -19,7 +19,8 @@
19#include "compr.h" 19#include "compr.h"
20 20
21 21
22int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint32_t mode, struct jffs2_raw_inode *ri) 22int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
23 uint32_t mode, struct jffs2_raw_inode *ri)
23{ 24{
24 struct jffs2_inode_cache *ic; 25 struct jffs2_inode_cache *ic;
25 26
@@ -31,7 +32,7 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint
31 memset(ic, 0, sizeof(*ic)); 32 memset(ic, 0, sizeof(*ic));
32 33
33 f->inocache = ic; 34 f->inocache = ic;
34 f->inocache->nlink = 1; 35 f->inocache->pino_nlink = 1; /* Will be overwritten shortly for directories */
35 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache; 36 f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
36 f->inocache->state = INO_STATE_PRESENT; 37 f->inocache->state = INO_STATE_PRESENT;
37 38
@@ -438,10 +439,10 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
438 ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL, 439 ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL,
439 JFFS2_SUMMARY_INODE_SIZE); 440 JFFS2_SUMMARY_INODE_SIZE);
440 D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen)); 441 D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen));
441 if (ret) { 442 if (ret)
442 mutex_unlock(&f->sem);
443 return ret; 443 return ret;
444 } 444
445 mutex_lock(&f->sem);
445 446
446 ri->data_crc = cpu_to_je32(0); 447 ri->data_crc = cpu_to_je32(0);
447 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8)); 448 ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
@@ -635,9 +636,9 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
635 jffs2_mark_node_obsolete(c, fd->raw); 636 jffs2_mark_node_obsolete(c, fd->raw);
636 jffs2_free_full_dirent(fd); 637 jffs2_free_full_dirent(fd);
637 } 638 }
638 } 639 dead_f->inocache->pino_nlink = 0;
639 640 } else
640 dead_f->inocache->nlink--; 641 dead_f->inocache->pino_nlink--;
641 /* NB: Caller must set inode nlink if appropriate */ 642 /* NB: Caller must set inode nlink if appropriate */
642 mutex_unlock(&dead_f->sem); 643 mutex_unlock(&dead_f->sem);
643 } 644 }
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index e48665984cb3..082e844ab2db 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -82,7 +82,7 @@ static int is_xattr_datum_unchecked(struct jffs2_sb_info *c, struct jffs2_xattr_
82static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) 82static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
83{ 83{
84 /* must be called under down_write(xattr_sem) */ 84 /* must be called under down_write(xattr_sem) */
85 D1(dbg_xattr("%s: xid=%u, version=%u\n", __FUNCTION__, xd->xid, xd->version)); 85 D1(dbg_xattr("%s: xid=%u, version=%u\n", __func__, xd->xid, xd->version));
86 if (xd->xname) { 86 if (xd->xname) {
87 c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len); 87 c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len);
88 kfree(xd->xname); 88 kfree(xd->xname);
@@ -592,7 +592,7 @@ void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache
592 When an inode with XATTR is removed, those XATTRs must be removed. */ 592 When an inode with XATTR is removed, those XATTRs must be removed. */
593 struct jffs2_xattr_ref *ref, *_ref; 593 struct jffs2_xattr_ref *ref, *_ref;
594 594
595 if (!ic || ic->nlink > 0) 595 if (!ic || ic->pino_nlink > 0)
596 return; 596 return;
597 597
598 down_write(&c->xattr_sem); 598 down_write(&c->xattr_sem);
@@ -829,7 +829,7 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
829 ref->xd and ref->ic are not valid yet. */ 829 ref->xd and ref->ic are not valid yet. */
830 xd = jffs2_find_xattr_datum(c, ref->xid); 830 xd = jffs2_find_xattr_datum(c, ref->xid);
831 ic = jffs2_get_ino_cache(c, ref->ino); 831 ic = jffs2_get_ino_cache(c, ref->ino);
832 if (!xd || !ic || !ic->nlink) { 832 if (!xd || !ic || !ic->pino_nlink) {
833 dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n", 833 dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n",
834 ref->ino, ref->xid, ref->xseqno); 834 ref->ino, ref->xid, ref->xseqno);
835 ref->xseqno |= XREF_DELETE_MARKER; 835 ref->xseqno |= XREF_DELETE_MARKER;
@@ -1252,7 +1252,7 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
1252 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE); 1252 rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE);
1253 if (rc) { 1253 if (rc) {
1254 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n", 1254 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n",
1255 __FUNCTION__, rc, totlen); 1255 __func__, rc, totlen);
1256 rc = rc ? rc : -EBADFD; 1256 rc = rc ? rc : -EBADFD;
1257 goto out; 1257 goto out;
1258 } 1258 }
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 887f5759e536..bf6ab19b86ee 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -89,7 +89,7 @@ void jfs_proc_init(void)
89{ 89{
90 int i; 90 int i;
91 91
92 if (!(base = proc_mkdir("jfs", proc_root_fs))) 92 if (!(base = proc_mkdir("fs/jfs", NULL)))
93 return; 93 return;
94 base->owner = THIS_MODULE; 94 base->owner = THIS_MODULE;
95 95
@@ -109,7 +109,7 @@ void jfs_proc_clean(void)
109 if (base) { 109 if (base) {
110 for (i = 0; i < NPROCENT; i++) 110 for (i = 0; i < NPROCENT; i++)
111 remove_proc_entry(Entries[i].name, base); 111 remove_proc_entry(Entries[i].name, base);
112 remove_proc_entry("jfs", proc_root_fs); 112 remove_proc_entry("fs/jfs", NULL);
113 } 113 }
114} 114}
115 115
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 40b16f23e49a..5df517b81f3f 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -573,7 +573,7 @@ again:
573 /* Ensure the resulting lock will get added to granted list */ 573 /* Ensure the resulting lock will get added to granted list */
574 fl->fl_flags |= FL_SLEEP; 574 fl->fl_flags |= FL_SLEEP;
575 if (do_vfs_lock(fl) < 0) 575 if (do_vfs_lock(fl) < 0)
576 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); 576 printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
577 up_read(&host->h_rwsem); 577 up_read(&host->h_rwsem);
578 fl->fl_flags = fl_flags; 578 fl->fl_flags = fl_flags;
579 status = 0; 579 status = 0;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 4d81553d2948..81aca859bfde 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -752,7 +752,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
752 return; 752 return;
753 default: 753 default:
754 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n", 754 printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
755 -error, __FUNCTION__); 755 -error, __func__);
756 nlmsvc_insert_block(block, 10 * HZ); 756 nlmsvc_insert_block(block, 10 * HZ);
757 nlmsvc_release_block(block); 757 nlmsvc_release_block(block);
758 return; 758 return;
diff --git a/fs/locks.c b/fs/locks.c
index 44d9a6a7ec50..663c069b59b3 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -116,6 +116,7 @@
116 116
117#include <linux/capability.h> 117#include <linux/capability.h>
118#include <linux/file.h> 118#include <linux/file.h>
119#include <linux/fdtable.h>
119#include <linux/fs.h> 120#include <linux/fs.h>
120#include <linux/init.h> 121#include <linux/init.h>
121#include <linux/module.h> 122#include <linux/module.h>
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 2d4358c59f68..05ff4f1d7026 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -609,7 +609,7 @@ error_inode:
609 if (corrupt < 0) { 609 if (corrupt < 0) {
610 fat_fs_panic(new_dir->i_sb, 610 fat_fs_panic(new_dir->i_sb,
611 "%s: Filesystem corrupted (i_pos %lld)", 611 "%s: Filesystem corrupted (i_pos %lld)",
612 __FUNCTION__, sinfo.i_pos); 612 __func__, sinfo.i_pos);
613 } 613 }
614 goto out; 614 goto out;
615} 615}
diff --git a/fs/namei.c b/fs/namei.c
index e179f71bfcb0..32fd9655485b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -30,6 +30,7 @@
30#include <linux/capability.h> 30#include <linux/capability.h>
31#include <linux/file.h> 31#include <linux/file.h>
32#include <linux/fcntl.h> 32#include <linux/fcntl.h>
33#include <linux/device_cgroup.h>
33#include <asm/namei.h> 34#include <asm/namei.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
35 36
@@ -281,6 +282,10 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
281 if (retval) 282 if (retval)
282 return retval; 283 return retval;
283 284
285 retval = devcgroup_inode_permission(inode, mask);
286 if (retval)
287 return retval;
288
284 return security_inode_permission(inode, mask, nd); 289 return security_inode_permission(inode, mask, nd);
285} 290}
286 291
@@ -2028,6 +2033,10 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2028 if (!dir->i_op || !dir->i_op->mknod) 2033 if (!dir->i_op || !dir->i_op->mknod)
2029 return -EPERM; 2034 return -EPERM;
2030 2035
2036 error = devcgroup_inode_mknod(mode, dev);
2037 if (error)
2038 return error;
2039
2031 error = security_inode_mknod(dir, dentry, mode, dev); 2040 error = security_inode_mknod(dir, dentry, mode, dev);
2032 if (error) 2041 if (error)
2033 return error; 2042 return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index fe376805cf5f..4fc302c2a0e0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1176,17 +1176,6 @@ static int mount_is_safe(struct nameidata *nd)
1176#endif 1176#endif
1177} 1177}
1178 1178
1179static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
1180{
1181 while (1) {
1182 if (d == dentry)
1183 return 1;
1184 if (d == NULL || d == d->d_parent)
1185 return 0;
1186 d = d->d_parent;
1187 }
1188}
1189
1190struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, 1179struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1191 int flag) 1180 int flag)
1192{ 1181{
@@ -1203,7 +1192,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1203 1192
1204 p = mnt; 1193 p = mnt;
1205 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { 1194 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1206 if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) 1195 if (!is_subdir(r->mnt_mountpoint, dentry))
1207 continue; 1196 continue;
1208 1197
1209 for (s = r; s; s = next_mnt(s, r)) { 1198 for (s = r; s; s = next_mnt(s, r)) {
@@ -2340,10 +2329,10 @@ void __init mnt_init(void)
2340 err = sysfs_init(); 2329 err = sysfs_init();
2341 if (err) 2330 if (err)
2342 printk(KERN_WARNING "%s: sysfs_init error: %d\n", 2331 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
2343 __FUNCTION__, err); 2332 __func__, err);
2344 fs_kobj = kobject_create_and_add("fs", NULL); 2333 fs_kobj = kobject_create_and_add("fs", NULL);
2345 if (!fs_kobj) 2334 if (!fs_kobj)
2346 printk(KERN_WARNING "%s: kobj create error\n", __FUNCTION__); 2335 printk(KERN_WARNING "%s: kobj create error\n", __func__);
2347 init_rootfs(); 2336 init_rootfs();
2348 init_mount_tree(); 2337 init_mount_tree();
2349} 2338}
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index df6d60bdfcd3..97645f112114 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -102,48 +102,47 @@ static inline void ncp_init_request_s(struct ncp_server *server, int subfunction
102} 102}
103 103
104static inline char * 104static inline char *
105 ncp_reply_data(struct ncp_server *server, int offset) 105ncp_reply_data(struct ncp_server *server, int offset)
106{ 106{
107 return &(server->packet[sizeof(struct ncp_reply_header) + offset]); 107 return &(server->packet[sizeof(struct ncp_reply_header) + offset]);
108} 108}
109 109
110static inline __u8 BVAL(void* data) 110static inline u8 BVAL(void *data)
111{ 111{
112 return get_unaligned((__u8*)data); 112 return *(u8 *)data;
113} 113}
114 114
115static __u8 115static u8 ncp_reply_byte(struct ncp_server *server, int offset)
116 ncp_reply_byte(struct ncp_server *server, int offset)
117{ 116{
118 return get_unaligned((__u8 *) ncp_reply_data(server, offset)); 117 return *(u8 *)ncp_reply_data(server, offset);
119} 118}
120 119
121static inline __u16 WVAL_LH(void* data) 120static inline u16 WVAL_LH(void *data)
122{ 121{
123 return le16_to_cpu(get_unaligned((__le16*)data)); 122 return get_unaligned_le16(data);
124} 123}
125 124
126static __u16 125static u16
127 ncp_reply_le16(struct ncp_server *server, int offset) 126ncp_reply_le16(struct ncp_server *server, int offset)
128{ 127{
129 return le16_to_cpu(get_unaligned((__le16 *) ncp_reply_data(server, offset))); 128 return get_unaligned_le16(ncp_reply_data(server, offset));
130} 129}
131 130
132static __u16 131static u16
133 ncp_reply_be16(struct ncp_server *server, int offset) 132ncp_reply_be16(struct ncp_server *server, int offset)
134{ 133{
135 return be16_to_cpu(get_unaligned((__be16 *) ncp_reply_data(server, offset))); 134 return get_unaligned_be16(ncp_reply_data(server, offset));
136} 135}
137 136
138static inline __u32 DVAL_LH(void* data) 137static inline u32 DVAL_LH(void *data)
139{ 138{
140 return le32_to_cpu(get_unaligned((__le32*)data)); 139 return get_unaligned_le32(data);
141} 140}
142 141
143static __le32 142static __le32
144 ncp_reply_dword(struct ncp_server *server, int offset) 143ncp_reply_dword(struct ncp_server *server, int offset)
145{ 144{
146 return get_unaligned((__le32 *) ncp_reply_data(server, offset)); 145 return get_unaligned((__le32 *)ncp_reply_data(server, offset));
147} 146}
148 147
149static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) { 148static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) {
@@ -1006,8 +1005,8 @@ ncp_read_bounce(struct ncp_server *server, const char *file_id,
1006 result = ncp_request2(server, 72, bounce, bufsize); 1005 result = ncp_request2(server, 72, bounce, bufsize);
1007 ncp_unlock_server(server); 1006 ncp_unlock_server(server);
1008 if (!result) { 1007 if (!result) {
1009 int len = be16_to_cpu(get_unaligned((__be16*)((char*)bounce + 1008 int len = get_unaligned_be16((char *)bounce +
1010 sizeof(struct ncp_reply_header)))); 1009 sizeof(struct ncp_reply_header));
1011 result = -EIO; 1010 result = -EIO;
1012 if (len <= to_read) { 1011 if (len <= to_read) {
1013 char* source; 1012 char* source;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f2f3b284e6dd..89ac5bb0401c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1321,6 +1321,7 @@ static const struct file_operations nfs_server_list_fops = {
1321 .read = seq_read, 1321 .read = seq_read,
1322 .llseek = seq_lseek, 1322 .llseek = seq_lseek,
1323 .release = seq_release, 1323 .release = seq_release,
1324 .owner = THIS_MODULE,
1324}; 1325};
1325 1326
1326static int nfs_volume_list_open(struct inode *inode, struct file *file); 1327static int nfs_volume_list_open(struct inode *inode, struct file *file);
@@ -1341,6 +1342,7 @@ static const struct file_operations nfs_volume_list_fops = {
1341 .read = seq_read, 1342 .read = seq_read,
1342 .llseek = seq_lseek, 1343 .llseek = seq_lseek,
1343 .release = seq_release, 1344 .release = seq_release,
1345 .owner = THIS_MODULE,
1344}; 1346};
1345 1347
1346/* 1348/*
@@ -1500,33 +1502,29 @@ int __init nfs_fs_proc_init(void)
1500{ 1502{
1501 struct proc_dir_entry *p; 1503 struct proc_dir_entry *p;
1502 1504
1503 proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs); 1505 proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL);
1504 if (!proc_fs_nfs) 1506 if (!proc_fs_nfs)
1505 goto error_0; 1507 goto error_0;
1506 1508
1507 proc_fs_nfs->owner = THIS_MODULE; 1509 proc_fs_nfs->owner = THIS_MODULE;
1508 1510
1509 /* a file of servers with which we're dealing */ 1511 /* a file of servers with which we're dealing */
1510 p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs); 1512 p = proc_create("servers", S_IFREG|S_IRUGO,
1513 proc_fs_nfs, &nfs_server_list_fops);
1511 if (!p) 1514 if (!p)
1512 goto error_1; 1515 goto error_1;
1513 1516
1514 p->proc_fops = &nfs_server_list_fops;
1515 p->owner = THIS_MODULE;
1516
1517 /* a file of volumes that we have mounted */ 1517 /* a file of volumes that we have mounted */
1518 p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs); 1518 p = proc_create("volumes", S_IFREG|S_IRUGO,
1519 proc_fs_nfs, &nfs_volume_list_fops);
1519 if (!p) 1520 if (!p)
1520 goto error_2; 1521 goto error_2;
1521
1522 p->proc_fops = &nfs_volume_list_fops;
1523 p->owner = THIS_MODULE;
1524 return 0; 1522 return 0;
1525 1523
1526error_2: 1524error_2:
1527 remove_proc_entry("servers", proc_fs_nfs); 1525 remove_proc_entry("servers", proc_fs_nfs);
1528error_1: 1526error_1:
1529 remove_proc_entry("nfsfs", proc_root_fs); 1527 remove_proc_entry("fs/nfsfs", NULL);
1530error_0: 1528error_0:
1531 return -ENOMEM; 1529 return -ENOMEM;
1532} 1530}
@@ -1538,7 +1536,7 @@ void nfs_fs_proc_exit(void)
1538{ 1536{
1539 remove_proc_entry("volumes", proc_fs_nfs); 1537 remove_proc_entry("volumes", proc_fs_nfs);
1540 remove_proc_entry("servers", proc_fs_nfs); 1538 remove_proc_entry("servers", proc_fs_nfs);
1541 remove_proc_entry("nfsfs", proc_root_fs); 1539 remove_proc_entry("fs/nfsfs", NULL);
1542} 1540}
1543 1541
1544#endif /* CONFIG_PROC_FS */ 1542#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fa220dc74609..7226a506f3ca 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1575,6 +1575,11 @@ static int nfs_compare_super(struct super_block *sb, void *data)
1575 return nfs_compare_mount_options(sb, server, mntflags); 1575 return nfs_compare_mount_options(sb, server, mntflags);
1576} 1576}
1577 1577
1578static int nfs_bdi_register(struct nfs_server *server)
1579{
1580 return bdi_register_dev(&server->backing_dev_info, server->s_dev);
1581}
1582
1578static int nfs_get_sb(struct file_system_type *fs_type, 1583static int nfs_get_sb(struct file_system_type *fs_type,
1579 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1584 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1580{ 1585{
@@ -1617,6 +1622,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1617 if (s->s_fs_info != server) { 1622 if (s->s_fs_info != server) {
1618 nfs_free_server(server); 1623 nfs_free_server(server);
1619 server = NULL; 1624 server = NULL;
1625 } else {
1626 error = nfs_bdi_register(server);
1627 if (error)
1628 goto error_splat_super;
1620 } 1629 }
1621 1630
1622 if (!s->s_root) { 1631 if (!s->s_root) {
@@ -1664,6 +1673,7 @@ static void nfs_kill_super(struct super_block *s)
1664{ 1673{
1665 struct nfs_server *server = NFS_SB(s); 1674 struct nfs_server *server = NFS_SB(s);
1666 1675
1676 bdi_unregister(&server->backing_dev_info);
1667 kill_anon_super(s); 1677 kill_anon_super(s);
1668 nfs_free_server(server); 1678 nfs_free_server(server);
1669} 1679}
@@ -1708,6 +1718,10 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
1708 if (s->s_fs_info != server) { 1718 if (s->s_fs_info != server) {
1709 nfs_free_server(server); 1719 nfs_free_server(server);
1710 server = NULL; 1720 server = NULL;
1721 } else {
1722 error = nfs_bdi_register(server);
1723 if (error)
1724 goto error_splat_super;
1711 } 1725 }
1712 1726
1713 if (!s->s_root) { 1727 if (!s->s_root) {
@@ -1984,6 +1998,10 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
1984 if (s->s_fs_info != server) { 1998 if (s->s_fs_info != server) {
1985 nfs_free_server(server); 1999 nfs_free_server(server);
1986 server = NULL; 2000 server = NULL;
2001 } else {
2002 error = nfs_bdi_register(server);
2003 if (error)
2004 goto error_splat_super;
1987 } 2005 }
1988 2006
1989 if (!s->s_root) { 2007 if (!s->s_root) {
@@ -2070,6 +2088,10 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
2070 if (s->s_fs_info != server) { 2088 if (s->s_fs_info != server) {
2071 nfs_free_server(server); 2089 nfs_free_server(server);
2072 server = NULL; 2090 server = NULL;
2091 } else {
2092 error = nfs_bdi_register(server);
2093 if (error)
2094 goto error_splat_super;
2073 } 2095 }
2074 2096
2075 if (!s->s_root) { 2097 if (!s->s_root) {
@@ -2149,6 +2171,10 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
2149 if (s->s_fs_info != server) { 2171 if (s->s_fs_info != server) {
2150 nfs_free_server(server); 2172 nfs_free_server(server);
2151 server = NULL; 2173 server = NULL;
2174 } else {
2175 error = nfs_bdi_register(server);
2176 if (error)
2177 goto error_splat_super;
2152 } 2178 }
2153 2179
2154 if (!s->s_root) { 2180 if (!s->s_root) {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 562abf3380d0..0b3ffa9840c2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -104,7 +104,7 @@ xdr_writemem(__be32 *p, const void *ptr, int nbytes)
104} while (0) 104} while (0)
105#define RESERVE_SPACE(nbytes) do { \ 105#define RESERVE_SPACE(nbytes) do { \
106 p = xdr_reserve_space(xdr, nbytes); \ 106 p = xdr_reserve_space(xdr, nbytes); \
107 if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ 107 if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \
108 BUG_ON(!p); \ 108 BUG_ON(!p); \
109} while (0) 109} while (0)
110 110
@@ -134,7 +134,7 @@ xdr_error: \
134 p = xdr_inline_decode(xdr, nbytes); \ 134 p = xdr_inline_decode(xdr, nbytes); \
135 if (!p) { \ 135 if (!p) { \
136 dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \ 136 dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
137 __FUNCTION__, __LINE__); \ 137 __func__, __LINE__); \
138 return -EIO; \ 138 return -EIO; \
139 } \ 139 } \
140} while (0) 140} while (0)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 42f3820ee8f5..5ac00c4fee91 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -169,6 +169,7 @@ static const struct file_operations exports_operations = {
169 .read = seq_read, 169 .read = seq_read,
170 .llseek = seq_lseek, 170 .llseek = seq_lseek,
171 .release = seq_release, 171 .release = seq_release,
172 .owner = THIS_MODULE,
172}; 173};
173 174
174/*----------------------------------------------------------------------------*/ 175/*----------------------------------------------------------------------------*/
@@ -801,10 +802,9 @@ static int create_proc_exports_entry(void)
801 entry = proc_mkdir("fs/nfs", NULL); 802 entry = proc_mkdir("fs/nfs", NULL);
802 if (!entry) 803 if (!entry)
803 return -ENOMEM; 804 return -ENOMEM;
804 entry = create_proc_entry("fs/nfs/exports", 0, NULL); 805 entry = proc_create("exports", 0, entry, &exports_operations);
805 if (!entry) 806 if (!entry)
806 return -ENOMEM; 807 return -ENOMEM;
807 entry->proc_fops = &exports_operations;
808 return 0; 808 return 0;
809} 809}
810#else /* CONFIG_PROC_FS */ 810#else /* CONFIG_PROC_FS */
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 8ac37c33d127..5e6724c1afd1 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -45,7 +45,7 @@ static void ntfs_debug(const char *f, ...);
45extern void __ntfs_debug (const char *file, int line, const char *function, 45extern void __ntfs_debug (const char *file, int line, const char *function,
46 const char *format, ...) __attribute__ ((format (printf, 4, 5))); 46 const char *format, ...) __attribute__ ((format (printf, 4, 5)));
47#define ntfs_debug(f, a...) \ 47#define ntfs_debug(f, a...) \
48 __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a) 48 __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a)
49 49
50extern void ntfs_debug_dump_runlist(const runlist_element *rl); 50extern void ntfs_debug_dump_runlist(const runlist_element *rl);
51 51
@@ -58,10 +58,10 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl);
58 58
59extern void __ntfs_warning(const char *function, const struct super_block *sb, 59extern void __ntfs_warning(const char *function, const struct super_block *sb,
60 const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); 60 const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
61#define ntfs_warning(sb, f, a...) __ntfs_warning(__FUNCTION__, sb, f, ##a) 61#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a)
62 62
63extern void __ntfs_error(const char *function, const struct super_block *sb, 63extern void __ntfs_error(const char *function, const struct super_block *sb,
64 const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); 64 const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
65#define ntfs_error(sb, f, a...) __ntfs_error(__FUNCTION__, sb, f, ##a) 65#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a)
66 66
67#endif /* _LINUX_NTFS_DEBUG_H */ 67#endif /* _LINUX_NTFS_DEBUG_H */
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 2ad5c8b104b9..790defb847e7 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1191,7 +1191,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
1191 if (size) { 1191 if (size) {
1192 page = ntfs_map_page(mftbmp_mapping, 1192 page = ntfs_map_page(mftbmp_mapping,
1193 ofs >> PAGE_CACHE_SHIFT); 1193 ofs >> PAGE_CACHE_SHIFT);
1194 if (unlikely(IS_ERR(page))) { 1194 if (IS_ERR(page)) {
1195 ntfs_error(vol->sb, "Failed to read mft " 1195 ntfs_error(vol->sb, "Failed to read mft "
1196 "bitmap, aborting."); 1196 "bitmap, aborting.");
1197 return PTR_ERR(page); 1197 return PTR_ERR(page);
@@ -2118,7 +2118,7 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
2118 } 2118 }
2119 /* Read, map, and pin the page containing the mft record. */ 2119 /* Read, map, and pin the page containing the mft record. */
2120 page = ntfs_map_page(mft_vi->i_mapping, index); 2120 page = ntfs_map_page(mft_vi->i_mapping, index);
2121 if (unlikely(IS_ERR(page))) { 2121 if (IS_ERR(page)) {
2122 ntfs_error(vol->sb, "Failed to map page containing mft record " 2122 ntfs_error(vol->sb, "Failed to map page containing mft record "
2123 "to format 0x%llx.", (long long)mft_no); 2123 "to format 0x%llx.", (long long)mft_no);
2124 return PTR_ERR(page); 2124 return PTR_ERR(page);
@@ -2519,7 +2519,7 @@ mft_rec_already_initialized:
2519 ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; 2519 ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
2520 /* Read, map, and pin the page containing the mft record. */ 2520 /* Read, map, and pin the page containing the mft record. */
2521 page = ntfs_map_page(vol->mft_ino->i_mapping, index); 2521 page = ntfs_map_page(vol->mft_ino->i_mapping, index);
2522 if (unlikely(IS_ERR(page))) { 2522 if (IS_ERR(page)) {
2523 ntfs_error(vol->sb, "Failed to map page containing allocated " 2523 ntfs_error(vol->sb, "Failed to map page containing allocated "
2524 "mft record 0x%llx.", (long long)bit); 2524 "mft record 0x%llx.", (long long)bit);
2525 err = PTR_ERR(page); 2525 err = PTR_ERR(page);
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index 98429fd68499..bc702dab5d1f 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -65,7 +65,7 @@ int o2cb_sys_init(void)
65{ 65{
66 int ret; 66 int ret;
67 67
68 o2cb_kset = kset_create_and_add("o2cb", NULL, NULL); 68 o2cb_kset = kset_create_and_add("o2cb", NULL, fs_kobj);
69 if (!o2cb_kset) 69 if (!o2cb_kset)
70 return -ENOMEM; 70 return -ENOMEM;
71 71
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 5f6d858770a2..1b81dcba175d 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -44,7 +44,8 @@
44#define MLOG_MASK_PREFIX ML_DLM 44#define MLOG_MASK_PREFIX ML_DLM
45#include "cluster/masklog.h" 45#include "cluster/masklog.h"
46 46
47int stringify_lockname(const char *lockname, int locklen, char *buf, int len); 47static int stringify_lockname(const char *lockname, int locklen, char *buf,
48 int len);
48 49
49void dlm_print_one_lock_resource(struct dlm_lock_resource *res) 50void dlm_print_one_lock_resource(struct dlm_lock_resource *res)
50{ 51{
@@ -251,7 +252,8 @@ EXPORT_SYMBOL_GPL(dlm_errname);
251 * 252 *
252 * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h. 253 * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h.
253 */ 254 */
254int stringify_lockname(const char *lockname, int locklen, char *buf, int len) 255static int stringify_lockname(const char *lockname, int locklen, char *buf,
256 int len)
255{ 257{
256 int out = 0; 258 int out = 0;
257 __be64 inode_blkno_be; 259 __be64 inode_blkno_be;
@@ -368,7 +370,7 @@ static void dlm_debug_free(struct kref *kref)
368 kfree(dc); 370 kfree(dc);
369} 371}
370 372
371void dlm_debug_put(struct dlm_debug_ctxt *dc) 373static void dlm_debug_put(struct dlm_debug_ctxt *dc)
372{ 374{
373 if (dc) 375 if (dc)
374 kref_put(&dc->debug_refcnt, dlm_debug_free); 376 kref_put(&dc->debug_refcnt, dlm_debug_free);
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 61a000f8524c..e48aba698b77 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -327,7 +327,7 @@ clear_fields:
327 327
328static struct backing_dev_info dlmfs_backing_dev_info = { 328static struct backing_dev_info dlmfs_backing_dev_info = {
329 .ra_pages = 0, /* No readahead */ 329 .ra_pages = 0, /* No readahead */
330 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 330 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
331}; 331};
332 332
333static struct inode *dlmfs_get_root_inode(struct super_block *sb) 333static struct inode *dlmfs_get_root_inode(struct super_block *sb)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9154c82d3258..57e0d30cde98 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1048,6 +1048,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1048 mlog_entry("(0x%p, '%.*s')\n", dentry, 1048 mlog_entry("(0x%p, '%.*s')\n", dentry,
1049 dentry->d_name.len, dentry->d_name.name); 1049 dentry->d_name.len, dentry->d_name.name);
1050 1050
1051 /* ensuring we don't even attempt to truncate a symlink */
1052 if (S_ISLNK(inode->i_mode))
1053 attr->ia_valid &= ~ATTR_SIZE;
1054
1051 if (attr->ia_valid & ATTR_MODE) 1055 if (attr->ia_valid & ATTR_MODE)
1052 mlog(0, "mode change: %d\n", attr->ia_mode); 1056 mlog(0, "mode change: %d\n", attr->ia_mode);
1053 if (attr->ia_valid & ATTR_UID) 1057 if (attr->ia_valid & ATTR_UID)
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index ce0dc147602a..be774bdc8b36 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -260,7 +260,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
260 bh = osb->local_alloc_bh; 260 bh = osb->local_alloc_bh;
261 alloc = (struct ocfs2_dinode *) bh->b_data; 261 alloc = (struct ocfs2_dinode *) bh->b_data;
262 262
263 alloc_copy = kmalloc(bh->b_size, GFP_KERNEL); 263 alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
264 if (!alloc_copy) { 264 if (!alloc_copy) {
265 status = -ENOMEM; 265 status = -ENOMEM;
266 goto out_commit; 266 goto out_commit;
@@ -931,7 +931,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
931 * local alloc shutdown won't try to double free main bitmap 931 * local alloc shutdown won't try to double free main bitmap
932 * bits. Make a copy so the sync function knows which bits to 932 * bits. Make a copy so the sync function knows which bits to
933 * free. */ 933 * free. */
934 alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_KERNEL); 934 alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
935 if (!alloc_copy) { 935 if (!alloc_copy) {
936 status = -ENOMEM; 936 status = -ENOMEM;
937 mlog_errno(status); 937 mlog_errno(status);
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index ac1d74c63bf5..bbd1667aa7d3 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -385,7 +385,7 @@ static int o2cb_cluster_this_node(unsigned int *node)
385 return 0; 385 return 0;
386} 386}
387 387
388struct ocfs2_stack_operations o2cb_stack_ops = { 388static struct ocfs2_stack_operations o2cb_stack_ops = {
389 .connect = o2cb_cluster_connect, 389 .connect = o2cb_cluster_connect,
390 .disconnect = o2cb_cluster_disconnect, 390 .disconnect = o2cb_cluster_disconnect,
391 .hangup = o2cb_cluster_hangup, 391 .hangup = o2cb_cluster_hangup,
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 7428663f9cbb..b503772cd0ec 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -635,7 +635,7 @@ static const struct file_operations ocfs2_control_fops = {
635 .owner = THIS_MODULE, 635 .owner = THIS_MODULE,
636}; 636};
637 637
638struct miscdevice ocfs2_control_device = { 638static struct miscdevice ocfs2_control_device = {
639 .minor = MISC_DYNAMIC_MINOR, 639 .minor = MISC_DYNAMIC_MINOR,
640 .name = "ocfs2_control", 640 .name = "ocfs2_control",
641 .fops = &ocfs2_control_fops, 641 .fops = &ocfs2_control_fops,
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 7134007ba22f..ba9dbb51d25b 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -167,9 +167,11 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
167 .readlink = page_readlink, 167 .readlink = page_readlink,
168 .follow_link = ocfs2_follow_link, 168 .follow_link = ocfs2_follow_link,
169 .getattr = ocfs2_getattr, 169 .getattr = ocfs2_getattr,
170 .setattr = ocfs2_setattr,
170}; 171};
171const struct inode_operations ocfs2_fast_symlink_inode_operations = { 172const struct inode_operations ocfs2_fast_symlink_inode_operations = {
172 .readlink = ocfs2_readlink, 173 .readlink = ocfs2_readlink,
173 .follow_link = ocfs2_follow_link, 174 .follow_link = ocfs2_follow_link,
174 .getattr = ocfs2_getattr, 175 .getattr = ocfs2_getattr,
176 .setattr = ocfs2_setattr,
175}; 177};
diff --git a/fs/open.c b/fs/open.c
index 7af1f05d5978..a1450086e92f 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -7,6 +7,7 @@
7#include <linux/string.h> 7#include <linux/string.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/file.h> 9#include <linux/file.h>
10#include <linux/fdtable.h>
10#include <linux/quotaops.h> 11#include <linux/quotaops.h>
11#include <linux/fsnotify.h> 12#include <linux/fsnotify.h>
12#include <linux/module.h> 13#include <linux/module.h>
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index e7dd1d4e3473..0fdda2e8a4cc 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -41,12 +41,12 @@
41#ifndef CONFIG_LDM_DEBUG 41#ifndef CONFIG_LDM_DEBUG
42#define ldm_debug(...) do {} while (0) 42#define ldm_debug(...) do {} while (0)
43#else 43#else
44#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __FUNCTION__, f, ##a) 44#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a)
45#endif 45#endif
46 46
47#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __FUNCTION__, f, ##a) 47#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __func__, f, ##a)
48#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __FUNCTION__, f, ##a) 48#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a)
49#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __FUNCTION__, f, ##a) 49#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a)
50 50
51__attribute__ ((format (printf, 3, 4))) 51__attribute__ ((format (printf, 3, 4)))
52static void _ldm_printk (const char *level, const char *function, 52static void _ldm_printk (const char *level, const char *function,
diff --git a/fs/pipe.c b/fs/pipe.c
index f73492b6817e..3499f9ff6316 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1076,6 +1076,23 @@ int do_pipe(int *fd)
1076} 1076}
1077 1077
1078/* 1078/*
1079 * sys_pipe() is the normal C calling standard for creating
1080 * a pipe. It's not the way Unix traditionally does this, though.
1081 */
1082asmlinkage long __weak sys_pipe(int __user *fildes)
1083{
1084 int fd[2];
1085 int error;
1086
1087 error = do_pipe(fd);
1088 if (!error) {
1089 if (copy_to_user(fildes, fd, sizeof(fd)))
1090 error = -EFAULT;
1091 }
1092 return error;
1093}
1094
1095/*
1079 * pipefs should _never_ be mounted by userland - too much of security hassle, 1096 * pipefs should _never_ be mounted by userland - too much of security hassle,
1080 * no real gain from having the whole whorehouse mounted. So we don't need 1097 * no real gain from having the whole whorehouse mounted. So we don't need
1081 * any operations on the root directory. However, we need a non-trivial 1098 * any operations on the root directory. However, we need a non-trivial
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 07d6c4853fe8..dca997a93bff 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -73,6 +73,7 @@
73#include <linux/signal.h> 73#include <linux/signal.h>
74#include <linux/highmem.h> 74#include <linux/highmem.h>
75#include <linux/file.h> 75#include <linux/file.h>
76#include <linux/fdtable.h>
76#include <linux/times.h> 77#include <linux/times.h>
77#include <linux/cpuset.h> 78#include <linux/cpuset.h>
78#include <linux/rcupdate.h> 79#include <linux/rcupdate.h>
@@ -425,12 +426,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
425 cutime = cstime = utime = stime = cputime_zero; 426 cutime = cstime = utime = stime = cputime_zero;
426 cgtime = gtime = cputime_zero; 427 cgtime = gtime = cputime_zero;
427 428
428 rcu_read_lock();
429 if (lock_task_sighand(task, &flags)) { 429 if (lock_task_sighand(task, &flags)) {
430 struct signal_struct *sig = task->signal; 430 struct signal_struct *sig = task->signal;
431 431
432 if (sig->tty) { 432 if (sig->tty) {
433 tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns); 433 struct pid *pgrp = tty_get_pgrp(sig->tty);
434 tty_pgrp = pid_nr_ns(pgrp, ns);
435 put_pid(pgrp);
434 tty_nr = new_encode_dev(tty_devnum(sig->tty)); 436 tty_nr = new_encode_dev(tty_devnum(sig->tty));
435 } 437 }
436 438
@@ -469,7 +471,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
469 471
470 unlock_task_sighand(task, &flags); 472 unlock_task_sighand(task, &flags);
471 } 473 }
472 rcu_read_unlock();
473 474
474 if (!whole || num_threads < 2) 475 if (!whole || num_threads < 2)
475 wchan = get_wchan(task); 476 wchan = get_wchan(task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c5e412a00b17..808cbdc193d3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -56,6 +56,7 @@
56#include <linux/init.h> 56#include <linux/init.h>
57#include <linux/capability.h> 57#include <linux/capability.h>
58#include <linux/file.h> 58#include <linux/file.h>
59#include <linux/fdtable.h>
59#include <linux/string.h> 60#include <linux/string.h>
60#include <linux/seq_file.h> 61#include <linux/seq_file.h>
61#include <linux/namei.h> 62#include <linux/namei.h>
@@ -195,12 +196,32 @@ static int proc_root_link(struct inode *inode, struct path *path)
195 return result; 196 return result;
196} 197}
197 198
198#define MAY_PTRACE(task) \ 199/*
199 (task == current || \ 200 * Return zero if current may access user memory in @task, -error if not.
200 (task->parent == current && \ 201 */
201 (task->ptrace & PT_PTRACED) && \ 202static int check_mem_permission(struct task_struct *task)
202 (task_is_stopped_or_traced(task)) && \ 203{
203 security_ptrace(current,task) == 0)) 204 /*
205 * A task can always look at itself, in case it chooses
206 * to use system calls instead of load instructions.
207 */
208 if (task == current)
209 return 0;
210
211 /*
212 * If current is actively ptrace'ing, and would also be
213 * permitted to freshly attach with ptrace now, permit it.
214 */
215 if (task->parent == current && (task->ptrace & PT_PTRACED) &&
216 task_is_stopped_or_traced(task) &&
217 ptrace_may_attach(task))
218 return 0;
219
220 /*
221 * Noone else is allowed.
222 */
223 return -EPERM;
224}
204 225
205struct mm_struct *mm_for_maps(struct task_struct *task) 226struct mm_struct *mm_for_maps(struct task_struct *task)
206{ 227{
@@ -722,7 +743,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
722 if (!task) 743 if (!task)
723 goto out_no_task; 744 goto out_no_task;
724 745
725 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 746 if (check_mem_permission(task))
726 goto out; 747 goto out;
727 748
728 ret = -ENOMEM; 749 ret = -ENOMEM;
@@ -748,7 +769,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
748 769
749 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 770 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
750 retval = access_process_vm(task, src, page, this_len, 0); 771 retval = access_process_vm(task, src, page, this_len, 0);
751 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { 772 if (!retval || check_mem_permission(task)) {
752 if (!ret) 773 if (!ret)
753 ret = -EIO; 774 ret = -EIO;
754 break; 775 break;
@@ -792,7 +813,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
792 if (!task) 813 if (!task)
793 goto out_no_task; 814 goto out_no_task;
794 815
795 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 816 if (check_mem_permission(task))
796 goto out; 817 goto out;
797 818
798 copied = -ENOMEM; 819 copied = -ENOMEM;
@@ -1181,6 +1202,81 @@ static const struct file_operations proc_pid_sched_operations = {
1181 1202
1182#endif 1203#endif
1183 1204
1205/*
1206 * We added or removed a vma mapping the executable. The vmas are only mapped
1207 * during exec and are not mapped with the mmap system call.
1208 * Callers must hold down_write() on the mm's mmap_sem for these
1209 */
1210void added_exe_file_vma(struct mm_struct *mm)
1211{
1212 mm->num_exe_file_vmas++;
1213}
1214
1215void removed_exe_file_vma(struct mm_struct *mm)
1216{
1217 mm->num_exe_file_vmas--;
1218 if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
1219 fput(mm->exe_file);
1220 mm->exe_file = NULL;
1221 }
1222
1223}
1224
1225void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
1226{
1227 if (new_exe_file)
1228 get_file(new_exe_file);
1229 if (mm->exe_file)
1230 fput(mm->exe_file);
1231 mm->exe_file = new_exe_file;
1232 mm->num_exe_file_vmas = 0;
1233}
1234
1235struct file *get_mm_exe_file(struct mm_struct *mm)
1236{
1237 struct file *exe_file;
1238
1239 /* We need mmap_sem to protect against races with removal of
1240 * VM_EXECUTABLE vmas */
1241 down_read(&mm->mmap_sem);
1242 exe_file = mm->exe_file;
1243 if (exe_file)
1244 get_file(exe_file);
1245 up_read(&mm->mmap_sem);
1246 return exe_file;
1247}
1248
1249void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
1250{
1251 /* It's safe to write the exe_file pointer without exe_file_lock because
1252 * this is called during fork when the task is not yet in /proc */
1253 newmm->exe_file = get_mm_exe_file(oldmm);
1254}
1255
1256static int proc_exe_link(struct inode *inode, struct path *exe_path)
1257{
1258 struct task_struct *task;
1259 struct mm_struct *mm;
1260 struct file *exe_file;
1261
1262 task = get_proc_task(inode);
1263 if (!task)
1264 return -ENOENT;
1265 mm = get_task_mm(task);
1266 put_task_struct(task);
1267 if (!mm)
1268 return -ENOENT;
1269 exe_file = get_mm_exe_file(mm);
1270 mmput(mm);
1271 if (exe_file) {
1272 *exe_path = exe_file->f_path;
1273 path_get(&exe_file->f_path);
1274 fput(exe_file);
1275 return 0;
1276 } else
1277 return -ENOENT;
1278}
1279
1184static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1280static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1185{ 1281{
1186 struct inode *inode = dentry->d_inode; 1282 struct inode *inode = dentry->d_inode;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index a36ad3c75cf4..43e54e86cefd 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -69,12 +69,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
69 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 69 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
70 70
71 start = NULL; 71 start = NULL;
72 if (dp->get_info) { 72 if (dp->read_proc) {
73 /* Handle old net routines */
74 n = dp->get_info(page, &start, *ppos, count);
75 if (n < count)
76 eof = 1;
77 } else if (dp->read_proc) {
78 /* 73 /*
79 * How to be a proc read function 74 * How to be a proc read function
80 * ------------------------------ 75 * ------------------------------
@@ -277,8 +272,11 @@ static int xlate_proc_name(const char *name,
277 int len; 272 int len;
278 int rtn = 0; 273 int rtn = 0;
279 274
275 de = *ret;
276 if (!de)
277 de = &proc_root;
278
280 spin_lock(&proc_subdir_lock); 279 spin_lock(&proc_subdir_lock);
281 de = &proc_root;
282 while (1) { 280 while (1) {
283 next = strchr(cp, '/'); 281 next = strchr(cp, '/');
284 if (!next) 282 if (!next)
@@ -385,20 +383,18 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
385 383
386 lock_kernel(); 384 lock_kernel();
387 spin_lock(&proc_subdir_lock); 385 spin_lock(&proc_subdir_lock);
388 if (de) { 386 for (de = de->subdir; de ; de = de->next) {
389 for (de = de->subdir; de ; de = de->next) { 387 if (de->namelen != dentry->d_name.len)
390 if (de->namelen != dentry->d_name.len) 388 continue;
391 continue; 389 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
392 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 390 unsigned int ino;
393 unsigned int ino;
394 391
395 ino = de->low_ino; 392 ino = de->low_ino;
396 de_get(de); 393 de_get(de);
397 spin_unlock(&proc_subdir_lock); 394 spin_unlock(&proc_subdir_lock);
398 error = -EINVAL; 395 error = -EINVAL;
399 inode = proc_get_inode(dir->i_sb, ino, de); 396 inode = proc_get_inode(dir->i_sb, ino, de);
400 goto out_unlock; 397 goto out_unlock;
401 }
402 } 398 }
403 } 399 }
404 spin_unlock(&proc_subdir_lock); 400 spin_unlock(&proc_subdir_lock);
@@ -410,7 +406,8 @@ out_unlock:
410 d_add(dentry, inode); 406 d_add(dentry, inode);
411 return NULL; 407 return NULL;
412 } 408 }
413 de_put(de); 409 if (de)
410 de_put(de);
414 return ERR_PTR(error); 411 return ERR_PTR(error);
415} 412}
416 413
@@ -440,10 +437,6 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
440 lock_kernel(); 437 lock_kernel();
441 438
442 ino = inode->i_ino; 439 ino = inode->i_ino;
443 if (!de) {
444 ret = -EINVAL;
445 goto out;
446 }
447 i = filp->f_pos; 440 i = filp->f_pos;
448 switch (i) { 441 switch (i) {
449 case 0: 442 case 0:
@@ -582,7 +575,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
582 /* make sure name is valid */ 575 /* make sure name is valid */
583 if (!name || !strlen(name)) goto out; 576 if (!name || !strlen(name)) goto out;
584 577
585 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 578 if (xlate_proc_name(name, parent, &fn) != 0)
586 goto out; 579 goto out;
587 580
588 /* At this point there must not be any '/' characters beyond *fn */ 581 /* At this point there must not be any '/' characters beyond *fn */
@@ -648,6 +641,23 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
648 return ent; 641 return ent;
649} 642}
650 643
644struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
645 struct proc_dir_entry *parent)
646{
647 struct proc_dir_entry *ent;
648
649 ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2);
650 if (ent) {
651 ent->data = net;
652 if (proc_register(parent, ent) < 0) {
653 kfree(ent);
654 ent = NULL;
655 }
656 }
657 return ent;
658}
659EXPORT_SYMBOL_GPL(proc_net_mkdir);
660
651struct proc_dir_entry *proc_mkdir(const char *name, 661struct proc_dir_entry *proc_mkdir(const char *name,
652 struct proc_dir_entry *parent) 662 struct proc_dir_entry *parent)
653{ 663{
@@ -682,9 +692,10 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
682 return ent; 692 return ent;
683} 693}
684 694
685struct proc_dir_entry *proc_create(const char *name, mode_t mode, 695struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
686 struct proc_dir_entry *parent, 696 struct proc_dir_entry *parent,
687 const struct file_operations *proc_fops) 697 const struct file_operations *proc_fops,
698 void *data)
688{ 699{
689 struct proc_dir_entry *pde; 700 struct proc_dir_entry *pde;
690 nlink_t nlink; 701 nlink_t nlink;
@@ -705,6 +716,7 @@ struct proc_dir_entry *proc_create(const char *name, mode_t mode,
705 if (!pde) 716 if (!pde)
706 goto out; 717 goto out;
707 pde->proc_fops = proc_fops; 718 pde->proc_fops = proc_fops;
719 pde->data = data;
708 if (proc_register(parent, pde) < 0) 720 if (proc_register(parent, pde) < 0)
709 goto out_free; 721 goto out_free;
710 return pde; 722 return pde;
@@ -734,55 +746,58 @@ void free_proc_entry(struct proc_dir_entry *de)
734void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 746void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
735{ 747{
736 struct proc_dir_entry **p; 748 struct proc_dir_entry **p;
737 struct proc_dir_entry *de; 749 struct proc_dir_entry *de = NULL;
738 const char *fn = name; 750 const char *fn = name;
739 int len; 751 int len;
740 752
741 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 753 if (xlate_proc_name(name, &parent, &fn) != 0)
742 goto out; 754 return;
743 len = strlen(fn); 755 len = strlen(fn);
744 756
745 spin_lock(&proc_subdir_lock); 757 spin_lock(&proc_subdir_lock);
746 for (p = &parent->subdir; *p; p=&(*p)->next ) { 758 for (p = &parent->subdir; *p; p=&(*p)->next ) {
747 if (!proc_match(len, fn, *p)) 759 if (proc_match(len, fn, *p)) {
748 continue; 760 de = *p;
749 de = *p; 761 *p = de->next;
750 *p = de->next; 762 de->next = NULL;
751 de->next = NULL; 763 break;
752 764 }
753 spin_lock(&de->pde_unload_lock); 765 }
754 /* 766 spin_unlock(&proc_subdir_lock);
755 * Stop accepting new callers into module. If you're 767 if (!de)
756 * dynamically allocating ->proc_fops, save a pointer somewhere. 768 return;
757 */
758 de->proc_fops = NULL;
759 /* Wait until all existing callers into module are done. */
760 if (de->pde_users > 0) {
761 DECLARE_COMPLETION_ONSTACK(c);
762
763 if (!de->pde_unload_completion)
764 de->pde_unload_completion = &c;
765
766 spin_unlock(&de->pde_unload_lock);
767 spin_unlock(&proc_subdir_lock);
768 769
769 wait_for_completion(de->pde_unload_completion); 770 spin_lock(&de->pde_unload_lock);
771 /*
772 * Stop accepting new callers into module. If you're
773 * dynamically allocating ->proc_fops, save a pointer somewhere.
774 */
775 de->proc_fops = NULL;
776 /* Wait until all existing callers into module are done. */
777 if (de->pde_users > 0) {
778 DECLARE_COMPLETION_ONSTACK(c);
779
780 if (!de->pde_unload_completion)
781 de->pde_unload_completion = &c;
770 782
771 spin_lock(&proc_subdir_lock);
772 goto continue_removing;
773 }
774 spin_unlock(&de->pde_unload_lock); 783 spin_unlock(&de->pde_unload_lock);
775 784
785 wait_for_completion(de->pde_unload_completion);
786
787 goto continue_removing;
788 }
789 spin_unlock(&de->pde_unload_lock);
790
776continue_removing: 791continue_removing:
777 if (S_ISDIR(de->mode)) 792 if (S_ISDIR(de->mode))
778 parent->nlink--; 793 parent->nlink--;
779 de->nlink = 0; 794 de->nlink = 0;
780 WARN_ON(de->subdir); 795 if (de->subdir) {
781 if (atomic_dec_and_test(&de->count)) 796 printk(KERN_WARNING "%s: removing non-empty directory "
782 free_proc_entry(de); 797 "'%s/%s', leaking at least '%s'\n", __func__,
783 break; 798 de->parent->name, de->name, de->subdir->name);
799 WARN_ON(1);
784 } 800 }
785 spin_unlock(&proc_subdir_lock); 801 if (atomic_dec_and_test(&de->count))
786out: 802 free_proc_entry(de);
787 return;
788} 803}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 82b3a1b5a70b..6f4e8dc97da1 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -25,8 +25,7 @@
25 25
26struct proc_dir_entry *de_get(struct proc_dir_entry *de) 26struct proc_dir_entry *de_get(struct proc_dir_entry *de)
27{ 27{
28 if (de) 28 atomic_inc(&de->count);
29 atomic_inc(&de->count);
30 return de; 29 return de;
31} 30}
32 31
@@ -35,18 +34,16 @@ struct proc_dir_entry *de_get(struct proc_dir_entry *de)
35 */ 34 */
36void de_put(struct proc_dir_entry *de) 35void de_put(struct proc_dir_entry *de)
37{ 36{
38 if (de) { 37 lock_kernel();
39 lock_kernel(); 38 if (!atomic_read(&de->count)) {
40 if (!atomic_read(&de->count)) { 39 printk("de_put: entry %s already free!\n", de->name);
41 printk("de_put: entry %s already free!\n", de->name);
42 unlock_kernel();
43 return;
44 }
45
46 if (atomic_dec_and_test(&de->count))
47 free_proc_entry(de);
48 unlock_kernel(); 40 unlock_kernel();
41 return;
49 } 42 }
43
44 if (atomic_dec_and_test(&de->count))
45 free_proc_entry(de);
46 unlock_kernel();
50} 47}
51 48
52/* 49/*
@@ -392,7 +389,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
392{ 389{
393 struct inode * inode; 390 struct inode * inode;
394 391
395 if (de != NULL && !try_module_get(de->owner)) 392 if (!try_module_get(de->owner))
396 goto out_mod; 393 goto out_mod;
397 394
398 inode = iget_locked(sb, ino); 395 inode = iget_locked(sb, ino);
@@ -402,30 +399,29 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
402 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 399 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
403 PROC_I(inode)->fd = 0; 400 PROC_I(inode)->fd = 0;
404 PROC_I(inode)->pde = de; 401 PROC_I(inode)->pde = de;
405 if (de) { 402
406 if (de->mode) { 403 if (de->mode) {
407 inode->i_mode = de->mode; 404 inode->i_mode = de->mode;
408 inode->i_uid = de->uid; 405 inode->i_uid = de->uid;
409 inode->i_gid = de->gid; 406 inode->i_gid = de->gid;
410 } 407 }
411 if (de->size) 408 if (de->size)
412 inode->i_size = de->size; 409 inode->i_size = de->size;
413 if (de->nlink) 410 if (de->nlink)
414 inode->i_nlink = de->nlink; 411 inode->i_nlink = de->nlink;
415 if (de->proc_iops) 412 if (de->proc_iops)
416 inode->i_op = de->proc_iops; 413 inode->i_op = de->proc_iops;
417 if (de->proc_fops) { 414 if (de->proc_fops) {
418 if (S_ISREG(inode->i_mode)) { 415 if (S_ISREG(inode->i_mode)) {
419#ifdef CONFIG_COMPAT 416#ifdef CONFIG_COMPAT
420 if (!de->proc_fops->compat_ioctl) 417 if (!de->proc_fops->compat_ioctl)
421 inode->i_fop = 418 inode->i_fop =
422 &proc_reg_file_ops_no_compat; 419 &proc_reg_file_ops_no_compat;
423 else 420 else
424#endif 421#endif
425 inode->i_fop = &proc_reg_file_ops; 422 inode->i_fop = &proc_reg_file_ops;
426 } else { 423 } else {
427 inode->i_fop = de->proc_fops; 424 inode->i_fop = de->proc_fops;
428 }
429 } 425 }
430 } 426 }
431 unlock_new_inode(inode); 427 unlock_new_inode(inode);
@@ -433,8 +429,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
433 return inode; 429 return inode;
434 430
435out_ino: 431out_ino:
436 if (de != NULL) 432 module_put(de->owner);
437 module_put(de->owner);
438out_mod: 433out_mod:
439 return NULL; 434 return NULL;
440} 435}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index bc72f5c8c47d..28cbca805905 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/proc_fs.h> 12#include <linux/proc_fs.h>
13 13
14extern struct proc_dir_entry proc_root;
14#ifdef CONFIG_PROC_SYSCTL 15#ifdef CONFIG_PROC_SYSCTL
15extern int proc_sys_init(void); 16extern int proc_sys_init(void);
16#else 17#else
@@ -46,9 +47,6 @@ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
46 47
47extern int maps_protect; 48extern int maps_protect;
48 49
49extern void create_seq_entry(char *name, mode_t mode,
50 const struct file_operations *f);
51extern int proc_exe_link(struct inode *, struct path *);
52extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, 50extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
53 struct pid *pid, struct task_struct *task); 51 struct pid *pid, struct task_struct *task);
54extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, 52extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 941e95114b5a..79ecd281d2cb 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -137,7 +137,7 @@ static const struct file_operations proc_nommu_vma_list_operations = {
137 137
138static int __init proc_nommu_init(void) 138static int __init proc_nommu_init(void)
139{ 139{
140 create_seq_entry("maps", S_IRUGO, &proc_nommu_vma_list_operations); 140 proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations);
141 return 0; 141 return 0;
142} 142}
143 143
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 441a32f0e5f2..74a323d2b850 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -179,6 +179,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
179 "PageTables: %8lu kB\n" 179 "PageTables: %8lu kB\n"
180 "NFS_Unstable: %8lu kB\n" 180 "NFS_Unstable: %8lu kB\n"
181 "Bounce: %8lu kB\n" 181 "Bounce: %8lu kB\n"
182 "WritebackTmp: %8lu kB\n"
182 "CommitLimit: %8lu kB\n" 183 "CommitLimit: %8lu kB\n"
183 "Committed_AS: %8lu kB\n" 184 "Committed_AS: %8lu kB\n"
184 "VmallocTotal: %8lu kB\n" 185 "VmallocTotal: %8lu kB\n"
@@ -210,6 +211,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
210 K(global_page_state(NR_PAGETABLE)), 211 K(global_page_state(NR_PAGETABLE)),
211 K(global_page_state(NR_UNSTABLE_NFS)), 212 K(global_page_state(NR_UNSTABLE_NFS)),
212 K(global_page_state(NR_BOUNCE)), 213 K(global_page_state(NR_BOUNCE)),
214 K(global_page_state(NR_WRITEBACK_TEMP)),
213 K(allowed), 215 K(allowed),
214 K(committed), 216 K(committed),
215 (unsigned long)VMALLOC_TOTAL >> 10, 217 (unsigned long)VMALLOC_TOTAL >> 10,
@@ -826,14 +828,6 @@ static struct file_operations proc_kpageflags_operations = {
826 828
827struct proc_dir_entry *proc_root_kcore; 829struct proc_dir_entry *proc_root_kcore;
828 830
829void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
830{
831 struct proc_dir_entry *entry;
832 entry = create_proc_entry(name, mode, NULL);
833 if (entry)
834 entry->proc_fops = f;
835}
836
837void __init proc_misc_init(void) 831void __init proc_misc_init(void)
838{ 832{
839 static struct { 833 static struct {
@@ -862,66 +856,52 @@ void __init proc_misc_init(void)
862 856
863 /* And now for trickier ones */ 857 /* And now for trickier ones */
864#ifdef CONFIG_PRINTK 858#ifdef CONFIG_PRINTK
865 { 859 proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations);
866 struct proc_dir_entry *entry;
867 entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
868 if (entry)
869 entry->proc_fops = &proc_kmsg_operations;
870 }
871#endif 860#endif
872 create_seq_entry("locks", 0, &proc_locks_operations); 861 proc_create("locks", 0, NULL, &proc_locks_operations);
873 create_seq_entry("devices", 0, &proc_devinfo_operations); 862 proc_create("devices", 0, NULL, &proc_devinfo_operations);
874 create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); 863 proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations);
875#ifdef CONFIG_BLOCK 864#ifdef CONFIG_BLOCK
876 create_seq_entry("partitions", 0, &proc_partitions_operations); 865 proc_create("partitions", 0, NULL, &proc_partitions_operations);
877#endif 866#endif
878 create_seq_entry("stat", 0, &proc_stat_operations); 867 proc_create("stat", 0, NULL, &proc_stat_operations);
879 create_seq_entry("interrupts", 0, &proc_interrupts_operations); 868 proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
880#ifdef CONFIG_SLABINFO 869#ifdef CONFIG_SLABINFO
881 create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); 870 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
882#ifdef CONFIG_DEBUG_SLAB_LEAK 871#ifdef CONFIG_DEBUG_SLAB_LEAK
883 create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations); 872 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
884#endif 873#endif
885#endif 874#endif
886#ifdef CONFIG_MMU 875#ifdef CONFIG_MMU
887 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); 876 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
888#endif 877#endif
889 create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); 878 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
890 create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops); 879 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
891 create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); 880 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
892 create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); 881 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
893#ifdef CONFIG_BLOCK 882#ifdef CONFIG_BLOCK
894 create_seq_entry("diskstats", 0, &proc_diskstats_operations); 883 proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
895#endif 884#endif
896#ifdef CONFIG_MODULES 885#ifdef CONFIG_MODULES
897 create_seq_entry("modules", 0, &proc_modules_operations); 886 proc_create("modules", 0, NULL, &proc_modules_operations);
898#endif 887#endif
899#ifdef CONFIG_SCHEDSTATS 888#ifdef CONFIG_SCHEDSTATS
900 create_seq_entry("schedstat", 0, &proc_schedstat_operations); 889 proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
901#endif 890#endif
902#ifdef CONFIG_PROC_KCORE 891#ifdef CONFIG_PROC_KCORE
903 proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); 892 proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations);
904 if (proc_root_kcore) { 893 if (proc_root_kcore)
905 proc_root_kcore->proc_fops = &proc_kcore_operations;
906 proc_root_kcore->size = 894 proc_root_kcore->size =
907 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; 895 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
908 }
909#endif 896#endif
910#ifdef CONFIG_PROC_PAGE_MONITOR 897#ifdef CONFIG_PROC_PAGE_MONITOR
911 create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations); 898 proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
912 create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations); 899 proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
913#endif 900#endif
914#ifdef CONFIG_PROC_VMCORE 901#ifdef CONFIG_PROC_VMCORE
915 proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); 902 proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
916 if (proc_vmcore)
917 proc_vmcore->proc_fops = &proc_vmcore_operations;
918#endif 903#endif
919#ifdef CONFIG_MAGIC_SYSRQ 904#ifdef CONFIG_MAGIC_SYSRQ
920 { 905 proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations);
921 struct proc_dir_entry *entry;
922 entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
923 if (entry)
924 entry->proc_fops = &proc_sysrq_trigger_operations;
925 }
926#endif 906#endif
927} 907}
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 13cd7835d0df..83f357b30d71 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -159,17 +159,6 @@ struct net *get_proc_net(const struct inode *inode)
159} 159}
160EXPORT_SYMBOL_GPL(get_proc_net); 160EXPORT_SYMBOL_GPL(get_proc_net);
161 161
162struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
163 struct proc_dir_entry *parent)
164{
165 struct proc_dir_entry *pde;
166 pde = proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
167 if (pde != NULL)
168 pde->data = net;
169 return pde;
170}
171EXPORT_SYMBOL_GPL(proc_net_mkdir);
172
173static __net_init int proc_net_ns_init(struct net *net) 162static __net_init int proc_net_ns_init(struct net *net)
174{ 163{
175 struct proc_dir_entry *netd, *net_statd; 164 struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 614c34b6d1c2..5acc001d49f6 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -165,8 +165,8 @@ out:
165 return err; 165 return err;
166} 166}
167 167
168static ssize_t proc_sys_read(struct file *filp, char __user *buf, 168static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
169 size_t count, loff_t *ppos) 169 size_t count, loff_t *ppos, int write)
170{ 170{
171 struct dentry *dentry = filp->f_dentry; 171 struct dentry *dentry = filp->f_dentry;
172 struct ctl_table_header *head; 172 struct ctl_table_header *head;
@@ -190,12 +190,12 @@ static ssize_t proc_sys_read(struct file *filp, char __user *buf,
190 * and won't be until we finish. 190 * and won't be until we finish.
191 */ 191 */
192 error = -EPERM; 192 error = -EPERM;
193 if (sysctl_perm(table, MAY_READ)) 193 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
194 goto out; 194 goto out;
195 195
196 /* careful: calling conventions are nasty here */ 196 /* careful: calling conventions are nasty here */
197 res = count; 197 res = count;
198 error = table->proc_handler(table, 0, filp, buf, &res, ppos); 198 error = table->proc_handler(table, write, filp, buf, &res, ppos);
199 if (!error) 199 if (!error)
200 error = res; 200 error = res;
201out: 201out:
@@ -204,44 +204,16 @@ out:
204 return error; 204 return error;
205} 205}
206 206
207static ssize_t proc_sys_write(struct file *filp, const char __user *buf, 207static ssize_t proc_sys_read(struct file *filp, char __user *buf,
208 size_t count, loff_t *ppos) 208 size_t count, loff_t *ppos)
209{ 209{
210 struct dentry *dentry = filp->f_dentry; 210 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
211 struct ctl_table_header *head; 211}
212 struct ctl_table *table;
213 ssize_t error;
214 size_t res;
215
216 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
217 /* Has the sysctl entry disappeared on us? */
218 error = -ENOENT;
219 if (!table)
220 goto out;
221
222 /* Has the sysctl entry been replaced by a directory? */
223 error = -EISDIR;
224 if (!table->proc_handler)
225 goto out;
226
227 /*
228 * At this point we know that the sysctl was not unregistered
229 * and won't be until we finish.
230 */
231 error = -EPERM;
232 if (sysctl_perm(table, MAY_WRITE))
233 goto out;
234
235 /* careful: calling conventions are nasty here */
236 res = count;
237 error = table->proc_handler(table, 1, filp, (char __user *)buf,
238 &res, ppos);
239 if (!error)
240 error = res;
241out:
242 sysctl_head_finish(head);
243 212
244 return error; 213static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
214 size_t count, loff_t *ppos)
215{
216 return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
245} 217}
246 218
247 219
@@ -416,7 +388,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *
416 goto out; 388 goto out;
417 389
418 /* Use the permissions on the sysctl table entry */ 390 /* Use the permissions on the sysctl table entry */
419 error = sysctl_perm(table, mask); 391 error = sysctl_perm(head->root, table, mask);
420out: 392out:
421 sysctl_head_finish(head); 393 sysctl_head_finish(head);
422 return error; 394 return error;
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 49816e00b51a..21f490f5d65c 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -5,7 +5,7 @@
5 */ 5 */
6 6
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8#include <linux/module.h>
9#include <linux/init.h> 9#include <linux/init.h>
10#include <linux/errno.h> 10#include <linux/errno.h>
11#include <linux/time.h> 11#include <linux/time.h>
@@ -136,39 +136,54 @@ static const struct file_operations proc_tty_drivers_operations = {
136 .release = seq_release, 136 .release = seq_release,
137}; 137};
138 138
139/* 139static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
140 * This is the handler for /proc/tty/ldiscs
141 */
142static int tty_ldiscs_read_proc(char *page, char **start, off_t off,
143 int count, int *eof, void *data)
144{ 140{
145 int i; 141 return (*pos < NR_LDISCS) ? pos : NULL;
146 int len = 0; 142}
147 off_t begin = 0; 143
144static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
145{
146 (*pos)++;
147 return (*pos < NR_LDISCS) ? pos : NULL;
148}
149
150static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
151{
152}
153
154static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
155{
156 int i = *(loff_t *)v;
148 struct tty_ldisc *ld; 157 struct tty_ldisc *ld;
149 158
150 for (i=0; i < NR_LDISCS; i++) { 159 ld = tty_ldisc_get(i);
151 ld = tty_ldisc_get(i); 160 if (ld == NULL)
152 if (ld == NULL)
153 continue;
154 len += sprintf(page+len, "%-10s %2d\n",
155 ld->name ? ld->name : "???", i);
156 tty_ldisc_put(i);
157 if (len+begin > off+count)
158 break;
159 if (len+begin < off) {
160 begin += len;
161 len = 0;
162 }
163 }
164 if (i >= NR_LDISCS)
165 *eof = 1;
166 if (off >= len+begin)
167 return 0; 161 return 0;
168 *start = page + (off-begin); 162 seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i);
169 return ((count < begin+len-off) ? count : begin+len-off); 163 tty_ldisc_put(i);
164 return 0;
165}
166
167static const struct seq_operations tty_ldiscs_seq_ops = {
168 .start = tty_ldiscs_seq_start,
169 .next = tty_ldiscs_seq_next,
170 .stop = tty_ldiscs_seq_stop,
171 .show = tty_ldiscs_seq_show,
172};
173
174static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
175{
176 return seq_open(file, &tty_ldiscs_seq_ops);
170} 177}
171 178
179static const struct file_operations tty_ldiscs_proc_fops = {
180 .owner = THIS_MODULE,
181 .open = proc_tty_ldiscs_open,
182 .read = seq_read,
183 .llseek = seq_lseek,
184 .release = seq_release,
185};
186
172/* 187/*
173 * This function is called by tty_register_driver() to handle 188 * This function is called by tty_register_driver() to handle
174 * registering the driver's /proc handler into /proc/tty/driver/<foo> 189 * registering the driver's /proc handler into /proc/tty/driver/<foo>
@@ -177,16 +192,14 @@ void proc_tty_register_driver(struct tty_driver *driver)
177{ 192{
178 struct proc_dir_entry *ent; 193 struct proc_dir_entry *ent;
179 194
180 if ((!driver->read_proc && !driver->write_proc) || 195 if (!driver->ops->read_proc || !driver->driver_name ||
181 !driver->driver_name ||
182 driver->proc_entry) 196 driver->proc_entry)
183 return; 197 return;
184 198
185 ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); 199 ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver);
186 if (!ent) 200 if (!ent)
187 return; 201 return;
188 ent->read_proc = driver->read_proc; 202 ent->read_proc = driver->ops->read_proc;
189 ent->write_proc = driver->write_proc;
190 ent->owner = driver->owner; 203 ent->owner = driver->owner;
191 ent->data = driver; 204 ent->data = driver;
192 205
@@ -214,7 +227,6 @@ void proc_tty_unregister_driver(struct tty_driver *driver)
214 */ 227 */
215void __init proc_tty_init(void) 228void __init proc_tty_init(void)
216{ 229{
217 struct proc_dir_entry *entry;
218 if (!proc_mkdir("tty", NULL)) 230 if (!proc_mkdir("tty", NULL))
219 return; 231 return;
220 proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL); 232 proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL);
@@ -224,10 +236,7 @@ void __init proc_tty_init(void)
224 * password lengths and inter-keystroke timings during password 236 * password lengths and inter-keystroke timings during password
225 * entry. 237 * entry.
226 */ 238 */
227 proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR | S_IXUSR, NULL); 239 proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL);
228 240 proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops);
229 create_proc_read_entry("tty/ldiscs", 0, NULL, tty_ldiscs_read_proc, NULL); 241 proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations);
230 entry = create_proc_entry("tty/drivers", 0, NULL);
231 if (entry)
232 entry->proc_fops = &proc_tty_drivers_operations;
233} 242}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ef0fb57fc9ef..95117538a4f6 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -22,8 +22,6 @@
22 22
23#include "internal.h" 23#include "internal.h"
24 24
25struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
26
27static int proc_test_super(struct super_block *sb, void *data) 25static int proc_test_super(struct super_block *sb, void *data)
28{ 26{
29 return sb->s_fs_info == data; 27 return sb->s_fs_info == data;
@@ -126,8 +124,8 @@ void __init proc_root_init(void)
126#ifdef CONFIG_SYSVIPC 124#ifdef CONFIG_SYSVIPC
127 proc_mkdir("sysvipc", NULL); 125 proc_mkdir("sysvipc", NULL);
128#endif 126#endif
129 proc_root_fs = proc_mkdir("fs", NULL); 127 proc_mkdir("fs", NULL);
130 proc_root_driver = proc_mkdir("driver", NULL); 128 proc_mkdir("driver", NULL);
131 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ 129 proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
132#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) 130#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
133 /* just give it a mountpoint */ 131 /* just give it a mountpoint */
@@ -137,7 +135,7 @@ void __init proc_root_init(void)
137#ifdef CONFIG_PROC_DEVICETREE 135#ifdef CONFIG_PROC_DEVICETREE
138 proc_device_tree_init(); 136 proc_device_tree_init();
139#endif 137#endif
140 proc_bus = proc_mkdir("bus", NULL); 138 proc_mkdir("bus", NULL);
141 proc_sys_init(); 139 proc_sys_init();
142} 140}
143 141
@@ -232,9 +230,5 @@ void pid_ns_release_proc(struct pid_namespace *ns)
232EXPORT_SYMBOL(proc_symlink); 230EXPORT_SYMBOL(proc_symlink);
233EXPORT_SYMBOL(proc_mkdir); 231EXPORT_SYMBOL(proc_mkdir);
234EXPORT_SYMBOL(create_proc_entry); 232EXPORT_SYMBOL(create_proc_entry);
235EXPORT_SYMBOL(proc_create); 233EXPORT_SYMBOL(proc_create_data);
236EXPORT_SYMBOL(remove_proc_entry); 234EXPORT_SYMBOL(remove_proc_entry);
237EXPORT_SYMBOL(proc_root);
238EXPORT_SYMBOL(proc_root_fs);
239EXPORT_SYMBOL(proc_bus);
240EXPORT_SYMBOL(proc_root_driver);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 7415eeb7cc3a..e2b8e769f510 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -75,40 +75,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
75 return mm->total_vm; 75 return mm->total_vm;
76} 76}
77 77
78int proc_exe_link(struct inode *inode, struct path *path)
79{
80 struct vm_area_struct * vma;
81 int result = -ENOENT;
82 struct task_struct *task = get_proc_task(inode);
83 struct mm_struct * mm = NULL;
84
85 if (task) {
86 mm = get_task_mm(task);
87 put_task_struct(task);
88 }
89 if (!mm)
90 goto out;
91 down_read(&mm->mmap_sem);
92
93 vma = mm->mmap;
94 while (vma) {
95 if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
96 break;
97 vma = vma->vm_next;
98 }
99
100 if (vma) {
101 *path = vma->vm_file->f_path;
102 path_get(&vma->vm_file->f_path);
103 result = 0;
104 }
105
106 up_read(&mm->mmap_sem);
107 mmput(mm);
108out:
109 return result;
110}
111
112static void pad_len_spaces(struct seq_file *m, int len) 78static void pad_len_spaces(struct seq_file *m, int len)
113{ 79{
114 len = 25 + sizeof(void*) * 6 - len; 80 len = 25 + sizeof(void*) * 6 - len;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 8011528518bd..4b4f9cc2f186 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -1,6 +1,7 @@
1 1
2#include <linux/mm.h> 2#include <linux/mm.h>
3#include <linux/file.h> 3#include <linux/file.h>
4#include <linux/fdtable.h>
4#include <linux/mount.h> 5#include <linux/mount.h>
5#include <linux/ptrace.h> 6#include <linux/ptrace.h>
6#include <linux/seq_file.h> 7#include <linux/seq_file.h>
@@ -103,40 +104,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
103 return size; 104 return size;
104} 105}
105 106
106int proc_exe_link(struct inode *inode, struct path *path)
107{
108 struct vm_list_struct *vml;
109 struct vm_area_struct *vma;
110 struct task_struct *task = get_proc_task(inode);
111 struct mm_struct *mm = get_task_mm(task);
112 int result = -ENOENT;
113
114 if (!mm)
115 goto out;
116 down_read(&mm->mmap_sem);
117
118 vml = mm->context.vmlist;
119 vma = NULL;
120 while (vml) {
121 if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) {
122 vma = vml->vma;
123 break;
124 }
125 vml = vml->next;
126 }
127
128 if (vma) {
129 *path = vma->vm_file->f_path;
130 path_get(&vma->vm_file->f_path);
131 result = 0;
132 }
133
134 up_read(&mm->mmap_sem);
135 mmput(mm);
136out:
137 return result;
138}
139
140/* 107/*
141 * display mapping lines for a particular process's /proc/pid/maps 108 * display mapping lines for a particular process's /proc/pid/maps
142 */ 109 */
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 23b647f25d08..234ada903633 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -306,7 +306,7 @@ static uint find_free_dqentry(struct dquot *dquot, int *err)
306 printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); 306 printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
307 goto out_buf; 307 goto out_buf;
308 } 308 }
309 dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1); 309 le16_add_cpu(&dh->dqdh_entries, 1);
310 memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); 310 memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
311 /* Find free structure in block */ 311 /* Find free structure in block */
312 for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++); 312 for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
@@ -448,7 +448,7 @@ static int free_dqentry(struct dquot *dquot, uint blk)
448 goto out_buf; 448 goto out_buf;
449 } 449 }
450 dh = (struct v2_disk_dqdbheader *)buf; 450 dh = (struct v2_disk_dqdbheader *)buf;
451 dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1); 451 le16_add_cpu(&dh->dqdh_entries, -1);
452 if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ 452 if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
453 if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 || 453 if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
454 (ret = put_free_dqblk(sb, type, buf, blk)) < 0) { 454 (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index b41a514b0976..9590b9024300 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -26,6 +26,9 @@
26 26
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/ramfs.h>
30
31#include "internal.h"
29 32
30const struct address_space_operations ramfs_aops = { 33const struct address_space_operations ramfs_aops = {
31 .readpage = simple_readpage, 34 .readpage = simple_readpage,
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 8428d5b2711d..b13123424e49 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -44,7 +44,7 @@ static const struct inode_operations ramfs_dir_inode_operations;
44 44
45static struct backing_dev_info ramfs_backing_dev_info = { 45static struct backing_dev_info ramfs_backing_dev_info = {
46 .ra_pages = 0, /* No readahead */ 46 .ra_pages = 0, /* No readahead */
47 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | 47 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK |
48 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | 48 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
49 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, 49 BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP,
50}; 50};
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index af7cc074a476..6b330639b51d 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -11,5 +11,4 @@
11 11
12 12
13extern const struct address_space_operations ramfs_aops; 13extern const struct address_space_operations ramfs_aops;
14extern const struct file_operations ramfs_file_operations;
15extern const struct inode_operations ramfs_file_inode_operations; 14extern const struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index da86042b3e03..e396b2fa4743 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2574,11 +2574,9 @@ static int release_journal_dev(struct super_block *super,
2574 2574
2575 result = 0; 2575 result = 0;
2576 2576
2577 if (journal->j_dev_file != NULL) { 2577 if (journal->j_dev_bd != NULL) {
2578 result = filp_close(journal->j_dev_file, NULL); 2578 if (journal->j_dev_bd->bd_dev != super->s_dev)
2579 journal->j_dev_file = NULL; 2579 bd_release(journal->j_dev_bd);
2580 journal->j_dev_bd = NULL;
2581 } else if (journal->j_dev_bd != NULL) {
2582 result = blkdev_put(journal->j_dev_bd); 2580 result = blkdev_put(journal->j_dev_bd);
2583 journal->j_dev_bd = NULL; 2581 journal->j_dev_bd = NULL;
2584 } 2582 }
@@ -2603,7 +2601,6 @@ static int journal_init_dev(struct super_block *super,
2603 result = 0; 2601 result = 0;
2604 2602
2605 journal->j_dev_bd = NULL; 2603 journal->j_dev_bd = NULL;
2606 journal->j_dev_file = NULL;
2607 jdev = SB_ONDISK_JOURNAL_DEVICE(super) ? 2604 jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
2608 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; 2605 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
2609 2606
@@ -2620,35 +2617,34 @@ static int journal_init_dev(struct super_block *super,
2620 "cannot init journal device '%s': %i", 2617 "cannot init journal device '%s': %i",
2621 __bdevname(jdev, b), result); 2618 __bdevname(jdev, b), result);
2622 return result; 2619 return result;
2623 } else if (jdev != super->s_dev) 2620 } else if (jdev != super->s_dev) {
2621 result = bd_claim(journal->j_dev_bd, journal);
2622 if (result) {
2623 blkdev_put(journal->j_dev_bd);
2624 return result;
2625 }
2626
2624 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2627 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2628 }
2629
2625 return 0; 2630 return 0;
2626 } 2631 }
2627 2632
2628 journal->j_dev_file = filp_open(jdev_name, 0, 0); 2633 journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal);
2629 if (!IS_ERR(journal->j_dev_file)) { 2634 if (IS_ERR(journal->j_dev_bd)) {
2630 struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; 2635 result = PTR_ERR(journal->j_dev_bd);
2631 if (!S_ISBLK(jdev_inode->i_mode)) { 2636 journal->j_dev_bd = NULL;
2632 reiserfs_warning(super, "journal_init_dev: '%s' is "
2633 "not a block device", jdev_name);
2634 result = -ENOTBLK;
2635 release_journal_dev(super, journal);
2636 } else {
2637 /* ok */
2638 journal->j_dev_bd = I_BDEV(jdev_inode);
2639 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2640 reiserfs_info(super,
2641 "journal_init_dev: journal device: %s\n",
2642 bdevname(journal->j_dev_bd, b));
2643 }
2644 } else {
2645 result = PTR_ERR(journal->j_dev_file);
2646 journal->j_dev_file = NULL;
2647 reiserfs_warning(super, 2637 reiserfs_warning(super,
2648 "journal_init_dev: Cannot open '%s': %i", 2638 "journal_init_dev: Cannot open '%s': %i",
2649 jdev_name, result); 2639 jdev_name, result);
2640 return result;
2650 } 2641 }
2651 return result; 2642
2643 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2644 reiserfs_info(super,
2645 "journal_init_dev: journal device: %s\n",
2646 bdevname(journal->j_dev_bd, b));
2647 return 0;
2652} 2648}
2653 2649
2654/** 2650/**
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 8f86c52b30d8..b9dbeeca7049 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -467,6 +467,7 @@ static const struct file_operations r_file_operations = {
467 .read = seq_read, 467 .read = seq_read,
468 .llseek = seq_lseek, 468 .llseek = seq_lseek,
469 .release = seq_release, 469 .release = seq_release,
470 .owner = THIS_MODULE,
470}; 471};
471 472
472static struct proc_dir_entry *proc_info_root = NULL; 473static struct proc_dir_entry *proc_info_root = NULL;
@@ -475,12 +476,8 @@ static const char proc_info_root_name[] = "fs/reiserfs";
475static void add_file(struct super_block *sb, char *name, 476static void add_file(struct super_block *sb, char *name,
476 int (*func) (struct seq_file *, struct super_block *)) 477 int (*func) (struct seq_file *, struct super_block *))
477{ 478{
478 struct proc_dir_entry *de; 479 proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
479 de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir); 480 &r_file_operations, func);
480 if (de) {
481 de->data = func;
482 de->proc_fops = &r_file_operations;
483 }
484} 481}
485 482
486int reiserfs_proc_info_init(struct super_block *sb) 483int reiserfs_proc_info_init(struct super_block *sb)
diff --git a/fs/select.c b/fs/select.c
index 00f58c5c7e05..8dda969614a9 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -21,6 +21,7 @@
21#include <linux/poll.h> 21#include <linux/poll.h>
22#include <linux/personality.h> /* for STICKY_TIMEOUTS */ 22#include <linux/personality.h> /* for STICKY_TIMEOUTS */
23#include <linux/file.h> 23#include <linux/file.h>
24#include <linux/fdtable.h>
24#include <linux/fs.h> 25#include <linux/fs.h>
25#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
26 27
@@ -298,7 +299,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
298#define MAX_SELECT_SECONDS \ 299#define MAX_SELECT_SECONDS \
299 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 300 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
300 301
301static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 302int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
302 fd_set __user *exp, s64 *timeout) 303 fd_set __user *exp, s64 *timeout)
303{ 304{
304 fd_set_bits fds; 305 fd_set_bits fds;
@@ -425,7 +426,7 @@ sticky:
425 return ret; 426 return ret;
426} 427}
427 428
428#ifdef TIF_RESTORE_SIGMASK 429#ifdef HAVE_SET_RESTORE_SIGMASK
429asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, 430asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
430 fd_set __user *exp, struct timespec __user *tsp, 431 fd_set __user *exp, struct timespec __user *tsp,
431 const sigset_t __user *sigmask, size_t sigsetsize) 432 const sigset_t __user *sigmask, size_t sigsetsize)
@@ -498,7 +499,7 @@ sticky:
498 if (sigmask) { 499 if (sigmask) {
499 memcpy(&current->saved_sigmask, &sigsaved, 500 memcpy(&current->saved_sigmask, &sigsaved,
500 sizeof(sigsaved)); 501 sizeof(sigsaved));
501 set_thread_flag(TIF_RESTORE_SIGMASK); 502 set_restore_sigmask();
502 } 503 }
503 } else if (sigmask) 504 } else if (sigmask)
504 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 505 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -528,7 +529,7 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
528 529
529 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize); 530 return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize);
530} 531}
531#endif /* TIF_RESTORE_SIGMASK */ 532#endif /* HAVE_SET_RESTORE_SIGMASK */
532 533
533struct poll_list { 534struct poll_list {
534 struct poll_list *next; 535 struct poll_list *next;
@@ -759,7 +760,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
759 return ret; 760 return ret;
760} 761}
761 762
762#ifdef TIF_RESTORE_SIGMASK 763#ifdef HAVE_SET_RESTORE_SIGMASK
763asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, 764asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
764 struct timespec __user *tsp, const sigset_t __user *sigmask, 765 struct timespec __user *tsp, const sigset_t __user *sigmask,
765 size_t sigsetsize) 766 size_t sigsetsize)
@@ -805,7 +806,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
805 if (sigmask) { 806 if (sigmask) {
806 memcpy(&current->saved_sigmask, &sigsaved, 807 memcpy(&current->saved_sigmask, &sigsaved,
807 sizeof(sigsaved)); 808 sizeof(sigsaved));
808 set_thread_flag(TIF_RESTORE_SIGMASK); 809 set_restore_sigmask();
809 } 810 }
810 ret = -ERESTARTNOHAND; 811 ret = -ERESTARTNOHAND;
811 } else if (sigmask) 812 } else if (sigmask)
@@ -839,4 +840,4 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
839 840
840 return ret; 841 return ret;
841} 842}
842#endif /* TIF_RESTORE_SIGMASK */ 843#endif /* HAVE_SET_RESTORE_SIGMASK */
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 8ead0db35933..619725644c75 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -207,11 +207,8 @@ static const struct file_operations signalfd_fops = {
207 207
208asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) 208asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
209{ 209{
210 int error;
211 sigset_t sigmask; 210 sigset_t sigmask;
212 struct signalfd_ctx *ctx; 211 struct signalfd_ctx *ctx;
213 struct file *file;
214 struct inode *inode;
215 212
216 if (sizemask != sizeof(sigset_t) || 213 if (sizemask != sizeof(sigset_t) ||
217 copy_from_user(&sigmask, user_mask, sizeof(sigmask))) 214 copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
@@ -230,12 +227,11 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
230 * When we call this, the initialization must be complete, since 227 * When we call this, the initialization must be complete, since
231 * anon_inode_getfd() will install the fd. 228 * anon_inode_getfd() will install the fd.
232 */ 229 */
233 error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]", 230 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx);
234 &signalfd_fops, ctx); 231 if (ufd < 0)
235 if (error) 232 kfree(ctx);
236 goto err_fdalloc;
237 } else { 233 } else {
238 file = fget(ufd); 234 struct file *file = fget(ufd);
239 if (!file) 235 if (!file)
240 return -EBADF; 236 return -EBADF;
241 ctx = file->private_data; 237 ctx = file->private_data;
@@ -252,9 +248,4 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
252 } 248 }
253 249
254 return ufd; 250 return ufd;
255
256err_fdalloc:
257 kfree(ctx);
258 return error;
259} 251}
260
diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h
index 734972b92694..fc4b1a5dd755 100644
--- a/fs/smbfs/smb_debug.h
+++ b/fs/smbfs/smb_debug.h
@@ -11,14 +11,14 @@
11 * these are normally enabled. 11 * these are normally enabled.
12 */ 12 */
13#ifdef SMBFS_PARANOIA 13#ifdef SMBFS_PARANOIA
14# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __FUNCTION__ , ## a) 14# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
15#else 15#else
16# define PARANOIA(f, a...) do { ; } while(0) 16# define PARANOIA(f, a...) do { ; } while(0)
17#endif 17#endif
18 18
19/* lots of debug messages */ 19/* lots of debug messages */
20#ifdef SMBFS_DEBUG_VERBOSE 20#ifdef SMBFS_DEBUG_VERBOSE
21# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a) 21# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
22#else 22#else
23# define VERBOSE(f, a...) do { ; } while(0) 23# define VERBOSE(f, a...) do { ; } while(0)
24#endif 24#endif
@@ -28,7 +28,7 @@
28 * too common name. 28 * too common name.
29 */ 29 */
30#ifdef SMBFS_DEBUG 30#ifdef SMBFS_DEBUG
31#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a) 31#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
32#else 32#else
33#define DEBUG1(f, a...) do { ; } while(0) 33#define DEBUG1(f, a...) do { ; } while(0)
34#endif 34#endif
diff --git a/fs/splice.c b/fs/splice.c
index eeb1a86a7014..633f58ebfb72 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1075,7 +1075,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1075 1075
1076 ret = splice_direct_to_actor(in, &sd, direct_splice_actor); 1076 ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1077 if (ret > 0) 1077 if (ret > 0)
1078 *ppos += ret; 1078 *ppos = sd.pos;
1079 1079
1080 return ret; 1080 return ret;
1081} 1081}
diff --git a/fs/super.c b/fs/super.c
index a5a4aca7e22f..453877c5697b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -117,7 +117,7 @@ static inline void destroy_super(struct super_block *s)
117 * Drop a superblock's refcount. Returns non-zero if the superblock was 117 * Drop a superblock's refcount. Returns non-zero if the superblock was
118 * destroyed. The caller must hold sb_lock. 118 * destroyed. The caller must hold sb_lock.
119 */ 119 */
120int __put_super(struct super_block *sb) 120static int __put_super(struct super_block *sb)
121{ 121{
122 int ret = 0; 122 int ret = 0;
123 123
diff --git a/fs/sync.c b/fs/sync.c
index 7cd005ea7639..228e17b5e9ee 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -64,7 +64,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
64 /* sync the superblock to buffers */ 64 /* sync the superblock to buffers */
65 sb = inode->i_sb; 65 sb = inode->i_sb;
66 lock_super(sb); 66 lock_super(sb);
67 if (sb->s_op->write_super) 67 if (sb->s_dirt && sb->s_op->write_super)
68 sb->s_op->write_super(sb); 68 sb->s_op->write_super(sb);
69 unlock_super(sb); 69 unlock_super(sb);
70 70
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index dbdfabbfd609..e7735f643cd1 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -135,7 +135,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
135 goto out; 135 goto out;
136 } 136 }
137 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", 137 pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
138 __FUNCTION__, count, *ppos, buffer->page); 138 __func__, count, *ppos, buffer->page);
139 retval = simple_read_from_buffer(buf, count, ppos, buffer->page, 139 retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
140 buffer->count); 140 buffer->count);
141out: 141out:
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index d9262f74f94e..eb53c632f856 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -30,7 +30,7 @@ static const struct address_space_operations sysfs_aops = {
30 30
31static struct backing_dev_info sysfs_backing_dev_info = { 31static struct backing_dev_info sysfs_backing_dev_info = {
32 .ra_pages = 0, /* No readahead */ 32 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 33 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
34}; 34};
35 35
36static const struct inode_operations sysfs_inode_operations ={ 36static const struct inode_operations sysfs_inode_operations ={
@@ -59,6 +59,8 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
59 if (error) 59 if (error)
60 return error; 60 return error;
61 61
62 iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
63
62 error = inode_setattr(inode, iattr); 64 error = inode_setattr(inode, iattr);
63 if (error) 65 if (error)
64 return error; 66 return error;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 74168266cd59..14f0023984d7 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -61,7 +61,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
61 /* instantiate and link root dentry */ 61 /* instantiate and link root dentry */
62 root = d_alloc_root(inode); 62 root = d_alloc_root(inode);
63 if (!root) { 63 if (!root) {
64 pr_debug("%s: could not get root dentry!\n",__FUNCTION__); 64 pr_debug("%s: could not get root dentry!\n",__func__);
65 iput(inode); 65 iput(inode);
66 return -ENOMEM; 66 return -ENOMEM;
67 } 67 }
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 42d51d1c05cd..38ebe3f85b3d 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -217,9 +217,9 @@ static inline __fs32 fs32_add(struct sysv_sb_info *sbi, __fs32 *n, int d)
217 if (sbi->s_bytesex == BYTESEX_PDP) 217 if (sbi->s_bytesex == BYTESEX_PDP)
218 *(__u32*)n = PDP_swab(PDP_swab(*(__u32*)n)+d); 218 *(__u32*)n = PDP_swab(PDP_swab(*(__u32*)n)+d);
219 else if (sbi->s_bytesex == BYTESEX_LE) 219 else if (sbi->s_bytesex == BYTESEX_LE)
220 *(__le32*)n = cpu_to_le32(le32_to_cpu(*(__le32*)n)+d); 220 le32_add_cpu((__le32 *)n, d);
221 else 221 else
222 *(__be32*)n = cpu_to_be32(be32_to_cpu(*(__be32*)n)+d); 222 be32_add_cpu((__be32 *)n, d);
223 return *n; 223 return *n;
224} 224}
225 225
@@ -242,9 +242,9 @@ static inline __fs16 cpu_to_fs16(struct sysv_sb_info *sbi, __u16 n)
242static inline __fs16 fs16_add(struct sysv_sb_info *sbi, __fs16 *n, int d) 242static inline __fs16 fs16_add(struct sysv_sb_info *sbi, __fs16 *n, int d)
243{ 243{
244 if (sbi->s_bytesex != BYTESEX_BE) 244 if (sbi->s_bytesex != BYTESEX_BE)
245 *(__le16*)n = cpu_to_le16(le16_to_cpu(*(__le16 *)n)+d); 245 le16_add_cpu((__le16 *)n, d);
246 else 246 else
247 *(__be16*)n = cpu_to_be16(be16_to_cpu(*(__be16 *)n)+d); 247 be16_add_cpu((__be16 *)n, d);
248 return *n; 248 return *n;
249} 249}
250 250
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 10c80b59ec4b..d87d354ec424 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -20,6 +20,7 @@
20#include <linux/hrtimer.h> 20#include <linux/hrtimer.h>
21#include <linux/anon_inodes.h> 21#include <linux/anon_inodes.h>
22#include <linux/timerfd.h> 22#include <linux/timerfd.h>
23#include <linux/syscalls.h>
23 24
24struct timerfd_ctx { 25struct timerfd_ctx {
25 struct hrtimer tmr; 26 struct hrtimer tmr;
@@ -180,10 +181,8 @@ static struct file *timerfd_fget(int fd)
180 181
181asmlinkage long sys_timerfd_create(int clockid, int flags) 182asmlinkage long sys_timerfd_create(int clockid, int flags)
182{ 183{
183 int error, ufd; 184 int ufd;
184 struct timerfd_ctx *ctx; 185 struct timerfd_ctx *ctx;
185 struct file *file;
186 struct inode *inode;
187 186
188 if (flags) 187 if (flags)
189 return -EINVAL; 188 return -EINVAL;
@@ -199,12 +198,9 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
199 ctx->clockid = clockid; 198 ctx->clockid = clockid;
200 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 199 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
201 200
202 error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", 201 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx);
203 &timerfd_fops, ctx); 202 if (ufd < 0)
204 if (error) {
205 kfree(ctx); 203 kfree(ctx);
206 return error;
207 }
208 204
209 return ufd; 205 return ufd;
210} 206}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index b564fc140fe4..9fb18a340fc1 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -240,7 +240,7 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
240 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map), 240 sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map),
241 GFP_KERNEL); 241 GFP_KERNEL);
242 if (!sbi->s_partmaps) { 242 if (!sbi->s_partmaps) {
243 udf_error(sb, __FUNCTION__, 243 udf_error(sb, __func__,
244 "Unable to allocate space for %d partition maps", 244 "Unable to allocate space for %d partition maps",
245 count); 245 count);
246 sbi->s_partitions = 0; 246 sbi->s_partitions = 0;
@@ -1086,7 +1086,7 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
1086 bitmap = vmalloc(size); /* TODO: get rid of vmalloc */ 1086 bitmap = vmalloc(size); /* TODO: get rid of vmalloc */
1087 1087
1088 if (bitmap == NULL) { 1088 if (bitmap == NULL) {
1089 udf_error(sb, __FUNCTION__, 1089 udf_error(sb, __func__,
1090 "Unable to allocate space for bitmap " 1090 "Unable to allocate space for bitmap "
1091 "and %d buffer_head pointers", nr_groups); 1091 "and %d buffer_head pointers", nr_groups);
1092 return NULL; 1092 return NULL;
diff --git a/fs/utimes.c b/fs/utimes.c
index a2bef77dc9c9..af059d5cb485 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -40,9 +40,14 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times)
40 40
41#endif 41#endif
42 42
43static bool nsec_special(long nsec)
44{
45 return nsec == UTIME_OMIT || nsec == UTIME_NOW;
46}
47
43static bool nsec_valid(long nsec) 48static bool nsec_valid(long nsec)
44{ 49{
45 if (nsec == UTIME_OMIT || nsec == UTIME_NOW) 50 if (nsec_special(nsec))
46 return true; 51 return true;
47 52
48 return nsec >= 0 && nsec <= 999999999; 53 return nsec >= 0 && nsec <= 999999999;
@@ -119,7 +124,15 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
119 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; 124 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
120 newattrs.ia_valid |= ATTR_MTIME_SET; 125 newattrs.ia_valid |= ATTR_MTIME_SET;
121 } 126 }
122 } else { 127 }
128
129 /*
130 * If times is NULL or both times are either UTIME_OMIT or
131 * UTIME_NOW, then need to check permissions, because
132 * inode_change_ok() won't do it.
133 */
134 if (!times || (nsec_special(times[0].tv_nsec) &&
135 nsec_special(times[1].tv_nsec))) {
123 error = -EACCES; 136 error = -EACCES;
124 if (IS_IMMUTABLE(inode)) 137 if (IS_IMMUTABLE(inode))
125 goto mnt_drop_write_and_out; 138 goto mnt_drop_write_and_out;
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index 5b66162d0747..a3522727ea5b 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -986,7 +986,7 @@ error_inode:
986 if (corrupt < 0) { 986 if (corrupt < 0) {
987 fat_fs_panic(new_dir->i_sb, 987 fat_fs_panic(new_dir->i_sb,
988 "%s: Filesystem corrupted (i_pos %lld)", 988 "%s: Filesystem corrupted (i_pos %lld)",
989 __FUNCTION__, sinfo.i_pos); 989 __func__, sinfo.i_pos);
990 } 990 }
991 goto out; 991 goto out;
992} 992}
diff --git a/fs/xattr.c b/fs/xattr.c
index 89a942f07e1b..4706a8b1f495 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -67,7 +67,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
67} 67}
68 68
69int 69int
70vfs_setxattr(struct dentry *dentry, char *name, void *value, 70vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
71 size_t size, int flags) 71 size_t size, int flags)
72{ 72{
73 struct inode *inode = dentry->d_inode; 73 struct inode *inode = dentry->d_inode;
@@ -131,7 +131,7 @@ out_noalloc:
131EXPORT_SYMBOL_GPL(xattr_getsecurity); 131EXPORT_SYMBOL_GPL(xattr_getsecurity);
132 132
133ssize_t 133ssize_t
134vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) 134vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
135{ 135{
136 struct inode *inode = dentry->d_inode; 136 struct inode *inode = dentry->d_inode;
137 int error; 137 int error;
@@ -187,7 +187,7 @@ vfs_listxattr(struct dentry *d, char *list, size_t size)
187EXPORT_SYMBOL_GPL(vfs_listxattr); 187EXPORT_SYMBOL_GPL(vfs_listxattr);
188 188
189int 189int
190vfs_removexattr(struct dentry *dentry, char *name) 190vfs_removexattr(struct dentry *dentry, const char *name)
191{ 191{
192 struct inode *inode = dentry->d_inode; 192 struct inode *inode = dentry->d_inode;
193 int error; 193 int error;
@@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
218 * Extended attribute SET operations 218 * Extended attribute SET operations
219 */ 219 */
220static long 220static long
221setxattr(struct dentry *d, char __user *name, void __user *value, 221setxattr(struct dentry *d, const char __user *name, const void __user *value,
222 size_t size, int flags) 222 size_t size, int flags)
223{ 223{
224 int error; 224 int error;
@@ -252,8 +252,8 @@ setxattr(struct dentry *d, char __user *name, void __user *value,
252} 252}
253 253
254asmlinkage long 254asmlinkage long
255sys_setxattr(char __user *path, char __user *name, void __user *value, 255sys_setxattr(const char __user *path, const char __user *name,
256 size_t size, int flags) 256 const void __user *value, size_t size, int flags)
257{ 257{
258 struct nameidata nd; 258 struct nameidata nd;
259 int error; 259 int error;
@@ -271,8 +271,8 @@ sys_setxattr(char __user *path, char __user *name, void __user *value,
271} 271}
272 272
273asmlinkage long 273asmlinkage long
274sys_lsetxattr(char __user *path, char __user *name, void __user *value, 274sys_lsetxattr(const char __user *path, const char __user *name,
275 size_t size, int flags) 275 const void __user *value, size_t size, int flags)
276{ 276{
277 struct nameidata nd; 277 struct nameidata nd;
278 int error; 278 int error;
@@ -290,7 +290,7 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value,
290} 290}
291 291
292asmlinkage long 292asmlinkage long
293sys_fsetxattr(int fd, char __user *name, void __user *value, 293sys_fsetxattr(int fd, const char __user *name, const void __user *value,
294 size_t size, int flags) 294 size_t size, int flags)
295{ 295{
296 struct file *f; 296 struct file *f;
@@ -315,7 +315,8 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
315 * Extended attribute GET operations 315 * Extended attribute GET operations
316 */ 316 */
317static ssize_t 317static ssize_t
318getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) 318getxattr(struct dentry *d, const char __user *name, void __user *value,
319 size_t size)
319{ 320{
320 ssize_t error; 321 ssize_t error;
321 void *kvalue = NULL; 322 void *kvalue = NULL;
@@ -349,8 +350,8 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size)
349} 350}
350 351
351asmlinkage ssize_t 352asmlinkage ssize_t
352sys_getxattr(char __user *path, char __user *name, void __user *value, 353sys_getxattr(const char __user *path, const char __user *name,
353 size_t size) 354 void __user *value, size_t size)
354{ 355{
355 struct nameidata nd; 356 struct nameidata nd;
356 ssize_t error; 357 ssize_t error;
@@ -364,7 +365,7 @@ sys_getxattr(char __user *path, char __user *name, void __user *value,
364} 365}
365 366
366asmlinkage ssize_t 367asmlinkage ssize_t
367sys_lgetxattr(char __user *path, char __user *name, void __user *value, 368sys_lgetxattr(const char __user *path, const char __user *name, void __user *value,
368 size_t size) 369 size_t size)
369{ 370{
370 struct nameidata nd; 371 struct nameidata nd;
@@ -379,7 +380,7 @@ sys_lgetxattr(char __user *path, char __user *name, void __user *value,
379} 380}
380 381
381asmlinkage ssize_t 382asmlinkage ssize_t
382sys_fgetxattr(int fd, char __user *name, void __user *value, size_t size) 383sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size)
383{ 384{
384 struct file *f; 385 struct file *f;
385 ssize_t error = -EBADF; 386 ssize_t error = -EBADF;
@@ -424,7 +425,7 @@ listxattr(struct dentry *d, char __user *list, size_t size)
424} 425}
425 426
426asmlinkage ssize_t 427asmlinkage ssize_t
427sys_listxattr(char __user *path, char __user *list, size_t size) 428sys_listxattr(const char __user *path, char __user *list, size_t size)
428{ 429{
429 struct nameidata nd; 430 struct nameidata nd;
430 ssize_t error; 431 ssize_t error;
@@ -438,7 +439,7 @@ sys_listxattr(char __user *path, char __user *list, size_t size)
438} 439}
439 440
440asmlinkage ssize_t 441asmlinkage ssize_t
441sys_llistxattr(char __user *path, char __user *list, size_t size) 442sys_llistxattr(const char __user *path, char __user *list, size_t size)
442{ 443{
443 struct nameidata nd; 444 struct nameidata nd;
444 ssize_t error; 445 ssize_t error;
@@ -470,7 +471,7 @@ sys_flistxattr(int fd, char __user *list, size_t size)
470 * Extended attribute REMOVE operations 471 * Extended attribute REMOVE operations
471 */ 472 */
472static long 473static long
473removexattr(struct dentry *d, char __user *name) 474removexattr(struct dentry *d, const char __user *name)
474{ 475{
475 int error; 476 int error;
476 char kname[XATTR_NAME_MAX + 1]; 477 char kname[XATTR_NAME_MAX + 1];
@@ -485,7 +486,7 @@ removexattr(struct dentry *d, char __user *name)
485} 486}
486 487
487asmlinkage long 488asmlinkage long
488sys_removexattr(char __user *path, char __user *name) 489sys_removexattr(const char __user *path, const char __user *name)
489{ 490{
490 struct nameidata nd; 491 struct nameidata nd;
491 int error; 492 int error;
@@ -503,7 +504,7 @@ sys_removexattr(char __user *path, char __user *name)
503} 504}
504 505
505asmlinkage long 506asmlinkage long
506sys_lremovexattr(char __user *path, char __user *name) 507sys_lremovexattr(const char __user *path, const char __user *name)
507{ 508{
508 struct nameidata nd; 509 struct nameidata nd;
509 int error; 510 int error;
@@ -521,7 +522,7 @@ sys_lremovexattr(char __user *path, char __user *name)
521} 522}
522 523
523asmlinkage long 524asmlinkage long
524sys_fremovexattr(int fd, char __user *name) 525sys_fremovexattr(int fd, const char __user *name)
525{ 526{
526 struct file *f; 527 struct file *f;
527 struct dentry *dentry; 528 struct dentry *dentry;
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 524021ff5436..3f53dd101f99 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -64,3 +64,16 @@ config XFS_RT
64 See the xfs man page in section 5 for additional information. 64 See the xfs man page in section 5 for additional information.
65 65
66 If unsure, say N. 66 If unsure, say N.
67
68config XFS_DEBUG
69 bool "XFS Debugging support (EXPERIMENTAL)"
70 depends on XFS_FS && EXPERIMENTAL
71 help
72 Say Y here to get an XFS build with many debugging features,
73 including ASSERT checks, function wrappers around macros,
74 and extra sanity-checking functions in various code paths.
75
76 Note that the resulting code will be HUGE and SLOW, and probably
77 not useful unless you are debugging a particular problem.
78
79 Say N unless you are an XFS developer, or you play one on TV.
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index c110bb002665..ff6a19873e5c 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -20,29 +20,24 @@
20 20
21#include <linux/rwsem.h> 21#include <linux/rwsem.h>
22 22
23enum { MR_NONE, MR_ACCESS, MR_UPDATE };
24
25typedef struct { 23typedef struct {
26 struct rw_semaphore mr_lock; 24 struct rw_semaphore mr_lock;
25#ifdef DEBUG
27 int mr_writer; 26 int mr_writer;
27#endif
28} mrlock_t; 28} mrlock_t;
29 29
30#ifdef DEBUG
30#define mrinit(mrp, name) \ 31#define mrinit(mrp, name) \
31 do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0) 32 do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
33#else
34#define mrinit(mrp, name) \
35 do { init_rwsem(&(mrp)->mr_lock); } while (0)
36#endif
37
32#define mrlock_init(mrp, t,n,s) mrinit(mrp, n) 38#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
33#define mrfree(mrp) do { } while (0) 39#define mrfree(mrp) do { } while (0)
34 40
35static inline void mraccess(mrlock_t *mrp)
36{
37 down_read(&mrp->mr_lock);
38}
39
40static inline void mrupdate(mrlock_t *mrp)
41{
42 down_write(&mrp->mr_lock);
43 mrp->mr_writer = 1;
44}
45
46static inline void mraccess_nested(mrlock_t *mrp, int subclass) 41static inline void mraccess_nested(mrlock_t *mrp, int subclass)
47{ 42{
48 down_read_nested(&mrp->mr_lock, subclass); 43 down_read_nested(&mrp->mr_lock, subclass);
@@ -51,10 +46,11 @@ static inline void mraccess_nested(mrlock_t *mrp, int subclass)
51static inline void mrupdate_nested(mrlock_t *mrp, int subclass) 46static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
52{ 47{
53 down_write_nested(&mrp->mr_lock, subclass); 48 down_write_nested(&mrp->mr_lock, subclass);
49#ifdef DEBUG
54 mrp->mr_writer = 1; 50 mrp->mr_writer = 1;
51#endif
55} 52}
56 53
57
58static inline int mrtryaccess(mrlock_t *mrp) 54static inline int mrtryaccess(mrlock_t *mrp)
59{ 55{
60 return down_read_trylock(&mrp->mr_lock); 56 return down_read_trylock(&mrp->mr_lock);
@@ -64,39 +60,31 @@ static inline int mrtryupdate(mrlock_t *mrp)
64{ 60{
65 if (!down_write_trylock(&mrp->mr_lock)) 61 if (!down_write_trylock(&mrp->mr_lock))
66 return 0; 62 return 0;
63#ifdef DEBUG
67 mrp->mr_writer = 1; 64 mrp->mr_writer = 1;
65#endif
68 return 1; 66 return 1;
69} 67}
70 68
71static inline void mrunlock(mrlock_t *mrp) 69static inline void mrunlock_excl(mrlock_t *mrp)
72{ 70{
73 if (mrp->mr_writer) { 71#ifdef DEBUG
74 mrp->mr_writer = 0; 72 mrp->mr_writer = 0;
75 up_write(&mrp->mr_lock); 73#endif
76 } else { 74 up_write(&mrp->mr_lock);
77 up_read(&mrp->mr_lock);
78 }
79} 75}
80 76
81static inline void mrdemote(mrlock_t *mrp) 77static inline void mrunlock_shared(mrlock_t *mrp)
82{ 78{
83 mrp->mr_writer = 0; 79 up_read(&mrp->mr_lock);
84 downgrade_write(&mrp->mr_lock);
85} 80}
86 81
87#ifdef DEBUG 82static inline void mrdemote(mrlock_t *mrp)
88/*
89 * Debug-only routine, without some platform-specific asm code, we can
90 * now only answer requests regarding whether we hold the lock for write
91 * (reader state is outside our visibility, we only track writer state).
92 * Note: means !ismrlocked would give false positives, so don't do that.
93 */
94static inline int ismrlocked(mrlock_t *mrp, int type)
95{ 83{
96 if (mrp && type == MR_UPDATE) 84#ifdef DEBUG
97 return mrp->mr_writer; 85 mrp->mr_writer = 0;
98 return 1;
99}
100#endif 86#endif
87 downgrade_write(&mrp->mr_lock);
88}
101 89
102#endif /* __XFS_SUPPORT_MRLOCK_H__ */ 90#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 52f6846101d5..5105015a75ad 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -886,7 +886,7 @@ int
886xfs_buf_lock_value( 886xfs_buf_lock_value(
887 xfs_buf_t *bp) 887 xfs_buf_t *bp)
888{ 888{
889 return atomic_read(&bp->b_sema.count); 889 return bp->b_sema.count;
890} 890}
891#endif 891#endif
892 892
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 265f0168ab76..c672b3238b14 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -133,7 +133,7 @@ xfs_nfs_get_inode(
133 if (!ip) 133 if (!ip)
134 return ERR_PTR(-EIO); 134 return ERR_PTR(-EIO);
135 135
136 if (!ip->i_d.di_mode || ip->i_d.di_gen != generation) { 136 if (ip->i_d.di_gen != generation) {
137 xfs_iput_new(ip, XFS_ILOCK_SHARED); 137 xfs_iput_new(ip, XFS_ILOCK_SHARED);
138 return ERR_PTR(-ENOENT); 138 return ERR_PTR(-ENOENT);
139 } 139 }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 05905246434d..65e78c13d4ae 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -43,9 +43,6 @@
43#include <linux/smp_lock.h> 43#include <linux/smp_lock.h>
44 44
45static struct vm_operations_struct xfs_file_vm_ops; 45static struct vm_operations_struct xfs_file_vm_ops;
46#ifdef CONFIG_XFS_DMAPI
47static struct vm_operations_struct xfs_dmapi_file_vm_ops;
48#endif
49 46
50STATIC_INLINE ssize_t 47STATIC_INLINE ssize_t
51__xfs_file_read( 48__xfs_file_read(
@@ -202,22 +199,6 @@ xfs_file_fsync(
202 (xfs_off_t)0, (xfs_off_t)-1); 199 (xfs_off_t)0, (xfs_off_t)-1);
203} 200}
204 201
205#ifdef CONFIG_XFS_DMAPI
206STATIC int
207xfs_vm_fault(
208 struct vm_area_struct *vma,
209 struct vm_fault *vmf)
210{
211 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
212 bhv_vnode_t *vp = vn_from_inode(inode);
213
214 ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
215 if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0))
216 return VM_FAULT_SIGBUS;
217 return filemap_fault(vma, vmf);
218}
219#endif /* CONFIG_XFS_DMAPI */
220
221/* 202/*
222 * Unfortunately we can't just use the clean and simple readdir implementation 203 * Unfortunately we can't just use the clean and simple readdir implementation
223 * below, because nfs might call back into ->lookup from the filldir callback 204 * below, because nfs might call back into ->lookup from the filldir callback
@@ -386,11 +367,6 @@ xfs_file_mmap(
386 vma->vm_ops = &xfs_file_vm_ops; 367 vma->vm_ops = &xfs_file_vm_ops;
387 vma->vm_flags |= VM_CAN_NONLINEAR; 368 vma->vm_flags |= VM_CAN_NONLINEAR;
388 369
389#ifdef CONFIG_XFS_DMAPI
390 if (XFS_M(filp->f_path.dentry->d_inode->i_sb)->m_flags & XFS_MOUNT_DMAPI)
391 vma->vm_ops = &xfs_dmapi_file_vm_ops;
392#endif /* CONFIG_XFS_DMAPI */
393
394 file_accessed(filp); 370 file_accessed(filp);
395 return 0; 371 return 0;
396} 372}
@@ -437,47 +413,6 @@ xfs_file_ioctl_invis(
437 return error; 413 return error;
438} 414}
439 415
440#ifdef CONFIG_XFS_DMAPI
441#ifdef HAVE_VMOP_MPROTECT
442STATIC int
443xfs_vm_mprotect(
444 struct vm_area_struct *vma,
445 unsigned int newflags)
446{
447 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
448 struct xfs_mount *mp = XFS_M(inode->i_sb);
449 int error = 0;
450
451 if (mp->m_flags & XFS_MOUNT_DMAPI) {
452 if ((vma->vm_flags & VM_MAYSHARE) &&
453 (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE))
454 error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
455 }
456 return error;
457}
458#endif /* HAVE_VMOP_MPROTECT */
459#endif /* CONFIG_XFS_DMAPI */
460
461#ifdef HAVE_FOP_OPEN_EXEC
462/* If the user is attempting to execute a file that is offline then
463 * we have to trigger a DMAPI READ event before the file is marked as busy
464 * otherwise the invisible I/O will not be able to write to the file to bring
465 * it back online.
466 */
467STATIC int
468xfs_file_open_exec(
469 struct inode *inode)
470{
471 struct xfs_mount *mp = XFS_M(inode->i_sb);
472 struct xfs_inode *ip = XFS_I(inode);
473
474 if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) &&
475 DM_EVENT_ENABLED(ip, DM_EVENT_READ))
476 return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
477 return 0;
478}
479#endif /* HAVE_FOP_OPEN_EXEC */
480
481/* 416/*
482 * mmap()d file has taken write protection fault and is being made 417 * mmap()d file has taken write protection fault and is being made
483 * writable. We can set the page state up correctly for a writable 418 * writable. We can set the page state up correctly for a writable
@@ -546,13 +481,3 @@ static struct vm_operations_struct xfs_file_vm_ops = {
546 .fault = filemap_fault, 481 .fault = filemap_fault,
547 .page_mkwrite = xfs_vm_page_mkwrite, 482 .page_mkwrite = xfs_vm_page_mkwrite,
548}; 483};
549
550#ifdef CONFIG_XFS_DMAPI
551static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
552 .fault = xfs_vm_fault,
553 .page_mkwrite = xfs_vm_page_mkwrite,
554#ifdef HAVE_VMOP_MPROTECT
555 .mprotect = xfs_vm_mprotect,
556#endif
557};
558#endif /* CONFIG_XFS_DMAPI */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4ddb86b73c6b..a42ba9d71156 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -238,7 +238,7 @@ xfs_vget_fsop_handlereq(
238 return error; 238 return error;
239 if (ip == NULL) 239 if (ip == NULL)
240 return XFS_ERROR(EIO); 240 return XFS_ERROR(EIO);
241 if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { 241 if (ip->i_d.di_gen != igen) {
242 xfs_iput_new(ip, XFS_ILOCK_SHARED); 242 xfs_iput_new(ip, XFS_ILOCK_SHARED);
243 return XFS_ERROR(ENOENT); 243 return XFS_ERROR(ENOENT);
244 } 244 }
@@ -505,14 +505,14 @@ xfs_attrmulti_attr_get(
505{ 505{
506 char *kbuf; 506 char *kbuf;
507 int error = EFAULT; 507 int error = EFAULT;
508 508
509 if (*len > XATTR_SIZE_MAX) 509 if (*len > XATTR_SIZE_MAX)
510 return EINVAL; 510 return EINVAL;
511 kbuf = kmalloc(*len, GFP_KERNEL); 511 kbuf = kmalloc(*len, GFP_KERNEL);
512 if (!kbuf) 512 if (!kbuf)
513 return ENOMEM; 513 return ENOMEM;
514 514
515 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags, NULL); 515 error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
516 if (error) 516 if (error)
517 goto out_kfree; 517 goto out_kfree;
518 518
@@ -546,7 +546,7 @@ xfs_attrmulti_attr_set(
546 546
547 if (copy_from_user(kbuf, ubuf, len)) 547 if (copy_from_user(kbuf, ubuf, len))
548 goto out_kfree; 548 goto out_kfree;
549 549
550 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); 550 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
551 551
552 out_kfree: 552 out_kfree:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index a1237dad6430..2bf287ef5489 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -511,7 +511,8 @@ xfs_vn_rename(
511 xfs_dentry_to_name(&nname, ndentry); 511 xfs_dentry_to_name(&nname, ndentry);
512 512
513 error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), 513 error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
514 XFS_I(ndir), &nname); 514 XFS_I(ndir), &nname, new_inode ?
515 XFS_I(new_inode) : NULL);
515 if (likely(!error)) { 516 if (likely(!error)) {
516 if (new_inode) 517 if (new_inode)
517 xfs_validate_fields(new_inode); 518 xfs_validate_fields(new_inode);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index e5143323e71f..4edc46915b57 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -75,6 +75,7 @@
75#include <linux/delay.h> 75#include <linux/delay.h>
76#include <linux/log2.h> 76#include <linux/log2.h>
77#include <linux/spinlock.h> 77#include <linux/spinlock.h>
78#include <linux/random.h>
78 79
79#include <asm/page.h> 80#include <asm/page.h>
80#include <asm/div64.h> 81#include <asm/div64.h>
@@ -99,7 +100,6 @@
99/* 100/*
100 * Feature macros (disable/enable) 101 * Feature macros (disable/enable)
101 */ 102 */
102#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */
103#ifdef CONFIG_SMP 103#ifdef CONFIG_SMP
104#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ 104#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
105#else 105#else
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 1ebd8004469c..5e3b57516ec7 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -394,7 +394,7 @@ xfs_zero_last_block(
394 int error = 0; 394 int error = 0;
395 xfs_bmbt_irec_t imap; 395 xfs_bmbt_irec_t imap;
396 396
397 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 397 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
398 398
399 zero_offset = XFS_B_FSB_OFFSET(mp, isize); 399 zero_offset = XFS_B_FSB_OFFSET(mp, isize);
400 if (zero_offset == 0) { 400 if (zero_offset == 0) {
@@ -425,14 +425,14 @@ xfs_zero_last_block(
425 * out sync. We need to drop the ilock while we do this so we 425 * out sync. We need to drop the ilock while we do this so we
426 * don't deadlock when the buffer cache calls back to us. 426 * don't deadlock when the buffer cache calls back to us.
427 */ 427 */
428 xfs_iunlock(ip, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); 428 xfs_iunlock(ip, XFS_ILOCK_EXCL);
429 429
430 zero_len = mp->m_sb.sb_blocksize - zero_offset; 430 zero_len = mp->m_sb.sb_blocksize - zero_offset;
431 if (isize + zero_len > offset) 431 if (isize + zero_len > offset)
432 zero_len = offset - isize; 432 zero_len = offset - isize;
433 error = xfs_iozero(ip, isize, zero_len); 433 error = xfs_iozero(ip, isize, zero_len);
434 434
435 xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 435 xfs_ilock(ip, XFS_ILOCK_EXCL);
436 ASSERT(error >= 0); 436 ASSERT(error >= 0);
437 return error; 437 return error;
438} 438}
@@ -465,8 +465,7 @@ xfs_zero_eof(
465 int error = 0; 465 int error = 0;
466 xfs_bmbt_irec_t imap; 466 xfs_bmbt_irec_t imap;
467 467
468 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 468 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
469 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
470 ASSERT(offset > isize); 469 ASSERT(offset > isize);
471 470
472 /* 471 /*
@@ -475,8 +474,7 @@ xfs_zero_eof(
475 */ 474 */
476 error = xfs_zero_last_block(ip, offset, isize); 475 error = xfs_zero_last_block(ip, offset, isize);
477 if (error) { 476 if (error) {
478 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 477 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
479 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
480 return error; 478 return error;
481 } 479 }
482 480
@@ -507,8 +505,7 @@ xfs_zero_eof(
507 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, 505 error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
508 0, NULL, 0, &imap, &nimaps, NULL, NULL); 506 0, NULL, 0, &imap, &nimaps, NULL, NULL);
509 if (error) { 507 if (error) {
510 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 508 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
511 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
512 return error; 509 return error;
513 } 510 }
514 ASSERT(nimaps > 0); 511 ASSERT(nimaps > 0);
@@ -532,7 +529,7 @@ xfs_zero_eof(
532 * Drop the inode lock while we're doing the I/O. 529 * Drop the inode lock while we're doing the I/O.
533 * We'll still have the iolock to protect us. 530 * We'll still have the iolock to protect us.
534 */ 531 */
535 xfs_iunlock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 532 xfs_iunlock(ip, XFS_ILOCK_EXCL);
536 533
537 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); 534 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
538 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); 535 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
@@ -548,13 +545,13 @@ xfs_zero_eof(
548 start_zero_fsb = imap.br_startoff + imap.br_blockcount; 545 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
549 ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 546 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
550 547
551 xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 548 xfs_ilock(ip, XFS_ILOCK_EXCL);
552 } 549 }
553 550
554 return 0; 551 return 0;
555 552
556out_lock: 553out_lock:
557 xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); 554 xfs_ilock(ip, XFS_ILOCK_EXCL);
558 ASSERT(error >= 0); 555 ASSERT(error >= 0);
559 return error; 556 return error;
560} 557}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index e1d498b4ba7a..e6be37dbd0e9 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -50,7 +50,6 @@ struct xfs_iomap;
50#define XFS_INVAL_CACHED 18 50#define XFS_INVAL_CACHED 18
51#define XFS_DIORD_ENTER 19 51#define XFS_DIORD_ENTER 19
52#define XFS_DIOWR_ENTER 20 52#define XFS_DIOWR_ENTER 20
53#define XFS_SENDFILE_ENTER 21
54#define XFS_WRITEPAGE_ENTER 22 53#define XFS_WRITEPAGE_ENTER 22
55#define XFS_RELEASEPAGE_ENTER 23 54#define XFS_RELEASEPAGE_ENTER 23
56#define XFS_INVALIDPAGE_ENTER 24 55#define XFS_INVALIDPAGE_ENTER 24
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 865eb708aa95..742b2c7852c1 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1181,7 +1181,7 @@ xfs_fs_statfs(
1181 statp->f_fsid.val[0] = (u32)id; 1181 statp->f_fsid.val[0] = (u32)id;
1182 statp->f_fsid.val[1] = (u32)(id >> 32); 1182 statp->f_fsid.val[1] = (u32)(id >> 32);
1183 1183
1184 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); 1184 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
1185 1185
1186 spin_lock(&mp->m_sb_lock); 1186 spin_lock(&mp->m_sb_lock);
1187 statp->f_bsize = sbp->sb_blocksize; 1187 statp->f_bsize = sbp->sb_blocksize;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 8b4d63ce8694..9d73cb5c0fc7 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -25,12 +25,6 @@ struct attrlist_cursor_kern;
25 25
26typedef struct inode bhv_vnode_t; 26typedef struct inode bhv_vnode_t;
27 27
28#define VN_ISLNK(vp) S_ISLNK((vp)->i_mode)
29#define VN_ISREG(vp) S_ISREG((vp)->i_mode)
30#define VN_ISDIR(vp) S_ISDIR((vp)->i_mode)
31#define VN_ISCHR(vp) S_ISCHR((vp)->i_mode)
32#define VN_ISBLK(vp) S_ISBLK((vp)->i_mode)
33
34/* 28/*
35 * Vnode to Linux inode mapping. 29 * Vnode to Linux inode mapping.
36 */ 30 */
@@ -151,24 +145,6 @@ typedef struct bhv_vattr {
151 XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\ 145 XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
152 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT) 146 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
153 147
154/*
155 * Modes.
156 */
157#define VSUID S_ISUID /* set user id on execution */
158#define VSGID S_ISGID /* set group id on execution */
159#define VSVTX S_ISVTX /* save swapped text even after use */
160#define VREAD S_IRUSR /* read, write, execute permissions */
161#define VWRITE S_IWUSR
162#define VEXEC S_IXUSR
163
164#define MODEMASK S_IALLUGO /* mode bits plus permission bits */
165
166/*
167 * Check whether mandatory file locking is enabled.
168 */
169#define MANDLOCK(vp, mode) \
170 (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
171
172extern void vn_init(void); 148extern void vn_init(void);
173extern int vn_revalidate(bhv_vnode_t *); 149extern int vn_revalidate(bhv_vnode_t *);
174 150
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 631ebb31b295..85df3288efd5 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -933,7 +933,7 @@ xfs_qm_dqget(
933 type == XFS_DQ_PROJ || 933 type == XFS_DQ_PROJ ||
934 type == XFS_DQ_GROUP); 934 type == XFS_DQ_GROUP);
935 if (ip) { 935 if (ip) {
936 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 936 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
937 if (type == XFS_DQ_USER) 937 if (type == XFS_DQ_USER)
938 ASSERT(ip->i_udquot == NULL); 938 ASSERT(ip->i_udquot == NULL);
939 else 939 else
@@ -1088,7 +1088,7 @@ xfs_qm_dqget(
1088 xfs_qm_mplist_unlock(mp); 1088 xfs_qm_mplist_unlock(mp);
1089 XFS_DQ_HASH_UNLOCK(h); 1089 XFS_DQ_HASH_UNLOCK(h);
1090 dqret: 1090 dqret:
1091 ASSERT((ip == NULL) || XFS_ISLOCKED_INODE_EXCL(ip)); 1091 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
1092 xfs_dqtrace_entry(dqp, "DQGET DONE"); 1092 xfs_dqtrace_entry(dqp, "DQGET DONE");
1093 *O_dqpp = dqp; 1093 *O_dqpp = dqp;
1094 return (0); 1094 return (0);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 40ea56409561..d31cce1165c5 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -670,7 +670,7 @@ xfs_qm_dqattach_one(
670 xfs_dquot_t *dqp; 670 xfs_dquot_t *dqp;
671 int error; 671 int error;
672 672
673 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 673 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
674 error = 0; 674 error = 0;
675 /* 675 /*
676 * See if we already have it in the inode itself. IO_idqpp is 676 * See if we already have it in the inode itself. IO_idqpp is
@@ -874,7 +874,7 @@ xfs_qm_dqattach(
874 return 0; 874 return 0;
875 875
876 ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 || 876 ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
877 XFS_ISLOCKED_INODE_EXCL(ip)); 877 xfs_isilocked(ip, XFS_ILOCK_EXCL));
878 878
879 if (! (flags & XFS_QMOPT_ILOCKED)) 879 if (! (flags & XFS_QMOPT_ILOCKED))
880 xfs_ilock(ip, XFS_ILOCK_EXCL); 880 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -888,7 +888,8 @@ xfs_qm_dqattach(
888 goto done; 888 goto done;
889 nquotas++; 889 nquotas++;
890 } 890 }
891 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 891
892 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
892 if (XFS_IS_OQUOTA_ON(mp)) { 893 if (XFS_IS_OQUOTA_ON(mp)) {
893 error = XFS_IS_GQUOTA_ON(mp) ? 894 error = XFS_IS_GQUOTA_ON(mp) ?
894 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, 895 xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
@@ -913,7 +914,7 @@ xfs_qm_dqattach(
913 * This WON'T, in general, result in a thrash. 914 * This WON'T, in general, result in a thrash.
914 */ 915 */
915 if (nquotas == 2) { 916 if (nquotas == 2) {
916 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 917 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
917 ASSERT(ip->i_udquot); 918 ASSERT(ip->i_udquot);
918 ASSERT(ip->i_gdquot); 919 ASSERT(ip->i_gdquot);
919 920
@@ -956,7 +957,7 @@ xfs_qm_dqattach(
956 957
957#ifdef QUOTADEBUG 958#ifdef QUOTADEBUG
958 else 959 else
959 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 960 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
960#endif 961#endif
961 return error; 962 return error;
962} 963}
@@ -1291,7 +1292,7 @@ xfs_qm_dqget_noattach(
1291 xfs_mount_t *mp; 1292 xfs_mount_t *mp;
1292 xfs_dquot_t *udqp, *gdqp; 1293 xfs_dquot_t *udqp, *gdqp;
1293 1294
1294 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 1295 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1295 mp = ip->i_mount; 1296 mp = ip->i_mount;
1296 udqp = NULL; 1297 udqp = NULL;
1297 gdqp = NULL; 1298 gdqp = NULL;
@@ -1392,7 +1393,7 @@ xfs_qm_qino_alloc(
1392 * Keep an extra reference to this quota inode. This inode is 1393 * Keep an extra reference to this quota inode. This inode is
1393 * locked exclusively and joined to the transaction already. 1394 * locked exclusively and joined to the transaction already.
1394 */ 1395 */
1395 ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip)); 1396 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1396 VN_HOLD(XFS_ITOV((*ip))); 1397 VN_HOLD(XFS_ITOV((*ip)));
1397 1398
1398 /* 1399 /*
@@ -1737,12 +1738,6 @@ xfs_qm_dqusage_adjust(
1737 return error; 1738 return error;
1738 } 1739 }
1739 1740
1740 if (ip->i_d.di_mode == 0) {
1741 xfs_iput_new(ip, XFS_ILOCK_EXCL);
1742 *res = BULKSTAT_RV_NOTHING;
1743 return XFS_ERROR(ENOENT);
1744 }
1745
1746 /* 1741 /*
1747 * Obtain the locked dquots. In case of an error (eg. allocation 1742 * Obtain the locked dquots. In case of an error (eg. allocation
1748 * fails for ENOSPC), we return the negative of the error number 1743 * fails for ENOSPC), we return the negative of the error number
@@ -2563,7 +2558,7 @@ xfs_qm_vop_chown(
2563 uint bfield = XFS_IS_REALTIME_INODE(ip) ? 2558 uint bfield = XFS_IS_REALTIME_INODE(ip) ?
2564 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; 2559 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2565 2560
2566 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 2561 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2567 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); 2562 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2568 2563
2569 /* old dquot */ 2564 /* old dquot */
@@ -2607,7 +2602,7 @@ xfs_qm_vop_chown_reserve(
2607 uint delblks, blkflags, prjflags = 0; 2602 uint delblks, blkflags, prjflags = 0;
2608 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; 2603 xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2609 2604
2610 ASSERT(XFS_ISLOCKED_INODE(ip)); 2605 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2611 mp = ip->i_mount; 2606 mp = ip->i_mount;
2612 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 2607 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2613 2608
@@ -2717,7 +2712,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
2717 if (!XFS_IS_QUOTA_ON(tp->t_mountp)) 2712 if (!XFS_IS_QUOTA_ON(tp->t_mountp))
2718 return; 2713 return;
2719 2714
2720 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 2715 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2721 ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); 2716 ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
2722 2717
2723 if (udqp) { 2718 if (udqp) {
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 8342823dbdc3..768a3b27d2b6 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1366,12 +1366,6 @@ xfs_qm_internalqcheck_adjust(
1366 return (error); 1366 return (error);
1367 } 1367 }
1368 1368
1369 if (ip->i_d.di_mode == 0) {
1370 xfs_iput_new(ip, lock_flags);
1371 *res = BULKSTAT_RV_NOTHING;
1372 return XFS_ERROR(ENOENT);
1373 }
1374
1375 /* 1369 /*
1376 * This inode can have blocks after eof which can get released 1370 * This inode can have blocks after eof which can get released
1377 * when we send it to inactive. Since we don't check the dquot 1371 * when we send it to inactive. Since we don't check the dquot
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index a8b85e2be9d5..5e4a40b1c565 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -27,11 +27,6 @@
27/* Number of dquots that fit in to a dquot block */ 27/* Number of dquots that fit in to a dquot block */
28#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk) 28#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk)
29 29
30#define XFS_ISLOCKED_INODE(ip) (ismrlocked(&(ip)->i_lock, \
31 MR_UPDATE | MR_ACCESS) != 0)
32#define XFS_ISLOCKED_INODE_EXCL(ip) (ismrlocked(&(ip)->i_lock, \
33 MR_UPDATE) != 0)
34
35#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t)) 30#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t))
36 31
37#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims) 32#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims)
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index f441f836ca8b..99611381e740 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -834,7 +834,7 @@ xfs_trans_reserve_quota_nblks(
834 ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); 834 ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
835 ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); 835 ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
836 836
837 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip)); 837 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
838 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); 838 ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
839 ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == 839 ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
840 XFS_TRANS_DQ_RES_RTBLKS || 840 XFS_TRANS_DQ_RES_RTBLKS ||
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 855da0408647..75845f950814 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -49,8 +49,6 @@ extern void assfail(char *expr, char *f, int l);
49 49
50#else /* DEBUG */ 50#else /* DEBUG */
51 51
52#include <linux/random.h>
53
54#define ASSERT(expr) \ 52#define ASSERT(expr) \
55 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) 53 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
56 54
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 765aaf65e2d3..540e4c989825 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -22,7 +22,7 @@
22#define STATIC 22#define STATIC
23#define DEBUG 1 23#define DEBUG 1
24#define XFS_BUF_LOCK_TRACKING 1 24#define XFS_BUF_LOCK_TRACKING 1
25#define QUOTADEBUG 1 25/* #define QUOTADEBUG 1 */
26#endif 26#endif
27 27
28#ifdef CONFIG_XFS_TRACE 28#ifdef CONFIG_XFS_TRACE
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 8e130b9720ae..ebee3a4f703a 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -72,7 +72,7 @@ xfs_acl_vhasacl_default(
72{ 72{
73 int error; 73 int error;
74 74
75 if (!VN_ISDIR(vp)) 75 if (!S_ISDIR(vp->i_mode))
76 return 0; 76 return 0;
77 xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error); 77 xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
78 return (error == 0); 78 return (error == 0);
@@ -238,15 +238,8 @@ xfs_acl_vget(
238 error = EINVAL; 238 error = EINVAL;
239 goto out; 239 goto out;
240 } 240 }
241 if (kind == _ACL_TYPE_ACCESS) { 241 if (kind == _ACL_TYPE_ACCESS)
242 bhv_vattr_t va; 242 xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl);
243
244 va.va_mask = XFS_AT_MODE;
245 error = xfs_getattr(xfs_vtoi(vp), &va, 0);
246 if (error)
247 goto out;
248 xfs_acl_sync_mode(va.va_mode, xfs_acl);
249 }
250 error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); 243 error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
251 } 244 }
252out: 245out:
@@ -341,14 +334,15 @@ xfs_acl_iaccess(
341{ 334{
342 xfs_acl_t *acl; 335 xfs_acl_t *acl;
343 int rval; 336 int rval;
337 struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
344 338
345 if (!(_ACL_ALLOC(acl))) 339 if (!(_ACL_ALLOC(acl)))
346 return -1; 340 return -1;
347 341
348 /* If the file has no ACL return -1. */ 342 /* If the file has no ACL return -1. */
349 rval = sizeof(xfs_acl_t); 343 rval = sizeof(xfs_acl_t);
350 if (xfs_attr_fetch(ip, SGI_ACL_FILE, SGI_ACL_FILE_SIZE, 344 if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval,
351 (char *)acl, &rval, ATTR_ROOT | ATTR_KERNACCESS, cr)) { 345 ATTR_ROOT | ATTR_KERNACCESS)) {
352 _ACL_FREE(acl); 346 _ACL_FREE(acl);
353 return -1; 347 return -1;
354 } 348 }
@@ -373,23 +367,15 @@ xfs_acl_allow_set(
373 bhv_vnode_t *vp, 367 bhv_vnode_t *vp,
374 int kind) 368 int kind)
375{ 369{
376 xfs_inode_t *ip = xfs_vtoi(vp);
377 bhv_vattr_t va;
378 int error;
379
380 if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) 370 if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
381 return EPERM; 371 return EPERM;
382 if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp)) 372 if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
383 return ENOTDIR; 373 return ENOTDIR;
384 if (vp->i_sb->s_flags & MS_RDONLY) 374 if (vp->i_sb->s_flags & MS_RDONLY)
385 return EROFS; 375 return EROFS;
386 va.va_mask = XFS_AT_UID; 376 if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))
387 error = xfs_getattr(ip, &va, 0);
388 if (error)
389 return error;
390 if (va.va_uid != current->fsuid && !capable(CAP_FOWNER))
391 return EPERM; 377 return EPERM;
392 return error; 378 return 0;
393} 379}
394 380
395/* 381/*
@@ -594,7 +580,7 @@ xfs_acl_get_attr(
594 *error = xfs_attr_get(xfs_vtoi(vp), 580 *error = xfs_attr_get(xfs_vtoi(vp),
595 kind == _ACL_TYPE_ACCESS ? 581 kind == _ACL_TYPE_ACCESS ?
596 SGI_ACL_FILE : SGI_ACL_DEFAULT, 582 SGI_ACL_FILE : SGI_ACL_DEFAULT,
597 (char *)aclp, &len, flags, sys_cred); 583 (char *)aclp, &len, flags);
598 if (*error || (flags & ATTR_KERNOVAL)) 584 if (*error || (flags & ATTR_KERNOVAL))
599 return; 585 return;
600 xfs_acl_get_endian(aclp); 586 xfs_acl_get_endian(aclp);
@@ -643,7 +629,6 @@ xfs_acl_vtoacl(
643 xfs_acl_t *access_acl, 629 xfs_acl_t *access_acl,
644 xfs_acl_t *default_acl) 630 xfs_acl_t *default_acl)
645{ 631{
646 bhv_vattr_t va;
647 int error = 0; 632 int error = 0;
648 633
649 if (access_acl) { 634 if (access_acl) {
@@ -652,16 +637,10 @@ xfs_acl_vtoacl(
652 * be obtained for some reason, invalidate the access ACL. 637 * be obtained for some reason, invalidate the access ACL.
653 */ 638 */
654 xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error); 639 xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
655 if (!error) {
656 /* Got the ACL, need the mode... */
657 va.va_mask = XFS_AT_MODE;
658 error = xfs_getattr(xfs_vtoi(vp), &va, 0);
659 }
660
661 if (error) 640 if (error)
662 access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; 641 access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
663 else /* We have a good ACL and the file mode, synchronize. */ 642 else /* We have a good ACL and the file mode, synchronize. */
664 xfs_acl_sync_mode(va.va_mode, access_acl); 643 xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl);
665 } 644 }
666 645
667 if (default_acl) { 646 if (default_acl) {
@@ -719,7 +698,7 @@ xfs_acl_inherit(
719 * If the new file is a directory, its default ACL is a copy of 698 * If the new file is a directory, its default ACL is a copy of
720 * the containing directory's default ACL. 699 * the containing directory's default ACL.
721 */ 700 */
722 if (VN_ISDIR(vp)) 701 if (S_ISDIR(vp->i_mode))
723 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); 702 xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
724 if (!error && !basicperms) 703 if (!error && !basicperms)
725 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); 704 xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
@@ -744,7 +723,7 @@ xfs_acl_setmode(
744 bhv_vattr_t va; 723 bhv_vattr_t va;
745 xfs_acl_entry_t *ap; 724 xfs_acl_entry_t *ap;
746 xfs_acl_entry_t *gap = NULL; 725 xfs_acl_entry_t *gap = NULL;
747 int i, error, nomask = 1; 726 int i, nomask = 1;
748 727
749 *basicperms = 1; 728 *basicperms = 1;
750 729
@@ -756,11 +735,7 @@ xfs_acl_setmode(
756 * mode. The m:: bits take precedence over the g:: bits. 735 * mode. The m:: bits take precedence over the g:: bits.
757 */ 736 */
758 va.va_mask = XFS_AT_MODE; 737 va.va_mask = XFS_AT_MODE;
759 error = xfs_getattr(xfs_vtoi(vp), &va, 0); 738 va.va_mode = xfs_vtoi(vp)->i_d.di_mode;
760 if (error)
761 return error;
762
763 va.va_mask = XFS_AT_MODE;
764 va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); 739 va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
765 ap = acl->acl_entry; 740 ap = acl->acl_entry;
766 for (i = 0; i < acl->acl_cnt; ++i) { 741 for (i = 0; i < acl->acl_cnt; ++i) {
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 36d781ee5fcc..df151a859186 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -101,14 +101,28 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
101ktrace_t *xfs_attr_trace_buf; 101ktrace_t *xfs_attr_trace_buf;
102#endif 102#endif
103 103
104STATIC int
105xfs_attr_name_to_xname(
106 struct xfs_name *xname,
107 const char *aname)
108{
109 if (!aname)
110 return EINVAL;
111 xname->name = aname;
112 xname->len = strlen(aname);
113 if (xname->len >= MAXNAMELEN)
114 return EFAULT; /* match IRIX behaviour */
115
116 return 0;
117}
104 118
105/*======================================================================== 119/*========================================================================
106 * Overall external interface routines. 120 * Overall external interface routines.
107 *========================================================================*/ 121 *========================================================================*/
108 122
109int 123int
110xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen, 124xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
111 char *value, int *valuelenp, int flags, struct cred *cred) 125 char *value, int *valuelenp, int flags)
112{ 126{
113 xfs_da_args_t args; 127 xfs_da_args_t args;
114 int error; 128 int error;
@@ -122,8 +136,8 @@ xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
122 * Fill in the arg structure for this request. 136 * Fill in the arg structure for this request.
123 */ 137 */
124 memset((char *)&args, 0, sizeof(args)); 138 memset((char *)&args, 0, sizeof(args));
125 args.name = name; 139 args.name = name->name;
126 args.namelen = namelen; 140 args.namelen = name->len;
127 args.value = value; 141 args.value = value;
128 args.valuelen = *valuelenp; 142 args.valuelen = *valuelenp;
129 args.flags = flags; 143 args.flags = flags;
@@ -162,31 +176,29 @@ xfs_attr_get(
162 const char *name, 176 const char *name,
163 char *value, 177 char *value,
164 int *valuelenp, 178 int *valuelenp,
165 int flags, 179 int flags)
166 cred_t *cred)
167{ 180{
168 int error, namelen; 181 int error;
182 struct xfs_name xname;
169 183
170 XFS_STATS_INC(xs_attr_get); 184 XFS_STATS_INC(xs_attr_get);
171 185
172 if (!name)
173 return(EINVAL);
174 namelen = strlen(name);
175 if (namelen >= MAXNAMELEN)
176 return(EFAULT); /* match IRIX behaviour */
177
178 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 186 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
179 return(EIO); 187 return(EIO);
180 188
189 error = xfs_attr_name_to_xname(&xname, name);
190 if (error)
191 return error;
192
181 xfs_ilock(ip, XFS_ILOCK_SHARED); 193 xfs_ilock(ip, XFS_ILOCK_SHARED);
182 error = xfs_attr_fetch(ip, name, namelen, value, valuelenp, flags, cred); 194 error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags);
183 xfs_iunlock(ip, XFS_ILOCK_SHARED); 195 xfs_iunlock(ip, XFS_ILOCK_SHARED);
184 return(error); 196 return(error);
185} 197}
186 198
187int 199STATIC int
188xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen, 200xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
189 char *value, int valuelen, int flags) 201 char *value, int valuelen, int flags)
190{ 202{
191 xfs_da_args_t args; 203 xfs_da_args_t args;
192 xfs_fsblock_t firstblock; 204 xfs_fsblock_t firstblock;
@@ -209,7 +221,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
209 */ 221 */
210 if (XFS_IFORK_Q(dp) == 0) { 222 if (XFS_IFORK_Q(dp) == 0) {
211 int sf_size = sizeof(xfs_attr_sf_hdr_t) + 223 int sf_size = sizeof(xfs_attr_sf_hdr_t) +
212 XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen); 224 XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen);
213 225
214 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd))) 226 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
215 return(error); 227 return(error);
@@ -219,8 +231,8 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
219 * Fill in the arg structure for this request. 231 * Fill in the arg structure for this request.
220 */ 232 */
221 memset((char *)&args, 0, sizeof(args)); 233 memset((char *)&args, 0, sizeof(args));
222 args.name = name; 234 args.name = name->name;
223 args.namelen = namelen; 235 args.namelen = name->len;
224 args.value = value; 236 args.value = value;
225 args.valuelen = valuelen; 237 args.valuelen = valuelen;
226 args.flags = flags; 238 args.flags = flags;
@@ -236,7 +248,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
236 * Determine space new attribute will use, and if it would be 248 * Determine space new attribute will use, and if it would be
237 * "local" or "remote" (note: local != inline). 249 * "local" or "remote" (note: local != inline).
238 */ 250 */
239 size = xfs_attr_leaf_newentsize(namelen, valuelen, 251 size = xfs_attr_leaf_newentsize(name->len, valuelen,
240 mp->m_sb.sb_blocksize, &local); 252 mp->m_sb.sb_blocksize, &local);
241 253
242 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); 254 nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
@@ -429,26 +441,27 @@ xfs_attr_set(
429 int valuelen, 441 int valuelen,
430 int flags) 442 int flags)
431{ 443{
432 int namelen; 444 int error;
433 445 struct xfs_name xname;
434 namelen = strlen(name);
435 if (namelen >= MAXNAMELEN)
436 return EFAULT; /* match IRIX behaviour */
437 446
438 XFS_STATS_INC(xs_attr_set); 447 XFS_STATS_INC(xs_attr_set);
439 448
440 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 449 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
441 return (EIO); 450 return (EIO);
442 451
443 return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags); 452 error = xfs_attr_name_to_xname(&xname, name);
453 if (error)
454 return error;
455
456 return xfs_attr_set_int(dp, &xname, value, valuelen, flags);
444} 457}
445 458
446/* 459/*
447 * Generic handler routine to remove a name from an attribute list. 460 * Generic handler routine to remove a name from an attribute list.
448 * Transitions attribute list from Btree to shortform as necessary. 461 * Transitions attribute list from Btree to shortform as necessary.
449 */ 462 */
450int 463STATIC int
451xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags) 464xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
452{ 465{
453 xfs_da_args_t args; 466 xfs_da_args_t args;
454 xfs_fsblock_t firstblock; 467 xfs_fsblock_t firstblock;
@@ -460,8 +473,8 @@ xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
460 * Fill in the arg structure for this request. 473 * Fill in the arg structure for this request.
461 */ 474 */
462 memset((char *)&args, 0, sizeof(args)); 475 memset((char *)&args, 0, sizeof(args));
463 args.name = name; 476 args.name = name->name;
464 args.namelen = namelen; 477 args.namelen = name->len;
465 args.flags = flags; 478 args.flags = flags;
466 args.hashval = xfs_da_hashname(args.name, args.namelen); 479 args.hashval = xfs_da_hashname(args.name, args.namelen);
467 args.dp = dp; 480 args.dp = dp;
@@ -575,17 +588,18 @@ xfs_attr_remove(
575 const char *name, 588 const char *name,
576 int flags) 589 int flags)
577{ 590{
578 int namelen; 591 int error;
579 592 struct xfs_name xname;
580 namelen = strlen(name);
581 if (namelen >= MAXNAMELEN)
582 return EFAULT; /* match IRIX behaviour */
583 593
584 XFS_STATS_INC(xs_attr_remove); 594 XFS_STATS_INC(xs_attr_remove);
585 595
586 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 596 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
587 return (EIO); 597 return (EIO);
588 598
599 error = xfs_attr_name_to_xname(&xname, name);
600 if (error)
601 return error;
602
589 xfs_ilock(dp, XFS_ILOCK_SHARED); 603 xfs_ilock(dp, XFS_ILOCK_SHARED);
590 if (XFS_IFORK_Q(dp) == 0 || 604 if (XFS_IFORK_Q(dp) == 0 ||
591 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && 605 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
@@ -595,10 +609,10 @@ xfs_attr_remove(
595 } 609 }
596 xfs_iunlock(dp, XFS_ILOCK_SHARED); 610 xfs_iunlock(dp, XFS_ILOCK_SHARED);
597 611
598 return xfs_attr_remove_int(dp, name, namelen, flags); 612 return xfs_attr_remove_int(dp, &xname, flags);
599} 613}
600 614
601int /* error */ 615STATIC int
602xfs_attr_list_int(xfs_attr_list_context_t *context) 616xfs_attr_list_int(xfs_attr_list_context_t *context)
603{ 617{
604 int error; 618 int error;
@@ -2522,8 +2536,7 @@ attr_generic_get(
2522{ 2536{
2523 int error, asize = size; 2537 int error, asize = size;
2524 2538
2525 error = xfs_attr_get(xfs_vtoi(vp), name, data, 2539 error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
2526 &asize, xflags, NULL);
2527 if (!error) 2540 if (!error)
2528 return asize; 2541 return asize;
2529 return -error; 2542 return -error;
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 786eba3121c4..6cfc9384fe35 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -158,14 +158,10 @@ struct xfs_da_args;
158/* 158/*
159 * Overall external interface routines. 159 * Overall external interface routines.
160 */ 160 */
161int xfs_attr_set_int(struct xfs_inode *, const char *, int, char *, int, int);
162int xfs_attr_remove_int(struct xfs_inode *, const char *, int, int);
163int xfs_attr_list_int(struct xfs_attr_list_context *);
164int xfs_attr_inactive(struct xfs_inode *dp); 161int xfs_attr_inactive(struct xfs_inode *dp);
165 162
166int xfs_attr_shortform_getvalue(struct xfs_da_args *); 163int xfs_attr_shortform_getvalue(struct xfs_da_args *);
167int xfs_attr_fetch(struct xfs_inode *, const char *, int, 164int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
168 char *, int *, int, struct cred *);
169int xfs_attr_rmtval_get(struct xfs_da_args *args); 165int xfs_attr_rmtval_get(struct xfs_da_args *args);
170 166
171#endif /* __XFS_ATTR_H__ */ 167#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index eb198c01c35d..53c259f5a5af 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4074,7 +4074,6 @@ xfs_bmap_add_attrfork(
4074error2: 4074error2:
4075 xfs_bmap_cancel(&flist); 4075 xfs_bmap_cancel(&flist);
4076error1: 4076error1:
4077 ASSERT(ismrlocked(&ip->i_lock,MR_UPDATE));
4078 xfs_iunlock(ip, XFS_ILOCK_EXCL); 4077 xfs_iunlock(ip, XFS_ILOCK_EXCL);
4079error0: 4078error0:
4080 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 4079 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3f53fad356a3..5f3647cb9885 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -162,7 +162,7 @@ xfs_swap_extents(
162 ips[1] = ip; 162 ips[1] = ip;
163 } 163 }
164 164
165 xfs_lock_inodes(ips, 2, 0, lock_flags); 165 xfs_lock_inodes(ips, 2, lock_flags);
166 locked = 1; 166 locked = 1;
167 167
168 /* Verify that both files have the same format */ 168 /* Verify that both files have the same format */
@@ -265,7 +265,7 @@ xfs_swap_extents(
265 locked = 0; 265 locked = 0;
266 goto error0; 266 goto error0;
267 } 267 }
268 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 268 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
269 269
270 /* 270 /*
271 * Count the number of extended attribute blocks 271 * Count the number of extended attribute blocks
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d3a0f538d6a6..381ebda4f7bc 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -462,7 +462,7 @@ xfs_fs_counts(
462 xfs_mount_t *mp, 462 xfs_mount_t *mp,
463 xfs_fsop_counts_t *cnt) 463 xfs_fsop_counts_t *cnt)
464{ 464{
465 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT); 465 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
466 spin_lock(&mp->m_sb_lock); 466 spin_lock(&mp->m_sb_lock);
467 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 467 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
468 cnt->freertx = mp->m_sb.sb_frextents; 468 cnt->freertx = mp->m_sb.sb_frextents;
@@ -524,7 +524,7 @@ xfs_reserve_blocks(
524 */ 524 */
525retry: 525retry:
526 spin_lock(&mp->m_sb_lock); 526 spin_lock(&mp->m_sb_lock);
527 xfs_icsb_sync_counters_flags(mp, XFS_ICSB_SB_LOCKED); 527 xfs_icsb_sync_counters_locked(mp, 0);
528 528
529 /* 529 /*
530 * If our previous reservation was larger than the current value, 530 * If our previous reservation was larger than the current value,
@@ -552,11 +552,8 @@ retry:
552 mp->m_resblks += free; 552 mp->m_resblks += free;
553 mp->m_resblks_avail += free; 553 mp->m_resblks_avail += free;
554 fdblks_delta = -free; 554 fdblks_delta = -free;
555 mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
556 } else { 555 } else {
557 fdblks_delta = -delta; 556 fdblks_delta = -delta;
558 mp->m_sb.sb_fdblocks =
559 lcounter + XFS_ALLOC_SET_ASIDE(mp);
560 mp->m_resblks = request; 557 mp->m_resblks = request;
561 mp->m_resblks_avail += delta; 558 mp->m_resblks_avail += delta;
562 } 559 }
@@ -587,7 +584,6 @@ out:
587 if (error == ENOSPC) 584 if (error == ENOSPC)
588 goto retry; 585 goto retry;
589 } 586 }
590
591 return 0; 587 return 0;
592} 588}
593 589
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a64dfbd565a5..aad8c5da38af 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -147,6 +147,7 @@ xfs_ialloc_ag_alloc(
147 int version; /* inode version number to use */ 147 int version; /* inode version number to use */
148 int isaligned = 0; /* inode allocation at stripe unit */ 148 int isaligned = 0; /* inode allocation at stripe unit */
149 /* boundary */ 149 /* boundary */
150 unsigned int gen;
150 151
151 args.tp = tp; 152 args.tp = tp;
152 args.mp = tp->t_mountp; 153 args.mp = tp->t_mountp;
@@ -290,6 +291,14 @@ xfs_ialloc_ag_alloc(
290 else 291 else
291 version = XFS_DINODE_VERSION_1; 292 version = XFS_DINODE_VERSION_1;
292 293
294 /*
295 * Seed the new inode cluster with a random generation number. This
296 * prevents short-term reuse of generation numbers if a chunk is
297 * freed and then immediately reallocated. We use random numbers
298 * rather than a linear progression to prevent the next generation
299 * number from being easily guessable.
300 */
301 gen = random32();
293 for (j = 0; j < nbufs; j++) { 302 for (j = 0; j < nbufs; j++) {
294 /* 303 /*
295 * Get the block. 304 * Get the block.
@@ -309,6 +318,7 @@ xfs_ialloc_ag_alloc(
309 free = XFS_MAKE_IPTR(args.mp, fbuf, i); 318 free = XFS_MAKE_IPTR(args.mp, fbuf, i);
310 free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 319 free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
311 free->di_core.di_version = version; 320 free->di_core.di_version = version;
321 free->di_core.di_gen = cpu_to_be32(gen);
312 free->di_next_unlinked = cpu_to_be32(NULLAGINO); 322 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
313 xfs_ialloc_log_di(tp, fbuf, i, 323 xfs_ialloc_log_di(tp, fbuf, i,
314 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED); 324 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e657c5128460..b07604b94d9f 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -593,8 +593,9 @@ xfs_iunlock_map_shared(
593 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 593 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
594 */ 594 */
595void 595void
596xfs_ilock(xfs_inode_t *ip, 596xfs_ilock(
597 uint lock_flags) 597 xfs_inode_t *ip,
598 uint lock_flags)
598{ 599{
599 /* 600 /*
600 * You can't set both SHARED and EXCL for the same lock, 601 * You can't set both SHARED and EXCL for the same lock,
@@ -607,16 +608,16 @@ xfs_ilock(xfs_inode_t *ip,
607 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 608 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
608 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 609 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
609 610
610 if (lock_flags & XFS_IOLOCK_EXCL) { 611 if (lock_flags & XFS_IOLOCK_EXCL)
611 mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 612 mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
612 } else if (lock_flags & XFS_IOLOCK_SHARED) { 613 else if (lock_flags & XFS_IOLOCK_SHARED)
613 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 614 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
614 } 615
615 if (lock_flags & XFS_ILOCK_EXCL) { 616 if (lock_flags & XFS_ILOCK_EXCL)
616 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 617 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
617 } else if (lock_flags & XFS_ILOCK_SHARED) { 618 else if (lock_flags & XFS_ILOCK_SHARED)
618 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 619 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
619 } 620
620 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); 621 xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
621} 622}
622 623
@@ -631,15 +632,12 @@ xfs_ilock(xfs_inode_t *ip,
631 * lock_flags -- this parameter indicates the inode's locks to be 632 * lock_flags -- this parameter indicates the inode's locks to be
632 * to be locked. See the comment for xfs_ilock() for a list 633 * to be locked. See the comment for xfs_ilock() for a list
633 * of valid values. 634 * of valid values.
634 *
635 */ 635 */
636int 636int
637xfs_ilock_nowait(xfs_inode_t *ip, 637xfs_ilock_nowait(
638 uint lock_flags) 638 xfs_inode_t *ip,
639 uint lock_flags)
639{ 640{
640 int iolocked;
641 int ilocked;
642
643 /* 641 /*
644 * You can't set both SHARED and EXCL for the same lock, 642 * You can't set both SHARED and EXCL for the same lock,
645 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED, 643 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
@@ -651,37 +649,30 @@ xfs_ilock_nowait(xfs_inode_t *ip,
651 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 649 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
652 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 650 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
653 651
654 iolocked = 0;
655 if (lock_flags & XFS_IOLOCK_EXCL) { 652 if (lock_flags & XFS_IOLOCK_EXCL) {
656 iolocked = mrtryupdate(&ip->i_iolock); 653 if (!mrtryupdate(&ip->i_iolock))
657 if (!iolocked) { 654 goto out;
658 return 0;
659 }
660 } else if (lock_flags & XFS_IOLOCK_SHARED) { 655 } else if (lock_flags & XFS_IOLOCK_SHARED) {
661 iolocked = mrtryaccess(&ip->i_iolock); 656 if (!mrtryaccess(&ip->i_iolock))
662 if (!iolocked) { 657 goto out;
663 return 0;
664 }
665 } 658 }
666 if (lock_flags & XFS_ILOCK_EXCL) { 659 if (lock_flags & XFS_ILOCK_EXCL) {
667 ilocked = mrtryupdate(&ip->i_lock); 660 if (!mrtryupdate(&ip->i_lock))
668 if (!ilocked) { 661 goto out_undo_iolock;
669 if (iolocked) {
670 mrunlock(&ip->i_iolock);
671 }
672 return 0;
673 }
674 } else if (lock_flags & XFS_ILOCK_SHARED) { 662 } else if (lock_flags & XFS_ILOCK_SHARED) {
675 ilocked = mrtryaccess(&ip->i_lock); 663 if (!mrtryaccess(&ip->i_lock))
676 if (!ilocked) { 664 goto out_undo_iolock;
677 if (iolocked) {
678 mrunlock(&ip->i_iolock);
679 }
680 return 0;
681 }
682 } 665 }
683 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address); 666 xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address);
684 return 1; 667 return 1;
668
669 out_undo_iolock:
670 if (lock_flags & XFS_IOLOCK_EXCL)
671 mrunlock_excl(&ip->i_iolock);
672 else if (lock_flags & XFS_IOLOCK_SHARED)
673 mrunlock_shared(&ip->i_iolock);
674 out:
675 return 0;
685} 676}
686 677
687/* 678/*
@@ -697,8 +688,9 @@ xfs_ilock_nowait(xfs_inode_t *ip,
697 * 688 *
698 */ 689 */
699void 690void
700xfs_iunlock(xfs_inode_t *ip, 691xfs_iunlock(
701 uint lock_flags) 692 xfs_inode_t *ip,
693 uint lock_flags)
702{ 694{
703 /* 695 /*
704 * You can't set both SHARED and EXCL for the same lock, 696 * You can't set both SHARED and EXCL for the same lock,
@@ -713,31 +705,25 @@ xfs_iunlock(xfs_inode_t *ip,
713 XFS_LOCK_DEP_MASK)) == 0); 705 XFS_LOCK_DEP_MASK)) == 0);
714 ASSERT(lock_flags != 0); 706 ASSERT(lock_flags != 0);
715 707
716 if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { 708 if (lock_flags & XFS_IOLOCK_EXCL)
717 ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) || 709 mrunlock_excl(&ip->i_iolock);
718 (ismrlocked(&ip->i_iolock, MR_ACCESS))); 710 else if (lock_flags & XFS_IOLOCK_SHARED)
719 ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) || 711 mrunlock_shared(&ip->i_iolock);
720 (ismrlocked(&ip->i_iolock, MR_UPDATE)));
721 mrunlock(&ip->i_iolock);
722 }
723 712
724 if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) { 713 if (lock_flags & XFS_ILOCK_EXCL)
725 ASSERT(!(lock_flags & XFS_ILOCK_SHARED) || 714 mrunlock_excl(&ip->i_lock);
726 (ismrlocked(&ip->i_lock, MR_ACCESS))); 715 else if (lock_flags & XFS_ILOCK_SHARED)
727 ASSERT(!(lock_flags & XFS_ILOCK_EXCL) || 716 mrunlock_shared(&ip->i_lock);
728 (ismrlocked(&ip->i_lock, MR_UPDATE)));
729 mrunlock(&ip->i_lock);
730 717
718 if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) &&
719 !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) {
731 /* 720 /*
732 * Let the AIL know that this item has been unlocked in case 721 * Let the AIL know that this item has been unlocked in case
733 * it is in the AIL and anyone is waiting on it. Don't do 722 * it is in the AIL and anyone is waiting on it. Don't do
734 * this if the caller has asked us not to. 723 * this if the caller has asked us not to.
735 */ 724 */
736 if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) && 725 xfs_trans_unlocked_item(ip->i_mount,
737 ip->i_itemp != NULL) { 726 (xfs_log_item_t*)(ip->i_itemp));
738 xfs_trans_unlocked_item(ip->i_mount,
739 (xfs_log_item_t*)(ip->i_itemp));
740 }
741 } 727 }
742 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); 728 xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
743} 729}
@@ -747,21 +733,47 @@ xfs_iunlock(xfs_inode_t *ip,
747 * if it is being demoted. 733 * if it is being demoted.
748 */ 734 */
749void 735void
750xfs_ilock_demote(xfs_inode_t *ip, 736xfs_ilock_demote(
751 uint lock_flags) 737 xfs_inode_t *ip,
738 uint lock_flags)
752{ 739{
753 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 740 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
754 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 741 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
755 742
756 if (lock_flags & XFS_ILOCK_EXCL) { 743 if (lock_flags & XFS_ILOCK_EXCL)
757 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
758 mrdemote(&ip->i_lock); 744 mrdemote(&ip->i_lock);
759 } 745 if (lock_flags & XFS_IOLOCK_EXCL)
760 if (lock_flags & XFS_IOLOCK_EXCL) {
761 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
762 mrdemote(&ip->i_iolock); 746 mrdemote(&ip->i_iolock);
747}
748
749#ifdef DEBUG
750/*
751 * Debug-only routine, without additional rw_semaphore APIs, we can
752 * now only answer requests regarding whether we hold the lock for write
753 * (reader state is outside our visibility, we only track writer state).
754 *
755 * Note: this means !xfs_isilocked would give false positives, so don't do that.
756 */
757int
758xfs_isilocked(
759 xfs_inode_t *ip,
760 uint lock_flags)
761{
762 if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) ==
763 XFS_ILOCK_EXCL) {
764 if (!ip->i_lock.mr_writer)
765 return 0;
763 } 766 }
767
768 if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) ==
769 XFS_IOLOCK_EXCL) {
770 if (!ip->i_iolock.mr_writer)
771 return 0;
772 }
773
774 return 1;
764} 775}
776#endif
765 777
766/* 778/*
767 * The following three routines simply manage the i_flock 779 * The following three routines simply manage the i_flock
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ca12acb90394..cf0bb9c1d621 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1291,7 +1291,7 @@ xfs_file_last_byte(
1291 xfs_fileoff_t size_last_block; 1291 xfs_fileoff_t size_last_block;
1292 int error; 1292 int error;
1293 1293
1294 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE | MR_ACCESS)); 1294 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
1295 1295
1296 mp = ip->i_mount; 1296 mp = ip->i_mount;
1297 /* 1297 /*
@@ -1402,7 +1402,7 @@ xfs_itruncate_start(
1402 bhv_vnode_t *vp; 1402 bhv_vnode_t *vp;
1403 int error = 0; 1403 int error = 0;
1404 1404
1405 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1405 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1406 ASSERT((new_size == 0) || (new_size <= ip->i_size)); 1406 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1407 ASSERT((flags == XFS_ITRUNC_DEFINITE) || 1407 ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
1408 (flags == XFS_ITRUNC_MAYBE)); 1408 (flags == XFS_ITRUNC_MAYBE));
@@ -1528,8 +1528,7 @@ xfs_itruncate_finish(
1528 xfs_bmap_free_t free_list; 1528 xfs_bmap_free_t free_list;
1529 int error; 1529 int error;
1530 1530
1531 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0); 1531 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1532 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
1533 ASSERT((new_size == 0) || (new_size <= ip->i_size)); 1532 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1534 ASSERT(*tp != NULL); 1533 ASSERT(*tp != NULL);
1535 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 1534 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -1780,8 +1779,7 @@ xfs_igrow_start(
1780 xfs_fsize_t new_size, 1779 xfs_fsize_t new_size,
1781 cred_t *credp) 1780 cred_t *credp)
1782{ 1781{
1783 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1782 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1784 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1785 ASSERT(new_size > ip->i_size); 1783 ASSERT(new_size > ip->i_size);
1786 1784
1787 /* 1785 /*
@@ -1809,8 +1807,7 @@ xfs_igrow_finish(
1809 xfs_fsize_t new_size, 1807 xfs_fsize_t new_size,
1810 int change_flag) 1808 int change_flag)
1811{ 1809{
1812 ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0); 1810 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1813 ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
1814 ASSERT(ip->i_transp == tp); 1811 ASSERT(ip->i_transp == tp);
1815 ASSERT(new_size > ip->i_size); 1812 ASSERT(new_size > ip->i_size);
1816 1813
@@ -2287,7 +2284,7 @@ xfs_ifree(
2287 xfs_dinode_t *dip; 2284 xfs_dinode_t *dip;
2288 xfs_buf_t *ibp; 2285 xfs_buf_t *ibp;
2289 2286
2290 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 2287 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2291 ASSERT(ip->i_transp == tp); 2288 ASSERT(ip->i_transp == tp);
2292 ASSERT(ip->i_d.di_nlink == 0); 2289 ASSERT(ip->i_d.di_nlink == 0);
2293 ASSERT(ip->i_d.di_nextents == 0); 2290 ASSERT(ip->i_d.di_nextents == 0);
@@ -2746,7 +2743,7 @@ void
2746xfs_ipin( 2743xfs_ipin(
2747 xfs_inode_t *ip) 2744 xfs_inode_t *ip)
2748{ 2745{
2749 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 2746 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2750 2747
2751 atomic_inc(&ip->i_pincount); 2748 atomic_inc(&ip->i_pincount);
2752} 2749}
@@ -2779,7 +2776,7 @@ __xfs_iunpin_wait(
2779{ 2776{
2780 xfs_inode_log_item_t *iip = ip->i_itemp; 2777 xfs_inode_log_item_t *iip = ip->i_itemp;
2781 2778
2782 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); 2779 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2783 if (atomic_read(&ip->i_pincount) == 0) 2780 if (atomic_read(&ip->i_pincount) == 0)
2784 return; 2781 return;
2785 2782
@@ -2829,7 +2826,7 @@ xfs_iextents_copy(
2829 xfs_fsblock_t start_block; 2826 xfs_fsblock_t start_block;
2830 2827
2831 ifp = XFS_IFORK_PTR(ip, whichfork); 2828 ifp = XFS_IFORK_PTR(ip, whichfork);
2832 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 2829 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2833 ASSERT(ifp->if_bytes > 0); 2830 ASSERT(ifp->if_bytes > 0);
2834 2831
2835 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); 2832 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
@@ -3132,7 +3129,7 @@ xfs_iflush(
3132 3129
3133 XFS_STATS_INC(xs_iflush_count); 3130 XFS_STATS_INC(xs_iflush_count);
3134 3131
3135 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3132 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3136 ASSERT(issemalocked(&(ip->i_flock))); 3133 ASSERT(issemalocked(&(ip->i_flock)));
3137 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3134 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3138 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3135 ip->i_d.di_nextents > ip->i_df.if_ext_max);
@@ -3297,7 +3294,7 @@ xfs_iflush_int(
3297 int first; 3294 int first;
3298#endif 3295#endif
3299 3296
3300 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS)); 3297 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3301 ASSERT(issemalocked(&(ip->i_flock))); 3298 ASSERT(issemalocked(&(ip->i_flock)));
3302 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 3299 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
3303 ip->i_d.di_nextents > ip->i_df.if_ext_max); 3300 ip->i_d.di_nextents > ip->i_df.if_ext_max);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 93c37697a72c..0a999fee4f03 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -386,20 +386,9 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
386#define XFS_ILOCK_EXCL (1<<2) 386#define XFS_ILOCK_EXCL (1<<2)
387#define XFS_ILOCK_SHARED (1<<3) 387#define XFS_ILOCK_SHARED (1<<3)
388#define XFS_IUNLOCK_NONOTIFY (1<<4) 388#define XFS_IUNLOCK_NONOTIFY (1<<4)
389/* #define XFS_IOLOCK_NESTED (1<<5) */
390#define XFS_EXTENT_TOKEN_RD (1<<6)
391#define XFS_SIZE_TOKEN_RD (1<<7)
392#define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
393#define XFS_WILLLEND (1<<8) /* Always acquire tokens for lending */
394#define XFS_EXTENT_TOKEN_WR (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND)
395#define XFS_SIZE_TOKEN_WR (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
396#define XFS_EXTSIZE_WR (XFS_EXTSIZE_RD | XFS_WILLLEND)
397/* TODO:XFS_SIZE_TOKEN_WANT (1<<9) */
398 389
399#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 390#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
400 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \ 391 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
401 | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD \
402 | XFS_WILLLEND)
403 392
404/* 393/*
405 * Flags for lockdep annotations. 394 * Flags for lockdep annotations.
@@ -483,6 +472,7 @@ void xfs_ilock(xfs_inode_t *, uint);
483int xfs_ilock_nowait(xfs_inode_t *, uint); 472int xfs_ilock_nowait(xfs_inode_t *, uint);
484void xfs_iunlock(xfs_inode_t *, uint); 473void xfs_iunlock(xfs_inode_t *, uint);
485void xfs_ilock_demote(xfs_inode_t *, uint); 474void xfs_ilock_demote(xfs_inode_t *, uint);
475int xfs_isilocked(xfs_inode_t *, uint);
486void xfs_iflock(xfs_inode_t *); 476void xfs_iflock(xfs_inode_t *);
487int xfs_iflock_nowait(xfs_inode_t *); 477int xfs_iflock_nowait(xfs_inode_t *);
488uint xfs_ilock_map_shared(xfs_inode_t *); 478uint xfs_ilock_map_shared(xfs_inode_t *);
@@ -534,7 +524,7 @@ int xfs_iflush(xfs_inode_t *, uint);
534void xfs_iflush_all(struct xfs_mount *); 524void xfs_iflush_all(struct xfs_mount *);
535void xfs_ichgtime(xfs_inode_t *, int); 525void xfs_ichgtime(xfs_inode_t *, int);
536xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); 526xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
537void xfs_lock_inodes(xfs_inode_t **, int, int, uint); 527void xfs_lock_inodes(xfs_inode_t **, int, uint);
538 528
539void xfs_synchronize_atime(xfs_inode_t *); 529void xfs_synchronize_atime(xfs_inode_t *);
540void xfs_mark_inode_dirty_sync(xfs_inode_t *); 530void xfs_mark_inode_dirty_sync(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 93b5db453ea2..167b33f15772 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -547,7 +547,7 @@ STATIC void
547xfs_inode_item_pin( 547xfs_inode_item_pin(
548 xfs_inode_log_item_t *iip) 548 xfs_inode_log_item_t *iip)
549{ 549{
550 ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE)); 550 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
551 xfs_ipin(iip->ili_inode); 551 xfs_ipin(iip->ili_inode);
552} 552}
553 553
@@ -664,13 +664,13 @@ xfs_inode_item_unlock(
664 664
665 ASSERT(iip != NULL); 665 ASSERT(iip != NULL);
666 ASSERT(iip->ili_inode->i_itemp != NULL); 666 ASSERT(iip->ili_inode->i_itemp != NULL);
667 ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE)); 667 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
668 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 668 ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
669 XFS_ILI_IOLOCKED_EXCL)) || 669 XFS_ILI_IOLOCKED_EXCL)) ||
670 ismrlocked(&(iip->ili_inode->i_iolock), MR_UPDATE)); 670 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL));
671 ASSERT((!(iip->ili_inode->i_itemp->ili_flags & 671 ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
672 XFS_ILI_IOLOCKED_SHARED)) || 672 XFS_ILI_IOLOCKED_SHARED)) ||
673 ismrlocked(&(iip->ili_inode->i_iolock), MR_ACCESS)); 673 xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED));
674 /* 674 /*
675 * Clear the transaction pointer in the inode. 675 * Clear the transaction pointer in the inode.
676 */ 676 */
@@ -769,7 +769,7 @@ xfs_inode_item_pushbuf(
769 769
770 ip = iip->ili_inode; 770 ip = iip->ili_inode;
771 771
772 ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); 772 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
773 773
774 /* 774 /*
775 * The ili_pushbuf_flag keeps others from 775 * The ili_pushbuf_flag keeps others from
@@ -857,7 +857,7 @@ xfs_inode_item_push(
857 857
858 ip = iip->ili_inode; 858 ip = iip->ili_inode;
859 859
860 ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); 860 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
861 ASSERT(issemalocked(&(ip->i_flock))); 861 ASSERT(issemalocked(&(ip->i_flock)));
862 /* 862 /*
863 * Since we were able to lock the inode's flush lock and 863 * Since we were able to lock the inode's flush lock and
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fb3cf1191419..7edcde691d1a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -196,14 +196,14 @@ xfs_iomap(
196 break; 196 break;
197 case BMAPI_WRITE: 197 case BMAPI_WRITE:
198 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count); 198 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count);
199 lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR; 199 lockmode = XFS_ILOCK_EXCL;
200 if (flags & BMAPI_IGNSTATE) 200 if (flags & BMAPI_IGNSTATE)
201 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; 201 bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
202 xfs_ilock(ip, lockmode); 202 xfs_ilock(ip, lockmode);
203 break; 203 break;
204 case BMAPI_ALLOCATE: 204 case BMAPI_ALLOCATE:
205 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count); 205 xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count);
206 lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD; 206 lockmode = XFS_ILOCK_SHARED;
207 bmapi_flags = XFS_BMAPI_ENTIRE; 207 bmapi_flags = XFS_BMAPI_ENTIRE;
208 208
209 /* Attempt non-blocking lock */ 209 /* Attempt non-blocking lock */
@@ -523,8 +523,7 @@ xfs_iomap_write_direct(
523 goto error_out; 523 goto error_out;
524 } 524 }
525 525
526 if (unlikely(!imap.br_startblock && 526 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
527 !(XFS_IS_REALTIME_INODE(ip)))) {
528 error = xfs_cmn_err_fsblock_zero(ip, &imap); 527 error = xfs_cmn_err_fsblock_zero(ip, &imap);
529 goto error_out; 528 goto error_out;
530 } 529 }
@@ -624,7 +623,7 @@ xfs_iomap_write_delay(
624 int prealloc, fsynced = 0; 623 int prealloc, fsynced = 0;
625 int error; 624 int error;
626 625
627 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); 626 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
628 627
629 /* 628 /*
630 * Make sure that the dquots are there. This doesn't hold 629 * Make sure that the dquots are there. This doesn't hold
@@ -686,8 +685,7 @@ retry:
686 goto retry; 685 goto retry;
687 } 686 }
688 687
689 if (unlikely(!imap[0].br_startblock && 688 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
690 !(XFS_IS_REALTIME_INODE(ip))))
691 return xfs_cmn_err_fsblock_zero(ip, &imap[0]); 689 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
692 690
693 *ret_imap = imap[0]; 691 *ret_imap = imap[0];
@@ -838,9 +836,9 @@ xfs_iomap_write_allocate(
838 * See if we were able to allocate an extent that 836 * See if we were able to allocate an extent that
839 * covers at least part of the callers request 837 * covers at least part of the callers request
840 */ 838 */
841 if (unlikely(!imap.br_startblock && 839 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
842 XFS_IS_REALTIME_INODE(ip)))
843 return xfs_cmn_err_fsblock_zero(ip, &imap); 840 return xfs_cmn_err_fsblock_zero(ip, &imap);
841
844 if ((offset_fsb >= imap.br_startoff) && 842 if ((offset_fsb >= imap.br_startoff) &&
845 (offset_fsb < (imap.br_startoff + 843 (offset_fsb < (imap.br_startoff +
846 imap.br_blockcount))) { 844 imap.br_blockcount))) {
@@ -934,8 +932,7 @@ xfs_iomap_write_unwritten(
934 if (error) 932 if (error)
935 return XFS_ERROR(error); 933 return XFS_ERROR(error);
936 934
937 if (unlikely(!imap.br_startblock && 935 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
938 !(XFS_IS_REALTIME_INODE(ip))))
939 return xfs_cmn_err_fsblock_zero(ip, &imap); 936 return xfs_cmn_err_fsblock_zero(ip, &imap);
940 937
941 if ((numblks_fsb = imap.br_blockcount) == 0) { 938 if ((numblks_fsb = imap.br_blockcount) == 0) {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index eb85bdedad0c..419de15aeb43 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -71,11 +71,6 @@ xfs_bulkstat_one_iget(
71 71
72 ASSERT(ip != NULL); 72 ASSERT(ip != NULL);
73 ASSERT(ip->i_blkno != (xfs_daddr_t)0); 73 ASSERT(ip->i_blkno != (xfs_daddr_t)0);
74 if (ip->i_d.di_mode == 0) {
75 *stat = BULKSTAT_RV_NOTHING;
76 error = XFS_ERROR(ENOENT);
77 goto out_iput;
78 }
79 74
80 vp = XFS_ITOV(ip); 75 vp = XFS_ITOV(ip);
81 dic = &ip->i_d; 76 dic = &ip->i_d;
@@ -124,7 +119,6 @@ xfs_bulkstat_one_iget(
124 break; 119 break;
125 } 120 }
126 121
127 out_iput:
128 xfs_iput(ip, XFS_ILOCK_SHARED); 122 xfs_iput(ip, XFS_ILOCK_SHARED);
129 return error; 123 return error;
130} 124}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2fec452afbcc..da3988453b71 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -54,8 +54,9 @@ STATIC void xfs_unmountfs_wait(xfs_mount_t *);
54#ifdef HAVE_PERCPU_SB 54#ifdef HAVE_PERCPU_SB
55STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); 55STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
56STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, 56STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
57 int, int); 57 int);
58STATIC void xfs_icsb_sync_counters(xfs_mount_t *); 58STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
59 int);
59STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 60STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
60 int64_t, int); 61 int64_t, int);
61STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 62STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
@@ -63,8 +64,8 @@ STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
63#else 64#else
64 65
65#define xfs_icsb_destroy_counters(mp) do { } while (0) 66#define xfs_icsb_destroy_counters(mp) do { } while (0)
66#define xfs_icsb_balance_counter(mp, a, b, c) do { } while (0) 67#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
67#define xfs_icsb_sync_counters(mp) do { } while (0) 68#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
68#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 69#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
69 70
70#endif 71#endif
@@ -1400,7 +1401,7 @@ xfs_log_sbcount(
1400 if (!xfs_fs_writable(mp)) 1401 if (!xfs_fs_writable(mp))
1401 return 0; 1402 return 0;
1402 1403
1403 xfs_icsb_sync_counters(mp); 1404 xfs_icsb_sync_counters(mp, 0);
1404 1405
1405 /* 1406 /*
1406 * we don't need to do this if we are updating the superblock 1407 * we don't need to do this if we are updating the superblock
@@ -2026,9 +2027,9 @@ xfs_icsb_cpu_notify(
2026 case CPU_ONLINE: 2027 case CPU_ONLINE:
2027 case CPU_ONLINE_FROZEN: 2028 case CPU_ONLINE_FROZEN:
2028 xfs_icsb_lock(mp); 2029 xfs_icsb_lock(mp);
2029 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 2030 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
2030 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 2031 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
2031 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 2032 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
2032 xfs_icsb_unlock(mp); 2033 xfs_icsb_unlock(mp);
2033 break; 2034 break;
2034 case CPU_DEAD: 2035 case CPU_DEAD:
@@ -2048,12 +2049,9 @@ xfs_icsb_cpu_notify(
2048 2049
2049 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 2050 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
2050 2051
2051 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 2052 xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
2052 XFS_ICSB_SB_LOCKED, 0); 2053 xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
2053 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 2054 xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
2054 XFS_ICSB_SB_LOCKED, 0);
2055 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS,
2056 XFS_ICSB_SB_LOCKED, 0);
2057 spin_unlock(&mp->m_sb_lock); 2055 spin_unlock(&mp->m_sb_lock);
2058 xfs_icsb_unlock(mp); 2056 xfs_icsb_unlock(mp);
2059 break; 2057 break;
@@ -2105,9 +2103,9 @@ xfs_icsb_reinit_counters(
2105 * initial balance kicks us off correctly 2103 * initial balance kicks us off correctly
2106 */ 2104 */
2107 mp->m_icsb_counters = -1; 2105 mp->m_icsb_counters = -1;
2108 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 2106 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
2109 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 2107 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
2110 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 2108 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
2111 xfs_icsb_unlock(mp); 2109 xfs_icsb_unlock(mp);
2112} 2110}
2113 2111
@@ -2223,7 +2221,7 @@ xfs_icsb_disable_counter(
2223 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 2221 if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
2224 /* drain back to superblock */ 2222 /* drain back to superblock */
2225 2223
2226 xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT); 2224 xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
2227 switch(field) { 2225 switch(field) {
2228 case XFS_SBS_ICOUNT: 2226 case XFS_SBS_ICOUNT:
2229 mp->m_sb.sb_icount = cnt.icsb_icount; 2227 mp->m_sb.sb_icount = cnt.icsb_icount;
@@ -2278,38 +2276,33 @@ xfs_icsb_enable_counter(
2278} 2276}
2279 2277
2280void 2278void
2281xfs_icsb_sync_counters_flags( 2279xfs_icsb_sync_counters_locked(
2282 xfs_mount_t *mp, 2280 xfs_mount_t *mp,
2283 int flags) 2281 int flags)
2284{ 2282{
2285 xfs_icsb_cnts_t cnt; 2283 xfs_icsb_cnts_t cnt;
2286 2284
2287 /* Pass 1: lock all counters */
2288 if ((flags & XFS_ICSB_SB_LOCKED) == 0)
2289 spin_lock(&mp->m_sb_lock);
2290
2291 xfs_icsb_count(mp, &cnt, flags); 2285 xfs_icsb_count(mp, &cnt, flags);
2292 2286
2293 /* Step 3: update mp->m_sb fields */
2294 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) 2287 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
2295 mp->m_sb.sb_icount = cnt.icsb_icount; 2288 mp->m_sb.sb_icount = cnt.icsb_icount;
2296 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) 2289 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
2297 mp->m_sb.sb_ifree = cnt.icsb_ifree; 2290 mp->m_sb.sb_ifree = cnt.icsb_ifree;
2298 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) 2291 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
2299 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 2292 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
2300
2301 if ((flags & XFS_ICSB_SB_LOCKED) == 0)
2302 spin_unlock(&mp->m_sb_lock);
2303} 2293}
2304 2294
2305/* 2295/*
2306 * Accurate update of per-cpu counters to incore superblock 2296 * Accurate update of per-cpu counters to incore superblock
2307 */ 2297 */
2308STATIC void 2298void
2309xfs_icsb_sync_counters( 2299xfs_icsb_sync_counters(
2310 xfs_mount_t *mp) 2300 xfs_mount_t *mp,
2301 int flags)
2311{ 2302{
2312 xfs_icsb_sync_counters_flags(mp, 0); 2303 spin_lock(&mp->m_sb_lock);
2304 xfs_icsb_sync_counters_locked(mp, flags);
2305 spin_unlock(&mp->m_sb_lock);
2313} 2306}
2314 2307
2315/* 2308/*
@@ -2332,19 +2325,15 @@ xfs_icsb_sync_counters(
2332#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ 2325#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
2333 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp)) 2326 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
2334STATIC void 2327STATIC void
2335xfs_icsb_balance_counter( 2328xfs_icsb_balance_counter_locked(
2336 xfs_mount_t *mp, 2329 xfs_mount_t *mp,
2337 xfs_sb_field_t field, 2330 xfs_sb_field_t field,
2338 int flags,
2339 int min_per_cpu) 2331 int min_per_cpu)
2340{ 2332{
2341 uint64_t count, resid; 2333 uint64_t count, resid;
2342 int weight = num_online_cpus(); 2334 int weight = num_online_cpus();
2343 uint64_t min = (uint64_t)min_per_cpu; 2335 uint64_t min = (uint64_t)min_per_cpu;
2344 2336
2345 if (!(flags & XFS_ICSB_SB_LOCKED))
2346 spin_lock(&mp->m_sb_lock);
2347
2348 /* disable counter and sync counter */ 2337 /* disable counter and sync counter */
2349 xfs_icsb_disable_counter(mp, field); 2338 xfs_icsb_disable_counter(mp, field);
2350 2339
@@ -2354,19 +2343,19 @@ xfs_icsb_balance_counter(
2354 count = mp->m_sb.sb_icount; 2343 count = mp->m_sb.sb_icount;
2355 resid = do_div(count, weight); 2344 resid = do_div(count, weight);
2356 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2345 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
2357 goto out; 2346 return;
2358 break; 2347 break;
2359 case XFS_SBS_IFREE: 2348 case XFS_SBS_IFREE:
2360 count = mp->m_sb.sb_ifree; 2349 count = mp->m_sb.sb_ifree;
2361 resid = do_div(count, weight); 2350 resid = do_div(count, weight);
2362 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2351 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
2363 goto out; 2352 return;
2364 break; 2353 break;
2365 case XFS_SBS_FDBLOCKS: 2354 case XFS_SBS_FDBLOCKS:
2366 count = mp->m_sb.sb_fdblocks; 2355 count = mp->m_sb.sb_fdblocks;
2367 resid = do_div(count, weight); 2356 resid = do_div(count, weight);
2368 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp))) 2357 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
2369 goto out; 2358 return;
2370 break; 2359 break;
2371 default: 2360 default:
2372 BUG(); 2361 BUG();
@@ -2375,9 +2364,17 @@ xfs_icsb_balance_counter(
2375 } 2364 }
2376 2365
2377 xfs_icsb_enable_counter(mp, field, count, resid); 2366 xfs_icsb_enable_counter(mp, field, count, resid);
2378out: 2367}
2379 if (!(flags & XFS_ICSB_SB_LOCKED)) 2368
2380 spin_unlock(&mp->m_sb_lock); 2369STATIC void
2370xfs_icsb_balance_counter(
2371 xfs_mount_t *mp,
2372 xfs_sb_field_t fields,
2373 int min_per_cpu)
2374{
2375 spin_lock(&mp->m_sb_lock);
2376 xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
2377 spin_unlock(&mp->m_sb_lock);
2381} 2378}
2382 2379
2383STATIC int 2380STATIC int
@@ -2484,7 +2481,7 @@ slow_path:
2484 * we are done. 2481 * we are done.
2485 */ 2482 */
2486 if (ret != ENOSPC) 2483 if (ret != ENOSPC)
2487 xfs_icsb_balance_counter(mp, field, 0, 0); 2484 xfs_icsb_balance_counter(mp, field, 0);
2488 xfs_icsb_unlock(mp); 2485 xfs_icsb_unlock(mp);
2489 return ret; 2486 return ret;
2490 2487
@@ -2508,7 +2505,7 @@ balance_counter:
2508 * will either succeed through the fast path or slow path without 2505 * will either succeed through the fast path or slow path without
2509 * another balance operation being required. 2506 * another balance operation being required.
2510 */ 2507 */
2511 xfs_icsb_balance_counter(mp, field, 0, delta); 2508 xfs_icsb_balance_counter(mp, field, delta);
2512 xfs_icsb_unlock(mp); 2509 xfs_icsb_unlock(mp);
2513 goto again; 2510 goto again;
2514} 2511}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1ed575110ff0..63e0693a358a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -206,17 +206,18 @@ typedef struct xfs_icsb_cnts {
206 206
207#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */ 207#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */
208 208
209#define XFS_ICSB_SB_LOCKED (1 << 0) /* sb already locked */
210#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */ 209#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
211 210
212extern int xfs_icsb_init_counters(struct xfs_mount *); 211extern int xfs_icsb_init_counters(struct xfs_mount *);
213extern void xfs_icsb_reinit_counters(struct xfs_mount *); 212extern void xfs_icsb_reinit_counters(struct xfs_mount *);
214extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int); 213extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
214extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
215 215
216#else 216#else
217#define xfs_icsb_init_counters(mp) (0) 217#define xfs_icsb_init_counters(mp) (0)
218#define xfs_icsb_reinit_counters(mp) do { } while (0) 218#define xfs_icsb_reinit_counters(mp) do { } while (0)
219#define xfs_icsb_sync_counters_flags(mp, flags) do { } while (0) 219#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
220#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
220#endif 221#endif
221 222
222typedef struct xfs_ail { 223typedef struct xfs_ail {
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index ee371890d85d..d8063e1ad298 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -55,85 +55,32 @@ xfs_rename_unlock4(
55 55
56 xfs_iunlock(i_tab[0], lock_mode); 56 xfs_iunlock(i_tab[0], lock_mode);
57 for (i = 1; i < 4; i++) { 57 for (i = 1; i < 4; i++) {
58 if (i_tab[i] == NULL) { 58 if (i_tab[i] == NULL)
59 break; 59 break;
60 } 60
61 /* 61 /*
62 * Watch out for duplicate entries in the table. 62 * Watch out for duplicate entries in the table.
63 */ 63 */
64 if (i_tab[i] != i_tab[i-1]) { 64 if (i_tab[i] != i_tab[i-1])
65 xfs_iunlock(i_tab[i], lock_mode); 65 xfs_iunlock(i_tab[i], lock_mode);
66 }
67 } 66 }
68} 67}
69 68
70#ifdef DEBUG
71int xfs_rename_skip, xfs_rename_nskip;
72#endif
73
74/* 69/*
75 * The following routine will acquire the locks required for a rename 70 * Enter all inodes for a rename transaction into a sorted array.
76 * operation. The code understands the semantics of renames and will
77 * validate that name1 exists under dp1 & that name2 may or may not
78 * exist under dp2.
79 *
80 * We are renaming dp1/name1 to dp2/name2.
81 *
82 * Return ENOENT if dp1 does not exist, other lookup errors, or 0 for success.
83 */ 71 */
84STATIC int 72STATIC void
85xfs_lock_for_rename( 73xfs_sort_for_rename(
86 xfs_inode_t *dp1, /* in: old (source) directory inode */ 74 xfs_inode_t *dp1, /* in: old (source) directory inode */
87 xfs_inode_t *dp2, /* in: new (target) directory inode */ 75 xfs_inode_t *dp2, /* in: new (target) directory inode */
88 xfs_inode_t *ip1, /* in: inode of old entry */ 76 xfs_inode_t *ip1, /* in: inode of old entry */
89 struct xfs_name *name2, /* in: new entry name */ 77 xfs_inode_t *ip2, /* in: inode of new entry, if it
90 xfs_inode_t **ipp2, /* out: inode of new entry, if it
91 already exists, NULL otherwise. */ 78 already exists, NULL otherwise. */
92 xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ 79 xfs_inode_t **i_tab,/* out: array of inode returned, sorted */
93 int *num_inodes) /* out: number of inodes in array */ 80 int *num_inodes) /* out: number of inodes in array */
94{ 81{
95 xfs_inode_t *ip2 = NULL;
96 xfs_inode_t *temp; 82 xfs_inode_t *temp;
97 xfs_ino_t inum1, inum2;
98 int error;
99 int i, j; 83 int i, j;
100 uint lock_mode;
101 int diff_dirs = (dp1 != dp2);
102
103 /*
104 * First, find out the current inums of the entries so that we
105 * can determine the initial locking order. We'll have to
106 * sanity check stuff after all the locks have been acquired
107 * to see if we still have the right inodes, directories, etc.
108 */
109 lock_mode = xfs_ilock_map_shared(dp1);
110 IHOLD(ip1);
111 xfs_itrace_ref(ip1);
112
113 inum1 = ip1->i_ino;
114
115 /*
116 * Unlock dp1 and lock dp2 if they are different.
117 */
118 if (diff_dirs) {
119 xfs_iunlock_map_shared(dp1, lock_mode);
120 lock_mode = xfs_ilock_map_shared(dp2);
121 }
122
123 error = xfs_dir_lookup_int(dp2, lock_mode, name2, &inum2, &ip2);
124 if (error == ENOENT) { /* target does not need to exist. */
125 inum2 = 0;
126 } else if (error) {
127 /*
128 * If dp2 and dp1 are the same, the next line unlocks dp1.
129 * Got it?
130 */
131 xfs_iunlock_map_shared(dp2, lock_mode);
132 IRELE (ip1);
133 return error;
134 } else {
135 xfs_itrace_ref(ip2);
136 }
137 84
138 /* 85 /*
139 * i_tab contains a list of pointers to inodes. We initialize 86 * i_tab contains a list of pointers to inodes. We initialize
@@ -145,21 +92,20 @@ xfs_lock_for_rename(
145 i_tab[0] = dp1; 92 i_tab[0] = dp1;
146 i_tab[1] = dp2; 93 i_tab[1] = dp2;
147 i_tab[2] = ip1; 94 i_tab[2] = ip1;
148 if (inum2 == 0) { 95 if (ip2) {
149 *num_inodes = 3;
150 i_tab[3] = NULL;
151 } else {
152 *num_inodes = 4; 96 *num_inodes = 4;
153 i_tab[3] = ip2; 97 i_tab[3] = ip2;
98 } else {
99 *num_inodes = 3;
100 i_tab[3] = NULL;
154 } 101 }
155 *ipp2 = i_tab[3];
156 102
157 /* 103 /*
158 * Sort the elements via bubble sort. (Remember, there are at 104 * Sort the elements via bubble sort. (Remember, there are at
159 * most 4 elements to sort, so this is adequate.) 105 * most 4 elements to sort, so this is adequate.)
160 */ 106 */
161 for (i=0; i < *num_inodes; i++) { 107 for (i = 0; i < *num_inodes; i++) {
162 for (j=1; j < *num_inodes; j++) { 108 for (j = 1; j < *num_inodes; j++) {
163 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { 109 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
164 temp = i_tab[j]; 110 temp = i_tab[j];
165 i_tab[j] = i_tab[j-1]; 111 i_tab[j] = i_tab[j-1];
@@ -167,30 +113,6 @@ xfs_lock_for_rename(
167 } 113 }
168 } 114 }
169 } 115 }
170
171 /*
172 * We have dp2 locked. If it isn't first, unlock it.
173 * If it is first, tell xfs_lock_inodes so it can skip it
174 * when locking. if dp1 == dp2, xfs_lock_inodes will skip both
175 * since they are equal. xfs_lock_inodes needs all these inodes
176 * so that it can unlock and retry if there might be a dead-lock
177 * potential with the log.
178 */
179
180 if (i_tab[0] == dp2 && lock_mode == XFS_ILOCK_SHARED) {
181#ifdef DEBUG
182 xfs_rename_skip++;
183#endif
184 xfs_lock_inodes(i_tab, *num_inodes, 1, XFS_ILOCK_SHARED);
185 } else {
186#ifdef DEBUG
187 xfs_rename_nskip++;
188#endif
189 xfs_iunlock_map_shared(dp2, lock_mode);
190 xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED);
191 }
192
193 return 0;
194} 116}
195 117
196/* 118/*
@@ -202,10 +124,10 @@ xfs_rename(
202 struct xfs_name *src_name, 124 struct xfs_name *src_name,
203 xfs_inode_t *src_ip, 125 xfs_inode_t *src_ip,
204 xfs_inode_t *target_dp, 126 xfs_inode_t *target_dp,
205 struct xfs_name *target_name) 127 struct xfs_name *target_name,
128 xfs_inode_t *target_ip)
206{ 129{
207 xfs_trans_t *tp; 130 xfs_trans_t *tp = NULL;
208 xfs_inode_t *target_ip;
209 xfs_mount_t *mp = src_dp->i_mount; 131 xfs_mount_t *mp = src_dp->i_mount;
210 int new_parent; /* moving to a new dir */ 132 int new_parent; /* moving to a new dir */
211 int src_is_directory; /* src_name is a directory */ 133 int src_is_directory; /* src_name is a directory */
@@ -215,9 +137,7 @@ xfs_rename(
215 int cancel_flags; 137 int cancel_flags;
216 int committed; 138 int committed;
217 xfs_inode_t *inodes[4]; 139 xfs_inode_t *inodes[4];
218 int target_ip_dropped = 0; /* dropped target_ip link? */
219 int spaceres; 140 int spaceres;
220 int target_link_zero = 0;
221 int num_inodes; 141 int num_inodes;
222 142
223 xfs_itrace_entry(src_dp); 143 xfs_itrace_entry(src_dp);
@@ -230,64 +150,27 @@ xfs_rename(
230 target_dp, DM_RIGHT_NULL, 150 target_dp, DM_RIGHT_NULL,
231 src_name->name, target_name->name, 151 src_name->name, target_name->name,
232 0, 0, 0); 152 0, 0, 0);
233 if (error) { 153 if (error)
234 return error; 154 return error;
235 }
236 } 155 }
237 /* Return through std_return after this point. */ 156 /* Return through std_return after this point. */
238 157
239 /* 158 new_parent = (src_dp != target_dp);
240 * Lock all the participating inodes. Depending upon whether 159 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
241 * the target_name exists in the target directory, and
242 * whether the target directory is the same as the source
243 * directory, we can lock from 2 to 4 inodes.
244 * xfs_lock_for_rename() will return ENOENT if src_name
245 * does not exist in the source directory.
246 */
247 tp = NULL;
248 error = xfs_lock_for_rename(src_dp, target_dp, src_ip, target_name,
249 &target_ip, inodes, &num_inodes);
250 if (error) {
251 /*
252 * We have nothing locked, no inode references, and
253 * no transaction, so just get out.
254 */
255 goto std_return;
256 }
257
258 ASSERT(src_ip != NULL);
259 160
260 if ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 161 if (src_is_directory) {
261 /* 162 /*
262 * Check for link count overflow on target_dp 163 * Check for link count overflow on target_dp
263 */ 164 */
264 if (target_ip == NULL && (src_dp != target_dp) && 165 if (target_ip == NULL && new_parent &&
265 target_dp->i_d.di_nlink >= XFS_MAXLINK) { 166 target_dp->i_d.di_nlink >= XFS_MAXLINK) {
266 error = XFS_ERROR(EMLINK); 167 error = XFS_ERROR(EMLINK);
267 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); 168 goto std_return;
268 goto rele_return;
269 } 169 }
270 } 170 }
271 171
272 /* 172 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
273 * If we are using project inheritance, we only allow renames 173 inodes, &num_inodes);
274 * into our tree when the project IDs are the same; else the
275 * tree quota mechanism would be circumvented.
276 */
277 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
278 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
279 error = XFS_ERROR(EXDEV);
280 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
281 goto rele_return;
282 }
283
284 new_parent = (src_dp != target_dp);
285 src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
286
287 /*
288 * Drop the locks on our inodes so that we can start the transaction.
289 */
290 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
291 174
292 XFS_BMAP_INIT(&free_list, &first_block); 175 XFS_BMAP_INIT(&free_list, &first_block);
293 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); 176 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
@@ -302,7 +185,7 @@ xfs_rename(
302 } 185 }
303 if (error) { 186 if (error) {
304 xfs_trans_cancel(tp, 0); 187 xfs_trans_cancel(tp, 0);
305 goto rele_return; 188 goto std_return;
306 } 189 }
307 190
308 /* 191 /*
@@ -310,13 +193,29 @@ xfs_rename(
310 */ 193 */
311 if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { 194 if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
312 xfs_trans_cancel(tp, cancel_flags); 195 xfs_trans_cancel(tp, cancel_flags);
313 goto rele_return; 196 goto std_return;
314 } 197 }
315 198
316 /* 199 /*
317 * Reacquire the inode locks we dropped above. 200 * Lock all the participating inodes. Depending upon whether
201 * the target_name exists in the target directory, and
202 * whether the target directory is the same as the source
203 * directory, we can lock from 2 to 4 inodes.
204 */
205 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
206
207 /*
208 * If we are using project inheritance, we only allow renames
209 * into our tree when the project IDs are the same; else the
210 * tree quota mechanism would be circumvented.
318 */ 211 */
319 xfs_lock_inodes(inodes, num_inodes, 0, XFS_ILOCK_EXCL); 212 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
213 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
214 error = XFS_ERROR(EXDEV);
215 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
216 xfs_trans_cancel(tp, cancel_flags);
217 goto std_return;
218 }
320 219
321 /* 220 /*
322 * Join all the inodes to the transaction. From this point on, 221 * Join all the inodes to the transaction. From this point on,
@@ -328,17 +227,17 @@ xfs_rename(
328 */ 227 */
329 IHOLD(src_dp); 228 IHOLD(src_dp);
330 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 229 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
230
331 if (new_parent) { 231 if (new_parent) {
332 IHOLD(target_dp); 232 IHOLD(target_dp);
333 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 233 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
334 } 234 }
335 if ((src_ip != src_dp) && (src_ip != target_dp)) { 235
336 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 236 IHOLD(src_ip);
337 } 237 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
338 if ((target_ip != NULL) && 238
339 (target_ip != src_ip) && 239 if (target_ip) {
340 (target_ip != src_dp) && 240 IHOLD(target_ip);
341 (target_ip != target_dp)) {
342 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); 241 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
343 } 242 }
344 243
@@ -412,7 +311,6 @@ xfs_rename(
412 error = xfs_droplink(tp, target_ip); 311 error = xfs_droplink(tp, target_ip);
413 if (error) 312 if (error)
414 goto abort_return; 313 goto abort_return;
415 target_ip_dropped = 1;
416 314
417 if (src_is_directory) { 315 if (src_is_directory) {
418 /* 316 /*
@@ -422,10 +320,6 @@ xfs_rename(
422 if (error) 320 if (error)
423 goto abort_return; 321 goto abort_return;
424 } 322 }
425
426 /* Do this test while we still hold the locks */
427 target_link_zero = (target_ip)->i_d.di_nlink==0;
428
429 } /* target_ip != NULL */ 323 } /* target_ip != NULL */
430 324
431 /* 325 /*
@@ -492,15 +386,6 @@ xfs_rename(
492 } 386 }
493 387
494 /* 388 /*
495 * If there was a target inode, take an extra reference on
496 * it here so that it doesn't go to xfs_inactive() from
497 * within the commit.
498 */
499 if (target_ip != NULL) {
500 IHOLD(target_ip);
501 }
502
503 /*
504 * If this is a synchronous mount, make sure that the 389 * If this is a synchronous mount, make sure that the
505 * rename transaction goes to disk before returning to 390 * rename transaction goes to disk before returning to
506 * the user. 391 * the user.
@@ -509,30 +394,11 @@ xfs_rename(
509 xfs_trans_set_sync(tp); 394 xfs_trans_set_sync(tp);
510 } 395 }
511 396
512 /*
513 * Take refs. for vop_link_removed calls below. No need to worry
514 * about directory refs. because the caller holds them.
515 *
516 * Do holds before the xfs_bmap_finish since it might rele them down
517 * to zero.
518 */
519
520 if (target_ip_dropped)
521 IHOLD(target_ip);
522 IHOLD(src_ip);
523
524 error = xfs_bmap_finish(&tp, &free_list, &committed); 397 error = xfs_bmap_finish(&tp, &free_list, &committed);
525 if (error) { 398 if (error) {
526 xfs_bmap_cancel(&free_list); 399 xfs_bmap_cancel(&free_list);
527 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 400 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
528 XFS_TRANS_ABORT)); 401 XFS_TRANS_ABORT));
529 if (target_ip != NULL) {
530 IRELE(target_ip);
531 }
532 if (target_ip_dropped) {
533 IRELE(target_ip);
534 }
535 IRELE(src_ip);
536 goto std_return; 402 goto std_return;
537 } 403 }
538 404
@@ -541,15 +407,6 @@ xfs_rename(
541 * the vnode references. 407 * the vnode references.
542 */ 408 */
543 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 409 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
544 if (target_ip != NULL)
545 IRELE(target_ip);
546 /*
547 * Let interposed file systems know about removed links.
548 */
549 if (target_ip_dropped)
550 IRELE(target_ip);
551
552 IRELE(src_ip);
553 410
554 /* Fall through to std_return with error = 0 or errno from 411 /* Fall through to std_return with error = 0 or errno from
555 * xfs_trans_commit */ 412 * xfs_trans_commit */
@@ -571,11 +428,4 @@ std_return:
571 xfs_bmap_cancel(&free_list); 428 xfs_bmap_cancel(&free_list);
572 xfs_trans_cancel(tp, cancel_flags); 429 xfs_trans_cancel(tp, cancel_flags);
573 goto std_return; 430 goto std_return;
574
575 rele_return:
576 IRELE(src_ip);
577 if (target_ip != NULL) {
578 IRELE(target_ip);
579 }
580 goto std_return;
581} 431}
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index b8db1d5cde5a..4c70bf5e9985 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -111,13 +111,13 @@ xfs_trans_iget(
111 */ 111 */
112 ASSERT(ip->i_itemp != NULL); 112 ASSERT(ip->i_itemp != NULL);
113 ASSERT(lock_flags & XFS_ILOCK_EXCL); 113 ASSERT(lock_flags & XFS_ILOCK_EXCL);
114 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 114 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
115 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || 115 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
116 ismrlocked(&ip->i_iolock, MR_UPDATE)); 116 xfs_isilocked(ip, XFS_IOLOCK_EXCL));
117 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || 117 ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
118 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL)); 118 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL));
119 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || 119 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
120 ismrlocked(&ip->i_iolock, (MR_UPDATE | MR_ACCESS))); 120 xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
121 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || 121 ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
122 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY)); 122 (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY));
123 123
@@ -185,7 +185,7 @@ xfs_trans_ijoin(
185 xfs_inode_log_item_t *iip; 185 xfs_inode_log_item_t *iip;
186 186
187 ASSERT(ip->i_transp == NULL); 187 ASSERT(ip->i_transp == NULL);
188 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 188 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
189 ASSERT(lock_flags & XFS_ILOCK_EXCL); 189 ASSERT(lock_flags & XFS_ILOCK_EXCL);
190 if (ip->i_itemp == NULL) 190 if (ip->i_itemp == NULL)
191 xfs_inode_item_init(ip, ip->i_mount); 191 xfs_inode_item_init(ip, ip->i_mount);
@@ -232,7 +232,7 @@ xfs_trans_ihold(
232{ 232{
233 ASSERT(ip->i_transp == tp); 233 ASSERT(ip->i_transp == tp);
234 ASSERT(ip->i_itemp != NULL); 234 ASSERT(ip->i_itemp != NULL);
235 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 235 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
236 236
237 ip->i_itemp->ili_flags |= XFS_ILI_HOLD; 237 ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
238} 238}
@@ -257,7 +257,7 @@ xfs_trans_log_inode(
257 257
258 ASSERT(ip->i_transp == tp); 258 ASSERT(ip->i_transp == tp);
259 ASSERT(ip->i_itemp != NULL); 259 ASSERT(ip->i_itemp != NULL);
260 ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); 260 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
261 261
262 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp)); 262 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
263 ASSERT(lidp != NULL); 263 ASSERT(lidp != NULL);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 2b8dc7e40772..98e5f110ba5f 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -41,49 +41,6 @@
41#include "xfs_utils.h" 41#include "xfs_utils.h"
42 42
43 43
44int
45xfs_dir_lookup_int(
46 xfs_inode_t *dp,
47 uint lock_mode,
48 struct xfs_name *name,
49 xfs_ino_t *inum,
50 xfs_inode_t **ipp)
51{
52 int error;
53
54 xfs_itrace_entry(dp);
55
56 error = xfs_dir_lookup(NULL, dp, name, inum);
57 if (!error) {
58 /*
59 * Unlock the directory. We do this because we can't
60 * hold the directory lock while doing the vn_get()
61 * in xfs_iget(). Doing so could cause us to hold
62 * a lock while waiting for the inode to finish
63 * being inactive while it's waiting for a log
64 * reservation in the inactive routine.
65 */
66 xfs_iunlock(dp, lock_mode);
67 error = xfs_iget(dp->i_mount, NULL, *inum, 0, 0, ipp, 0);
68 xfs_ilock(dp, lock_mode);
69
70 if (error) {
71 *ipp = NULL;
72 } else if ((*ipp)->i_d.di_mode == 0) {
73 /*
74 * The inode has been freed. Something is
75 * wrong so just get out of here.
76 */
77 xfs_iunlock(dp, lock_mode);
78 xfs_iput_new(*ipp, 0);
79 *ipp = NULL;
80 xfs_ilock(dp, lock_mode);
81 error = XFS_ERROR(ENOENT);
82 }
83 }
84 return error;
85}
86
87/* 44/*
88 * Allocates a new inode from disk and return a pointer to the 45 * Allocates a new inode from disk and return a pointer to the
89 * incore copy. This routine will internally commit the current 46 * incore copy. This routine will internally commit the current
@@ -310,7 +267,7 @@ xfs_bump_ino_vers2(
310{ 267{
311 xfs_mount_t *mp; 268 xfs_mount_t *mp;
312 269
313 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 270 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
314 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); 271 ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1);
315 272
316 ip->i_d.di_version = XFS_DINODE_VERSION_2; 273 ip->i_d.di_version = XFS_DINODE_VERSION_2;
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 175b126d2cab..f316cb85d8e2 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -21,8 +21,6 @@
21#define IRELE(ip) VN_RELE(XFS_ITOV(ip)) 21#define IRELE(ip) VN_RELE(XFS_ITOV(ip))
22#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) 22#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip))
23 23
24extern int xfs_dir_lookup_int(xfs_inode_t *, uint, struct xfs_name *,
25 xfs_ino_t *, xfs_inode_t **);
26extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); 24extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
27extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 25extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
28 xfs_dev_t, cred_t *, prid_t, int, 26 xfs_dev_t, cred_t *, prid_t, int,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index fc48158fe479..30bacd8bb0e5 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -186,6 +186,7 @@ xfs_cleanup(void)
186 kmem_zone_destroy(xfs_efi_zone); 186 kmem_zone_destroy(xfs_efi_zone);
187 kmem_zone_destroy(xfs_ifork_zone); 187 kmem_zone_destroy(xfs_ifork_zone);
188 kmem_zone_destroy(xfs_ili_zone); 188 kmem_zone_destroy(xfs_ili_zone);
189 kmem_zone_destroy(xfs_log_ticket_zone);
189} 190}
190 191
191/* 192/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6650601c64f7..70702a60b4bb 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -76,132 +76,6 @@ xfs_open(
76} 76}
77 77
78/* 78/*
79 * xfs_getattr
80 */
81int
82xfs_getattr(
83 xfs_inode_t *ip,
84 bhv_vattr_t *vap,
85 int flags)
86{
87 bhv_vnode_t *vp = XFS_ITOV(ip);
88 xfs_mount_t *mp = ip->i_mount;
89
90 xfs_itrace_entry(ip);
91
92 if (XFS_FORCED_SHUTDOWN(mp))
93 return XFS_ERROR(EIO);
94
95 if (!(flags & ATTR_LAZY))
96 xfs_ilock(ip, XFS_ILOCK_SHARED);
97
98 vap->va_size = XFS_ISIZE(ip);
99 if (vap->va_mask == XFS_AT_SIZE)
100 goto all_done;
101
102 vap->va_nblocks =
103 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
104 vap->va_nodeid = ip->i_ino;
105#if XFS_BIG_INUMS
106 vap->va_nodeid += mp->m_inoadd;
107#endif
108 vap->va_nlink = ip->i_d.di_nlink;
109
110 /*
111 * Quick exit for non-stat callers
112 */
113 if ((vap->va_mask &
114 ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID|
115 XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0)
116 goto all_done;
117
118 /*
119 * Copy from in-core inode.
120 */
121 vap->va_mode = ip->i_d.di_mode;
122 vap->va_uid = ip->i_d.di_uid;
123 vap->va_gid = ip->i_d.di_gid;
124 vap->va_projid = ip->i_d.di_projid;
125
126 /*
127 * Check vnode type block/char vs. everything else.
128 */
129 switch (ip->i_d.di_mode & S_IFMT) {
130 case S_IFBLK:
131 case S_IFCHR:
132 vap->va_rdev = ip->i_df.if_u2.if_rdev;
133 vap->va_blocksize = BLKDEV_IOSIZE;
134 break;
135 default:
136 vap->va_rdev = 0;
137
138 if (!(XFS_IS_REALTIME_INODE(ip))) {
139 vap->va_blocksize = xfs_preferred_iosize(mp);
140 } else {
141
142 /*
143 * If the file blocks are being allocated from a
144 * realtime partition, then return the inode's
145 * realtime extent size or the realtime volume's
146 * extent size.
147 */
148 vap->va_blocksize =
149 xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
150 }
151 break;
152 }
153
154 vn_atime_to_timespec(vp, &vap->va_atime);
155 vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
156 vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
157 vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
158 vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
159
160 /*
161 * Exit for stat callers. See if any of the rest of the fields
162 * to be filled in are needed.
163 */
164 if ((vap->va_mask &
165 (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
166 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
167 goto all_done;
168
169 /*
170 * Convert di_flags to xflags.
171 */
172 vap->va_xflags = xfs_ip2xflags(ip);
173
174 /*
175 * Exit for inode revalidate. See if any of the rest of
176 * the fields to be filled in are needed.
177 */
178 if ((vap->va_mask &
179 (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
180 XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
181 goto all_done;
182
183 vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog;
184 vap->va_nextents =
185 (ip->i_df.if_flags & XFS_IFEXTENTS) ?
186 ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) :
187 ip->i_d.di_nextents;
188 if (ip->i_afp)
189 vap->va_anextents =
190 (ip->i_afp->if_flags & XFS_IFEXTENTS) ?
191 ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) :
192 ip->i_d.di_anextents;
193 else
194 vap->va_anextents = 0;
195 vap->va_gen = ip->i_d.di_gen;
196
197 all_done:
198 if (!(flags & ATTR_LAZY))
199 xfs_iunlock(ip, XFS_ILOCK_SHARED);
200 return 0;
201}
202
203
204/*
205 * xfs_setattr 79 * xfs_setattr
206 */ 80 */
207int 81int
@@ -211,7 +85,6 @@ xfs_setattr(
211 int flags, 85 int flags,
212 cred_t *credp) 86 cred_t *credp)
213{ 87{
214 bhv_vnode_t *vp = XFS_ITOV(ip);
215 xfs_mount_t *mp = ip->i_mount; 88 xfs_mount_t *mp = ip->i_mount;
216 xfs_trans_t *tp; 89 xfs_trans_t *tp;
217 int mask; 90 int mask;
@@ -222,7 +95,6 @@ xfs_setattr(
222 gid_t gid=0, igid=0; 95 gid_t gid=0, igid=0;
223 int timeflags = 0; 96 int timeflags = 0;
224 xfs_prid_t projid=0, iprojid=0; 97 xfs_prid_t projid=0, iprojid=0;
225 int mandlock_before, mandlock_after;
226 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 98 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
227 int file_owner; 99 int file_owner;
228 int need_iolock = 1; 100 int need_iolock = 1;
@@ -383,7 +255,7 @@ xfs_setattr(
383 m |= S_ISGID; 255 m |= S_ISGID;
384#if 0 256#if 0
385 /* Linux allows this, Irix doesn't. */ 257 /* Linux allows this, Irix doesn't. */
386 if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp)) 258 if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
387 m |= S_ISVTX; 259 m |= S_ISVTX;
388#endif 260#endif
389 if (m && !capable(CAP_FSETID)) 261 if (m && !capable(CAP_FSETID))
@@ -461,10 +333,10 @@ xfs_setattr(
461 goto error_return; 333 goto error_return;
462 } 334 }
463 335
464 if (VN_ISDIR(vp)) { 336 if (S_ISDIR(ip->i_d.di_mode)) {
465 code = XFS_ERROR(EISDIR); 337 code = XFS_ERROR(EISDIR);
466 goto error_return; 338 goto error_return;
467 } else if (!VN_ISREG(vp)) { 339 } else if (!S_ISREG(ip->i_d.di_mode)) {
468 code = XFS_ERROR(EINVAL); 340 code = XFS_ERROR(EINVAL);
469 goto error_return; 341 goto error_return;
470 } 342 }
@@ -626,9 +498,6 @@ xfs_setattr(
626 xfs_trans_ihold(tp, ip); 498 xfs_trans_ihold(tp, ip);
627 } 499 }
628 500
629 /* determine whether mandatory locking mode changes */
630 mandlock_before = MANDLOCK(vp, ip->i_d.di_mode);
631
632 /* 501 /*
633 * Truncate file. Must have write permission and not be a directory. 502 * Truncate file. Must have write permission and not be a directory.
634 */ 503 */
@@ -858,13 +727,6 @@ xfs_setattr(
858 code = xfs_trans_commit(tp, commit_flags); 727 code = xfs_trans_commit(tp, commit_flags);
859 } 728 }
860 729
861 /*
862 * If the (regular) file's mandatory locking mode changed, then
863 * notify the vnode. We do this under the inode lock to prevent
864 * racing calls to vop_vnode_change.
865 */
866 mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
867
868 xfs_iunlock(ip, lock_flags); 730 xfs_iunlock(ip, lock_flags);
869 731
870 /* 732 /*
@@ -1443,7 +1305,7 @@ xfs_inactive_attrs(
1443 int error; 1305 int error;
1444 xfs_mount_t *mp; 1306 xfs_mount_t *mp;
1445 1307
1446 ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); 1308 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1447 tp = *tpp; 1309 tp = *tpp;
1448 mp = ip->i_mount; 1310 mp = ip->i_mount;
1449 ASSERT(ip->i_d.di_forkoff != 0); 1311 ASSERT(ip->i_d.di_forkoff != 0);
@@ -1491,7 +1353,7 @@ xfs_release(
1491 xfs_mount_t *mp = ip->i_mount; 1353 xfs_mount_t *mp = ip->i_mount;
1492 int error; 1354 int error;
1493 1355
1494 if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) 1356 if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
1495 return 0; 1357 return 0;
1496 1358
1497 /* If this is a read-only mount, don't do this (would generate I/O) */ 1359 /* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1774,8 +1636,7 @@ xfs_lookup(
1774 struct xfs_name *name, 1636 struct xfs_name *name,
1775 xfs_inode_t **ipp) 1637 xfs_inode_t **ipp)
1776{ 1638{
1777 xfs_inode_t *ip; 1639 xfs_ino_t inum;
1778 xfs_ino_t e_inum;
1779 int error; 1640 int error;
1780 uint lock_mode; 1641 uint lock_mode;
1781 1642
@@ -1785,12 +1646,21 @@ xfs_lookup(
1785 return XFS_ERROR(EIO); 1646 return XFS_ERROR(EIO);
1786 1647
1787 lock_mode = xfs_ilock_map_shared(dp); 1648 lock_mode = xfs_ilock_map_shared(dp);
1788 error = xfs_dir_lookup_int(dp, lock_mode, name, &e_inum, &ip); 1649 error = xfs_dir_lookup(NULL, dp, name, &inum);
1789 if (!error) {
1790 *ipp = ip;
1791 xfs_itrace_ref(ip);
1792 }
1793 xfs_iunlock_map_shared(dp, lock_mode); 1650 xfs_iunlock_map_shared(dp, lock_mode);
1651
1652 if (error)
1653 goto out;
1654
1655 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
1656 if (error)
1657 goto out;
1658
1659 xfs_itrace_ref(*ipp);
1660 return 0;
1661
1662 out:
1663 *ipp = NULL;
1794 return error; 1664 return error;
1795} 1665}
1796 1666
@@ -1906,7 +1776,7 @@ xfs_create(
1906 * It is locked (and joined to the transaction). 1776 * It is locked (and joined to the transaction).
1907 */ 1777 */
1908 1778
1909 ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); 1779 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1910 1780
1911 /* 1781 /*
1912 * Now we join the directory inode to the transaction. We do not do it 1782 * Now we join the directory inode to the transaction. We do not do it
@@ -2112,7 +1982,7 @@ again:
2112 1982
2113 ips[0] = ip; 1983 ips[0] = ip;
2114 ips[1] = dp; 1984 ips[1] = dp;
2115 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 1985 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
2116 } 1986 }
2117 /* else e_inum == dp->i_ino */ 1987 /* else e_inum == dp->i_ino */
2118 /* This can happen if we're asked to lock /x/.. 1988 /* This can happen if we're asked to lock /x/..
@@ -2160,7 +2030,6 @@ void
2160xfs_lock_inodes( 2030xfs_lock_inodes(
2161 xfs_inode_t **ips, 2031 xfs_inode_t **ips,
2162 int inodes, 2032 int inodes,
2163 int first_locked,
2164 uint lock_mode) 2033 uint lock_mode)
2165{ 2034{
2166 int attempts = 0, i, j, try_lock; 2035 int attempts = 0, i, j, try_lock;
@@ -2168,13 +2037,8 @@ xfs_lock_inodes(
2168 2037
2169 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 2038 ASSERT(ips && (inodes >= 2)); /* we need at least two */
2170 2039
2171 if (first_locked) { 2040 try_lock = 0;
2172 try_lock = 1; 2041 i = 0;
2173 i = 1;
2174 } else {
2175 try_lock = 0;
2176 i = 0;
2177 }
2178 2042
2179again: 2043again:
2180 for (; i < inodes; i++) { 2044 for (; i < inodes; i++) {
@@ -2298,29 +2162,14 @@ xfs_remove(
2298 return error; 2162 return error;
2299 } 2163 }
2300 2164
2301 /*
2302 * We need to get a reference to ip before we get our log
2303 * reservation. The reason for this is that we cannot call
2304 * xfs_iget for an inode for which we do not have a reference
2305 * once we've acquired a log reservation. This is because the
2306 * inode we are trying to get might be in xfs_inactive going
2307 * for a log reservation. Since we'll have to wait for the
2308 * inactive code to complete before returning from xfs_iget,
2309 * we need to make sure that we don't have log space reserved
2310 * when we call xfs_iget. Instead we get an unlocked reference
2311 * to the inode before getting our log reservation.
2312 */
2313 IHOLD(ip);
2314
2315 xfs_itrace_entry(ip); 2165 xfs_itrace_entry(ip);
2316 xfs_itrace_ref(ip); 2166 xfs_itrace_ref(ip);
2317 2167
2318 error = XFS_QM_DQATTACH(mp, dp, 0); 2168 error = XFS_QM_DQATTACH(mp, dp, 0);
2319 if (!error && dp != ip) 2169 if (!error)
2320 error = XFS_QM_DQATTACH(mp, ip, 0); 2170 error = XFS_QM_DQATTACH(mp, ip, 0);
2321 if (error) { 2171 if (error) {
2322 REMOVE_DEBUG_TRACE(__LINE__); 2172 REMOVE_DEBUG_TRACE(__LINE__);
2323 IRELE(ip);
2324 goto std_return; 2173 goto std_return;
2325 } 2174 }
2326 2175
@@ -2347,7 +2196,6 @@ xfs_remove(
2347 ASSERT(error != ENOSPC); 2196 ASSERT(error != ENOSPC);
2348 REMOVE_DEBUG_TRACE(__LINE__); 2197 REMOVE_DEBUG_TRACE(__LINE__);
2349 xfs_trans_cancel(tp, 0); 2198 xfs_trans_cancel(tp, 0);
2350 IRELE(ip);
2351 return error; 2199 return error;
2352 } 2200 }
2353 2201
@@ -2355,7 +2203,6 @@ xfs_remove(
2355 if (error) { 2203 if (error) {
2356 REMOVE_DEBUG_TRACE(__LINE__); 2204 REMOVE_DEBUG_TRACE(__LINE__);
2357 xfs_trans_cancel(tp, cancel_flags); 2205 xfs_trans_cancel(tp, cancel_flags);
2358 IRELE(ip);
2359 goto std_return; 2206 goto std_return;
2360 } 2207 }
2361 2208
@@ -2363,23 +2210,18 @@ xfs_remove(
2363 * At this point, we've gotten both the directory and the entry 2210 * At this point, we've gotten both the directory and the entry
2364 * inodes locked. 2211 * inodes locked.
2365 */ 2212 */
2213 IHOLD(ip);
2366 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2214 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2367 if (dp != ip) { 2215
2368 /* 2216 IHOLD(dp);
2369 * Increment vnode ref count only in this case since 2217 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2370 * there's an extra vnode reference in the case where
2371 * dp == ip.
2372 */
2373 IHOLD(dp);
2374 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2375 }
2376 2218
2377 /* 2219 /*
2378 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2220 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
2379 */ 2221 */
2380 XFS_BMAP_INIT(&free_list, &first_block); 2222 XFS_BMAP_INIT(&free_list, &first_block);
2381 error = xfs_dir_removename(tp, dp, name, ip->i_ino, 2223 error = xfs_dir_removename(tp, dp, name, ip->i_ino,
2382 &first_block, &free_list, 0); 2224 &first_block, &free_list, resblks);
2383 if (error) { 2225 if (error) {
2384 ASSERT(error != ENOENT); 2226 ASSERT(error != ENOENT);
2385 REMOVE_DEBUG_TRACE(__LINE__); 2227 REMOVE_DEBUG_TRACE(__LINE__);
@@ -2402,12 +2244,6 @@ xfs_remove(
2402 link_zero = (ip)->i_d.di_nlink==0; 2244 link_zero = (ip)->i_d.di_nlink==0;
2403 2245
2404 /* 2246 /*
2405 * Take an extra ref on the inode so that it doesn't
2406 * go to xfs_inactive() from within the commit.
2407 */
2408 IHOLD(ip);
2409
2410 /*
2411 * If this is a synchronous mount, make sure that the 2247 * If this is a synchronous mount, make sure that the
2412 * remove transaction goes to disk before returning to 2248 * remove transaction goes to disk before returning to
2413 * the user. 2249 * the user.
@@ -2423,10 +2259,8 @@ xfs_remove(
2423 } 2259 }
2424 2260
2425 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2261 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2426 if (error) { 2262 if (error)
2427 IRELE(ip);
2428 goto std_return; 2263 goto std_return;
2429 }
2430 2264
2431 /* 2265 /*
2432 * If we are using filestreams, kill the stream association. 2266 * If we are using filestreams, kill the stream association.
@@ -2438,7 +2272,6 @@ xfs_remove(
2438 xfs_filestream_deassociate(ip); 2272 xfs_filestream_deassociate(ip);
2439 2273
2440 xfs_itrace_exit(ip); 2274 xfs_itrace_exit(ip);
2441 IRELE(ip);
2442 2275
2443/* Fall through to std_return with error = 0 */ 2276/* Fall through to std_return with error = 0 */
2444 std_return: 2277 std_return:
@@ -2467,8 +2300,6 @@ xfs_remove(
2467 cancel_flags |= XFS_TRANS_ABORT; 2300 cancel_flags |= XFS_TRANS_ABORT;
2468 xfs_trans_cancel(tp, cancel_flags); 2301 xfs_trans_cancel(tp, cancel_flags);
2469 2302
2470 IRELE(ip);
2471
2472 goto std_return; 2303 goto std_return;
2473} 2304}
2474 2305
@@ -2536,7 +2367,7 @@ xfs_link(
2536 ips[1] = sip; 2367 ips[1] = sip;
2537 } 2368 }
2538 2369
2539 xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); 2370 xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
2540 2371
2541 /* 2372 /*
2542 * Increment vnode ref counts since xfs_trans_commit & 2373 * Increment vnode ref counts since xfs_trans_commit &
@@ -2840,7 +2671,6 @@ xfs_rmdir(
2840 struct xfs_name *name, 2671 struct xfs_name *name,
2841 xfs_inode_t *cdp) 2672 xfs_inode_t *cdp)
2842{ 2673{
2843 bhv_vnode_t *dir_vp = XFS_ITOV(dp);
2844 xfs_mount_t *mp = dp->i_mount; 2674 xfs_mount_t *mp = dp->i_mount;
2845 xfs_trans_t *tp; 2675 xfs_trans_t *tp;
2846 int error; 2676 int error;
@@ -2866,27 +2696,12 @@ xfs_rmdir(
2866 } 2696 }
2867 2697
2868 /* 2698 /*
2869 * We need to get a reference to cdp before we get our log
2870 * reservation. The reason for this is that we cannot call
2871 * xfs_iget for an inode for which we do not have a reference
2872 * once we've acquired a log reservation. This is because the
2873 * inode we are trying to get might be in xfs_inactive going
2874 * for a log reservation. Since we'll have to wait for the
2875 * inactive code to complete before returning from xfs_iget,
2876 * we need to make sure that we don't have log space reserved
2877 * when we call xfs_iget. Instead we get an unlocked reference
2878 * to the inode before getting our log reservation.
2879 */
2880 IHOLD(cdp);
2881
2882 /*
2883 * Get the dquots for the inodes. 2699 * Get the dquots for the inodes.
2884 */ 2700 */
2885 error = XFS_QM_DQATTACH(mp, dp, 0); 2701 error = XFS_QM_DQATTACH(mp, dp, 0);
2886 if (!error && dp != cdp) 2702 if (!error)
2887 error = XFS_QM_DQATTACH(mp, cdp, 0); 2703 error = XFS_QM_DQATTACH(mp, cdp, 0);
2888 if (error) { 2704 if (error) {
2889 IRELE(cdp);
2890 REMOVE_DEBUG_TRACE(__LINE__); 2705 REMOVE_DEBUG_TRACE(__LINE__);
2891 goto std_return; 2706 goto std_return;
2892 } 2707 }
@@ -2913,7 +2728,6 @@ xfs_rmdir(
2913 if (error) { 2728 if (error) {
2914 ASSERT(error != ENOSPC); 2729 ASSERT(error != ENOSPC);
2915 cancel_flags = 0; 2730 cancel_flags = 0;
2916 IRELE(cdp);
2917 goto error_return; 2731 goto error_return;
2918 } 2732 }
2919 XFS_BMAP_INIT(&free_list, &first_block); 2733 XFS_BMAP_INIT(&free_list, &first_block);
@@ -2927,21 +2741,13 @@ xfs_rmdir(
2927 error = xfs_lock_dir_and_entry(dp, cdp); 2741 error = xfs_lock_dir_and_entry(dp, cdp);
2928 if (error) { 2742 if (error) {
2929 xfs_trans_cancel(tp, cancel_flags); 2743 xfs_trans_cancel(tp, cancel_flags);
2930 IRELE(cdp);
2931 goto std_return; 2744 goto std_return;
2932 } 2745 }
2933 2746
2747 IHOLD(dp);
2934 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2748 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2935 if (dp != cdp) {
2936 /*
2937 * Only increment the parent directory vnode count if
2938 * we didn't bump it in looking up cdp. The only time
2939 * we don't bump it is when we're looking up ".".
2940 */
2941 VN_HOLD(dir_vp);
2942 }
2943 2749
2944 xfs_itrace_ref(cdp); 2750 IHOLD(cdp);
2945 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); 2751 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
2946 2752
2947 ASSERT(cdp->i_d.di_nlink >= 2); 2753 ASSERT(cdp->i_d.di_nlink >= 2);
@@ -2995,12 +2801,6 @@ xfs_rmdir(
2995 last_cdp_link = (cdp)->i_d.di_nlink==0; 2801 last_cdp_link = (cdp)->i_d.di_nlink==0;
2996 2802
2997 /* 2803 /*
2998 * Take an extra ref on the child vnode so that it
2999 * does not go to xfs_inactive() from within the commit.
3000 */
3001 IHOLD(cdp);
3002
3003 /*
3004 * If this is a synchronous mount, make sure that the 2804 * If this is a synchronous mount, make sure that the
3005 * rmdir transaction goes to disk before returning to 2805 * rmdir transaction goes to disk before returning to
3006 * the user. 2806 * the user.
@@ -3014,19 +2814,15 @@ xfs_rmdir(
3014 xfs_bmap_cancel(&free_list); 2814 xfs_bmap_cancel(&free_list);
3015 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 2815 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
3016 XFS_TRANS_ABORT)); 2816 XFS_TRANS_ABORT));
3017 IRELE(cdp);
3018 goto std_return; 2817 goto std_return;
3019 } 2818 }
3020 2819
3021 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2820 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3022 if (error) { 2821 if (error) {
3023 IRELE(cdp);
3024 goto std_return; 2822 goto std_return;
3025 } 2823 }
3026 2824
3027 2825
3028 IRELE(cdp);
3029
3030 /* Fall through to std_return with error = 0 or the errno 2826 /* Fall through to std_return with error = 0 or the errno
3031 * from xfs_trans_commit. */ 2827 * from xfs_trans_commit. */
3032 std_return: 2828 std_return:
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 24c53923dc2c..8abe8f186e20 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -15,7 +15,6 @@ struct xfs_iomap;
15 15
16 16
17int xfs_open(struct xfs_inode *ip); 17int xfs_open(struct xfs_inode *ip);
18int xfs_getattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags);
19int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, 18int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
20 struct cred *credp); 19 struct cred *credp);
21int xfs_readlink(struct xfs_inode *ip, char *link); 20int xfs_readlink(struct xfs_inode *ip, char *link);
@@ -48,9 +47,9 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd,
48 struct cred *credp, int attr_flags); 47 struct cred *credp, int attr_flags);
49int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 48int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
50 struct xfs_inode *src_ip, struct xfs_inode *target_dp, 49 struct xfs_inode *src_ip, struct xfs_inode *target_dp,
51 struct xfs_name *target_name); 50 struct xfs_name *target_name, struct xfs_inode *target_ip);
52int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, 51int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value,
53 int *valuelenp, int flags, cred_t *cred); 52 int *valuelenp, int flags);
54int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, 53int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,
55 int valuelen, int flags); 54 int valuelen, int flags);
56int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); 55int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags);
@@ -61,9 +60,6 @@ int xfs_ioctl(struct xfs_inode *ip, struct file *filp,
61ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, 60ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
62 const struct iovec *iovp, unsigned int segs, 61 const struct iovec *iovp, unsigned int segs,
63 loff_t *offset, int ioflags); 62 loff_t *offset, int ioflags);
64ssize_t xfs_sendfile(struct xfs_inode *ip, struct file *filp,
65 loff_t *offset, int ioflags, size_t count,
66 read_actor_t actor, void *target);
67ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp, 63ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp,
68 loff_t *ppos, struct pipe_inode_info *pipe, size_t count, 64 loff_t *ppos, struct pipe_inode_info *pipe, size_t count,
69 int flags, int ioflags); 65 int flags, int ioflags);