aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c8
-rw-r--r--fs/aio.c62
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/bio.c14
-rw-r--r--fs/block_dev.c12
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/cifs/README2
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/cifspdu.h2
-rw-r--r--fs/cifs/dir.c3
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/smbdes.c2
-rw-r--r--fs/coda/sysctl.c10
-rw-r--r--fs/compat_ioctl.c1514
-rw-r--r--fs/debugfs/inode.c61
-rw-r--r--fs/devpts/inode.c16
-rw-r--r--fs/direct-io.c10
-rw-r--r--fs/dlm/config.c24
-rw-r--r--fs/dlm/debug_fs.c2
-rw-r--r--fs/dlm/dir.c7
-rw-r--r--fs/dlm/dlm_internal.h1
-rw-r--r--fs/dlm/lock.c6
-rw-r--r--fs/dlm/lockspace.c15
-rw-r--r--fs/dlm/lowcomms.c6
-rw-r--r--fs/dlm/member.c8
-rw-r--r--fs/dlm/memory.c6
-rw-r--r--fs/dlm/netlink.c2
-rw-r--r--fs/dlm/plock.c8
-rw-r--r--fs/dlm/rcom.c2
-rw-r--r--fs/dlm/requestqueue.c2
-rw-r--r--fs/dlm/user.c12
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/Kbuild2
-rw-r--r--fs/exofs/common.h81
-rw-r--r--fs/exofs/exofs.h97
-rw-r--r--fs/exofs/inode.c409
-rw-r--r--fs/exofs/ios.c421
-rw-r--r--fs/exofs/osd.c125
-rw-r--r--fs/exofs/pnfs.h51
-rw-r--r--fs/exofs/super.c353
-rw-r--r--fs/ext2/dir.c4
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/inode.c6
-rw-r--r--fs/ext2/super.c184
-rw-r--r--fs/ext2/xip.c5
-rw-r--r--fs/ext3/inode.c20
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/ext3/super.c468
-rw-r--r--fs/ext3/xattr.c7
-rw-r--r--fs/ext4/Kconfig10
-rw-r--r--fs/ext4/balloc.c46
-rw-r--r--fs/ext4/block_validity.c3
-rw-r--r--fs/ext4/ext4.h23
-rw-r--r--fs/ext4/ext4_jbd2.c82
-rw-r--r--fs/ext4/ext4_jbd2.h44
-rw-r--r--fs/ext4/extents.c44
-rw-r--r--fs/ext4/fsync.c54
-rw-r--r--fs/ext4/inode.c201
-rw-r--r--fs/ext4/ioctl.c29
-rw-r--r--fs/ext4/mballoc.c103
-rw-r--r--fs/ext4/migrate.c27
-rw-r--r--fs/ext4/move_extent.c282
-rw-r--r--fs/ext4/namei.c38
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/ext4/super.c148
-rw-r--r--fs/ext4/xattr.c15
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/fs-writeback.c28
-rw-r--r--fs/gfs2/Kconfig2
-rw-r--r--fs/gfs2/acl.c357
-rw-r--r--fs/gfs2/acl.h24
-rw-r--r--fs/gfs2/aops.c20
-rw-r--r--fs/gfs2/dir.c34
-rw-r--r--fs/gfs2/glock.c31
-rw-r--r--fs/gfs2/glock.h9
-rw-r--r--fs/gfs2/glops.c5
-rw-r--r--fs/gfs2/incore.h5
-rw-r--r--fs/gfs2/inode.c4
-rw-r--r--fs/gfs2/log.c2
-rw-r--r--fs/gfs2/lops.c4
-rw-r--r--fs/gfs2/ops_fstype.c154
-rw-r--r--fs/gfs2/quota.c393
-rw-r--r--fs/gfs2/quota.h5
-rw-r--r--fs/gfs2/recovery.c2
-rw-r--r--fs/gfs2/rgrp.c14
-rw-r--r--fs/gfs2/super.c110
-rw-r--r--fs/gfs2/super.h4
-rw-r--r--fs/gfs2/sys.c14
-rw-r--r--fs/gfs2/xattr.c74
-rw-r--r--fs/gfs2/xattr.h8
-rw-r--r--fs/inode.c10
-rw-r--r--fs/isofs/compress.c533
-rw-r--r--fs/isofs/rock.c3
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/journal.c12
-rw-r--r--fs/jffs2/compr.c2
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/jfs_dmap.c4
-rw-r--r--fs/lockd/svc.c26
-rw-r--r--fs/namei.c31
-rw-r--r--fs/namespace.c20
-rw-r--r--fs/ncpfs/ioctl.c2
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/sysctl.c22
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nilfs2/alloc.c108
-rw-r--r--fs/nilfs2/alloc.h21
-rw-r--r--fs/nilfs2/bmap.c8
-rw-r--r--fs/nilfs2/btnode.c76
-rw-r--r--fs/nilfs2/btnode.h6
-rw-r--r--fs/nilfs2/btree.c106
-rw-r--r--fs/nilfs2/btree.h22
-rw-r--r--fs/nilfs2/cpfile.c26
-rw-r--r--fs/nilfs2/cpfile.h3
-rw-r--r--fs/nilfs2/dat.c47
-rw-r--r--fs/nilfs2/dat.h3
-rw-r--r--fs/nilfs2/dir.c24
-rw-r--r--fs/nilfs2/gcdat.c3
-rw-r--r--fs/nilfs2/gcinode.c6
-rw-r--r--fs/nilfs2/ifile.c35
-rw-r--r--fs/nilfs2/ifile.h2
-rw-r--r--fs/nilfs2/inode.c7
-rw-r--r--fs/nilfs2/mdt.c56
-rw-r--r--fs/nilfs2/mdt.h25
-rw-r--r--fs/nilfs2/namei.c83
-rw-r--r--fs/nilfs2/recovery.c34
-rw-r--r--fs/nilfs2/segbuf.c185
-rw-r--r--fs/nilfs2/segbuf.h54
-rw-r--r--fs/nilfs2/segment.c369
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/sufile.c203
-rw-r--r--fs/nilfs2/sufile.h14
-rw-r--r--fs/nilfs2/super.c88
-rw-r--r--fs/nilfs2/the_nilfs.c155
-rw-r--r--fs/nilfs2/the_nilfs.h10
-rw-r--r--fs/notify/inotify/inotify_user.c18
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/ntfs/logfile.c2
-rw-r--r--fs/ntfs/sysctl.c4
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/blockcheck.c2
-rw-r--r--fs/ocfs2/cluster/netdebug.c8
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c2
-rw-r--r--fs/ocfs2/dlmglue.c2
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/journal.c2
-rw-r--r--fs/ocfs2/quota.h4
-rw-r--r--fs/ocfs2/quota_local.c2
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/stackglue.c15
-rw-r--r--fs/omfs/bitmap.c2
-rw-r--r--fs/open.c27
-rw-r--r--fs/partitions/check.c12
-rw-r--r--fs/partitions/efi.c30
-rw-r--r--fs/partitions/efi.h8
-rw-r--r--fs/proc/array.c23
-rw-r--r--fs/proc/proc_devtree.c41
-rw-r--r--fs/proc/proc_sysctl.c4
-rw-r--r--fs/proc/stat.c19
-rw-r--r--fs/qnx4/bitmap.c2
-rw-r--r--fs/qnx4/dir.c6
-rw-r--r--fs/qnx4/inode.c26
-rw-r--r--fs/qnx4/namei.c6
-rw-r--r--fs/quota/Kconfig10
-rw-r--r--fs/quota/dquot.c130
-rw-r--r--fs/quota/quota.c93
-rw-r--r--fs/quota/quota_v1.c2
-rw-r--r--fs/quota/quota_v2.c167
-rw-r--r--fs/quota/quotaio_v2.h19
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/reiserfs/Makefile2
-rw-r--r--fs/reiserfs/bitmap.c4
-rw-r--r--fs/reiserfs/dir.c10
-rw-r--r--fs/reiserfs/do_balan.c17
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/fix_node.c21
-rw-r--r--fs/reiserfs/inode.c97
-rw-r--r--fs/reiserfs/ioctl.c77
-rw-r--r--fs/reiserfs/journal.c130
-rw-r--r--fs/reiserfs/lock.c88
-rw-r--r--fs/reiserfs/namei.c20
-rw-r--r--fs/reiserfs/prints.c4
-rw-r--r--fs/reiserfs/resize.c2
-rw-r--r--fs/reiserfs/stree.c53
-rw-r--r--fs/reiserfs/super.c52
-rw-r--r--fs/reiserfs/xattr.c6
-rw-r--r--fs/splice.c24
-rw-r--r--fs/sync.c13
-rw-r--r--fs/sysfs/dir.c388
-rw-r--r--fs/sysfs/file.c41
-rw-r--r--fs/sysfs/inode.c176
-rw-r--r--fs/sysfs/symlink.c11
-rw-r--r--fs/sysfs/sysfs.h9
-rw-r--r--fs/ubifs/debug.c2
-rw-r--r--fs/ubifs/file.c13
-rw-r--r--fs/ubifs/recovery.c2
-rw-r--r--fs/ubifs/super.c20
-rw-r--r--fs/xattr_acl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c123
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h9
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c71
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c62
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h1
-rw-r--r--fs/xfs/quota/xfs_dquot.h2
-rw-r--r--fs/xfs/support/debug.h18
-rw-r--r--fs/xfs/xfs_attr.c16
-rw-r--r--fs/xfs/xfs_attr.h1
-rw-r--r--fs/xfs/xfs_attr_leaf.c2
-rw-r--r--fs/xfs/xfs_bmap_btree.c3
-rw-r--r--fs/xfs/xfs_filestream.h8
-rw-r--r--fs/xfs/xfs_fsops.c25
-rw-r--r--fs/xfs/xfs_ialloc.c2
-rw-r--r--fs/xfs/xfs_iget.c5
-rw-r--r--fs/xfs/xfs_iomap.c9
-rw-r--r--fs/xfs/xfs_log_recover.c40
-rw-r--r--fs/xfs/xfs_mount.c18
-rw-r--r--fs/xfs/xfs_mount.h27
-rw-r--r--fs/xfs/xfs_rw.c30
-rw-r--r--fs/xfs/xfs_rw.h29
-rw-r--r--fs/xfs/xfs_trans.c7
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_buf.c13
-rw-r--r--fs/xfs/xfs_vnodeops.c79
-rw-r--r--fs/xfs/xfs_vnodeops.h1
235 files changed, 6153 insertions, 5487 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c63a3c8beb73..5e15a21dbf9f 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -671,7 +671,6 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
671 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); 671 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
672 ssize_t result; 672 ssize_t result;
673 size_t count = iov_length(iov, nr_segs); 673 size_t count = iov_length(iov, nr_segs);
674 int ret;
675 674
676 _enter("{%x.%u},{%zu},%lu,", 675 _enter("{%x.%u},{%zu},%lu,",
677 vnode->fid.vid, vnode->fid.vnode, count, nr_segs); 676 vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
@@ -691,13 +690,6 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
691 return result; 690 return result;
692 } 691 }
693 692
694 /* return error values for O_SYNC and IS_SYNC() */
695 if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) {
696 ret = afs_fsync(iocb->ki_filp, dentry, 1);
697 if (ret < 0)
698 result = ret;
699 }
700
701 _leave(" = %zd", result); 693 _leave(" = %zd", result);
702 return result; 694 return result;
703} 695}
diff --git a/fs/aio.c b/fs/aio.c
index 02a2c9340573..c30dfc006108 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -15,6 +15,7 @@
15#include <linux/aio_abi.h> 15#include <linux/aio_abi.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/backing-dev.h>
18#include <linux/uio.h> 19#include <linux/uio.h>
19 20
20#define DEBUG 0 21#define DEBUG 0
@@ -32,6 +33,9 @@
32#include <linux/workqueue.h> 33#include <linux/workqueue.h>
33#include <linux/security.h> 34#include <linux/security.h>
34#include <linux/eventfd.h> 35#include <linux/eventfd.h>
36#include <linux/blkdev.h>
37#include <linux/mempool.h>
38#include <linux/hash.h>
35 39
36#include <asm/kmap_types.h> 40#include <asm/kmap_types.h>
37#include <asm/uaccess.h> 41#include <asm/uaccess.h>
@@ -60,6 +64,14 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
60static DEFINE_SPINLOCK(fput_lock); 64static DEFINE_SPINLOCK(fput_lock);
61static LIST_HEAD(fput_head); 65static LIST_HEAD(fput_head);
62 66
67#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */
68#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS)
69struct aio_batch_entry {
70 struct hlist_node list;
71 struct address_space *mapping;
72};
73mempool_t *abe_pool;
74
63static void aio_kick_handler(struct work_struct *); 75static void aio_kick_handler(struct work_struct *);
64static void aio_queue_work(struct kioctx *); 76static void aio_queue_work(struct kioctx *);
65 77
@@ -73,6 +85,8 @@ static int __init aio_setup(void)
73 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 85 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
74 86
75 aio_wq = create_workqueue("aio"); 87 aio_wq = create_workqueue("aio");
88 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
89 BUG_ON(!abe_pool);
76 90
77 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); 91 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
78 92
@@ -1531,8 +1545,44 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode,
1531 return 1; 1545 return 1;
1532} 1546}
1533 1547
1548static void aio_batch_add(struct address_space *mapping,
1549 struct hlist_head *batch_hash)
1550{
1551 struct aio_batch_entry *abe;
1552 struct hlist_node *pos;
1553 unsigned bucket;
1554
1555 bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
1556 hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
1557 if (abe->mapping == mapping)
1558 return;
1559 }
1560
1561 abe = mempool_alloc(abe_pool, GFP_KERNEL);
1562 BUG_ON(!igrab(mapping->host));
1563 abe->mapping = mapping;
1564 hlist_add_head(&abe->list, &batch_hash[bucket]);
1565 return;
1566}
1567
1568static void aio_batch_free(struct hlist_head *batch_hash)
1569{
1570 struct aio_batch_entry *abe;
1571 struct hlist_node *pos, *n;
1572 int i;
1573
1574 for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
1575 hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
1576 blk_run_address_space(abe->mapping);
1577 iput(abe->mapping->host);
1578 hlist_del(&abe->list);
1579 mempool_free(abe, abe_pool);
1580 }
1581 }
1582}
1583
1534static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1584static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1535 struct iocb *iocb) 1585 struct iocb *iocb, struct hlist_head *batch_hash)
1536{ 1586{
1537 struct kiocb *req; 1587 struct kiocb *req;
1538 struct file *file; 1588 struct file *file;
@@ -1608,6 +1658,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1608 ; 1658 ;
1609 } 1659 }
1610 spin_unlock_irq(&ctx->ctx_lock); 1660 spin_unlock_irq(&ctx->ctx_lock);
1661 if (req->ki_opcode == IOCB_CMD_PREAD ||
1662 req->ki_opcode == IOCB_CMD_PREADV ||
1663 req->ki_opcode == IOCB_CMD_PWRITE ||
1664 req->ki_opcode == IOCB_CMD_PWRITEV)
1665 aio_batch_add(file->f_mapping, batch_hash);
1666
1611 aio_put_req(req); /* drop extra ref to req */ 1667 aio_put_req(req); /* drop extra ref to req */
1612 return 0; 1668 return 0;
1613 1669
@@ -1635,6 +1691,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1635 struct kioctx *ctx; 1691 struct kioctx *ctx;
1636 long ret = 0; 1692 long ret = 0;
1637 int i; 1693 int i;
1694 struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, };
1638 1695
1639 if (unlikely(nr < 0)) 1696 if (unlikely(nr < 0))
1640 return -EINVAL; 1697 return -EINVAL;
@@ -1666,10 +1723,11 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1666 break; 1723 break;
1667 } 1724 }
1668 1725
1669 ret = io_submit_one(ctx, user_iocb, &tmp); 1726 ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash);
1670 if (ret) 1727 if (ret)
1671 break; 1728 break;
1672 } 1729 }
1730 aio_batch_free(batch_hash);
1673 1731
1674 put_ioctx(ctx); 1732 put_ioctx(ctx);
1675 return i ? i : ret; 1733 return i ? i : ret;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b9b3bb51b1e4..d15ea1790bfb 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -767,7 +767,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
767 767
768 current->mm->start_stack = bprm->p; 768 current->mm->start_stack = bprm->p;
769 769
770 /* Now we do a little grungy work by mmaping the ELF image into 770 /* Now we do a little grungy work by mmapping the ELF image into
771 the correct location in memory. */ 771 the correct location in memory. */
772 for(i = 0, elf_ppnt = elf_phdata; 772 for(i = 0, elf_ppnt = elf_phdata;
773 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { 773 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
diff --git a/fs/bio.c b/fs/bio.c
index 12da5db8682c..76e6713abf94 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -272,7 +272,7 @@ EXPORT_SYMBOL(bio_init);
272 * for a &struct bio to become free. If a %NULL @bs is passed in, we will 272 * for a &struct bio to become free. If a %NULL @bs is passed in, we will
273 * fall back to just using @kmalloc to allocate the required memory. 273 * fall back to just using @kmalloc to allocate the required memory.
274 * 274 *
275 * Note that the caller must set ->bi_destructor on succesful return 275 * Note that the caller must set ->bi_destructor on successful return
276 * of a bio, to do the appropriate freeing of the bio once the reference 276 * of a bio, to do the appropriate freeing of the bio once the reference
277 * count drops to zero. 277 * count drops to zero.
278 **/ 278 **/
@@ -1393,6 +1393,18 @@ void bio_check_pages_dirty(struct bio *bio)
1393 } 1393 }
1394} 1394}
1395 1395
1396#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
1397void bio_flush_dcache_pages(struct bio *bi)
1398{
1399 int i;
1400 struct bio_vec *bvec;
1401
1402 bio_for_each_segment(bvec, bi, i)
1403 flush_dcache_page(bvec->bv_page);
1404}
1405EXPORT_SYMBOL(bio_flush_dcache_pages);
1406#endif
1407
1396/** 1408/**
1397 * bio_endio - end I/O on a bio 1409 * bio_endio - end I/O on a bio
1398 * @bio: bio 1410 * @bio: bio
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 8bed0557d88c..73d6a735b8f3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -405,7 +405,17 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
405 405
406static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) 406static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
407{ 407{
408 return sync_blockdev(I_BDEV(filp->f_mapping->host)); 408 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
409 int error;
410
411 error = sync_blockdev(bdev);
412 if (error)
413 return error;
414
415 error = blkdev_issue_flush(bdev, NULL);
416 if (error == -EOPNOTSUPP)
417 error = 0;
418 return error;
409} 419}
410 420
411/* 421/*
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index ccbdcb54ec5d..46bea0f4dc7b 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -256,7 +256,7 @@ out:
256 * Insert @em into @tree or perform a simple forward/backward merge with 256 * Insert @em into @tree or perform a simple forward/backward merge with
257 * existing mappings. The extent_map struct passed in will be inserted 257 * existing mappings. The extent_map struct passed in will be inserted
258 * into the tree directly, with an additional reference taken, or a 258 * into the tree directly, with an additional reference taken, or a
259 * reference dropped if the merge attempt was sucessfull. 259 * reference dropped if the merge attempt was successfull.
260 */ 260 */
261int add_extent_mapping(struct extent_map_tree *tree, 261int add_extent_mapping(struct extent_map_tree *tree,
262 struct extent_map *em) 262 struct extent_map *em)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 06550affbd27..77f759302e12 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -909,7 +909,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
909 unsigned long last_index; 909 unsigned long last_index;
910 int will_write; 910 int will_write;
911 911
912 will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || 912 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
913 (file->f_flags & O_DIRECT)); 913 (file->f_flags & O_DIRECT));
914 914
915 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, 915 nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
@@ -1076,7 +1076,7 @@ out_nolock:
1076 if (err) 1076 if (err)
1077 num_written = err; 1077 num_written = err;
1078 1078
1079 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 1079 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1080 trans = btrfs_start_transaction(root, 1); 1080 trans = btrfs_start_transaction(root, 1);
1081 ret = btrfs_log_dentry_safe(trans, root, 1081 ret = btrfs_log_dentry_safe(trans, root,
1082 file->f_dentry); 1082 file->f_dentry);
diff --git a/fs/cifs/README b/fs/cifs/README
index 79c1a93400be..a727b7cb075f 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -423,7 +423,7 @@ A partial list of the supported mount options follows:
423 source name to use to represent the client netbios machine 423 source name to use to represent the client netbios machine
424 name when doing the RFC1001 netbios session initialize. 424 name when doing the RFC1001 netbios session initialize.
425 direct Do not do inode data caching on files opened on this mount. 425 direct Do not do inode data caching on files opened on this mount.
426 This precludes mmaping files on this mount. In some cases 426 This precludes mmapping files on this mount. In some cases
427 with fast networks and little or no caching benefits on the 427 with fast networks and little or no caching benefits on the
428 client (e.g. when the application is doing large sequential 428 client (e.g. when the application is doing large sequential
429 reads bigger than page size without rereading the same data) 429 reads bigger than page size without rereading the same data)
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 5d0fde18039c..4b35f7ec0583 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -39,7 +39,7 @@
39 39
40/* 40/*
41 * MAX_REQ is the maximum number of requests that WE will send 41 * MAX_REQ is the maximum number of requests that WE will send
42 * on one socket concurently. It also matches the most common 42 * on one socket concurrently. It also matches the most common
43 * value of max multiplex returned by servers. We may 43 * value of max multiplex returned by servers. We may
44 * eventually want to use the negotiated value (in case 44 * eventually want to use the negotiated value (in case
45 * future servers can handle more) when we are more confident that 45 * future servers can handle more) when we are more confident that
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 2d07f890a842..3877737f96a6 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1227,7 +1227,7 @@ typedef struct smb_com_setattr_rsp {
1227/* empty wct response to setattr */ 1227/* empty wct response to setattr */
1228 1228
1229/*******************************************************/ 1229/*******************************************************/
1230/* NT Transact structure defintions follow */ 1230/* NT Transact structure definitions follow */
1231/* Currently only ioctl, acl (get security descriptor) */ 1231/* Currently only ioctl, acl (get security descriptor) */
1232/* and notify are implemented */ 1232/* and notify are implemented */
1233/*******************************************************/ 1233/*******************************************************/
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 1f42f772865a..6ccf7262d1b7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -214,7 +214,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
214 posix_flags |= SMB_O_EXCL; 214 posix_flags |= SMB_O_EXCL;
215 if (oflags & O_TRUNC) 215 if (oflags & O_TRUNC)
216 posix_flags |= SMB_O_TRUNC; 216 posix_flags |= SMB_O_TRUNC;
217 if (oflags & O_SYNC) 217 /* be safe and imply O_SYNC for O_DSYNC */
218 if (oflags & O_DSYNC)
218 posix_flags |= SMB_O_SYNC; 219 posix_flags |= SMB_O_SYNC;
219 if (oflags & O_DIRECTORY) 220 if (oflags & O_DIRECTORY)
220 posix_flags |= SMB_O_DIRECTORY; 221 posix_flags |= SMB_O_DIRECTORY;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 429337eb7afe..057e1dae12ab 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -76,8 +76,10 @@ static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
76 reopening a file. They had their effect on the original open */ 76 reopening a file. They had their effect on the original open */
77 if (flags & O_APPEND) 77 if (flags & O_APPEND)
78 posix_flags |= (fmode_t)O_APPEND; 78 posix_flags |= (fmode_t)O_APPEND;
79 if (flags & O_SYNC) 79 if (flags & O_DSYNC)
80 posix_flags |= (fmode_t)O_SYNC; 80 posix_flags |= (fmode_t)O_DSYNC;
81 if (flags & __O_SYNC)
82 posix_flags |= (fmode_t)__O_SYNC;
81 if (flags & O_DIRECTORY) 83 if (flags & O_DIRECTORY)
82 posix_flags |= (fmode_t)O_DIRECTORY; 84 posix_flags |= (fmode_t)O_DIRECTORY;
83 if (flags & O_NOFOLLOW) 85 if (flags & O_NOFOLLOW)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index cababd8a52df..cf18ee765590 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -914,8 +914,8 @@ undo_setattr:
914/* 914/*
915 * If dentry->d_inode is null (usually meaning the cached dentry 915 * If dentry->d_inode is null (usually meaning the cached dentry
916 * is a negative dentry) then we would attempt a standard SMB delete, but 916 * is a negative dentry) then we would attempt a standard SMB delete, but
917 * if that fails we can not attempt the fall back mechanisms on EACESS 917 * if that fails we can not attempt the fall back mechanisms on EACCESS
918 * but will return the EACESS to the caller. Note that the VFS does not call 918 * but will return the EACCESS to the caller. Note that the VFS does not call
919 * unlink on negative dentries currently. 919 * unlink on negative dentries currently.
920 */ 920 */
921int cifs_unlink(struct inode *dir, struct dentry *dentry) 921int cifs_unlink(struct inode *dir, struct dentry *dentry)
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index 224a1f478966..b6b6dcb500bf 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -371,7 +371,7 @@ E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
371 smbhash(p24 + 16, c8, p21 + 14, 1); 371 smbhash(p24 + 16, c8, p21 + 14, 1);
372} 372}
373 373
374#if 0 /* currently unsued */ 374#if 0 /* currently unused */
375static void 375static void
376D_P16(unsigned char *p14, unsigned char *in, unsigned char *out) 376D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
377{ 377{
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index 43c96ce29614..c6405ce3c50e 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -17,28 +17,25 @@ static struct ctl_table_header *fs_table_header;
17 17
18static ctl_table coda_table[] = { 18static ctl_table coda_table[] = {
19 { 19 {
20 .ctl_name = CTL_UNNUMBERED,
21 .procname = "timeout", 20 .procname = "timeout",
22 .data = &coda_timeout, 21 .data = &coda_timeout,
23 .maxlen = sizeof(int), 22 .maxlen = sizeof(int),
24 .mode = 0644, 23 .mode = 0644,
25 .proc_handler = &proc_dointvec 24 .proc_handler = proc_dointvec
26 }, 25 },
27 { 26 {
28 .ctl_name = CTL_UNNUMBERED,
29 .procname = "hard", 27 .procname = "hard",
30 .data = &coda_hard, 28 .data = &coda_hard,
31 .maxlen = sizeof(int), 29 .maxlen = sizeof(int),
32 .mode = 0644, 30 .mode = 0644,
33 .proc_handler = &proc_dointvec 31 .proc_handler = proc_dointvec
34 }, 32 },
35 { 33 {
36 .ctl_name = CTL_UNNUMBERED,
37 .procname = "fake_statfs", 34 .procname = "fake_statfs",
38 .data = &coda_fake_statfs, 35 .data = &coda_fake_statfs,
39 .maxlen = sizeof(int), 36 .maxlen = sizeof(int),
40 .mode = 0600, 37 .mode = 0600,
41 .proc_handler = &proc_dointvec 38 .proc_handler = proc_dointvec
42 }, 39 },
43 {} 40 {}
44}; 41};
@@ -46,7 +43,6 @@ static ctl_table coda_table[] = {
46#ifdef CONFIG_SYSCTL 43#ifdef CONFIG_SYSCTL
47static ctl_table fs_table[] = { 44static ctl_table fs_table[] = {
48 { 45 {
49 .ctl_name = CTL_UNNUMBERED,
50 .procname = "coda", 46 .procname = "coda",
51 .mode = 0555, 47 .mode = 0555,
52 .child = coda_table 48 .child = coda_table
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index d84e7058c298..278020d2449c 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -111,43 +111,40 @@
111#include <linux/dvb/frontend.h> 111#include <linux/dvb/frontend.h>
112#include <linux/dvb/video.h> 112#include <linux/dvb/video.h>
113 113
114#include <linux/sort.h>
115
114#ifdef CONFIG_SPARC 116#ifdef CONFIG_SPARC
115#include <asm/fbio.h> 117#include <asm/fbio.h>
116#endif 118#endif
117 119
118static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd, 120static int w_long(unsigned int fd, unsigned int cmd,
119 unsigned long arg, struct file *f) 121 compat_ulong_t __user *argp)
120{
121 return sys_ioctl(fd, cmd, (unsigned long)compat_ptr(arg));
122}
123
124static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg)
125{ 122{
126 mm_segment_t old_fs = get_fs(); 123 mm_segment_t old_fs = get_fs();
127 int err; 124 int err;
128 unsigned long val; 125 unsigned long val;
129 126
130 set_fs (KERNEL_DS); 127 set_fs (KERNEL_DS);
131 err = sys_ioctl(fd, cmd, (unsigned long)&val); 128 err = sys_ioctl(fd, cmd, (unsigned long)&val);
132 set_fs (old_fs); 129 set_fs (old_fs);
133 if (!err && put_user(val, (u32 __user *)compat_ptr(arg))) 130 if (!err && put_user(val, argp))
134 return -EFAULT; 131 return -EFAULT;
135 return err; 132 return err;
136} 133}
137 134
138static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg) 135static int rw_long(unsigned int fd, unsigned int cmd,
136 compat_ulong_t __user *argp)
139{ 137{
140 mm_segment_t old_fs = get_fs(); 138 mm_segment_t old_fs = get_fs();
141 u32 __user *argptr = compat_ptr(arg);
142 int err; 139 int err;
143 unsigned long val; 140 unsigned long val;
144 141
145 if(get_user(val, argptr)) 142 if(get_user(val, argp))
146 return -EFAULT; 143 return -EFAULT;
147 set_fs (KERNEL_DS); 144 set_fs (KERNEL_DS);
148 err = sys_ioctl(fd, cmd, (unsigned long)&val); 145 err = sys_ioctl(fd, cmd, (unsigned long)&val);
149 set_fs (old_fs); 146 set_fs (old_fs);
150 if (!err && put_user(val, argptr)) 147 if (!err && put_user(val, argp))
151 return -EFAULT; 148 return -EFAULT;
152 return err; 149 return err;
153} 150}
@@ -161,7 +158,8 @@ struct compat_video_event {
161 } u; 158 } u;
162}; 159};
163 160
164static int do_video_get_event(unsigned int fd, unsigned int cmd, unsigned long arg) 161static int do_video_get_event(unsigned int fd, unsigned int cmd,
162 struct compat_video_event __user *up)
165{ 163{
166 struct video_event kevent; 164 struct video_event kevent;
167 mm_segment_t old_fs = get_fs(); 165 mm_segment_t old_fs = get_fs();
@@ -172,8 +170,6 @@ static int do_video_get_event(unsigned int fd, unsigned int cmd, unsigned long a
172 set_fs(old_fs); 170 set_fs(old_fs);
173 171
174 if (!err) { 172 if (!err) {
175 struct compat_video_event __user *up = compat_ptr(arg);
176
177 err = put_user(kevent.type, &up->type); 173 err = put_user(kevent.type, &up->type);
178 err |= put_user(kevent.timestamp, &up->timestamp); 174 err |= put_user(kevent.timestamp, &up->timestamp);
179 err |= put_user(kevent.u.size.w, &up->u.size.w); 175 err |= put_user(kevent.u.size.w, &up->u.size.w);
@@ -192,15 +188,14 @@ struct compat_video_still_picture {
192 int32_t size; 188 int32_t size;
193}; 189};
194 190
195static int do_video_stillpicture(unsigned int fd, unsigned int cmd, unsigned long arg) 191static int do_video_stillpicture(unsigned int fd, unsigned int cmd,
192 struct compat_video_still_picture __user *up)
196{ 193{
197 struct compat_video_still_picture __user *up;
198 struct video_still_picture __user *up_native; 194 struct video_still_picture __user *up_native;
199 compat_uptr_t fp; 195 compat_uptr_t fp;
200 int32_t size; 196 int32_t size;
201 int err; 197 int err;
202 198
203 up = (struct compat_video_still_picture __user *) arg;
204 err = get_user(fp, &up->iFrame); 199 err = get_user(fp, &up->iFrame);
205 err |= get_user(size, &up->size); 200 err |= get_user(size, &up->size);
206 if (err) 201 if (err)
@@ -224,14 +219,13 @@ struct compat_video_spu_palette {
224 compat_uptr_t palette; 219 compat_uptr_t palette;
225}; 220};
226 221
227static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, unsigned long arg) 222static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
223 struct compat_video_spu_palette __user *up)
228{ 224{
229 struct compat_video_spu_palette __user *up;
230 struct video_spu_palette __user *up_native; 225 struct video_spu_palette __user *up_native;
231 compat_uptr_t palp; 226 compat_uptr_t palp;
232 int length, err; 227 int length, err;
233 228
234 up = (struct compat_video_spu_palette __user *) arg;
235 err = get_user(palp, &up->palette); 229 err = get_user(palp, &up->palette);
236 err |= get_user(length, &up->length); 230 err |= get_user(length, &up->length);
237 231
@@ -246,428 +240,6 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, unsigned
246 return err; 240 return err;
247} 241}
248 242
249#ifdef CONFIG_NET
250static int do_siocgstamp(unsigned int fd, unsigned int cmd, unsigned long arg)
251{
252 struct compat_timeval __user *up = compat_ptr(arg);
253 struct timeval ktv;
254 mm_segment_t old_fs = get_fs();
255 int err;
256
257 set_fs(KERNEL_DS);
258 err = sys_ioctl(fd, cmd, (unsigned long)&ktv);
259 set_fs(old_fs);
260 if(!err) {
261 err = put_user(ktv.tv_sec, &up->tv_sec);
262 err |= __put_user(ktv.tv_usec, &up->tv_usec);
263 }
264 return err;
265}
266
267static int do_siocgstampns(unsigned int fd, unsigned int cmd, unsigned long arg)
268{
269 struct compat_timespec __user *up = compat_ptr(arg);
270 struct timespec kts;
271 mm_segment_t old_fs = get_fs();
272 int err;
273
274 set_fs(KERNEL_DS);
275 err = sys_ioctl(fd, cmd, (unsigned long)&kts);
276 set_fs(old_fs);
277 if (!err) {
278 err = put_user(kts.tv_sec, &up->tv_sec);
279 err |= __put_user(kts.tv_nsec, &up->tv_nsec);
280 }
281 return err;
282}
283
284struct ifmap32 {
285 compat_ulong_t mem_start;
286 compat_ulong_t mem_end;
287 unsigned short base_addr;
288 unsigned char irq;
289 unsigned char dma;
290 unsigned char port;
291};
292
293struct ifreq32 {
294#define IFHWADDRLEN 6
295#define IFNAMSIZ 16
296 union {
297 char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */
298 } ifr_ifrn;
299 union {
300 struct sockaddr ifru_addr;
301 struct sockaddr ifru_dstaddr;
302 struct sockaddr ifru_broadaddr;
303 struct sockaddr ifru_netmask;
304 struct sockaddr ifru_hwaddr;
305 short ifru_flags;
306 compat_int_t ifru_ivalue;
307 compat_int_t ifru_mtu;
308 struct ifmap32 ifru_map;
309 char ifru_slave[IFNAMSIZ]; /* Just fits the size */
310 char ifru_newname[IFNAMSIZ];
311 compat_caddr_t ifru_data;
312 /* XXXX? ifru_settings should be here */
313 } ifr_ifru;
314};
315
316struct ifconf32 {
317 compat_int_t ifc_len; /* size of buffer */
318 compat_caddr_t ifcbuf;
319};
320
321static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg)
322{
323 struct ifreq __user *uifr;
324 int err;
325
326 uifr = compat_alloc_user_space(sizeof(struct ifreq));
327 if (copy_in_user(uifr, compat_ptr(arg), sizeof(struct ifreq32)))
328 return -EFAULT;
329
330 err = sys_ioctl(fd, SIOCGIFNAME, (unsigned long)uifr);
331 if (err)
332 return err;
333
334 if (copy_in_user(compat_ptr(arg), uifr, sizeof(struct ifreq32)))
335 return -EFAULT;
336
337 return 0;
338}
339
340static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg)
341{
342 struct ifconf32 ifc32;
343 struct ifconf ifc;
344 struct ifconf __user *uifc;
345 struct ifreq32 __user *ifr32;
346 struct ifreq __user *ifr;
347 unsigned int i, j;
348 int err;
349
350 if (copy_from_user(&ifc32, compat_ptr(arg), sizeof(struct ifconf32)))
351 return -EFAULT;
352
353 if (ifc32.ifcbuf == 0) {
354 ifc32.ifc_len = 0;
355 ifc.ifc_len = 0;
356 ifc.ifc_req = NULL;
357 uifc = compat_alloc_user_space(sizeof(struct ifconf));
358 } else {
359 size_t len =((ifc32.ifc_len / sizeof (struct ifreq32)) + 1) *
360 sizeof (struct ifreq);
361 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
362 ifc.ifc_len = len;
363 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
364 ifr32 = compat_ptr(ifc32.ifcbuf);
365 for (i = 0; i < ifc32.ifc_len; i += sizeof (struct ifreq32)) {
366 if (copy_in_user(ifr, ifr32, sizeof(struct ifreq32)))
367 return -EFAULT;
368 ifr++;
369 ifr32++;
370 }
371 }
372 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
373 return -EFAULT;
374
375 err = sys_ioctl (fd, SIOCGIFCONF, (unsigned long)uifc);
376 if (err)
377 return err;
378
379 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
380 return -EFAULT;
381
382 ifr = ifc.ifc_req;
383 ifr32 = compat_ptr(ifc32.ifcbuf);
384 for (i = 0, j = 0;
385 i + sizeof (struct ifreq32) <= ifc32.ifc_len && j < ifc.ifc_len;
386 i += sizeof (struct ifreq32), j += sizeof (struct ifreq)) {
387 if (copy_in_user(ifr32, ifr, sizeof (struct ifreq32)))
388 return -EFAULT;
389 ifr32++;
390 ifr++;
391 }
392
393 if (ifc32.ifcbuf == 0) {
394 /* Translate from 64-bit structure multiple to
395 * a 32-bit one.
396 */
397 i = ifc.ifc_len;
398 i = ((i / sizeof(struct ifreq)) * sizeof(struct ifreq32));
399 ifc32.ifc_len = i;
400 } else {
401 ifc32.ifc_len = i;
402 }
403 if (copy_to_user(compat_ptr(arg), &ifc32, sizeof(struct ifconf32)))
404 return -EFAULT;
405
406 return 0;
407}
408
409static int ethtool_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
410{
411 struct ifreq __user *ifr;
412 struct ifreq32 __user *ifr32;
413 u32 data;
414 void __user *datap;
415
416 ifr = compat_alloc_user_space(sizeof(*ifr));
417 ifr32 = compat_ptr(arg);
418
419 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
420 return -EFAULT;
421
422 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
423 return -EFAULT;
424
425 datap = compat_ptr(data);
426 if (put_user(datap, &ifr->ifr_ifru.ifru_data))
427 return -EFAULT;
428
429 return sys_ioctl(fd, cmd, (unsigned long) ifr);
430}
431
432static int bond_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
433{
434 struct ifreq kifr;
435 struct ifreq __user *uifr;
436 struct ifreq32 __user *ifr32 = compat_ptr(arg);
437 mm_segment_t old_fs;
438 int err;
439 u32 data;
440 void __user *datap;
441
442 switch (cmd) {
443 case SIOCBONDENSLAVE:
444 case SIOCBONDRELEASE:
445 case SIOCBONDSETHWADDR:
446 case SIOCBONDCHANGEACTIVE:
447 if (copy_from_user(&kifr, ifr32, sizeof(struct ifreq32)))
448 return -EFAULT;
449
450 old_fs = get_fs();
451 set_fs (KERNEL_DS);
452 err = sys_ioctl (fd, cmd, (unsigned long)&kifr);
453 set_fs (old_fs);
454
455 return err;
456 case SIOCBONDSLAVEINFOQUERY:
457 case SIOCBONDINFOQUERY:
458 uifr = compat_alloc_user_space(sizeof(*uifr));
459 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
460 return -EFAULT;
461
462 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
463 return -EFAULT;
464
465 datap = compat_ptr(data);
466 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
467 return -EFAULT;
468
469 return sys_ioctl (fd, cmd, (unsigned long)uifr);
470 default:
471 return -EINVAL;
472 };
473}
474
475static int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
476{
477 struct ifreq __user *u_ifreq64;
478 struct ifreq32 __user *u_ifreq32 = compat_ptr(arg);
479 char tmp_buf[IFNAMSIZ];
480 void __user *data64;
481 u32 data32;
482
483 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
484 IFNAMSIZ))
485 return -EFAULT;
486 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
487 return -EFAULT;
488 data64 = compat_ptr(data32);
489
490 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
491
492 /* Don't check these user accesses, just let that get trapped
493 * in the ioctl handler instead.
494 */
495 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
496 IFNAMSIZ))
497 return -EFAULT;
498 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
499 return -EFAULT;
500
501 return sys_ioctl(fd, cmd, (unsigned long) u_ifreq64);
502}
503
504static int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long arg)
505{
506 struct ifreq ifr;
507 struct ifreq32 __user *uifr32;
508 struct ifmap32 __user *uifmap32;
509 mm_segment_t old_fs;
510 int err;
511
512 uifr32 = compat_ptr(arg);
513 uifmap32 = &uifr32->ifr_ifru.ifru_map;
514 switch (cmd) {
515 case SIOCSIFMAP:
516 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
517 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
518 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
519 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
520 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
521 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
522 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
523 if (err)
524 return -EFAULT;
525 break;
526 case SIOCSHWTSTAMP:
527 if (copy_from_user(&ifr, uifr32, sizeof(*uifr32)))
528 return -EFAULT;
529 ifr.ifr_data = compat_ptr(uifr32->ifr_ifru.ifru_data);
530 break;
531 default:
532 if (copy_from_user(&ifr, uifr32, sizeof(*uifr32)))
533 return -EFAULT;
534 break;
535 }
536 old_fs = get_fs();
537 set_fs (KERNEL_DS);
538 err = sys_ioctl (fd, cmd, (unsigned long)&ifr);
539 set_fs (old_fs);
540 if (!err) {
541 switch (cmd) {
542 /* TUNSETIFF is defined as _IOW, it should be _IORW
543 * as the data is copied back to user space, but that
544 * cannot be fixed without breaking all existing apps.
545 */
546 case TUNSETIFF:
547 case TUNGETIFF:
548 case SIOCGIFFLAGS:
549 case SIOCGIFMETRIC:
550 case SIOCGIFMTU:
551 case SIOCGIFMEM:
552 case SIOCGIFHWADDR:
553 case SIOCGIFINDEX:
554 case SIOCGIFADDR:
555 case SIOCGIFBRDADDR:
556 case SIOCGIFDSTADDR:
557 case SIOCGIFNETMASK:
558 case SIOCGIFTXQLEN:
559 if (copy_to_user(uifr32, &ifr, sizeof(*uifr32)))
560 return -EFAULT;
561 break;
562 case SIOCGIFMAP:
563 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
564 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
565 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
566 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
567 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
568 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
569 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
570 if (err)
571 err = -EFAULT;
572 break;
573 }
574 }
575 return err;
576}
577
578struct rtentry32 {
579 u32 rt_pad1;
580 struct sockaddr rt_dst; /* target address */
581 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
582 struct sockaddr rt_genmask; /* target network mask (IP) */
583 unsigned short rt_flags;
584 short rt_pad2;
585 u32 rt_pad3;
586 unsigned char rt_tos;
587 unsigned char rt_class;
588 short rt_pad4;
589 short rt_metric; /* +1 for binary compatibility! */
590 /* char * */ u32 rt_dev; /* forcing the device at add */
591 u32 rt_mtu; /* per route MTU/Window */
592 u32 rt_window; /* Window clamping */
593 unsigned short rt_irtt; /* Initial RTT */
594
595};
596
597struct in6_rtmsg32 {
598 struct in6_addr rtmsg_dst;
599 struct in6_addr rtmsg_src;
600 struct in6_addr rtmsg_gateway;
601 u32 rtmsg_type;
602 u16 rtmsg_dst_len;
603 u16 rtmsg_src_len;
604 u32 rtmsg_metric;
605 u32 rtmsg_info;
606 u32 rtmsg_flags;
607 s32 rtmsg_ifindex;
608};
609
610static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
611{
612 int ret;
613 void *r = NULL;
614 struct in6_rtmsg r6;
615 struct rtentry r4;
616 char devname[16];
617 u32 rtdev;
618 mm_segment_t old_fs = get_fs();
619
620 struct socket *mysock = sockfd_lookup(fd, &ret);
621
622 if (mysock && mysock->sk && mysock->sk->sk_family == AF_INET6) { /* ipv6 */
623 struct in6_rtmsg32 __user *ur6 = compat_ptr(arg);
624 ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst),
625 3 * sizeof(struct in6_addr));
626 ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type));
627 ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
628 ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
629 ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric));
630 ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info));
631 ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags));
632 ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
633
634 r = (void *) &r6;
635 } else { /* ipv4 */
636 struct rtentry32 __user *ur4 = compat_ptr(arg);
637 ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst),
638 3 * sizeof(struct sockaddr));
639 ret |= __get_user (r4.rt_flags, &(ur4->rt_flags));
640 ret |= __get_user (r4.rt_metric, &(ur4->rt_metric));
641 ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu));
642 ret |= __get_user (r4.rt_window, &(ur4->rt_window));
643 ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt));
644 ret |= __get_user (rtdev, &(ur4->rt_dev));
645 if (rtdev) {
646 ret |= copy_from_user (devname, compat_ptr(rtdev), 15);
647 r4.rt_dev = devname; devname[15] = 0;
648 } else
649 r4.rt_dev = NULL;
650
651 r = (void *) &r4;
652 }
653
654 if (ret) {
655 ret = -EFAULT;
656 goto out;
657 }
658
659 set_fs (KERNEL_DS);
660 ret = sys_ioctl (fd, cmd, (unsigned long) r);
661 set_fs (old_fs);
662
663out:
664 if (mysock)
665 sockfd_put(mysock);
666
667 return ret;
668}
669#endif
670
671#ifdef CONFIG_BLOCK 243#ifdef CONFIG_BLOCK
672typedef struct sg_io_hdr32 { 244typedef struct sg_io_hdr32 {
673 compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */ 245 compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */
@@ -721,16 +293,15 @@ static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iov
721 return 0; 293 return 0;
722} 294}
723 295
724static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 296static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
297 sg_io_hdr32_t __user *sgio32)
725{ 298{
726 sg_io_hdr_t __user *sgio; 299 sg_io_hdr_t __user *sgio;
727 sg_io_hdr32_t __user *sgio32;
728 u16 iovec_count; 300 u16 iovec_count;
729 u32 data; 301 u32 data;
730 void __user *dxferp; 302 void __user *dxferp;
731 int err; 303 int err;
732 304
733 sgio32 = compat_ptr(arg);
734 if (get_user(iovec_count, &sgio32->iovec_count)) 305 if (get_user(iovec_count, &sgio32->iovec_count))
735 return -EFAULT; 306 return -EFAULT;
736 307
@@ -820,11 +391,11 @@ struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */
820 int unused; 391 int unused;
821}; 392};
822 393
823static int sg_grt_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 394static int sg_grt_trans(unsigned int fd, unsigned int cmd, struct
395 compat_sg_req_info __user *o)
824{ 396{
825 int err, i; 397 int err, i;
826 sg_req_info_t __user *r; 398 sg_req_info_t __user *r;
827 struct compat_sg_req_info __user *o = (void __user *)arg;
828 r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE); 399 r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE);
829 err = sys_ioctl(fd,cmd,(unsigned long)r); 400 err = sys_ioctl(fd,cmd,(unsigned long)r);
830 if (err < 0) 401 if (err < 0)
@@ -852,9 +423,9 @@ struct sock_fprog32 {
852#define PPPIOCSPASS32 _IOW('t', 71, struct sock_fprog32) 423#define PPPIOCSPASS32 _IOW('t', 71, struct sock_fprog32)
853#define PPPIOCSACTIVE32 _IOW('t', 70, struct sock_fprog32) 424#define PPPIOCSACTIVE32 _IOW('t', 70, struct sock_fprog32)
854 425
855static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 426static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd,
427 struct sock_fprog32 __user *u_fprog32)
856{ 428{
857 struct sock_fprog32 __user *u_fprog32 = compat_ptr(arg);
858 struct sock_fprog __user *u_fprog64 = compat_alloc_user_space(sizeof(struct sock_fprog)); 429 struct sock_fprog __user *u_fprog64 = compat_alloc_user_space(sizeof(struct sock_fprog));
859 void __user *fptr64; 430 void __user *fptr64;
860 u32 fptr32; 431 u32 fptr32;
@@ -891,15 +462,14 @@ struct ppp_idle32 {
891}; 462};
892#define PPPIOCGIDLE32 _IOR('t', 63, struct ppp_idle32) 463#define PPPIOCGIDLE32 _IOR('t', 63, struct ppp_idle32)
893 464
894static int ppp_gidle(unsigned int fd, unsigned int cmd, unsigned long arg) 465static int ppp_gidle(unsigned int fd, unsigned int cmd,
466 struct ppp_idle32 __user *idle32)
895{ 467{
896 struct ppp_idle __user *idle; 468 struct ppp_idle __user *idle;
897 struct ppp_idle32 __user *idle32;
898 __kernel_time_t xmit, recv; 469 __kernel_time_t xmit, recv;
899 int err; 470 int err;
900 471
901 idle = compat_alloc_user_space(sizeof(*idle)); 472 idle = compat_alloc_user_space(sizeof(*idle));
902 idle32 = compat_ptr(arg);
903 473
904 err = sys_ioctl(fd, PPPIOCGIDLE, (unsigned long) idle); 474 err = sys_ioctl(fd, PPPIOCGIDLE, (unsigned long) idle);
905 475
@@ -913,15 +483,14 @@ static int ppp_gidle(unsigned int fd, unsigned int cmd, unsigned long arg)
913 return err; 483 return err;
914} 484}
915 485
916static int ppp_scompress(unsigned int fd, unsigned int cmd, unsigned long arg) 486static int ppp_scompress(unsigned int fd, unsigned int cmd,
487 struct ppp_option_data32 __user *odata32)
917{ 488{
918 struct ppp_option_data __user *odata; 489 struct ppp_option_data __user *odata;
919 struct ppp_option_data32 __user *odata32;
920 __u32 data; 490 __u32 data;
921 void __user *datap; 491 void __user *datap;
922 492
923 odata = compat_alloc_user_space(sizeof(*odata)); 493 odata = compat_alloc_user_space(sizeof(*odata));
924 odata32 = compat_ptr(arg);
925 494
926 if (get_user(data, &odata32->ptr)) 495 if (get_user(data, &odata32->ptr))
927 return -EFAULT; 496 return -EFAULT;
@@ -937,35 +506,6 @@ static int ppp_scompress(unsigned int fd, unsigned int cmd, unsigned long arg)
937 return sys_ioctl(fd, PPPIOCSCOMPRESS, (unsigned long) odata); 506 return sys_ioctl(fd, PPPIOCSCOMPRESS, (unsigned long) odata);
938} 507}
939 508
940static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
941{
942 int err;
943
944 switch (cmd) {
945 case PPPIOCGIDLE32:
946 err = ppp_gidle(fd, cmd, arg);
947 break;
948
949 case PPPIOCSCOMPRESS32:
950 err = ppp_scompress(fd, cmd, arg);
951 break;
952
953 default:
954 do {
955 static int count;
956 if (++count <= 20)
957 printk("ppp_ioctl: Unknown cmd fd(%d) "
958 "cmd(%08x) arg(%08x)\n",
959 (int)fd, (unsigned int)cmd, (unsigned int)arg);
960 } while(0);
961 err = -EINVAL;
962 break;
963 };
964
965 return err;
966}
967
968
969#ifdef CONFIG_BLOCK 509#ifdef CONFIG_BLOCK
970struct mtget32 { 510struct mtget32 {
971 compat_long_t mt_type; 511 compat_long_t mt_type;
@@ -983,7 +523,7 @@ struct mtpos32 {
983}; 523};
984#define MTIOCPOS32 _IOR('m', 3, struct mtpos32) 524#define MTIOCPOS32 _IOR('m', 3, struct mtpos32)
985 525
986static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) 526static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
987{ 527{
988 mm_segment_t old_fs = get_fs(); 528 mm_segment_t old_fs = get_fs();
989 struct mtget get; 529 struct mtget get;
@@ -1003,15 +543,6 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1003 kcmd = MTIOCGET; 543 kcmd = MTIOCGET;
1004 karg = &get; 544 karg = &get;
1005 break; 545 break;
1006 default:
1007 do {
1008 static int count;
1009 if (++count <= 20)
1010 printk("mt_ioctl: Unknown cmd fd(%d) "
1011 "cmd(%08x) arg(%08x)\n",
1012 (int)fd, (unsigned int)cmd, (unsigned int)arg);
1013 } while(0);
1014 return -EINVAL;
1015 } 546 }
1016 set_fs (KERNEL_DS); 547 set_fs (KERNEL_DS);
1017 err = sys_ioctl (fd, kcmd, (unsigned long)karg); 548 err = sys_ioctl (fd, kcmd, (unsigned long)karg);
@@ -1020,11 +551,11 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1020 return err; 551 return err;
1021 switch (cmd) { 552 switch (cmd) {
1022 case MTIOCPOS32: 553 case MTIOCPOS32:
1023 upos32 = compat_ptr(arg); 554 upos32 = argp;
1024 err = __put_user(pos.mt_blkno, &upos32->mt_blkno); 555 err = __put_user(pos.mt_blkno, &upos32->mt_blkno);
1025 break; 556 break;
1026 case MTIOCGET32: 557 case MTIOCGET32:
1027 umget32 = compat_ptr(arg); 558 umget32 = argp;
1028 err = __put_user(get.mt_type, &umget32->mt_type); 559 err = __put_user(get.mt_type, &umget32->mt_type);
1029 err |= __put_user(get.mt_resid, &umget32->mt_resid); 560 err |= __put_user(get.mt_resid, &umget32->mt_resid);
1030 err |= __put_user(get.mt_dsreg, &umget32->mt_dsreg); 561 err |= __put_user(get.mt_dsreg, &umget32->mt_dsreg);
@@ -1039,162 +570,8 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1039 570
1040#endif /* CONFIG_BLOCK */ 571#endif /* CONFIG_BLOCK */
1041 572
1042#ifdef CONFIG_VT 573static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
1043 574 compat_uid_t __user *argp)
1044static int vt_check(struct file *file)
1045{
1046 struct tty_struct *tty;
1047 struct inode *inode = file->f_path.dentry->d_inode;
1048 struct vc_data *vc;
1049
1050 if (file->f_op->unlocked_ioctl != tty_ioctl)
1051 return -EINVAL;
1052
1053 tty = (struct tty_struct *)file->private_data;
1054 if (tty_paranoia_check(tty, inode, "tty_ioctl"))
1055 return -EINVAL;
1056
1057 if (tty->ops->ioctl != vt_ioctl)
1058 return -EINVAL;
1059
1060 vc = (struct vc_data *)tty->driver_data;
1061 if (!vc_cons_allocated(vc->vc_num)) /* impossible? */
1062 return -ENOIOCTLCMD;
1063
1064 /*
1065 * To have permissions to do most of the vt ioctls, we either have
1066 * to be the owner of the tty, or have CAP_SYS_TTY_CONFIG.
1067 */
1068 if (current->signal->tty == tty || capable(CAP_SYS_TTY_CONFIG))
1069 return 1;
1070 return 0;
1071}
1072
1073struct consolefontdesc32 {
1074 unsigned short charcount; /* characters in font (256 or 512) */
1075 unsigned short charheight; /* scan lines per character (1-32) */
1076 compat_caddr_t chardata; /* font data in expanded form */
1077};
1078
1079static int do_fontx_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file)
1080{
1081 struct consolefontdesc32 __user *user_cfd = compat_ptr(arg);
1082 struct console_font_op op;
1083 compat_caddr_t data;
1084 int i, perm;
1085
1086 perm = vt_check(file);
1087 if (perm < 0) return perm;
1088
1089 switch (cmd) {
1090 case PIO_FONTX:
1091 if (!perm)
1092 return -EPERM;
1093 op.op = KD_FONT_OP_SET;
1094 op.flags = 0;
1095 op.width = 8;
1096 if (get_user(op.height, &user_cfd->charheight) ||
1097 get_user(op.charcount, &user_cfd->charcount) ||
1098 get_user(data, &user_cfd->chardata))
1099 return -EFAULT;
1100 op.data = compat_ptr(data);
1101 return con_font_op(vc_cons[fg_console].d, &op);
1102 case GIO_FONTX:
1103 op.op = KD_FONT_OP_GET;
1104 op.flags = 0;
1105 op.width = 8;
1106 if (get_user(op.height, &user_cfd->charheight) ||
1107 get_user(op.charcount, &user_cfd->charcount) ||
1108 get_user(data, &user_cfd->chardata))
1109 return -EFAULT;
1110 if (!data)
1111 return 0;
1112 op.data = compat_ptr(data);
1113 i = con_font_op(vc_cons[fg_console].d, &op);
1114 if (i)
1115 return i;
1116 if (put_user(op.height, &user_cfd->charheight) ||
1117 put_user(op.charcount, &user_cfd->charcount) ||
1118 put_user((compat_caddr_t)(unsigned long)op.data,
1119 &user_cfd->chardata))
1120 return -EFAULT;
1121 return 0;
1122 }
1123 return -EINVAL;
1124}
1125
1126struct console_font_op32 {
1127 compat_uint_t op; /* operation code KD_FONT_OP_* */
1128 compat_uint_t flags; /* KD_FONT_FLAG_* */
1129 compat_uint_t width, height; /* font size */
1130 compat_uint_t charcount;
1131 compat_caddr_t data; /* font data with height fixed to 32 */
1132};
1133
1134static int do_kdfontop_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file)
1135{
1136 struct console_font_op op;
1137 struct console_font_op32 __user *fontop = compat_ptr(arg);
1138 int perm = vt_check(file), i;
1139 struct vc_data *vc;
1140
1141 if (perm < 0) return perm;
1142
1143 if (copy_from_user(&op, fontop, sizeof(struct console_font_op32)))
1144 return -EFAULT;
1145 if (!perm && op.op != KD_FONT_OP_GET)
1146 return -EPERM;
1147 op.data = compat_ptr(((struct console_font_op32 *)&op)->data);
1148 op.flags |= KD_FONT_FLAG_OLD;
1149 vc = ((struct tty_struct *)file->private_data)->driver_data;
1150 i = con_font_op(vc, &op);
1151 if (i)
1152 return i;
1153 ((struct console_font_op32 *)&op)->data = (unsigned long)op.data;
1154 if (copy_to_user(fontop, &op, sizeof(struct console_font_op32)))
1155 return -EFAULT;
1156 return 0;
1157}
1158
1159struct unimapdesc32 {
1160 unsigned short entry_ct;
1161 compat_caddr_t entries;
1162};
1163
1164static int do_unimap_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *file)
1165{
1166 struct unimapdesc32 tmp;
1167 struct unimapdesc32 __user *user_ud = compat_ptr(arg);
1168 int perm = vt_check(file);
1169 struct vc_data *vc;
1170
1171 if (perm < 0)
1172 return perm;
1173 if (copy_from_user(&tmp, user_ud, sizeof tmp))
1174 return -EFAULT;
1175 if (tmp.entries)
1176 if (!access_ok(VERIFY_WRITE, compat_ptr(tmp.entries),
1177 tmp.entry_ct*sizeof(struct unipair)))
1178 return -EFAULT;
1179 vc = ((struct tty_struct *)file->private_data)->driver_data;
1180 switch (cmd) {
1181 case PIO_UNIMAP:
1182 if (!perm)
1183 return -EPERM;
1184 return con_set_unimap(vc, tmp.entry_ct,
1185 compat_ptr(tmp.entries));
1186 case GIO_UNIMAP:
1187 if (!perm && fg_console != vc->vc_num)
1188 return -EPERM;
1189 return con_get_unimap(vc, tmp.entry_ct, &(user_ud->entry_ct),
1190 compat_ptr(tmp.entries));
1191 }
1192 return 0;
1193}
1194
1195#endif /* CONFIG_VT */
1196
1197static int do_smb_getmountuid(unsigned int fd, unsigned int cmd, unsigned long arg)
1198{ 575{
1199 mm_segment_t old_fs = get_fs(); 576 mm_segment_t old_fs = get_fs();
1200 __kernel_uid_t kuid; 577 __kernel_uid_t kuid;
@@ -1207,184 +584,15 @@ static int do_smb_getmountuid(unsigned int fd, unsigned int cmd, unsigned long a
1207 set_fs(old_fs); 584 set_fs(old_fs);
1208 585
1209 if (err >= 0) 586 if (err >= 0)
1210 err = put_user(kuid, (compat_uid_t __user *)compat_ptr(arg)); 587 err = put_user(kuid, argp);
1211 588
1212 return err; 589 return err;
1213} 590}
1214 591
1215struct atmif_sioc32 { 592static int ioc_settimeout(unsigned int fd, unsigned int cmd,
1216 compat_int_t number; 593 compat_ulong_t __user *argp)
1217 compat_int_t length;
1218 compat_caddr_t arg;
1219};
1220
1221struct atm_iobuf32 {
1222 compat_int_t length;
1223 compat_caddr_t buffer;
1224};
1225
1226#define ATM_GETLINKRATE32 _IOW('a', ATMIOC_ITF+1, struct atmif_sioc32)
1227#define ATM_GETNAMES32 _IOW('a', ATMIOC_ITF+3, struct atm_iobuf32)
1228#define ATM_GETTYPE32 _IOW('a', ATMIOC_ITF+4, struct atmif_sioc32)
1229#define ATM_GETESI32 _IOW('a', ATMIOC_ITF+5, struct atmif_sioc32)
1230#define ATM_GETADDR32 _IOW('a', ATMIOC_ITF+6, struct atmif_sioc32)
1231#define ATM_RSTADDR32 _IOW('a', ATMIOC_ITF+7, struct atmif_sioc32)
1232#define ATM_ADDADDR32 _IOW('a', ATMIOC_ITF+8, struct atmif_sioc32)
1233#define ATM_DELADDR32 _IOW('a', ATMIOC_ITF+9, struct atmif_sioc32)
1234#define ATM_GETCIRANGE32 _IOW('a', ATMIOC_ITF+10, struct atmif_sioc32)
1235#define ATM_SETCIRANGE32 _IOW('a', ATMIOC_ITF+11, struct atmif_sioc32)
1236#define ATM_SETESI32 _IOW('a', ATMIOC_ITF+12, struct atmif_sioc32)
1237#define ATM_SETESIF32 _IOW('a', ATMIOC_ITF+13, struct atmif_sioc32)
1238#define ATM_GETSTAT32 _IOW('a', ATMIOC_SARCOM+0, struct atmif_sioc32)
1239#define ATM_GETSTATZ32 _IOW('a', ATMIOC_SARCOM+1, struct atmif_sioc32)
1240#define ATM_GETLOOP32 _IOW('a', ATMIOC_SARCOM+2, struct atmif_sioc32)
1241#define ATM_SETLOOP32 _IOW('a', ATMIOC_SARCOM+3, struct atmif_sioc32)
1242#define ATM_QUERYLOOP32 _IOW('a', ATMIOC_SARCOM+4, struct atmif_sioc32)
1243
1244static struct {
1245 unsigned int cmd32;
1246 unsigned int cmd;
1247} atm_ioctl_map[] = {
1248 { ATM_GETLINKRATE32, ATM_GETLINKRATE },
1249 { ATM_GETNAMES32, ATM_GETNAMES },
1250 { ATM_GETTYPE32, ATM_GETTYPE },
1251 { ATM_GETESI32, ATM_GETESI },
1252 { ATM_GETADDR32, ATM_GETADDR },
1253 { ATM_RSTADDR32, ATM_RSTADDR },
1254 { ATM_ADDADDR32, ATM_ADDADDR },
1255 { ATM_DELADDR32, ATM_DELADDR },
1256 { ATM_GETCIRANGE32, ATM_GETCIRANGE },
1257 { ATM_SETCIRANGE32, ATM_SETCIRANGE },
1258 { ATM_SETESI32, ATM_SETESI },
1259 { ATM_SETESIF32, ATM_SETESIF },
1260 { ATM_GETSTAT32, ATM_GETSTAT },
1261 { ATM_GETSTATZ32, ATM_GETSTATZ },
1262 { ATM_GETLOOP32, ATM_GETLOOP },
1263 { ATM_SETLOOP32, ATM_SETLOOP },
1264 { ATM_QUERYLOOP32, ATM_QUERYLOOP }
1265};
1266
1267#define NR_ATM_IOCTL ARRAY_SIZE(atm_ioctl_map)
1268
1269static int do_atm_iobuf(unsigned int fd, unsigned int cmd, unsigned long arg)
1270{ 594{
1271 struct atm_iobuf __user *iobuf; 595 return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, argp);
1272 struct atm_iobuf32 __user *iobuf32;
1273 u32 data;
1274 void __user *datap;
1275 int len, err;
1276
1277 iobuf = compat_alloc_user_space(sizeof(*iobuf));
1278 iobuf32 = compat_ptr(arg);
1279
1280 if (get_user(len, &iobuf32->length) ||
1281 get_user(data, &iobuf32->buffer))
1282 return -EFAULT;
1283 datap = compat_ptr(data);
1284 if (put_user(len, &iobuf->length) ||
1285 put_user(datap, &iobuf->buffer))
1286 return -EFAULT;
1287
1288 err = sys_ioctl(fd, cmd, (unsigned long)iobuf);
1289
1290 if (!err) {
1291 if (copy_in_user(&iobuf32->length, &iobuf->length,
1292 sizeof(int)))
1293 err = -EFAULT;
1294 }
1295
1296 return err;
1297}
1298
1299static int do_atmif_sioc(unsigned int fd, unsigned int cmd, unsigned long arg)
1300{
1301 struct atmif_sioc __user *sioc;
1302 struct atmif_sioc32 __user *sioc32;
1303 u32 data;
1304 void __user *datap;
1305 int err;
1306
1307 sioc = compat_alloc_user_space(sizeof(*sioc));
1308 sioc32 = compat_ptr(arg);
1309
1310 if (copy_in_user(&sioc->number, &sioc32->number, 2 * sizeof(int)) ||
1311 get_user(data, &sioc32->arg))
1312 return -EFAULT;
1313 datap = compat_ptr(data);
1314 if (put_user(datap, &sioc->arg))
1315 return -EFAULT;
1316
1317 err = sys_ioctl(fd, cmd, (unsigned long) sioc);
1318
1319 if (!err) {
1320 if (copy_in_user(&sioc32->length, &sioc->length,
1321 sizeof(int)))
1322 err = -EFAULT;
1323 }
1324 return err;
1325}
1326
1327static int do_atm_ioctl(unsigned int fd, unsigned int cmd32, unsigned long arg)
1328{
1329 int i;
1330 unsigned int cmd = 0;
1331
1332 switch (cmd32) {
1333 case SONET_GETSTAT:
1334 case SONET_GETSTATZ:
1335 case SONET_GETDIAG:
1336 case SONET_SETDIAG:
1337 case SONET_CLRDIAG:
1338 case SONET_SETFRAMING:
1339 case SONET_GETFRAMING:
1340 case SONET_GETFRSENSE:
1341 return do_atmif_sioc(fd, cmd32, arg);
1342 }
1343
1344 for (i = 0; i < NR_ATM_IOCTL; i++) {
1345 if (cmd32 == atm_ioctl_map[i].cmd32) {
1346 cmd = atm_ioctl_map[i].cmd;
1347 break;
1348 }
1349 }
1350 if (i == NR_ATM_IOCTL)
1351 return -EINVAL;
1352
1353 switch (cmd) {
1354 case ATM_GETNAMES:
1355 return do_atm_iobuf(fd, cmd, arg);
1356
1357 case ATM_GETLINKRATE:
1358 case ATM_GETTYPE:
1359 case ATM_GETESI:
1360 case ATM_GETADDR:
1361 case ATM_RSTADDR:
1362 case ATM_ADDADDR:
1363 case ATM_DELADDR:
1364 case ATM_GETCIRANGE:
1365 case ATM_SETCIRANGE:
1366 case ATM_SETESI:
1367 case ATM_SETESIF:
1368 case ATM_GETSTAT:
1369 case ATM_GETSTATZ:
1370 case ATM_GETLOOP:
1371 case ATM_SETLOOP:
1372 case ATM_QUERYLOOP:
1373 return do_atmif_sioc(fd, cmd, arg);
1374 }
1375
1376 return -EINVAL;
1377}
1378
1379static __used int
1380ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
1381{
1382 return -EINVAL;
1383}
1384
1385static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
1386{
1387 return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
1388} 596}
1389 597
1390/* Bluetooth ioctls */ 598/* Bluetooth ioctls */
@@ -1442,7 +650,8 @@ static int set_raw32_request(struct raw_config_request *req, struct raw32_config
1442 return ret ? -EFAULT : 0; 650 return ret ? -EFAULT : 0;
1443} 651}
1444 652
1445static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg) 653static int raw_ioctl(unsigned fd, unsigned cmd,
654 struct raw32_config_request __user *user_req)
1446{ 655{
1447 int ret; 656 int ret;
1448 657
@@ -1450,7 +659,6 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
1450 case RAW_SETBIND: 659 case RAW_SETBIND:
1451 case RAW_GETBIND: { 660 case RAW_GETBIND: {
1452 struct raw_config_request req; 661 struct raw_config_request req;
1453 struct raw32_config_request __user *user_req = compat_ptr(arg);
1454 mm_segment_t oldfs = get_fs(); 662 mm_segment_t oldfs = get_fs();
1455 663
1456 if ((ret = get_raw32_request(&req, user_req))) 664 if ((ret = get_raw32_request(&req, user_req)))
@@ -1465,9 +673,6 @@ static int raw_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
1465 } 673 }
1466 break; 674 break;
1467 } 675 }
1468 default:
1469 ret = sys_ioctl(fd, cmd, arg);
1470 break;
1471 } 676 }
1472 return ret; 677 return ret;
1473} 678}
@@ -1495,11 +700,11 @@ struct serial_struct32 {
1495 compat_int_t reserved[1]; 700 compat_int_t reserved[1];
1496}; 701};
1497 702
1498static int serial_struct_ioctl(unsigned fd, unsigned cmd, unsigned long arg) 703static int serial_struct_ioctl(unsigned fd, unsigned cmd,
704 struct serial_struct32 __user *ss32)
1499{ 705{
1500 typedef struct serial_struct SS; 706 typedef struct serial_struct SS;
1501 typedef struct serial_struct32 SS32; 707 typedef struct serial_struct32 SS32;
1502 struct serial_struct32 __user *ss32 = compat_ptr(arg);
1503 int err; 708 int err;
1504 struct serial_struct ss; 709 struct serial_struct ss;
1505 mm_segment_t oldseg = get_fs(); 710 mm_segment_t oldseg = get_fs();
@@ -1537,96 +742,6 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
1537 return err; 742 return err;
1538} 743}
1539 744
1540struct usbdevfs_ctrltransfer32 {
1541 u8 bRequestType;
1542 u8 bRequest;
1543 u16 wValue;
1544 u16 wIndex;
1545 u16 wLength;
1546 u32 timeout; /* in milliseconds */
1547 compat_caddr_t data;
1548};
1549
1550#define USBDEVFS_CONTROL32 _IOWR('U', 0, struct usbdevfs_ctrltransfer32)
1551
1552static int do_usbdevfs_control(unsigned int fd, unsigned int cmd, unsigned long arg)
1553{
1554 struct usbdevfs_ctrltransfer32 __user *p32 = compat_ptr(arg);
1555 struct usbdevfs_ctrltransfer __user *p;
1556 __u32 udata;
1557 p = compat_alloc_user_space(sizeof(*p));
1558 if (copy_in_user(p, p32, (sizeof(*p32) - sizeof(compat_caddr_t))) ||
1559 get_user(udata, &p32->data) ||
1560 put_user(compat_ptr(udata), &p->data))
1561 return -EFAULT;
1562 return sys_ioctl(fd, USBDEVFS_CONTROL, (unsigned long)p);
1563}
1564
1565
1566struct usbdevfs_bulktransfer32 {
1567 compat_uint_t ep;
1568 compat_uint_t len;
1569 compat_uint_t timeout; /* in milliseconds */
1570 compat_caddr_t data;
1571};
1572
1573#define USBDEVFS_BULK32 _IOWR('U', 2, struct usbdevfs_bulktransfer32)
1574
1575static int do_usbdevfs_bulk(unsigned int fd, unsigned int cmd, unsigned long arg)
1576{
1577 struct usbdevfs_bulktransfer32 __user *p32 = compat_ptr(arg);
1578 struct usbdevfs_bulktransfer __user *p;
1579 compat_uint_t n;
1580 compat_caddr_t addr;
1581
1582 p = compat_alloc_user_space(sizeof(*p));
1583
1584 if (get_user(n, &p32->ep) || put_user(n, &p->ep) ||
1585 get_user(n, &p32->len) || put_user(n, &p->len) ||
1586 get_user(n, &p32->timeout) || put_user(n, &p->timeout) ||
1587 get_user(addr, &p32->data) || put_user(compat_ptr(addr), &p->data))
1588 return -EFAULT;
1589
1590 return sys_ioctl(fd, USBDEVFS_BULK, (unsigned long)p);
1591}
1592
1593
1594/*
1595 * USBDEVFS_SUBMITURB, USBDEVFS_REAPURB and USBDEVFS_REAPURBNDELAY
1596 * are handled in usbdevfs core. -Christopher Li
1597 */
1598
1599struct usbdevfs_disconnectsignal32 {
1600 compat_int_t signr;
1601 compat_caddr_t context;
1602};
1603
1604#define USBDEVFS_DISCSIGNAL32 _IOR('U', 14, struct usbdevfs_disconnectsignal32)
1605
1606static int do_usbdevfs_discsignal(unsigned int fd, unsigned int cmd, unsigned long arg)
1607{
1608 struct usbdevfs_disconnectsignal kdis;
1609 struct usbdevfs_disconnectsignal32 __user *udis;
1610 mm_segment_t old_fs;
1611 u32 uctx;
1612 int err;
1613
1614 udis = compat_ptr(arg);
1615
1616 if (get_user(kdis.signr, &udis->signr) ||
1617 __get_user(uctx, &udis->context))
1618 return -EFAULT;
1619
1620 kdis.context = compat_ptr(uctx);
1621
1622 old_fs = get_fs();
1623 set_fs(KERNEL_DS);
1624 err = sys_ioctl(fd, USBDEVFS_DISCSIGNAL, (unsigned long) &kdis);
1625 set_fs(old_fs);
1626
1627 return err;
1628}
1629
1630/* 745/*
1631 * I2C layer ioctls 746 * I2C layer ioctls
1632 */ 747 */
@@ -1655,9 +770,9 @@ struct i2c_rdwr_aligned {
1655 struct i2c_msg msgs[0]; 770 struct i2c_msg msgs[0];
1656}; 771};
1657 772
1658static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) 773static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
774 struct i2c_rdwr_ioctl_data32 __user *udata)
1659{ 775{
1660 struct i2c_rdwr_ioctl_data32 __user *udata = compat_ptr(arg);
1661 struct i2c_rdwr_aligned __user *tdata; 776 struct i2c_rdwr_aligned __user *tdata;
1662 struct i2c_msg __user *tmsgs; 777 struct i2c_msg __user *tmsgs;
1663 struct i2c_msg32 __user *umsgs; 778 struct i2c_msg32 __user *umsgs;
@@ -1691,10 +806,10 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd, unsigned long ar
1691 return sys_ioctl(fd, cmd, (unsigned long)tdata); 806 return sys_ioctl(fd, cmd, (unsigned long)tdata);
1692} 807}
1693 808
1694static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) 809static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd,
810 struct i2c_smbus_ioctl_data32 __user *udata)
1695{ 811{
1696 struct i2c_smbus_ioctl_data __user *tdata; 812 struct i2c_smbus_ioctl_data __user *tdata;
1697 struct i2c_smbus_ioctl_data32 __user *udata;
1698 compat_caddr_t datap; 813 compat_caddr_t datap;
1699 814
1700 tdata = compat_alloc_user_space(sizeof(*tdata)); 815 tdata = compat_alloc_user_space(sizeof(*tdata));
@@ -1703,7 +818,6 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
1703 if (!access_ok(VERIFY_WRITE, tdata, sizeof(*tdata))) 818 if (!access_ok(VERIFY_WRITE, tdata, sizeof(*tdata)))
1704 return -EFAULT; 819 return -EFAULT;
1705 820
1706 udata = compat_ptr(arg);
1707 if (!access_ok(VERIFY_READ, udata, sizeof(*udata))) 821 if (!access_ok(VERIFY_READ, udata, sizeof(*udata)))
1708 return -EFAULT; 822 return -EFAULT;
1709 823
@@ -1718,27 +832,12 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
1718 return sys_ioctl(fd, cmd, (unsigned long)tdata); 832 return sys_ioctl(fd, cmd, (unsigned long)tdata);
1719} 833}
1720 834
1721/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
1722 * for some operations; this forces use of the newer bridge-utils that
1723 * use compatible ioctls
1724 */
1725static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
1726{
1727 u32 tmp;
1728
1729 if (get_user(tmp, (u32 __user *) arg))
1730 return -EFAULT;
1731 if (tmp == BRCTL_GET_VERSION)
1732 return BRCTL_VERSION + 1;
1733 return -EINVAL;
1734}
1735
1736#define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t) 835#define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t)
1737#define RTC_IRQP_SET32 _IOW('p', 0x0c, compat_ulong_t) 836#define RTC_IRQP_SET32 _IOW('p', 0x0c, compat_ulong_t)
1738#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t) 837#define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t)
1739#define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t) 838#define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t)
1740 839
1741static int rtc_ioctl(unsigned fd, unsigned cmd, unsigned long arg) 840static int rtc_ioctl(unsigned fd, unsigned cmd, void __user *argp)
1742{ 841{
1743 mm_segment_t oldfs = get_fs(); 842 mm_segment_t oldfs = get_fs();
1744 compat_ulong_t val32; 843 compat_ulong_t val32;
@@ -1756,29 +855,14 @@ static int rtc_ioctl(unsigned fd, unsigned cmd, unsigned long arg)
1756 if (ret) 855 if (ret)
1757 return ret; 856 return ret;
1758 val32 = kval; 857 val32 = kval;
1759 return put_user(val32, (unsigned int __user *)arg); 858 return put_user(val32, (unsigned int __user *)argp);
1760 case RTC_IRQP_SET32: 859 case RTC_IRQP_SET32:
1761 return sys_ioctl(fd, RTC_IRQP_SET, arg); 860 return sys_ioctl(fd, RTC_IRQP_SET, (unsigned long)argp);
1762 case RTC_EPOCH_SET32: 861 case RTC_EPOCH_SET32:
1763 return sys_ioctl(fd, RTC_EPOCH_SET, arg); 862 return sys_ioctl(fd, RTC_EPOCH_SET, (unsigned long)argp);
1764 default:
1765 /* unreached */
1766 return -ENOIOCTLCMD;
1767 } 863 }
1768}
1769 864
1770static int 865 return -ENOIOCTLCMD;
1771lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1772{
1773 struct compat_timeval __user *tc = (struct compat_timeval __user *)arg;
1774 struct timeval __user *tn = compat_alloc_user_space(sizeof(struct timeval));
1775 struct timeval ts;
1776 if (get_user(ts.tv_sec, &tc->tv_sec) ||
1777 get_user(ts.tv_usec, &tc->tv_usec) ||
1778 put_user(ts.tv_sec, &tn->tv_sec) ||
1779 put_user(ts.tv_usec, &tn->tv_usec))
1780 return -EFAULT;
1781 return sys_ioctl(fd, cmd, (unsigned long)tn);
1782} 866}
1783 867
1784/* on ia32 l_start is on a 32-bit boundary */ 868/* on ia32 l_start is on a 32-bit boundary */
@@ -1798,9 +882,9 @@ struct space_resv_32 {
1798#define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32) 882#define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32)
1799 883
1800/* just account for different alignment */ 884/* just account for different alignment */
1801static int compat_ioctl_preallocate(struct file *file, unsigned long arg) 885static int compat_ioctl_preallocate(struct file *file,
886 struct space_resv_32 __user *p32)
1802{ 887{
1803 struct space_resv_32 __user *p32 = compat_ptr(arg);
1804 struct space_resv __user *p = compat_alloc_user_space(sizeof(*p)); 888 struct space_resv __user *p = compat_alloc_user_space(sizeof(*p));
1805 889
1806 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || 890 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) ||
@@ -1816,27 +900,13 @@ static int compat_ioctl_preallocate(struct file *file, unsigned long arg)
1816} 900}
1817#endif 901#endif
1818 902
903/*
904 * simple reversible transform to make our table more evenly
905 * distributed after sorting.
906 */
907#define XFORM(i) (((i) ^ ((i) << 27) ^ ((i) << 17)) & 0xffffffff)
1819 908
1820typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int, 909#define COMPATIBLE_IOCTL(cmd) XFORM(cmd),
1821 unsigned long, struct file *);
1822
1823struct ioctl_trans {
1824 unsigned long cmd;
1825 ioctl_trans_handler_t handler;
1826 struct ioctl_trans *next;
1827};
1828
1829#define HANDLE_IOCTL(cmd,handler) \
1830 { (cmd), (ioctl_trans_handler_t)(handler) },
1831
1832/* pointer to compatible structure or no argument */
1833#define COMPATIBLE_IOCTL(cmd) \
1834 { (cmd), do_ioctl32_pointer },
1835
1836/* argument is an unsigned long integer, not a pointer */
1837#define ULONG_IOCTL(cmd) \
1838 { (cmd), (ioctl_trans_handler_t)sys_ioctl },
1839
1840/* ioctl should not be warned about even if it's not implemented. 910/* ioctl should not be warned about even if it's not implemented.
1841 Valid reasons to use this: 911 Valid reasons to use this:
1842 - It is implemented with ->compat_ioctl on some device, but programs 912 - It is implemented with ->compat_ioctl on some device, but programs
@@ -1846,7 +916,7 @@ struct ioctl_trans {
1846 Most other reasons are not valid. */ 916 Most other reasons are not valid. */
1847#define IGNORE_IOCTL(cmd) COMPATIBLE_IOCTL(cmd) 917#define IGNORE_IOCTL(cmd) COMPATIBLE_IOCTL(cmd)
1848 918
1849static struct ioctl_trans ioctl_start[] = { 919static unsigned int ioctl_pointer[] = {
1850/* compatible ioctls first */ 920/* compatible ioctls first */
1851COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */ 921COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */
1852COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */ 922COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */
@@ -1857,7 +927,6 @@ COMPATIBLE_IOCTL(TCSETA)
1857COMPATIBLE_IOCTL(TCSETAW) 927COMPATIBLE_IOCTL(TCSETAW)
1858COMPATIBLE_IOCTL(TCSETAF) 928COMPATIBLE_IOCTL(TCSETAF)
1859COMPATIBLE_IOCTL(TCSBRK) 929COMPATIBLE_IOCTL(TCSBRK)
1860ULONG_IOCTL(TCSBRKP)
1861COMPATIBLE_IOCTL(TCXONC) 930COMPATIBLE_IOCTL(TCXONC)
1862COMPATIBLE_IOCTL(TCFLSH) 931COMPATIBLE_IOCTL(TCFLSH)
1863COMPATIBLE_IOCTL(TCGETS) 932COMPATIBLE_IOCTL(TCGETS)
@@ -1867,7 +936,6 @@ COMPATIBLE_IOCTL(TCSETSF)
1867COMPATIBLE_IOCTL(TIOCLINUX) 936COMPATIBLE_IOCTL(TIOCLINUX)
1868COMPATIBLE_IOCTL(TIOCSBRK) 937COMPATIBLE_IOCTL(TIOCSBRK)
1869COMPATIBLE_IOCTL(TIOCCBRK) 938COMPATIBLE_IOCTL(TIOCCBRK)
1870ULONG_IOCTL(TIOCMIWAIT)
1871COMPATIBLE_IOCTL(TIOCGICOUNT) 939COMPATIBLE_IOCTL(TIOCGICOUNT)
1872/* Little t */ 940/* Little t */
1873COMPATIBLE_IOCTL(TIOCGETD) 941COMPATIBLE_IOCTL(TIOCGETD)
@@ -1889,7 +957,6 @@ COMPATIBLE_IOCTL(TIOCSTI)
1889COMPATIBLE_IOCTL(TIOCOUTQ) 957COMPATIBLE_IOCTL(TIOCOUTQ)
1890COMPATIBLE_IOCTL(TIOCSPGRP) 958COMPATIBLE_IOCTL(TIOCSPGRP)
1891COMPATIBLE_IOCTL(TIOCGPGRP) 959COMPATIBLE_IOCTL(TIOCGPGRP)
1892ULONG_IOCTL(TIOCSCTTY)
1893COMPATIBLE_IOCTL(TIOCGPTN) 960COMPATIBLE_IOCTL(TIOCGPTN)
1894COMPATIBLE_IOCTL(TIOCSPTLCK) 961COMPATIBLE_IOCTL(TIOCSPTLCK)
1895COMPATIBLE_IOCTL(TIOCSERGETLSR) 962COMPATIBLE_IOCTL(TIOCSERGETLSR)
@@ -1920,36 +987,21 @@ COMPATIBLE_IOCTL(PRINT_RAID_DEBUG)
1920COMPATIBLE_IOCTL(RAID_AUTORUN) 987COMPATIBLE_IOCTL(RAID_AUTORUN)
1921COMPATIBLE_IOCTL(CLEAR_ARRAY) 988COMPATIBLE_IOCTL(CLEAR_ARRAY)
1922COMPATIBLE_IOCTL(ADD_NEW_DISK) 989COMPATIBLE_IOCTL(ADD_NEW_DISK)
1923ULONG_IOCTL(HOT_REMOVE_DISK)
1924COMPATIBLE_IOCTL(SET_ARRAY_INFO) 990COMPATIBLE_IOCTL(SET_ARRAY_INFO)
1925COMPATIBLE_IOCTL(SET_DISK_INFO) 991COMPATIBLE_IOCTL(SET_DISK_INFO)
1926COMPATIBLE_IOCTL(WRITE_RAID_INFO) 992COMPATIBLE_IOCTL(WRITE_RAID_INFO)
1927COMPATIBLE_IOCTL(UNPROTECT_ARRAY) 993COMPATIBLE_IOCTL(UNPROTECT_ARRAY)
1928COMPATIBLE_IOCTL(PROTECT_ARRAY) 994COMPATIBLE_IOCTL(PROTECT_ARRAY)
1929ULONG_IOCTL(HOT_ADD_DISK)
1930ULONG_IOCTL(SET_DISK_FAULTY)
1931COMPATIBLE_IOCTL(RUN_ARRAY) 995COMPATIBLE_IOCTL(RUN_ARRAY)
1932COMPATIBLE_IOCTL(STOP_ARRAY) 996COMPATIBLE_IOCTL(STOP_ARRAY)
1933COMPATIBLE_IOCTL(STOP_ARRAY_RO) 997COMPATIBLE_IOCTL(STOP_ARRAY_RO)
1934COMPATIBLE_IOCTL(RESTART_ARRAY_RW) 998COMPATIBLE_IOCTL(RESTART_ARRAY_RW)
1935COMPATIBLE_IOCTL(GET_BITMAP_FILE) 999COMPATIBLE_IOCTL(GET_BITMAP_FILE)
1936ULONG_IOCTL(SET_BITMAP_FILE)
1937/* Big K */
1938COMPATIBLE_IOCTL(PIO_FONT)
1939COMPATIBLE_IOCTL(GIO_FONT)
1940COMPATIBLE_IOCTL(PIO_CMAP)
1941COMPATIBLE_IOCTL(GIO_CMAP)
1942ULONG_IOCTL(KDSIGACCEPT)
1943COMPATIBLE_IOCTL(KDGETKEYCODE) 1000COMPATIBLE_IOCTL(KDGETKEYCODE)
1944COMPATIBLE_IOCTL(KDSETKEYCODE) 1001COMPATIBLE_IOCTL(KDSETKEYCODE)
1945ULONG_IOCTL(KIOCSOUND)
1946ULONG_IOCTL(KDMKTONE)
1947COMPATIBLE_IOCTL(KDGKBTYPE) 1002COMPATIBLE_IOCTL(KDGKBTYPE)
1948ULONG_IOCTL(KDSETMODE)
1949COMPATIBLE_IOCTL(KDGETMODE) 1003COMPATIBLE_IOCTL(KDGETMODE)
1950ULONG_IOCTL(KDSKBMODE)
1951COMPATIBLE_IOCTL(KDGKBMODE) 1004COMPATIBLE_IOCTL(KDGKBMODE)
1952ULONG_IOCTL(KDSKBMETA)
1953COMPATIBLE_IOCTL(KDGKBMETA) 1005COMPATIBLE_IOCTL(KDGKBMETA)
1954COMPATIBLE_IOCTL(KDGKBENT) 1006COMPATIBLE_IOCTL(KDGKBENT)
1955COMPATIBLE_IOCTL(KDSKBENT) 1007COMPATIBLE_IOCTL(KDSKBENT)
@@ -1959,15 +1011,7 @@ COMPATIBLE_IOCTL(KDGKBDIACR)
1959COMPATIBLE_IOCTL(KDSKBDIACR) 1011COMPATIBLE_IOCTL(KDSKBDIACR)
1960COMPATIBLE_IOCTL(KDKBDREP) 1012COMPATIBLE_IOCTL(KDKBDREP)
1961COMPATIBLE_IOCTL(KDGKBLED) 1013COMPATIBLE_IOCTL(KDGKBLED)
1962ULONG_IOCTL(KDSKBLED)
1963COMPATIBLE_IOCTL(KDGETLED) 1014COMPATIBLE_IOCTL(KDGETLED)
1964ULONG_IOCTL(KDSETLED)
1965COMPATIBLE_IOCTL(GIO_SCRNMAP)
1966COMPATIBLE_IOCTL(PIO_SCRNMAP)
1967COMPATIBLE_IOCTL(GIO_UNISCRNMAP)
1968COMPATIBLE_IOCTL(PIO_UNISCRNMAP)
1969COMPATIBLE_IOCTL(PIO_FONTRESET)
1970COMPATIBLE_IOCTL(PIO_UNIMAPCLR)
1971#ifdef CONFIG_BLOCK 1015#ifdef CONFIG_BLOCK
1972/* Big S */ 1016/* Big S */
1973COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN) 1017COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN)
@@ -1979,32 +1023,6 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND)
1979COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST) 1023COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST)
1980COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) 1024COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI)
1981#endif 1025#endif
1982/* Big T */
1983COMPATIBLE_IOCTL(TUNSETNOCSUM)
1984COMPATIBLE_IOCTL(TUNSETDEBUG)
1985COMPATIBLE_IOCTL(TUNSETPERSIST)
1986COMPATIBLE_IOCTL(TUNSETOWNER)
1987COMPATIBLE_IOCTL(TUNSETLINK)
1988COMPATIBLE_IOCTL(TUNSETGROUP)
1989COMPATIBLE_IOCTL(TUNGETFEATURES)
1990COMPATIBLE_IOCTL(TUNSETOFFLOAD)
1991COMPATIBLE_IOCTL(TUNSETTXFILTER)
1992COMPATIBLE_IOCTL(TUNGETSNDBUF)
1993COMPATIBLE_IOCTL(TUNSETSNDBUF)
1994/* Big V */
1995COMPATIBLE_IOCTL(VT_SETMODE)
1996COMPATIBLE_IOCTL(VT_GETMODE)
1997COMPATIBLE_IOCTL(VT_GETSTATE)
1998COMPATIBLE_IOCTL(VT_OPENQRY)
1999ULONG_IOCTL(VT_ACTIVATE)
2000ULONG_IOCTL(VT_WAITACTIVE)
2001ULONG_IOCTL(VT_RELDISP)
2002ULONG_IOCTL(VT_DISALLOCATE)
2003COMPATIBLE_IOCTL(VT_RESIZE)
2004COMPATIBLE_IOCTL(VT_RESIZEX)
2005COMPATIBLE_IOCTL(VT_LOCKSWITCH)
2006COMPATIBLE_IOCTL(VT_UNLOCKSWITCH)
2007COMPATIBLE_IOCTL(VT_GETHIFONTMASK)
2008/* Little p (/dev/rtc, /dev/envctrl, etc.) */ 1026/* Little p (/dev/rtc, /dev/envctrl, etc.) */
2009COMPATIBLE_IOCTL(RTC_AIE_ON) 1027COMPATIBLE_IOCTL(RTC_AIE_ON)
2010COMPATIBLE_IOCTL(RTC_AIE_OFF) 1028COMPATIBLE_IOCTL(RTC_AIE_OFF)
@@ -2032,36 +1050,13 @@ COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
2032COMPATIBLE_IOCTL(MTIOCTOP) 1050COMPATIBLE_IOCTL(MTIOCTOP)
2033/* Socket level stuff */ 1051/* Socket level stuff */
2034COMPATIBLE_IOCTL(FIOQSIZE) 1052COMPATIBLE_IOCTL(FIOQSIZE)
2035COMPATIBLE_IOCTL(FIOSETOWN)
2036COMPATIBLE_IOCTL(SIOCSPGRP)
2037COMPATIBLE_IOCTL(FIOGETOWN)
2038COMPATIBLE_IOCTL(SIOCGPGRP)
2039COMPATIBLE_IOCTL(SIOCATMARK)
2040COMPATIBLE_IOCTL(SIOCSIFLINK)
2041COMPATIBLE_IOCTL(SIOCSIFENCAP)
2042COMPATIBLE_IOCTL(SIOCGIFENCAP)
2043COMPATIBLE_IOCTL(SIOCSIFNAME)
2044COMPATIBLE_IOCTL(SIOCSARP)
2045COMPATIBLE_IOCTL(SIOCGARP)
2046COMPATIBLE_IOCTL(SIOCDARP)
2047COMPATIBLE_IOCTL(SIOCSRARP)
2048COMPATIBLE_IOCTL(SIOCGRARP)
2049COMPATIBLE_IOCTL(SIOCDRARP)
2050COMPATIBLE_IOCTL(SIOCADDDLCI)
2051COMPATIBLE_IOCTL(SIOCDELDLCI)
2052COMPATIBLE_IOCTL(SIOCGMIIPHY)
2053COMPATIBLE_IOCTL(SIOCGMIIREG)
2054COMPATIBLE_IOCTL(SIOCSMIIREG)
2055COMPATIBLE_IOCTL(SIOCGIFVLAN)
2056COMPATIBLE_IOCTL(SIOCSIFVLAN)
2057COMPATIBLE_IOCTL(SIOCBRADDBR)
2058COMPATIBLE_IOCTL(SIOCBRDELBR)
2059#ifdef CONFIG_BLOCK 1053#ifdef CONFIG_BLOCK
1054/* loop */
1055IGNORE_IOCTL(LOOP_CLR_FD)
2060/* SG stuff */ 1056/* SG stuff */
2061COMPATIBLE_IOCTL(SG_SET_TIMEOUT) 1057COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
2062COMPATIBLE_IOCTL(SG_GET_TIMEOUT) 1058COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
2063COMPATIBLE_IOCTL(SG_EMULATED_HOST) 1059COMPATIBLE_IOCTL(SG_EMULATED_HOST)
2064ULONG_IOCTL(SG_SET_TRANSFORM)
2065COMPATIBLE_IOCTL(SG_GET_TRANSFORM) 1060COMPATIBLE_IOCTL(SG_GET_TRANSFORM)
2066COMPATIBLE_IOCTL(SG_SET_RESERVED_SIZE) 1061COMPATIBLE_IOCTL(SG_SET_RESERVED_SIZE)
2067COMPATIBLE_IOCTL(SG_GET_RESERVED_SIZE) 1062COMPATIBLE_IOCTL(SG_GET_RESERVED_SIZE)
@@ -2115,8 +1110,6 @@ COMPATIBLE_IOCTL(PPPIOCGCHAN)
2115/* PPPOX */ 1110/* PPPOX */
2116COMPATIBLE_IOCTL(PPPOEIOCSFWD) 1111COMPATIBLE_IOCTL(PPPOEIOCSFWD)
2117COMPATIBLE_IOCTL(PPPOEIOCDFWD) 1112COMPATIBLE_IOCTL(PPPOEIOCDFWD)
2118/* LP */
2119COMPATIBLE_IOCTL(LPGETSTATUS)
2120/* ppdev */ 1113/* ppdev */
2121COMPATIBLE_IOCTL(PPSETMODE) 1114COMPATIBLE_IOCTL(PPSETMODE)
2122COMPATIBLE_IOCTL(PPRSTATUS) 1115COMPATIBLE_IOCTL(PPRSTATUS)
@@ -2298,8 +1291,6 @@ COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
2298COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS) 1291COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
2299COMPATIBLE_IOCTL(OSS_GETVERSION) 1292COMPATIBLE_IOCTL(OSS_GETVERSION)
2300/* AUTOFS */ 1293/* AUTOFS */
2301ULONG_IOCTL(AUTOFS_IOC_READY)
2302ULONG_IOCTL(AUTOFS_IOC_FAIL)
2303COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC) 1294COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
2304COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER) 1295COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
2305COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE) 1296COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
@@ -2311,22 +1302,6 @@ COMPATIBLE_IOCTL(RAW_SETBIND)
2311COMPATIBLE_IOCTL(RAW_GETBIND) 1302COMPATIBLE_IOCTL(RAW_GETBIND)
2312/* SMB ioctls which do not need any translations */ 1303/* SMB ioctls which do not need any translations */
2313COMPATIBLE_IOCTL(SMB_IOC_NEWCONN) 1304COMPATIBLE_IOCTL(SMB_IOC_NEWCONN)
2314/* Little a */
2315COMPATIBLE_IOCTL(ATMSIGD_CTRL)
2316COMPATIBLE_IOCTL(ATMARPD_CTRL)
2317COMPATIBLE_IOCTL(ATMLEC_CTRL)
2318COMPATIBLE_IOCTL(ATMLEC_MCAST)
2319COMPATIBLE_IOCTL(ATMLEC_DATA)
2320COMPATIBLE_IOCTL(ATM_SETSC)
2321COMPATIBLE_IOCTL(SIOCSIFATMTCP)
2322COMPATIBLE_IOCTL(SIOCMKCLIP)
2323COMPATIBLE_IOCTL(ATMARP_MKIP)
2324COMPATIBLE_IOCTL(ATMARP_SETENTRY)
2325COMPATIBLE_IOCTL(ATMARP_ENCAP)
2326COMPATIBLE_IOCTL(ATMTCP_CREATE)
2327COMPATIBLE_IOCTL(ATMTCP_REMOVE)
2328COMPATIBLE_IOCTL(ATMMPC_CTRL)
2329COMPATIBLE_IOCTL(ATMMPC_DATA)
2330/* Watchdog */ 1305/* Watchdog */
2331COMPATIBLE_IOCTL(WDIOC_GETSUPPORT) 1306COMPATIBLE_IOCTL(WDIOC_GETSUPPORT)
2332COMPATIBLE_IOCTL(WDIOC_GETSTATUS) 1307COMPATIBLE_IOCTL(WDIOC_GETSTATUS)
@@ -2408,30 +1383,11 @@ COMPATIBLE_IOCTL(PCIIOC_CONTROLLER)
2408COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO) 1383COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO)
2409COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM) 1384COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM)
2410COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE) 1385COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE)
2411/* USB */
2412COMPATIBLE_IOCTL(USBDEVFS_RESETEP)
2413COMPATIBLE_IOCTL(USBDEVFS_SETINTERFACE)
2414COMPATIBLE_IOCTL(USBDEVFS_SETCONFIGURATION)
2415COMPATIBLE_IOCTL(USBDEVFS_GETDRIVER)
2416COMPATIBLE_IOCTL(USBDEVFS_DISCARDURB)
2417COMPATIBLE_IOCTL(USBDEVFS_CLAIMINTERFACE)
2418COMPATIBLE_IOCTL(USBDEVFS_RELEASEINTERFACE)
2419COMPATIBLE_IOCTL(USBDEVFS_CONNECTINFO)
2420COMPATIBLE_IOCTL(USBDEVFS_HUB_PORTINFO)
2421COMPATIBLE_IOCTL(USBDEVFS_RESET)
2422COMPATIBLE_IOCTL(USBDEVFS_SUBMITURB32)
2423COMPATIBLE_IOCTL(USBDEVFS_REAPURB32)
2424COMPATIBLE_IOCTL(USBDEVFS_REAPURBNDELAY32)
2425COMPATIBLE_IOCTL(USBDEVFS_CLEAR_HALT)
2426/* NBD */ 1386/* NBD */
2427ULONG_IOCTL(NBD_SET_SOCK)
2428ULONG_IOCTL(NBD_SET_BLKSIZE)
2429ULONG_IOCTL(NBD_SET_SIZE)
2430COMPATIBLE_IOCTL(NBD_DO_IT) 1387COMPATIBLE_IOCTL(NBD_DO_IT)
2431COMPATIBLE_IOCTL(NBD_CLEAR_SOCK) 1388COMPATIBLE_IOCTL(NBD_CLEAR_SOCK)
2432COMPATIBLE_IOCTL(NBD_CLEAR_QUE) 1389COMPATIBLE_IOCTL(NBD_CLEAR_QUE)
2433COMPATIBLE_IOCTL(NBD_PRINT_DEBUG) 1390COMPATIBLE_IOCTL(NBD_PRINT_DEBUG)
2434ULONG_IOCTL(NBD_SET_SIZE_BLOCKS)
2435COMPATIBLE_IOCTL(NBD_DISCONNECT) 1391COMPATIBLE_IOCTL(NBD_DISCONNECT)
2436/* i2c */ 1392/* i2c */
2437COMPATIBLE_IOCTL(I2C_SLAVE) 1393COMPATIBLE_IOCTL(I2C_SLAVE)
@@ -2531,131 +1487,13 @@ COMPATIBLE_IOCTL(JSIOCGAXES)
2531COMPATIBLE_IOCTL(JSIOCGBUTTONS) 1487COMPATIBLE_IOCTL(JSIOCGBUTTONS)
2532COMPATIBLE_IOCTL(JSIOCGNAME(0)) 1488COMPATIBLE_IOCTL(JSIOCGNAME(0))
2533 1489
2534/* now things that need handlers */
2535#ifdef CONFIG_NET
2536HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32)
2537HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf)
2538HANDLE_IOCTL(SIOCGIFFLAGS, dev_ifsioc)
2539HANDLE_IOCTL(SIOCSIFFLAGS, dev_ifsioc)
2540HANDLE_IOCTL(SIOCGIFMETRIC, dev_ifsioc)
2541HANDLE_IOCTL(SIOCSIFMETRIC, dev_ifsioc)
2542HANDLE_IOCTL(SIOCGIFMTU, dev_ifsioc)
2543HANDLE_IOCTL(SIOCSIFMTU, dev_ifsioc)
2544HANDLE_IOCTL(SIOCGIFMEM, dev_ifsioc)
2545HANDLE_IOCTL(SIOCSIFMEM, dev_ifsioc)
2546HANDLE_IOCTL(SIOCGIFHWADDR, dev_ifsioc)
2547HANDLE_IOCTL(SIOCSIFHWADDR, dev_ifsioc)
2548HANDLE_IOCTL(SIOCADDMULTI, dev_ifsioc)
2549HANDLE_IOCTL(SIOCDELMULTI, dev_ifsioc)
2550HANDLE_IOCTL(SIOCGIFINDEX, dev_ifsioc)
2551HANDLE_IOCTL(SIOCGIFMAP, dev_ifsioc)
2552HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc)
2553HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc)
2554HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc)
2555HANDLE_IOCTL(SIOCSIFHWBROADCAST, dev_ifsioc)
2556HANDLE_IOCTL(SIOCSHWTSTAMP, dev_ifsioc)
2557
2558/* ioctls used by appletalk ddp.c */
2559HANDLE_IOCTL(SIOCATALKDIFADDR, dev_ifsioc)
2560HANDLE_IOCTL(SIOCDIFADDR, dev_ifsioc)
2561HANDLE_IOCTL(SIOCSARP, dev_ifsioc)
2562HANDLE_IOCTL(SIOCDARP, dev_ifsioc)
2563
2564HANDLE_IOCTL(SIOCGIFBRDADDR, dev_ifsioc)
2565HANDLE_IOCTL(SIOCSIFBRDADDR, dev_ifsioc)
2566HANDLE_IOCTL(SIOCGIFDSTADDR, dev_ifsioc)
2567HANDLE_IOCTL(SIOCSIFDSTADDR, dev_ifsioc)
2568HANDLE_IOCTL(SIOCGIFNETMASK, dev_ifsioc)
2569HANDLE_IOCTL(SIOCSIFNETMASK, dev_ifsioc)
2570HANDLE_IOCTL(SIOCSIFPFLAGS, dev_ifsioc)
2571HANDLE_IOCTL(SIOCGIFPFLAGS, dev_ifsioc)
2572HANDLE_IOCTL(SIOCGIFTXQLEN, dev_ifsioc)
2573HANDLE_IOCTL(SIOCSIFTXQLEN, dev_ifsioc)
2574HANDLE_IOCTL(TUNSETIFF, dev_ifsioc)
2575HANDLE_IOCTL(TUNGETIFF, dev_ifsioc)
2576HANDLE_IOCTL(SIOCETHTOOL, ethtool_ioctl)
2577HANDLE_IOCTL(SIOCBONDENSLAVE, bond_ioctl)
2578HANDLE_IOCTL(SIOCBONDRELEASE, bond_ioctl)
2579HANDLE_IOCTL(SIOCBONDSETHWADDR, bond_ioctl)
2580HANDLE_IOCTL(SIOCBONDSLAVEINFOQUERY, bond_ioctl)
2581HANDLE_IOCTL(SIOCBONDINFOQUERY, bond_ioctl)
2582HANDLE_IOCTL(SIOCBONDCHANGEACTIVE, bond_ioctl)
2583HANDLE_IOCTL(SIOCADDRT, routing_ioctl)
2584HANDLE_IOCTL(SIOCDELRT, routing_ioctl)
2585HANDLE_IOCTL(SIOCBRADDIF, dev_ifsioc)
2586HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc)
2587/* Note SIOCRTMSG is no longer, so this is safe and * the user would have seen just an -EINVAL anyways. */
2588HANDLE_IOCTL(SIOCRTMSG, ret_einval)
2589HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
2590HANDLE_IOCTL(SIOCGSTAMPNS, do_siocgstampns)
2591#endif
2592#ifdef CONFIG_BLOCK
2593HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
2594HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
2595#endif
2596HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
2597HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
2598HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
2599HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
2600#ifdef CONFIG_BLOCK
2601HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
2602HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
2603#endif
2604#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
2605HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
2606#ifdef CONFIG_VT
2607HANDLE_IOCTL(PIO_FONTX, do_fontx_ioctl)
2608HANDLE_IOCTL(GIO_FONTX, do_fontx_ioctl)
2609HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl)
2610HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl)
2611HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl)
2612#endif
2613/* One SMB ioctl needs translations. */
2614#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
2615HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
2616HANDLE_IOCTL(ATM_GETLINKRATE32, do_atm_ioctl)
2617HANDLE_IOCTL(ATM_GETNAMES32, do_atm_ioctl)
2618HANDLE_IOCTL(ATM_GETTYPE32, do_atm_ioctl)
2619HANDLE_IOCTL(ATM_GETESI32, do_atm_ioctl)
2620HANDLE_IOCTL(ATM_GETADDR32, do_atm_ioctl)
2621HANDLE_IOCTL(ATM_RSTADDR32, do_atm_ioctl)
2622HANDLE_IOCTL(ATM_ADDADDR32, do_atm_ioctl)
2623HANDLE_IOCTL(ATM_DELADDR32, do_atm_ioctl)
2624HANDLE_IOCTL(ATM_GETCIRANGE32, do_atm_ioctl)
2625HANDLE_IOCTL(ATM_SETCIRANGE32, do_atm_ioctl)
2626HANDLE_IOCTL(ATM_SETESI32, do_atm_ioctl)
2627HANDLE_IOCTL(ATM_SETESIF32, do_atm_ioctl)
2628HANDLE_IOCTL(ATM_GETSTAT32, do_atm_ioctl)
2629HANDLE_IOCTL(ATM_GETSTATZ32, do_atm_ioctl)
2630HANDLE_IOCTL(ATM_GETLOOP32, do_atm_ioctl)
2631HANDLE_IOCTL(ATM_SETLOOP32, do_atm_ioctl)
2632HANDLE_IOCTL(ATM_QUERYLOOP32, do_atm_ioctl)
2633HANDLE_IOCTL(SONET_GETSTAT, do_atm_ioctl)
2634HANDLE_IOCTL(SONET_GETSTATZ, do_atm_ioctl)
2635HANDLE_IOCTL(SONET_GETDIAG, do_atm_ioctl)
2636HANDLE_IOCTL(SONET_SETDIAG, do_atm_ioctl)
2637HANDLE_IOCTL(SONET_CLRDIAG, do_atm_ioctl)
2638HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl)
2639HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
2640HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
2641/* block stuff */
2642#ifdef CONFIG_BLOCK
2643/* loop */
2644IGNORE_IOCTL(LOOP_CLR_FD)
2645/* Raw devices */
2646HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
2647HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
2648#endif
2649/* Serial */
2650HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl)
2651HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)
2652#ifdef TIOCGLTC 1490#ifdef TIOCGLTC
2653COMPATIBLE_IOCTL(TIOCGLTC) 1491COMPATIBLE_IOCTL(TIOCGLTC)
2654COMPATIBLE_IOCTL(TIOCSLTC) 1492COMPATIBLE_IOCTL(TIOCSLTC)
2655#endif 1493#endif
2656#ifdef TIOCSTART 1494#ifdef TIOCSTART
2657/* 1495/*
2658 * For these two we have defintions in ioctls.h and/or termios.h on 1496 * For these two we have definitions in ioctls.h and/or termios.h on
2659 * some architectures but no actual implemention. Some applications 1497 * some architectures but no actual implemention. Some applications
2660 * like bash call them if they are defined in the headers, so we provide 1498 * like bash call them if they are defined in the headers, so we provide
2661 * entries here to avoid syslog message spew. 1499 * entries here to avoid syslog message spew.
@@ -2663,43 +1501,6 @@ COMPATIBLE_IOCTL(TIOCSLTC)
2663COMPATIBLE_IOCTL(TIOCSTART) 1501COMPATIBLE_IOCTL(TIOCSTART)
2664COMPATIBLE_IOCTL(TIOCSTOP) 1502COMPATIBLE_IOCTL(TIOCSTOP)
2665#endif 1503#endif
2666/* Usbdevfs */
2667HANDLE_IOCTL(USBDEVFS_CONTROL32, do_usbdevfs_control)
2668HANDLE_IOCTL(USBDEVFS_BULK32, do_usbdevfs_bulk)
2669HANDLE_IOCTL(USBDEVFS_DISCSIGNAL32, do_usbdevfs_discsignal)
2670COMPATIBLE_IOCTL(USBDEVFS_IOCTL32)
2671/* i2c */
2672HANDLE_IOCTL(I2C_FUNCS, w_long)
2673HANDLE_IOCTL(I2C_RDWR, do_i2c_rdwr_ioctl)
2674HANDLE_IOCTL(I2C_SMBUS, do_i2c_smbus_ioctl)
2675/* bridge */
2676HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl)
2677HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl)
2678/* Not implemented in the native kernel */
2679IGNORE_IOCTL(SIOCGIFCOUNT)
2680HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl)
2681HANDLE_IOCTL(RTC_IRQP_SET32, rtc_ioctl)
2682HANDLE_IOCTL(RTC_EPOCH_READ32, rtc_ioctl)
2683HANDLE_IOCTL(RTC_EPOCH_SET32, rtc_ioctl)
2684
2685/* dvb */
2686HANDLE_IOCTL(VIDEO_GET_EVENT, do_video_get_event)
2687HANDLE_IOCTL(VIDEO_STILLPICTURE, do_video_stillpicture)
2688HANDLE_IOCTL(VIDEO_SET_SPU_PALETTE, do_video_set_spu_palette)
2689
2690/* parport */
2691COMPATIBLE_IOCTL(LPTIME)
2692COMPATIBLE_IOCTL(LPCHAR)
2693COMPATIBLE_IOCTL(LPABORTOPEN)
2694COMPATIBLE_IOCTL(LPCAREFUL)
2695COMPATIBLE_IOCTL(LPWAIT)
2696COMPATIBLE_IOCTL(LPSETIRQ)
2697COMPATIBLE_IOCTL(LPGETSTATUS)
2698COMPATIBLE_IOCTL(LPGETSTATUS)
2699COMPATIBLE_IOCTL(LPRESET)
2700/*LPGETSTATS not implemented, but no kernels seem to compile it in anyways*/
2701COMPATIBLE_IOCTL(LPGETFLAGS)
2702HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
2703 1504
2704/* fat 'r' ioctls. These are handled by fat with ->compat_ioctl, 1505/* fat 'r' ioctls. These are handled by fat with ->compat_ioctl,
2705 but we don't want warnings on other file systems. So declare 1506 but we don't want warnings on other file systems. So declare
@@ -2727,12 +1528,110 @@ IGNORE_IOCTL(FBIOGCURSOR32)
2727#endif 1528#endif
2728}; 1529};
2729 1530
2730#define IOCTL_HASHSIZE 256 1531/*
2731static struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE]; 1532 * Convert common ioctl arguments based on their command number
2732 1533 *
2733static inline unsigned long ioctl32_hash(unsigned long cmd) 1534 * Please do not add any code in here. Instead, implement
1535 * a compat_ioctl operation in the place that handleѕ the
1536 * ioctl for the native case.
1537 */
1538static long do_ioctl_trans(int fd, unsigned int cmd,
1539 unsigned long arg, struct file *file)
2734{ 1540{
2735 return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE; 1541 void __user *argp = compat_ptr(arg);
1542
1543 switch (cmd) {
1544 case PPPIOCGIDLE32:
1545 return ppp_gidle(fd, cmd, argp);
1546 case PPPIOCSCOMPRESS32:
1547 return ppp_scompress(fd, cmd, argp);
1548 case PPPIOCSPASS32:
1549 case PPPIOCSACTIVE32:
1550 return ppp_sock_fprog_ioctl_trans(fd, cmd, argp);
1551#ifdef CONFIG_BLOCK
1552 case SG_IO:
1553 return sg_ioctl_trans(fd, cmd, argp);
1554 case SG_GET_REQUEST_TABLE:
1555 return sg_grt_trans(fd, cmd, argp);
1556 case MTIOCGET32:
1557 case MTIOCPOS32:
1558 return mt_ioctl_trans(fd, cmd, argp);
1559 /* Raw devices */
1560 case RAW_SETBIND:
1561 case RAW_GETBIND:
1562 return raw_ioctl(fd, cmd, argp);
1563#endif
1564#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
1565 case AUTOFS_IOC_SETTIMEOUT32:
1566 return ioc_settimeout(fd, cmd, argp);
1567 /* One SMB ioctl needs translations. */
1568#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
1569 case SMB_IOC_GETMOUNTUID_32:
1570 return do_smb_getmountuid(fd, cmd, argp);
1571 /* Serial */
1572 case TIOCGSERIAL:
1573 case TIOCSSERIAL:
1574 return serial_struct_ioctl(fd, cmd, argp);
1575 /* i2c */
1576 case I2C_FUNCS:
1577 return w_long(fd, cmd, argp);
1578 case I2C_RDWR:
1579 return do_i2c_rdwr_ioctl(fd, cmd, argp);
1580 case I2C_SMBUS:
1581 return do_i2c_smbus_ioctl(fd, cmd, argp);
1582 /* Not implemented in the native kernel */
1583 case RTC_IRQP_READ32:
1584 case RTC_IRQP_SET32:
1585 case RTC_EPOCH_READ32:
1586 case RTC_EPOCH_SET32:
1587 return rtc_ioctl(fd, cmd, argp);
1588
1589 /* dvb */
1590 case VIDEO_GET_EVENT:
1591 return do_video_get_event(fd, cmd, argp);
1592 case VIDEO_STILLPICTURE:
1593 return do_video_stillpicture(fd, cmd, argp);
1594 case VIDEO_SET_SPU_PALETTE:
1595 return do_video_set_spu_palette(fd, cmd, argp);
1596 }
1597
1598 /*
1599 * These take an integer instead of a pointer as 'arg',
1600 * so we must not do a compat_ptr() translation.
1601 */
1602 switch (cmd) {
1603 /* Big T */
1604 case TCSBRKP:
1605 case TIOCMIWAIT:
1606 case TIOCSCTTY:
1607 /* RAID */
1608 case HOT_REMOVE_DISK:
1609 case HOT_ADD_DISK:
1610 case SET_DISK_FAULTY:
1611 case SET_BITMAP_FILE:
1612 /* Big K */
1613 case KDSIGACCEPT:
1614 case KIOCSOUND:
1615 case KDMKTONE:
1616 case KDSETMODE:
1617 case KDSKBMODE:
1618 case KDSKBMETA:
1619 case KDSKBLED:
1620 case KDSETLED:
1621 /* SG stuff */
1622 case SG_SET_TRANSFORM:
1623 /* AUTOFS */
1624 case AUTOFS_IOC_READY:
1625 case AUTOFS_IOC_FAIL:
1626 /* NBD */
1627 case NBD_SET_SOCK:
1628 case NBD_SET_BLKSIZE:
1629 case NBD_SET_SIZE:
1630 case NBD_SET_SIZE_BLOCKS:
1631 return do_vfs_ioctl(file, fd, cmd, arg);
1632 }
1633
1634 return -ENOIOCTLCMD;
2736} 1635}
2737 1636
2738static void compat_ioctl_error(struct file *filp, unsigned int fd, 1637static void compat_ioctl_error(struct file *filp, unsigned int fd,
@@ -2764,12 +1663,33 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd,
2764 free_page((unsigned long)path); 1663 free_page((unsigned long)path);
2765} 1664}
2766 1665
1666static int compat_ioctl_check_table(unsigned int xcmd)
1667{
1668 int i;
1669 const int max = ARRAY_SIZE(ioctl_pointer) - 1;
1670
1671 BUILD_BUG_ON(max >= (1 << 16));
1672
1673 /* guess initial offset into table, assuming a
1674 normalized distribution */
1675 i = ((xcmd >> 16) * max) >> 16;
1676
1677 /* do linear search up first, until greater or equal */
1678 while (ioctl_pointer[i] < xcmd && i < max)
1679 i++;
1680
1681 /* then do linear search down */
1682 while (ioctl_pointer[i] > xcmd && i > 0)
1683 i--;
1684
1685 return ioctl_pointer[i] == xcmd;
1686}
1687
2767asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, 1688asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2768 unsigned long arg) 1689 unsigned long arg)
2769{ 1690{
2770 struct file *filp; 1691 struct file *filp;
2771 int error = -EBADF; 1692 int error = -EBADF;
2772 struct ioctl_trans *t;
2773 int fput_needed; 1693 int fput_needed;
2774 1694
2775 filp = fget_light(fd, &fput_needed); 1695 filp = fget_light(fd, &fput_needed);
@@ -2797,7 +1717,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2797#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) 1717#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
2798 case FS_IOC_RESVSP_32: 1718 case FS_IOC_RESVSP_32:
2799 case FS_IOC_RESVSP64_32: 1719 case FS_IOC_RESVSP64_32:
2800 error = compat_ioctl_preallocate(filp, arg); 1720 error = compat_ioctl_preallocate(filp, compat_ptr(arg));
2801 goto out_fput; 1721 goto out_fput;
2802#else 1722#else
2803 case FS_IOC_RESVSP: 1723 case FS_IOC_RESVSP:
@@ -2826,18 +1746,11 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2826 break; 1746 break;
2827 } 1747 }
2828 1748
2829 for (t = ioctl32_hash_table[ioctl32_hash(cmd)]; t; t = t->next) { 1749 if (compat_ioctl_check_table(XFORM(cmd)))
2830 if (t->cmd == cmd) 1750 goto found_handler;
2831 goto found_handler;
2832 }
2833 1751
2834#ifdef CONFIG_NET 1752 error = do_ioctl_trans(fd, cmd, arg, filp);
2835 if (S_ISSOCK(filp->f_path.dentry->d_inode->i_mode) && 1753 if (error == -ENOIOCTLCMD) {
2836 cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
2837 error = siocdevprivate_ioctl(fd, cmd, arg);
2838 } else
2839#endif
2840 {
2841 static int count; 1754 static int count;
2842 1755
2843 if (++count <= 50) 1756 if (++count <= 50)
@@ -2848,13 +1761,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2848 goto out_fput; 1761 goto out_fput;
2849 1762
2850 found_handler: 1763 found_handler:
2851 if (t->handler) { 1764 arg = (unsigned long)compat_ptr(arg);
2852 lock_kernel();
2853 error = t->handler(fd, cmd, arg, filp);
2854 unlock_kernel();
2855 goto out_fput;
2856 }
2857
2858 do_ioctl: 1765 do_ioctl:
2859 error = do_vfs_ioctl(filp, fd, cmd, arg); 1766 error = do_vfs_ioctl(filp, fd, cmd, arg);
2860 out_fput: 1767 out_fput:
@@ -2863,35 +1770,22 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2863 return error; 1770 return error;
2864} 1771}
2865 1772
2866static void ioctl32_insert_translation(struct ioctl_trans *trans) 1773static int __init init_sys32_ioctl_cmp(const void *p, const void *q)
2867{ 1774{
2868 unsigned long hash; 1775 unsigned int a, b;
2869 struct ioctl_trans *t; 1776 a = *(unsigned int *)p;
2870 1777 b = *(unsigned int *)q;
2871 hash = ioctl32_hash (trans->cmd); 1778 if (a > b)
2872 if (!ioctl32_hash_table[hash]) 1779 return 1;
2873 ioctl32_hash_table[hash] = trans; 1780 if (a < b)
2874 else { 1781 return -1;
2875 t = ioctl32_hash_table[hash]; 1782 return 0;
2876 while (t->next)
2877 t = t->next;
2878 trans->next = NULL;
2879 t->next = trans;
2880 }
2881} 1783}
2882 1784
2883static int __init init_sys32_ioctl(void) 1785static int __init init_sys32_ioctl(void)
2884{ 1786{
2885 int i; 1787 sort(ioctl_pointer, ARRAY_SIZE(ioctl_pointer), sizeof(*ioctl_pointer),
2886 1788 init_sys32_ioctl_cmp, NULL);
2887 for (i = 0; i < ARRAY_SIZE(ioctl_start); i++) {
2888 if (ioctl_start[i].next) {
2889 printk("ioctl translation %d bad\n",i);
2890 return -1;
2891 }
2892
2893 ioctl32_insert_translation(&ioctl_start[i]);
2894 }
2895 return 0; 1789 return 0;
2896} 1790}
2897__initcall(init_sys32_ioctl); 1791__initcall(init_sys32_ioctl);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d22438ef7674..b486169f42bf 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -32,7 +32,9 @@ static struct vfsmount *debugfs_mount;
32static int debugfs_mount_count; 32static int debugfs_mount_count;
33static bool debugfs_registered; 33static bool debugfs_registered;
34 34
35static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev) 35static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev,
36 void *data, const struct file_operations *fops)
37
36{ 38{
37 struct inode *inode = new_inode(sb); 39 struct inode *inode = new_inode(sb);
38 40
@@ -44,14 +46,18 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
44 init_special_inode(inode, mode, dev); 46 init_special_inode(inode, mode, dev);
45 break; 47 break;
46 case S_IFREG: 48 case S_IFREG:
47 inode->i_fop = &debugfs_file_operations; 49 inode->i_fop = fops ? fops : &debugfs_file_operations;
50 inode->i_private = data;
48 break; 51 break;
49 case S_IFLNK: 52 case S_IFLNK:
50 inode->i_op = &debugfs_link_operations; 53 inode->i_op = &debugfs_link_operations;
54 inode->i_fop = fops;
55 inode->i_private = data;
51 break; 56 break;
52 case S_IFDIR: 57 case S_IFDIR:
53 inode->i_op = &simple_dir_inode_operations; 58 inode->i_op = &simple_dir_inode_operations;
54 inode->i_fop = &simple_dir_operations; 59 inode->i_fop = fops ? fops : &simple_dir_operations;
60 inode->i_private = data;
55 61
56 /* directory inodes start off with i_nlink == 2 62 /* directory inodes start off with i_nlink == 2
57 * (for "." entry) */ 63 * (for "." entry) */
@@ -64,7 +70,8 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
64 70
65/* SMP-safe */ 71/* SMP-safe */
66static int debugfs_mknod(struct inode *dir, struct dentry *dentry, 72static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
67 int mode, dev_t dev) 73 int mode, dev_t dev, void *data,
74 const struct file_operations *fops)
68{ 75{
69 struct inode *inode; 76 struct inode *inode;
70 int error = -EPERM; 77 int error = -EPERM;
@@ -72,7 +79,7 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
72 if (dentry->d_inode) 79 if (dentry->d_inode)
73 return -EEXIST; 80 return -EEXIST;
74 81
75 inode = debugfs_get_inode(dir->i_sb, mode, dev); 82 inode = debugfs_get_inode(dir->i_sb, mode, dev, data, fops);
76 if (inode) { 83 if (inode) {
77 d_instantiate(dentry, inode); 84 d_instantiate(dentry, inode);
78 dget(dentry); 85 dget(dentry);
@@ -81,12 +88,13 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
81 return error; 88 return error;
82} 89}
83 90
84static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 91static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode,
92 void *data, const struct file_operations *fops)
85{ 93{
86 int res; 94 int res;
87 95
88 mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; 96 mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
89 res = debugfs_mknod(dir, dentry, mode, 0); 97 res = debugfs_mknod(dir, dentry, mode, 0, data, fops);
90 if (!res) { 98 if (!res) {
91 inc_nlink(dir); 99 inc_nlink(dir);
92 fsnotify_mkdir(dir, dentry); 100 fsnotify_mkdir(dir, dentry);
@@ -94,18 +102,20 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
94 return res; 102 return res;
95} 103}
96 104
97static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode) 105static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode,
106 void *data, const struct file_operations *fops)
98{ 107{
99 mode = (mode & S_IALLUGO) | S_IFLNK; 108 mode = (mode & S_IALLUGO) | S_IFLNK;
100 return debugfs_mknod(dir, dentry, mode, 0); 109 return debugfs_mknod(dir, dentry, mode, 0, data, fops);
101} 110}
102 111
103static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode) 112static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode,
113 void *data, const struct file_operations *fops)
104{ 114{
105 int res; 115 int res;
106 116
107 mode = (mode & S_IALLUGO) | S_IFREG; 117 mode = (mode & S_IALLUGO) | S_IFREG;
108 res = debugfs_mknod(dir, dentry, mode, 0); 118 res = debugfs_mknod(dir, dentry, mode, 0, data, fops);
109 if (!res) 119 if (!res)
110 fsnotify_create(dir, dentry); 120 fsnotify_create(dir, dentry);
111 return res; 121 return res;
@@ -139,7 +149,9 @@ static struct file_system_type debug_fs_type = {
139 149
140static int debugfs_create_by_name(const char *name, mode_t mode, 150static int debugfs_create_by_name(const char *name, mode_t mode,
141 struct dentry *parent, 151 struct dentry *parent,
142 struct dentry **dentry) 152 struct dentry **dentry,
153 void *data,
154 const struct file_operations *fops)
143{ 155{
144 int error = 0; 156 int error = 0;
145 157
@@ -164,13 +176,16 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
164 if (!IS_ERR(*dentry)) { 176 if (!IS_ERR(*dentry)) {
165 switch (mode & S_IFMT) { 177 switch (mode & S_IFMT) {
166 case S_IFDIR: 178 case S_IFDIR:
167 error = debugfs_mkdir(parent->d_inode, *dentry, mode); 179 error = debugfs_mkdir(parent->d_inode, *dentry, mode,
180 data, fops);
168 break; 181 break;
169 case S_IFLNK: 182 case S_IFLNK:
170 error = debugfs_link(parent->d_inode, *dentry, mode); 183 error = debugfs_link(parent->d_inode, *dentry, mode,
184 data, fops);
171 break; 185 break;
172 default: 186 default:
173 error = debugfs_create(parent->d_inode, *dentry, mode); 187 error = debugfs_create(parent->d_inode, *dentry, mode,
188 data, fops);
174 break; 189 break;
175 } 190 }
176 dput(*dentry); 191 dput(*dentry);
@@ -184,7 +199,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
184/** 199/**
185 * debugfs_create_file - create a file in the debugfs filesystem 200 * debugfs_create_file - create a file in the debugfs filesystem
186 * @name: a pointer to a string containing the name of the file to create. 201 * @name: a pointer to a string containing the name of the file to create.
187 * @mode: the permission that the file should have 202 * @mode: the permission that the file should have.
188 * @parent: a pointer to the parent dentry for this file. This should be a 203 * @parent: a pointer to the parent dentry for this file. This should be a
189 * directory dentry if set. If this paramater is NULL, then the 204 * directory dentry if set. If this paramater is NULL, then the
190 * file will be created in the root of the debugfs filesystem. 205 * file will be created in the root of the debugfs filesystem.
@@ -195,8 +210,8 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
195 * this file. 210 * this file.
196 * 211 *
197 * This is the basic "create a file" function for debugfs. It allows for a 212 * This is the basic "create a file" function for debugfs. It allows for a
198 * wide range of flexibility in createing a file, or a directory (if you 213 * wide range of flexibility in creating a file, or a directory (if you want
199 * want to create a directory, the debugfs_create_dir() function is 214 * to create a directory, the debugfs_create_dir() function is
200 * recommended to be used instead.) 215 * recommended to be used instead.)
201 * 216 *
202 * This function will return a pointer to a dentry if it succeeds. This 217 * This function will return a pointer to a dentry if it succeeds. This
@@ -221,19 +236,13 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode,
221 if (error) 236 if (error)
222 goto exit; 237 goto exit;
223 238
224 error = debugfs_create_by_name(name, mode, parent, &dentry); 239 error = debugfs_create_by_name(name, mode, parent, &dentry,
240 data, fops);
225 if (error) { 241 if (error) {
226 dentry = NULL; 242 dentry = NULL;
227 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 243 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
228 goto exit; 244 goto exit;
229 } 245 }
230
231 if (dentry->d_inode) {
232 if (data)
233 dentry->d_inode->i_private = data;
234 if (fops)
235 dentry->d_inode->i_fop = fops;
236 }
237exit: 246exit:
238 return dentry; 247 return dentry;
239} 248}
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index d5f8c96964be..8882ecc0f1bf 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -517,11 +517,23 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
517 517
518struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) 518struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number)
519{ 519{
520 struct dentry *dentry;
521 struct tty_struct *tty;
522
520 BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); 523 BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
521 524
525 /* Ensure dentry has not been deleted by devpts_pty_kill() */
526 dentry = d_find_alias(pts_inode);
527 if (!dentry)
528 return NULL;
529
530 tty = NULL;
522 if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) 531 if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
523 return (struct tty_struct *)pts_inode->i_private; 532 tty = (struct tty_struct *)pts_inode->i_private;
524 return NULL; 533
534 dput(dentry);
535
536 return tty;
525} 537}
526 538
527void devpts_pty_kill(struct tty_struct *tty) 539void devpts_pty_kill(struct tty_struct *tty)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 8b10b87dc01a..b912270942fa 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1028,9 +1028,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1028 if (dio->bio) 1028 if (dio->bio)
1029 dio_bio_submit(dio); 1029 dio_bio_submit(dio);
1030 1030
1031 /* All IO is now issued, send it on its way */
1032 blk_run_address_space(inode->i_mapping);
1033
1034 /* 1031 /*
1035 * It is possible that, we return short IO due to end of file. 1032 * It is possible that, we return short IO due to end of file.
1036 * In that case, we need to release all the pages we got hold on. 1033 * In that case, we need to release all the pages we got hold on.
@@ -1057,8 +1054,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1057 ((rw & READ) || (dio->result == dio->size))) 1054 ((rw & READ) || (dio->result == dio->size)))
1058 ret = -EIOCBQUEUED; 1055 ret = -EIOCBQUEUED;
1059 1056
1060 if (ret != -EIOCBQUEUED) 1057 if (ret != -EIOCBQUEUED) {
1058 /* All IO is now issued, send it on its way */
1059 blk_run_address_space(inode->i_mapping);
1061 dio_await_completion(dio); 1060 dio_await_completion(dio);
1061 }
1062 1062
1063 /* 1063 /*
1064 * Sync will always be dropping the final ref and completing the 1064 * Sync will always be dropping the final ref and completing the
@@ -1124,7 +1124,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1124 int acquire_i_mutex = 0; 1124 int acquire_i_mutex = 0;
1125 1125
1126 if (rw & WRITE) 1126 if (rw & WRITE)
1127 rw = WRITE_ODIRECT; 1127 rw = WRITE_ODIRECT_PLUG;
1128 1128
1129 if (bdev) 1129 if (bdev)
1130 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev)); 1130 bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index fd9859f92fad..0df243850818 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -410,10 +410,10 @@ static struct config_group *make_cluster(struct config_group *g,
410 struct dlm_comms *cms = NULL; 410 struct dlm_comms *cms = NULL;
411 void *gps = NULL; 411 void *gps = NULL;
412 412
413 cl = kzalloc(sizeof(struct dlm_cluster), GFP_KERNEL); 413 cl = kzalloc(sizeof(struct dlm_cluster), GFP_NOFS);
414 gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); 414 gps = kcalloc(3, sizeof(struct config_group *), GFP_NOFS);
415 sps = kzalloc(sizeof(struct dlm_spaces), GFP_KERNEL); 415 sps = kzalloc(sizeof(struct dlm_spaces), GFP_NOFS);
416 cms = kzalloc(sizeof(struct dlm_comms), GFP_KERNEL); 416 cms = kzalloc(sizeof(struct dlm_comms), GFP_NOFS);
417 417
418 if (!cl || !gps || !sps || !cms) 418 if (!cl || !gps || !sps || !cms)
419 goto fail; 419 goto fail;
@@ -482,9 +482,9 @@ static struct config_group *make_space(struct config_group *g, const char *name)
482 struct dlm_nodes *nds = NULL; 482 struct dlm_nodes *nds = NULL;
483 void *gps = NULL; 483 void *gps = NULL;
484 484
485 sp = kzalloc(sizeof(struct dlm_space), GFP_KERNEL); 485 sp = kzalloc(sizeof(struct dlm_space), GFP_NOFS);
486 gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL); 486 gps = kcalloc(2, sizeof(struct config_group *), GFP_NOFS);
487 nds = kzalloc(sizeof(struct dlm_nodes), GFP_KERNEL); 487 nds = kzalloc(sizeof(struct dlm_nodes), GFP_NOFS);
488 488
489 if (!sp || !gps || !nds) 489 if (!sp || !gps || !nds)
490 goto fail; 490 goto fail;
@@ -536,7 +536,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name)
536{ 536{
537 struct dlm_comm *cm; 537 struct dlm_comm *cm;
538 538
539 cm = kzalloc(sizeof(struct dlm_comm), GFP_KERNEL); 539 cm = kzalloc(sizeof(struct dlm_comm), GFP_NOFS);
540 if (!cm) 540 if (!cm)
541 return ERR_PTR(-ENOMEM); 541 return ERR_PTR(-ENOMEM);
542 542
@@ -569,7 +569,7 @@ static struct config_item *make_node(struct config_group *g, const char *name)
569 struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent); 569 struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent);
570 struct dlm_node *nd; 570 struct dlm_node *nd;
571 571
572 nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL); 572 nd = kzalloc(sizeof(struct dlm_node), GFP_NOFS);
573 if (!nd) 573 if (!nd)
574 return ERR_PTR(-ENOMEM); 574 return ERR_PTR(-ENOMEM);
575 575
@@ -705,7 +705,7 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
705 if (cm->addr_count >= DLM_MAX_ADDR_COUNT) 705 if (cm->addr_count >= DLM_MAX_ADDR_COUNT)
706 return -ENOSPC; 706 return -ENOSPC;
707 707
708 addr = kzalloc(sizeof(*addr), GFP_KERNEL); 708 addr = kzalloc(sizeof(*addr), GFP_NOFS);
709 if (!addr) 709 if (!addr)
710 return -ENOMEM; 710 return -ENOMEM;
711 711
@@ -868,7 +868,7 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
868 868
869 ids_count = sp->members_count; 869 ids_count = sp->members_count;
870 870
871 ids = kcalloc(ids_count, sizeof(int), GFP_KERNEL); 871 ids = kcalloc(ids_count, sizeof(int), GFP_NOFS);
872 if (!ids) { 872 if (!ids) {
873 rv = -ENOMEM; 873 rv = -ENOMEM;
874 goto out; 874 goto out;
@@ -886,7 +886,7 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
886 if (!new_count) 886 if (!new_count)
887 goto out_ids; 887 goto out_ids;
888 888
889 new = kcalloc(new_count, sizeof(int), GFP_KERNEL); 889 new = kcalloc(new_count, sizeof(int), GFP_NOFS);
890 if (!new) { 890 if (!new) {
891 kfree(ids); 891 kfree(ids);
892 rv = -ENOMEM; 892 rv = -ENOMEM;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 1c8bb8c3a82e..375a2359b3bf 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -404,7 +404,7 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos)
404 if (bucket >= ls->ls_rsbtbl_size) 404 if (bucket >= ls->ls_rsbtbl_size)
405 return NULL; 405 return NULL;
406 406
407 ri = kzalloc(sizeof(struct rsbtbl_iter), GFP_KERNEL); 407 ri = kzalloc(sizeof(struct rsbtbl_iter), GFP_NOFS);
408 if (!ri) 408 if (!ri)
409 return NULL; 409 return NULL;
410 if (n == 0) 410 if (n == 0)
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index c4dfa1dcc86f..7b84c1dbc82e 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -49,8 +49,7 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
49 spin_unlock(&ls->ls_recover_list_lock); 49 spin_unlock(&ls->ls_recover_list_lock);
50 50
51 if (!found) 51 if (!found)
52 de = kzalloc(sizeof(struct dlm_direntry) + len, 52 de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_NOFS);
53 ls->ls_allocation);
54 return de; 53 return de;
55} 54}
56 55
@@ -212,7 +211,7 @@ int dlm_recover_directory(struct dlm_ls *ls)
212 211
213 dlm_dir_clear(ls); 212 dlm_dir_clear(ls);
214 213
215 last_name = kmalloc(DLM_RESNAME_MAXLEN, ls->ls_allocation); 214 last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
216 if (!last_name) 215 if (!last_name)
217 goto out; 216 goto out;
218 217
@@ -323,7 +322,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
323 if (namelen > DLM_RESNAME_MAXLEN) 322 if (namelen > DLM_RESNAME_MAXLEN)
324 return -EINVAL; 323 return -EINVAL;
325 324
326 de = kzalloc(sizeof(struct dlm_direntry) + namelen, ls->ls_allocation); 325 de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_NOFS);
327 if (!de) 326 if (!de)
328 return -ENOMEM; 327 return -ENOMEM;
329 328
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index d01ca0a711db..826d3dc6e0ab 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -473,7 +473,6 @@ struct dlm_ls {
473 int ls_low_nodeid; 473 int ls_low_nodeid;
474 int ls_total_weight; 474 int ls_total_weight;
475 int *ls_node_array; 475 int *ls_node_array;
476 gfp_t ls_allocation;
477 476
478 struct dlm_rsb ls_stub_rsb; /* for returning errors */ 477 struct dlm_rsb ls_stub_rsb; /* for returning errors */
479 struct dlm_lkb ls_stub_lkb; /* for returning errors */ 478 struct dlm_lkb ls_stub_lkb; /* for returning errors */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index eb507c453c5f..9c0c1db1e105 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -2689,7 +2689,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
2689 pass into lowcomms_commit and a message buffer (mb) that we 2689 pass into lowcomms_commit and a message buffer (mb) that we
2690 write our data into */ 2690 write our data into */
2691 2691
2692 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb); 2692 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
2693 if (!mh) 2693 if (!mh)
2694 return -ENOBUFS; 2694 return -ENOBUFS;
2695 2695
@@ -4512,7 +4512,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4512 } 4512 }
4513 4513
4514 if (flags & DLM_LKF_VALBLK) { 4514 if (flags & DLM_LKF_VALBLK) {
4515 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL); 4515 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
4516 if (!ua->lksb.sb_lvbptr) { 4516 if (!ua->lksb.sb_lvbptr) {
4517 kfree(ua); 4517 kfree(ua);
4518 __put_lkb(ls, lkb); 4518 __put_lkb(ls, lkb);
@@ -4582,7 +4582,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4582 ua = lkb->lkb_ua; 4582 ua = lkb->lkb_ua;
4583 4583
4584 if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) { 4584 if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
4585 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL); 4585 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
4586 if (!ua->lksb.sb_lvbptr) { 4586 if (!ua->lksb.sb_lvbptr) {
4587 error = -ENOMEM; 4587 error = -ENOMEM;
4588 goto out_put; 4588 goto out_put;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index d489fcc86713..c010ecfc0d29 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -430,7 +430,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
430 430
431 error = -ENOMEM; 431 error = -ENOMEM;
432 432
433 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); 433 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
434 if (!ls) 434 if (!ls)
435 goto out; 435 goto out;
436 memcpy(ls->ls_name, name, namelen); 436 memcpy(ls->ls_name, name, namelen);
@@ -443,11 +443,6 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
443 if (flags & DLM_LSFL_TIMEWARN) 443 if (flags & DLM_LSFL_TIMEWARN)
444 set_bit(LSFL_TIMEWARN, &ls->ls_flags); 444 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
445 445
446 if (flags & DLM_LSFL_FS)
447 ls->ls_allocation = GFP_NOFS;
448 else
449 ls->ls_allocation = GFP_KERNEL;
450
451 /* ls_exflags are forced to match among nodes, and we don't 446 /* ls_exflags are forced to match among nodes, and we don't
452 need to require all nodes to have some flags set */ 447 need to require all nodes to have some flags set */
453 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS | 448 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
@@ -456,7 +451,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
456 size = dlm_config.ci_rsbtbl_size; 451 size = dlm_config.ci_rsbtbl_size;
457 ls->ls_rsbtbl_size = size; 452 ls->ls_rsbtbl_size = size;
458 453
459 ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); 454 ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_NOFS);
460 if (!ls->ls_rsbtbl) 455 if (!ls->ls_rsbtbl)
461 goto out_lsfree; 456 goto out_lsfree;
462 for (i = 0; i < size; i++) { 457 for (i = 0; i < size; i++) {
@@ -468,7 +463,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
468 size = dlm_config.ci_lkbtbl_size; 463 size = dlm_config.ci_lkbtbl_size;
469 ls->ls_lkbtbl_size = size; 464 ls->ls_lkbtbl_size = size;
470 465
471 ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); 466 ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_NOFS);
472 if (!ls->ls_lkbtbl) 467 if (!ls->ls_lkbtbl)
473 goto out_rsbfree; 468 goto out_rsbfree;
474 for (i = 0; i < size; i++) { 469 for (i = 0; i < size; i++) {
@@ -480,7 +475,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
480 size = dlm_config.ci_dirtbl_size; 475 size = dlm_config.ci_dirtbl_size;
481 ls->ls_dirtbl_size = size; 476 ls->ls_dirtbl_size = size;
482 477
483 ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); 478 ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_NOFS);
484 if (!ls->ls_dirtbl) 479 if (!ls->ls_dirtbl)
485 goto out_lkbfree; 480 goto out_lkbfree;
486 for (i = 0; i < size; i++) { 481 for (i = 0; i < size; i++) {
@@ -527,7 +522,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
527 mutex_init(&ls->ls_requestqueue_mutex); 522 mutex_init(&ls->ls_requestqueue_mutex);
528 mutex_init(&ls->ls_clear_proc_locks); 523 mutex_init(&ls->ls_clear_proc_locks);
529 524
530 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL); 525 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
531 if (!ls->ls_recover_buf) 526 if (!ls->ls_recover_buf)
532 goto out_dirfree; 527 goto out_dirfree;
533 528
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 70736eb4b516..52cab160893c 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1060,7 +1060,7 @@ static void init_local(void)
1060 if (dlm_our_addr(&sas, i)) 1060 if (dlm_our_addr(&sas, i))
1061 break; 1061 break;
1062 1062
1063 addr = kmalloc(sizeof(*addr), GFP_KERNEL); 1063 addr = kmalloc(sizeof(*addr), GFP_NOFS);
1064 if (!addr) 1064 if (!addr)
1065 break; 1065 break;
1066 memcpy(addr, &sas, sizeof(*addr)); 1066 memcpy(addr, &sas, sizeof(*addr));
@@ -1099,7 +1099,7 @@ static int sctp_listen_for_all(void)
1099 struct sockaddr_storage localaddr; 1099 struct sockaddr_storage localaddr;
1100 struct sctp_event_subscribe subscribe; 1100 struct sctp_event_subscribe subscribe;
1101 int result = -EINVAL, num = 1, i, addr_len; 1101 int result = -EINVAL, num = 1, i, addr_len;
1102 struct connection *con = nodeid2con(0, GFP_KERNEL); 1102 struct connection *con = nodeid2con(0, GFP_NOFS);
1103 int bufsize = NEEDED_RMEM; 1103 int bufsize = NEEDED_RMEM;
1104 1104
1105 if (!con) 1105 if (!con)
@@ -1171,7 +1171,7 @@ out:
1171static int tcp_listen_for_all(void) 1171static int tcp_listen_for_all(void)
1172{ 1172{
1173 struct socket *sock = NULL; 1173 struct socket *sock = NULL;
1174 struct connection *con = nodeid2con(0, GFP_KERNEL); 1174 struct connection *con = nodeid2con(0, GFP_NOFS);
1175 int result = -EINVAL; 1175 int result = -EINVAL;
1176 1176
1177 if (!con) 1177 if (!con)
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index b128775913b2..84f70bfb0baf 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -48,7 +48,7 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid)
48 struct dlm_member *memb; 48 struct dlm_member *memb;
49 int w, error; 49 int w, error;
50 50
51 memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation); 51 memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
52 if (!memb) 52 if (!memb)
53 return -ENOMEM; 53 return -ENOMEM;
54 54
@@ -143,7 +143,7 @@ static void make_member_array(struct dlm_ls *ls)
143 143
144 ls->ls_total_weight = total; 144 ls->ls_total_weight = total;
145 145
146 array = kmalloc(sizeof(int) * total, ls->ls_allocation); 146 array = kmalloc(sizeof(int) * total, GFP_NOFS);
147 if (!array) 147 if (!array)
148 return; 148 return;
149 149
@@ -226,7 +226,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
226 continue; 226 continue;
227 log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); 227 log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
228 228
229 memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation); 229 memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
230 if (!memb) 230 if (!memb)
231 return -ENOMEM; 231 return -ENOMEM;
232 memb->nodeid = rv->new[i]; 232 memb->nodeid = rv->new[i];
@@ -341,7 +341,7 @@ int dlm_ls_start(struct dlm_ls *ls)
341 int *ids = NULL, *new = NULL; 341 int *ids = NULL, *new = NULL;
342 int error, ids_count = 0, new_count = 0; 342 int error, ids_count = 0, new_count = 0;
343 343
344 rv = kzalloc(sizeof(struct dlm_recover), ls->ls_allocation); 344 rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS);
345 if (!rv) 345 if (!rv)
346 return -ENOMEM; 346 return -ENOMEM;
347 347
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index c1775b84ebab..8e0d00db004f 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -39,7 +39,7 @@ char *dlm_allocate_lvb(struct dlm_ls *ls)
39{ 39{
40 char *p; 40 char *p;
41 41
42 p = kzalloc(ls->ls_lvblen, ls->ls_allocation); 42 p = kzalloc(ls->ls_lvblen, GFP_NOFS);
43 return p; 43 return p;
44} 44}
45 45
@@ -57,7 +57,7 @@ struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen)
57 57
58 DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); 58 DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
59 59
60 r = kzalloc(sizeof(*r) + namelen, ls->ls_allocation); 60 r = kzalloc(sizeof(*r) + namelen, GFP_NOFS);
61 return r; 61 return r;
62} 62}
63 63
@@ -72,7 +72,7 @@ struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
72{ 72{
73 struct dlm_lkb *lkb; 73 struct dlm_lkb *lkb;
74 74
75 lkb = kmem_cache_zalloc(lkb_cache, ls->ls_allocation); 75 lkb = kmem_cache_zalloc(lkb_cache, GFP_NOFS);
76 return lkb; 76 return lkb;
77} 77}
78 78
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 55ea369f43a9..052095cd592f 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -26,7 +26,7 @@ static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
26 struct sk_buff *skb; 26 struct sk_buff *skb;
27 void *data; 27 void *data;
28 28
29 skb = genlmsg_new(size, GFP_KERNEL); 29 skb = genlmsg_new(size, GFP_NOFS);
30 if (!skb) 30 if (!skb)
31 return -ENOMEM; 31 return -ENOMEM;
32 32
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 16f682e26c07..b5f89aef3b29 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -82,7 +82,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
82 if (!ls) 82 if (!ls)
83 return -EINVAL; 83 return -EINVAL;
84 84
85 xop = kzalloc(sizeof(*xop), GFP_KERNEL); 85 xop = kzalloc(sizeof(*xop), GFP_NOFS);
86 if (!xop) { 86 if (!xop) {
87 rv = -ENOMEM; 87 rv = -ENOMEM;
88 goto out; 88 goto out;
@@ -143,7 +143,7 @@ out:
143} 143}
144EXPORT_SYMBOL_GPL(dlm_posix_lock); 144EXPORT_SYMBOL_GPL(dlm_posix_lock);
145 145
146/* Returns failure iff a succesful lock operation should be canceled */ 146/* Returns failure iff a successful lock operation should be canceled */
147static int dlm_plock_callback(struct plock_op *op) 147static int dlm_plock_callback(struct plock_op *op)
148{ 148{
149 struct file *file; 149 struct file *file;
@@ -211,7 +211,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
211 if (!ls) 211 if (!ls)
212 return -EINVAL; 212 return -EINVAL;
213 213
214 op = kzalloc(sizeof(*op), GFP_KERNEL); 214 op = kzalloc(sizeof(*op), GFP_NOFS);
215 if (!op) { 215 if (!op) {
216 rv = -ENOMEM; 216 rv = -ENOMEM;
217 goto out; 217 goto out;
@@ -266,7 +266,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
266 if (!ls) 266 if (!ls)
267 return -EINVAL; 267 return -EINVAL;
268 268
269 op = kzalloc(sizeof(*op), GFP_KERNEL); 269 op = kzalloc(sizeof(*op), GFP_NOFS);
270 if (!op) { 270 if (!op) {
271 rv = -ENOMEM; 271 rv = -ENOMEM;
272 goto out; 272 goto out;
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 67522c268c14..3c83a49a48a3 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
38 char *mb; 38 char *mb;
39 int mb_len = sizeof(struct dlm_rcom) + len; 39 int mb_len = sizeof(struct dlm_rcom) + len;
40 40
41 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb); 41 mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
42 if (!mh) { 42 if (!mh) {
43 log_print("create_rcom to %d type %d len %d ENOBUFS", 43 log_print("create_rcom to %d type %d len %d ENOBUFS",
44 to_nodeid, type, len); 44 to_nodeid, type, len);
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 7a2307c08911..a44fa22890e1 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -35,7 +35,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
35 struct rq_entry *e; 35 struct rq_entry *e;
36 int length = ms->m_header.h_length - sizeof(struct dlm_message); 36 int length = ms->m_header.h_length - sizeof(struct dlm_message);
37 37
38 e = kmalloc(sizeof(struct rq_entry) + length, ls->ls_allocation); 38 e = kmalloc(sizeof(struct rq_entry) + length, GFP_NOFS);
39 if (!e) { 39 if (!e) {
40 log_print("dlm_add_requestqueue: out of memory len %d", length); 40 log_print("dlm_add_requestqueue: out of memory len %d", length);
41 return; 41 return;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index ebce994ab0b7..e73a4bb572aa 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -267,7 +267,7 @@ static int device_user_lock(struct dlm_user_proc *proc,
267 goto out; 267 goto out;
268 } 268 }
269 269
270 ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL); 270 ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
271 if (!ua) 271 if (!ua)
272 goto out; 272 goto out;
273 ua->proc = proc; 273 ua->proc = proc;
@@ -307,7 +307,7 @@ static int device_user_unlock(struct dlm_user_proc *proc,
307 if (!ls) 307 if (!ls)
308 return -ENOENT; 308 return -ENOENT;
309 309
310 ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL); 310 ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
311 if (!ua) 311 if (!ua)
312 goto out; 312 goto out;
313 ua->proc = proc; 313 ua->proc = proc;
@@ -352,7 +352,7 @@ static int dlm_device_register(struct dlm_ls *ls, char *name)
352 352
353 error = -ENOMEM; 353 error = -ENOMEM;
354 len = strlen(name) + strlen(name_prefix) + 2; 354 len = strlen(name) + strlen(name_prefix) + 2;
355 ls->ls_device.name = kzalloc(len, GFP_KERNEL); 355 ls->ls_device.name = kzalloc(len, GFP_NOFS);
356 if (!ls->ls_device.name) 356 if (!ls->ls_device.name)
357 goto fail; 357 goto fail;
358 358
@@ -520,7 +520,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
520#endif 520#endif
521 return -EINVAL; 521 return -EINVAL;
522 522
523 kbuf = kzalloc(count + 1, GFP_KERNEL); 523 kbuf = kzalloc(count + 1, GFP_NOFS);
524 if (!kbuf) 524 if (!kbuf)
525 return -ENOMEM; 525 return -ENOMEM;
526 526
@@ -546,7 +546,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
546 546
547 /* add 1 after namelen so that the name string is terminated */ 547 /* add 1 after namelen so that the name string is terminated */
548 kbuf = kzalloc(sizeof(struct dlm_write_request) + namelen + 1, 548 kbuf = kzalloc(sizeof(struct dlm_write_request) + namelen + 1,
549 GFP_KERNEL); 549 GFP_NOFS);
550 if (!kbuf) { 550 if (!kbuf) {
551 kfree(k32buf); 551 kfree(k32buf);
552 return -ENOMEM; 552 return -ENOMEM;
@@ -648,7 +648,7 @@ static int device_open(struct inode *inode, struct file *file)
648 if (!ls) 648 if (!ls)
649 return -ENOENT; 649 return -ENOENT;
650 650
651 proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); 651 proc = kzalloc(sizeof(struct dlm_user_proc), GFP_NOFS);
652 if (!proc) { 652 if (!proc) {
653 dlm_put_lockspace(ls); 653 dlm_put_lockspace(ls);
654 return -ENOMEM; 654 return -ENOMEM;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 085c5c063420..366c503f9657 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -251,10 +251,10 @@ ctl_table epoll_table[] = {
251 .data = &max_user_watches, 251 .data = &max_user_watches,
252 .maxlen = sizeof(int), 252 .maxlen = sizeof(int),
253 .mode = 0644, 253 .mode = 0644,
254 .proc_handler = &proc_dointvec_minmax, 254 .proc_handler = proc_dointvec_minmax,
255 .extra1 = &zero, 255 .extra1 = &zero,
256 }, 256 },
257 { .ctl_name = 0 } 257 { }
258}; 258};
259#endif /* CONFIG_SYSCTL */ 259#endif /* CONFIG_SYSCTL */
260 260
diff --git a/fs/exec.c b/fs/exec.c
index ba112bd4a339..c0c636e34f60 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -46,7 +46,6 @@
46#include <linux/proc_fs.h> 46#include <linux/proc_fs.h>
47#include <linux/mount.h> 47#include <linux/mount.h>
48#include <linux/security.h> 48#include <linux/security.h>
49#include <linux/ima.h>
50#include <linux/syscalls.h> 49#include <linux/syscalls.h>
51#include <linux/tsacct_kern.h> 50#include <linux/tsacct_kern.h>
52#include <linux/cn_proc.h> 51#include <linux/cn_proc.h>
@@ -1209,9 +1208,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1209 retval = security_bprm_check(bprm); 1208 retval = security_bprm_check(bprm);
1210 if (retval) 1209 if (retval)
1211 return retval; 1210 return retval;
1212 retval = ima_bprm_check(bprm);
1213 if (retval)
1214 return retval;
1215 1211
1216 /* kernel module loader fixup */ 1212 /* kernel module loader fixup */
1217 /* so we don't try to load run modprobe in kernel space. */ 1213 /* so we don't try to load run modprobe in kernel space. */
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index cc2d22db119c..2d0f757fda3e 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -12,5 +12,5 @@
12# Kbuild - Gets included from the Kernels Makefile and build system 12# Kbuild - Gets included from the Kernels Makefile and build system
13# 13#
14 14
15exofs-y := osd.o inode.o file.o symlink.o namei.o dir.o super.o 15exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o
16obj-$(CONFIG_EXOFS_FS) += exofs.o 16obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index c6718e4817fe..b1b178e61718 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -49,6 +49,7 @@
49#define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */ 49#define EXOFS_MIN_PID 0x10000 /* Smallest partition ID */
50#define EXOFS_OBJ_OFF 0x10000 /* offset for objects */ 50#define EXOFS_OBJ_OFF 0x10000 /* offset for objects */
51#define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */ 51#define EXOFS_SUPER_ID 0x10000 /* object ID for on-disk superblock */
52#define EXOFS_DEVTABLE_ID 0x10001 /* object ID for on-disk device table */
52#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ 53#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */
53 54
54/* exofs Application specific page/attribute */ 55/* exofs Application specific page/attribute */
@@ -78,17 +79,67 @@ enum {
78#define EXOFS_SUPER_MAGIC 0x5DF5 79#define EXOFS_SUPER_MAGIC 0x5DF5
79 80
80/* 81/*
81 * The file system control block - stored in an object's data (mainly, the one 82 * The file system control block - stored in object EXOFS_SUPER_ID's data.
82 * with ID EXOFS_SUPER_ID). This is where the in-memory superblock is stored 83 * This is where the in-memory superblock is stored on disk.
83 * on disk. Right now it just has a magic value, which is basically a sanity
84 * check on our ability to communicate with the object store.
85 */ 84 */
85enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1};
86struct exofs_fscb { 86struct exofs_fscb {
87 __le64 s_nextid; /* Highest object ID used */ 87 __le64 s_nextid; /* Highest object ID used */
88 __le32 s_numfiles; /* Number of files on fs */ 88 __le64 s_numfiles; /* Number of files on fs */
89 __le32 s_version; /* == EXOFS_FSCB_VER */
89 __le16 s_magic; /* Magic signature */ 90 __le16 s_magic; /* Magic signature */
90 __le16 s_newfs; /* Non-zero if this is a new fs */ 91 __le16 s_newfs; /* Non-zero if this is a new fs */
91}; 92
93 /* From here on it's a static part, only written by mkexofs */
94 __le64 s_dev_table_oid; /* Resurved, not used */
95 __le64 s_dev_table_count; /* == 0 means no dev_table */
96} __packed;
97
98/*
99 * Describes the raid used in the FS. It is part of the device table.
100 * This here is taken from the pNFS-objects definition. In exofs we
101 * use one raid policy through-out the filesystem. (NOTE: the funny
102 * alignment at begining. We take care of it at exofs_device_table.
103 */
104struct exofs_dt_data_map {
105 __le32 cb_num_comps;
106 __le64 cb_stripe_unit;
107 __le32 cb_group_width;
108 __le32 cb_group_depth;
109 __le32 cb_mirror_cnt;
110 __le32 cb_raid_algorithm;
111} __packed;
112
113/*
114 * This is an osd device information descriptor. It is a single entry in
115 * the exofs device table. It describes an osd target lun which
116 * contains data belonging to this FS. (Same partition_id on all devices)
117 */
118struct exofs_dt_device_info {
119 __le32 systemid_len;
120 u8 systemid[OSD_SYSTEMID_LEN];
121 __le64 long_name_offset; /* If !0 then offset-in-file */
122 __le32 osdname_len; /* */
123 u8 osdname[44]; /* Embbeded, Ususally an asci uuid */
124} __packed;
125
126/*
127 * The EXOFS device table - stored in object EXOFS_DEVTABLE_ID's data.
128 * It contains the raid used for this multy-device FS and an array of
129 * participating devices.
130 */
131struct exofs_device_table {
132 __le32 dt_version; /* == EXOFS_DT_VER */
133 struct exofs_dt_data_map dt_data_map; /* Raid policy to use */
134
135 /* Resurved space For future use. Total includeing this:
136 * (8 * sizeof(le64))
137 */
138 __le64 __Resurved[4];
139
140 __le64 dt_num_devices; /* Array size */
141 struct exofs_dt_device_info dt_dev_table[]; /* Array of devices */
142} __packed;
92 143
93/**************************************************************************** 144/****************************************************************************
94 * inode-related things 145 * inode-related things
@@ -155,22 +206,4 @@ enum {
155 (((name_len) + offsetof(struct exofs_dir_entry, name) + \ 206 (((name_len) + offsetof(struct exofs_dir_entry, name) + \
156 EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND) 207 EXOFS_DIR_ROUND) & ~EXOFS_DIR_ROUND)
157 208
158/*************************
159 * function declarations *
160 *************************/
161/* osd.c */
162void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
163 const struct osd_obj_id *obj);
164
165int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid);
166static inline int exofs_check_ok(struct osd_request *or)
167{
168 return exofs_check_ok_resid(or, NULL, NULL);
169}
170int exofs_sync_op(struct osd_request *or, int timeout, u8 *cred);
171int exofs_async_op(struct osd_request *or,
172 osd_req_done_fn *async_done, void *caller_context, u8 *cred);
173
174int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
175
176#endif /*ifndef __EXOFS_COM_H__*/ 209#endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 5ec72e020b22..c35fd4623986 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -30,13 +30,17 @@
30 * along with exofs; if not, write to the Free Software 30 * along with exofs; if not, write to the Free Software
31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
32 */ 32 */
33#ifndef __EXOFS_H__
34#define __EXOFS_H__
33 35
34#include <linux/fs.h> 36#include <linux/fs.h>
35#include <linux/time.h> 37#include <linux/time.h>
36#include "common.h" 38#include "common.h"
37 39
38#ifndef __EXOFS_H__ 40/* FIXME: Remove once pnfs hits mainline
39#define __EXOFS_H__ 41 * #include <linux/exportfs/pnfs_osd_xdr.h>
42 */
43#include "pnfs.h"
40 44
41#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) 45#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
42 46
@@ -55,7 +59,7 @@
55 * our extension to the in-memory superblock 59 * our extension to the in-memory superblock
56 */ 60 */
57struct exofs_sb_info { 61struct exofs_sb_info {
58 struct osd_dev *s_dev; /* returned by get_osd_dev */ 62 struct exofs_fscb s_fscb; /* Written often, pre-allocate*/
59 osd_id s_pid; /* partition ID of file system*/ 63 osd_id s_pid; /* partition ID of file system*/
60 int s_timeout; /* timeout for OSD operations */ 64 int s_timeout; /* timeout for OSD operations */
61 uint64_t s_nextid; /* highest object ID used */ 65 uint64_t s_nextid; /* highest object ID used */
@@ -63,7 +67,11 @@ struct exofs_sb_info {
63 spinlock_t s_next_gen_lock; /* spinlock for gen # update */ 67 spinlock_t s_next_gen_lock; /* spinlock for gen # update */
64 u32 s_next_generation; /* next gen # to use */ 68 u32 s_next_generation; /* next gen # to use */
65 atomic_t s_curr_pending; /* number of pending commands */ 69 atomic_t s_curr_pending; /* number of pending commands */
66 uint8_t s_cred[OSD_CAP_LEN]; /* all-powerful credential */ 70 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
71
72 struct pnfs_osd_data_map data_map; /* Default raid to use */
73 unsigned s_numdevs; /* Num of devices in array */
74 struct osd_dev *s_ods[1]; /* Variable length, minimum 1 */
67}; 75};
68 76
69/* 77/*
@@ -79,6 +87,50 @@ struct exofs_i_info {
79 struct inode vfs_inode; /* normal in-memory inode */ 87 struct inode vfs_inode; /* normal in-memory inode */
80}; 88};
81 89
90static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
91{
92 return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
93}
94
95struct exofs_io_state;
96typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
97
98struct exofs_io_state {
99 struct kref kref;
100
101 void *private;
102 exofs_io_done_fn done;
103
104 struct exofs_sb_info *sbi;
105 struct osd_obj_id obj;
106 u8 *cred;
107
108 /* Global read/write IO*/
109 loff_t offset;
110 unsigned long length;
111 void *kern_buff;
112 struct bio *bio;
113
114 /* Attributes */
115 unsigned in_attr_len;
116 struct osd_attr *in_attr;
117 unsigned out_attr_len;
118 struct osd_attr *out_attr;
119
120 /* Variable array of size numdevs */
121 unsigned numdevs;
122 struct exofs_per_dev_state {
123 struct osd_request *or;
124 struct bio *bio;
125 } per_dev[];
126};
127
128static inline unsigned exofs_io_state_size(unsigned numdevs)
129{
130 return sizeof(struct exofs_io_state) +
131 sizeof(struct exofs_per_dev_state) * numdevs;
132}
133
82/* 134/*
83 * our inode flags 135 * our inode flags
84 */ 136 */
@@ -130,6 +182,42 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
130/************************* 182/*************************
131 * function declarations * 183 * function declarations *
132 *************************/ 184 *************************/
185
186/* ios.c */
187void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
188 const struct osd_obj_id *obj);
189int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
190 u64 offset, void *p, unsigned length);
191
192int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios);
193void exofs_put_io_state(struct exofs_io_state *ios);
194
195int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
196
197int exofs_sbi_create(struct exofs_io_state *ios);
198int exofs_sbi_remove(struct exofs_io_state *ios);
199int exofs_sbi_write(struct exofs_io_state *ios);
200int exofs_sbi_read(struct exofs_io_state *ios);
201
202int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
203
204int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
205static inline int exofs_oi_write(struct exofs_i_info *oi,
206 struct exofs_io_state *ios)
207{
208 ios->obj.id = exofs_oi_objno(oi);
209 ios->cred = oi->i_cred;
210 return exofs_sbi_write(ios);
211}
212
213static inline int exofs_oi_read(struct exofs_i_info *oi,
214 struct exofs_io_state *ios)
215{
216 ios->obj.id = exofs_oi_objno(oi);
217 ios->cred = oi->i_cred;
218 return exofs_sbi_read(ios);
219}
220
133/* inode.c */ 221/* inode.c */
134void exofs_truncate(struct inode *inode); 222void exofs_truncate(struct inode *inode);
135int exofs_setattr(struct dentry *, struct iattr *); 223int exofs_setattr(struct dentry *, struct iattr *);
@@ -169,6 +257,7 @@ extern const struct file_operations exofs_file_operations;
169 257
170/* inode.c */ 258/* inode.c */
171extern const struct address_space_operations exofs_aops; 259extern const struct address_space_operations exofs_aops;
260extern const struct osd_attr g_attr_logical_length;
172 261
173/* namei.c */ 262/* namei.c */
174extern const struct inode_operations exofs_dir_inode_operations; 263extern const struct inode_operations exofs_dir_inode_operations;
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 6c10f7476699..698a8636d39c 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -37,15 +37,18 @@
37 37
38#include "exofs.h" 38#include "exofs.h"
39 39
40#ifdef CONFIG_EXOFS_DEBUG 40#define EXOFS_DBGMSG2(M...) do {} while (0)
41# define EXOFS_DEBUG_OBJ_ISIZE 1 41
42#endif 42enum { BIO_MAX_PAGES_KMALLOC =
43 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
44};
43 45
44struct page_collect { 46struct page_collect {
45 struct exofs_sb_info *sbi; 47 struct exofs_sb_info *sbi;
46 struct request_queue *req_q; 48 struct request_queue *req_q;
47 struct inode *inode; 49 struct inode *inode;
48 unsigned expected_pages; 50 unsigned expected_pages;
51 struct exofs_io_state *ios;
49 52
50 struct bio *bio; 53 struct bio *bio;
51 unsigned nr_pages; 54 unsigned nr_pages;
@@ -54,22 +57,23 @@ struct page_collect {
54}; 57};
55 58
56static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, 59static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
57 struct inode *inode) 60 struct inode *inode)
58{ 61{
59 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 62 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
60 63
61 pcol->sbi = sbi; 64 pcol->sbi = sbi;
62 pcol->req_q = osd_request_queue(sbi->s_dev); 65 /* Create master bios on first Q, later on cloning, each clone will be
66 * allocated on it's destination Q
67 */
68 pcol->req_q = osd_request_queue(sbi->s_ods[0]);
63 pcol->inode = inode; 69 pcol->inode = inode;
64 pcol->expected_pages = expected_pages; 70 pcol->expected_pages = expected_pages;
65 71
72 pcol->ios = NULL;
66 pcol->bio = NULL; 73 pcol->bio = NULL;
67 pcol->nr_pages = 0; 74 pcol->nr_pages = 0;
68 pcol->length = 0; 75 pcol->length = 0;
69 pcol->pg_first = -1; 76 pcol->pg_first = -1;
70
71 EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino,
72 expected_pages);
73} 77}
74 78
75static void _pcol_reset(struct page_collect *pcol) 79static void _pcol_reset(struct page_collect *pcol)
@@ -80,35 +84,49 @@ static void _pcol_reset(struct page_collect *pcol)
80 pcol->nr_pages = 0; 84 pcol->nr_pages = 0;
81 pcol->length = 0; 85 pcol->length = 0;
82 pcol->pg_first = -1; 86 pcol->pg_first = -1;
83 EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", 87 pcol->ios = NULL;
84 pcol->inode->i_ino, pcol->expected_pages);
85 88
86 /* this is probably the end of the loop but in writes 89 /* this is probably the end of the loop but in writes
87 * it might not end here. don't be left with nothing 90 * it might not end here. don't be left with nothing
88 */ 91 */
89 if (!pcol->expected_pages) 92 if (!pcol->expected_pages)
90 pcol->expected_pages = 128; 93 pcol->expected_pages = BIO_MAX_PAGES_KMALLOC;
91} 94}
92 95
93static int pcol_try_alloc(struct page_collect *pcol) 96static int pcol_try_alloc(struct page_collect *pcol)
94{ 97{
95 int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); 98 int pages = min_t(unsigned, pcol->expected_pages,
99 BIO_MAX_PAGES_KMALLOC);
100
101 if (!pcol->ios) { /* First time allocate io_state */
102 int ret = exofs_get_io_state(pcol->sbi, &pcol->ios);
103
104 if (ret)
105 return ret;
106 }
96 107
97 for (; pages; pages >>= 1) { 108 for (; pages; pages >>= 1) {
98 pcol->bio = bio_alloc(GFP_KERNEL, pages); 109 pcol->bio = bio_kmalloc(GFP_KERNEL, pages);
99 if (likely(pcol->bio)) 110 if (likely(pcol->bio))
100 return 0; 111 return 0;
101 } 112 }
102 113
103 EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", 114 EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n",
104 pcol->expected_pages); 115 pcol->expected_pages);
105 return -ENOMEM; 116 return -ENOMEM;
106} 117}
107 118
108static void pcol_free(struct page_collect *pcol) 119static void pcol_free(struct page_collect *pcol)
109{ 120{
110 bio_put(pcol->bio); 121 if (pcol->bio) {
111 pcol->bio = NULL; 122 bio_put(pcol->bio);
123 pcol->bio = NULL;
124 }
125
126 if (pcol->ios) {
127 exofs_put_io_state(pcol->ios);
128 pcol->ios = NULL;
129 }
112} 130}
113 131
114static int pcol_add_page(struct page_collect *pcol, struct page *page, 132static int pcol_add_page(struct page_collect *pcol, struct page *page,
@@ -161,22 +179,17 @@ static void update_write_page(struct page *page, int ret)
161/* Called at the end of reads, to optionally unlock pages and update their 179/* Called at the end of reads, to optionally unlock pages and update their
162 * status. 180 * status.
163 */ 181 */
164static int __readpages_done(struct osd_request *or, struct page_collect *pcol, 182static int __readpages_done(struct page_collect *pcol, bool do_unlock)
165 bool do_unlock)
166{ 183{
167 struct bio_vec *bvec; 184 struct bio_vec *bvec;
168 int i; 185 int i;
169 u64 resid; 186 u64 resid;
170 u64 good_bytes; 187 u64 good_bytes;
171 u64 length = 0; 188 u64 length = 0;
172 int ret = exofs_check_ok_resid(or, &resid, NULL); 189 int ret = exofs_check_io(pcol->ios, &resid);
173
174 osd_end_request(or);
175 190
176 if (likely(!ret)) 191 if (likely(!ret))
177 good_bytes = pcol->length; 192 good_bytes = pcol->length;
178 else if (!resid)
179 good_bytes = 0;
180 else 193 else
181 good_bytes = pcol->length - resid; 194 good_bytes = pcol->length - resid;
182 195
@@ -198,7 +211,7 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol,
198 else 211 else
199 page_stat = ret; 212 page_stat = ret;
200 213
201 EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", 214 EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n",
202 inode->i_ino, page->index, 215 inode->i_ino, page->index,
203 page_stat ? "bad_bytes" : "good_bytes"); 216 page_stat ? "bad_bytes" : "good_bytes");
204 217
@@ -214,13 +227,13 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol,
214} 227}
215 228
216/* callback of async reads */ 229/* callback of async reads */
217static void readpages_done(struct osd_request *or, void *p) 230static void readpages_done(struct exofs_io_state *ios, void *p)
218{ 231{
219 struct page_collect *pcol = p; 232 struct page_collect *pcol = p;
220 233
221 __readpages_done(or, pcol, true); 234 __readpages_done(pcol, true);
222 atomic_dec(&pcol->sbi->s_curr_pending); 235 atomic_dec(&pcol->sbi->s_curr_pending);
223 kfree(p); 236 kfree(pcol);
224} 237}
225 238
226static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) 239static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
@@ -238,17 +251,13 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
238 251
239 unlock_page(page); 252 unlock_page(page);
240 } 253 }
241 pcol_free(pcol);
242} 254}
243 255
244static int read_exec(struct page_collect *pcol, bool is_sync) 256static int read_exec(struct page_collect *pcol, bool is_sync)
245{ 257{
246 struct exofs_i_info *oi = exofs_i(pcol->inode); 258 struct exofs_i_info *oi = exofs_i(pcol->inode);
247 struct osd_obj_id obj = {pcol->sbi->s_pid, 259 struct exofs_io_state *ios = pcol->ios;
248 pcol->inode->i_ino + EXOFS_OBJ_OFF};
249 struct osd_request *or = NULL;
250 struct page_collect *pcol_copy = NULL; 260 struct page_collect *pcol_copy = NULL;
251 loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
252 int ret; 261 int ret;
253 262
254 if (!pcol->bio) 263 if (!pcol->bio)
@@ -257,17 +266,13 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
257 /* see comment in _readpage() about sync reads */ 266 /* see comment in _readpage() about sync reads */
258 WARN_ON(is_sync && (pcol->nr_pages != 1)); 267 WARN_ON(is_sync && (pcol->nr_pages != 1));
259 268
260 or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); 269 ios->bio = pcol->bio;
261 if (unlikely(!or)) { 270 ios->length = pcol->length;
262 ret = -ENOMEM; 271 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
263 goto err;
264 }
265
266 osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
267 272
268 if (is_sync) { 273 if (is_sync) {
269 exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); 274 exofs_oi_read(oi, pcol->ios);
270 return __readpages_done(or, pcol, false); 275 return __readpages_done(pcol, false);
271 } 276 }
272 277
273 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 278 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@@ -277,14 +282,16 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
277 } 282 }
278 283
279 *pcol_copy = *pcol; 284 *pcol_copy = *pcol;
280 ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); 285 ios->done = readpages_done;
286 ios->private = pcol_copy;
287 ret = exofs_oi_read(oi, ios);
281 if (unlikely(ret)) 288 if (unlikely(ret))
282 goto err; 289 goto err;
283 290
284 atomic_inc(&pcol->sbi->s_curr_pending); 291 atomic_inc(&pcol->sbi->s_curr_pending);
285 292
286 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", 293 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
287 obj.id, _LLU(i_start), pcol->length); 294 ios->obj.id, _LLU(ios->offset), pcol->length);
288 295
289 /* pages ownership was passed to pcol_copy */ 296 /* pages ownership was passed to pcol_copy */
290 _pcol_reset(pcol); 297 _pcol_reset(pcol);
@@ -293,12 +300,10 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
293err: 300err:
294 if (!is_sync) 301 if (!is_sync)
295 _unlock_pcol_pages(pcol, ret, READ); 302 _unlock_pcol_pages(pcol, ret, READ);
296 else /* Pages unlocked by caller in sync mode only free bio */ 303
297 pcol_free(pcol); 304 pcol_free(pcol);
298 305
299 kfree(pcol_copy); 306 kfree(pcol_copy);
300 if (or)
301 osd_end_request(or);
302 return ret; 307 return ret;
303} 308}
304 309
@@ -370,12 +375,12 @@ try_again:
370 if (len != PAGE_CACHE_SIZE) 375 if (len != PAGE_CACHE_SIZE)
371 zero_user(page, len, PAGE_CACHE_SIZE - len); 376 zero_user(page, len, PAGE_CACHE_SIZE - len);
372 377
373 EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", 378 EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n",
374 inode->i_ino, page->index, len); 379 inode->i_ino, page->index, len);
375 380
376 ret = pcol_add_page(pcol, page, len); 381 ret = pcol_add_page(pcol, page, len);
377 if (ret) { 382 if (ret) {
378 EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " 383 EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p "
379 "this_len=0x%zx nr_pages=%u length=0x%lx\n", 384 "this_len=0x%zx nr_pages=%u length=0x%lx\n",
380 page, len, pcol->nr_pages, pcol->length); 385 page, len, pcol->nr_pages, pcol->length);
381 386
@@ -419,9 +424,8 @@ static int _readpage(struct page *page, bool is_sync)
419 424
420 _pcol_init(&pcol, 1, page->mapping->host); 425 _pcol_init(&pcol, 1, page->mapping->host);
421 426
422 /* readpage_strip might call read_exec(,async) inside at several places 427 /* readpage_strip might call read_exec(,is_sync==false) at several
423 * but this is safe for is_async=0 since read_exec will not do anything 428 * places but not if we have a single page.
424 * when we have a single page.
425 */ 429 */
426 ret = readpage_strip(&pcol, page); 430 ret = readpage_strip(&pcol, page);
427 if (ret) { 431 if (ret) {
@@ -440,8 +444,8 @@ static int exofs_readpage(struct file *file, struct page *page)
440 return _readpage(page, false); 444 return _readpage(page, false);
441} 445}
442 446
443/* Callback for osd_write. All writes are asynchronouse */ 447/* Callback for osd_write. All writes are asynchronous */
444static void writepages_done(struct osd_request *or, void *p) 448static void writepages_done(struct exofs_io_state *ios, void *p)
445{ 449{
446 struct page_collect *pcol = p; 450 struct page_collect *pcol = p;
447 struct bio_vec *bvec; 451 struct bio_vec *bvec;
@@ -449,16 +453,12 @@ static void writepages_done(struct osd_request *or, void *p)
449 u64 resid; 453 u64 resid;
450 u64 good_bytes; 454 u64 good_bytes;
451 u64 length = 0; 455 u64 length = 0;
456 int ret = exofs_check_io(ios, &resid);
452 457
453 int ret = exofs_check_ok_resid(or, NULL, &resid);
454
455 osd_end_request(or);
456 atomic_dec(&pcol->sbi->s_curr_pending); 458 atomic_dec(&pcol->sbi->s_curr_pending);
457 459
458 if (likely(!ret)) 460 if (likely(!ret))
459 good_bytes = pcol->length; 461 good_bytes = pcol->length;
460 else if (!resid)
461 good_bytes = 0;
462 else 462 else
463 good_bytes = pcol->length - resid; 463 good_bytes = pcol->length - resid;
464 464
@@ -482,7 +482,7 @@ static void writepages_done(struct osd_request *or, void *p)
482 482
483 update_write_page(page, page_stat); 483 update_write_page(page, page_stat);
484 unlock_page(page); 484 unlock_page(page);
485 EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", 485 EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n",
486 inode->i_ino, page->index, page_stat); 486 inode->i_ino, page->index, page_stat);
487 487
488 length += bvec->bv_len; 488 length += bvec->bv_len;
@@ -496,23 +496,13 @@ static void writepages_done(struct osd_request *or, void *p)
496static int write_exec(struct page_collect *pcol) 496static int write_exec(struct page_collect *pcol)
497{ 497{
498 struct exofs_i_info *oi = exofs_i(pcol->inode); 498 struct exofs_i_info *oi = exofs_i(pcol->inode);
499 struct osd_obj_id obj = {pcol->sbi->s_pid, 499 struct exofs_io_state *ios = pcol->ios;
500 pcol->inode->i_ino + EXOFS_OBJ_OFF};
501 struct osd_request *or = NULL;
502 struct page_collect *pcol_copy = NULL; 500 struct page_collect *pcol_copy = NULL;
503 loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
504 int ret; 501 int ret;
505 502
506 if (!pcol->bio) 503 if (!pcol->bio)
507 return 0; 504 return 0;
508 505
509 or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
510 if (unlikely(!or)) {
511 EXOFS_ERR("write_exec: Faild to osd_start_request()\n");
512 ret = -ENOMEM;
513 goto err;
514 }
515
516 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 506 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
517 if (!pcol_copy) { 507 if (!pcol_copy) {
518 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); 508 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n");
@@ -523,16 +513,22 @@ static int write_exec(struct page_collect *pcol)
523 *pcol_copy = *pcol; 513 *pcol_copy = *pcol;
524 514
525 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ 515 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
526 osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); 516
527 ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); 517 ios->bio = pcol_copy->bio;
518 ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
519 ios->length = pcol_copy->length;
520 ios->done = writepages_done;
521 ios->private = pcol_copy;
522
523 ret = exofs_oi_write(oi, ios);
528 if (unlikely(ret)) { 524 if (unlikely(ret)) {
529 EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); 525 EXOFS_ERR("write_exec: exofs_oi_write() Faild\n");
530 goto err; 526 goto err;
531 } 527 }
532 528
533 atomic_inc(&pcol->sbi->s_curr_pending); 529 atomic_inc(&pcol->sbi->s_curr_pending);
534 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", 530 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
535 pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), 531 pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset),
536 pcol->length); 532 pcol->length);
537 /* pages ownership was passed to pcol_copy */ 533 /* pages ownership was passed to pcol_copy */
538 _pcol_reset(pcol); 534 _pcol_reset(pcol);
@@ -540,9 +536,9 @@ static int write_exec(struct page_collect *pcol)
540 536
541err: 537err:
542 _unlock_pcol_pages(pcol, ret, WRITE); 538 _unlock_pcol_pages(pcol, ret, WRITE);
539 pcol_free(pcol);
543 kfree(pcol_copy); 540 kfree(pcol_copy);
544 if (or) 541
545 osd_end_request(or);
546 return ret; 542 return ret;
547} 543}
548 544
@@ -586,6 +582,9 @@ static int writepage_strip(struct page *page,
586 if (PageError(page)) 582 if (PageError(page))
587 ClearPageError(page); 583 ClearPageError(page);
588 unlock_page(page); 584 unlock_page(page);
585 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) "
586 "outside the limits\n",
587 inode->i_ino, page->index);
589 return 0; 588 return 0;
590 } 589 }
591 } 590 }
@@ -600,6 +599,9 @@ try_again:
600 ret = write_exec(pcol); 599 ret = write_exec(pcol);
601 if (unlikely(ret)) 600 if (unlikely(ret))
602 goto fail; 601 goto fail;
602
603 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n",
604 inode->i_ino, page->index);
603 goto try_again; 605 goto try_again;
604 } 606 }
605 607
@@ -609,7 +611,7 @@ try_again:
609 goto fail; 611 goto fail;
610 } 612 }
611 613
612 EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", 614 EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n",
613 inode->i_ino, page->index, len); 615 inode->i_ino, page->index, len);
614 616
615 ret = pcol_add_page(pcol, page, len); 617 ret = pcol_add_page(pcol, page, len);
@@ -634,6 +636,8 @@ try_again:
634 return 0; 636 return 0;
635 637
636fail: 638fail:
639 EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n",
640 inode->i_ino, page->index, ret);
637 set_bit(AS_EIO, &page->mapping->flags); 641 set_bit(AS_EIO, &page->mapping->flags);
638 unlock_page(page); 642 unlock_page(page);
639 return ret; 643 return ret;
@@ -652,14 +656,17 @@ static int exofs_writepages(struct address_space *mapping,
652 wbc->range_end >> PAGE_CACHE_SHIFT; 656 wbc->range_end >> PAGE_CACHE_SHIFT;
653 657
654 if (start || end) 658 if (start || end)
655 expected_pages = min(end - start + 1, 32L); 659 expected_pages = end - start + 1;
656 else 660 else
657 expected_pages = mapping->nrpages; 661 expected_pages = mapping->nrpages;
658 662
659 EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" 663 if (expected_pages < 32L)
660 " m->nrpages=%lu start=0x%lx end=0x%lx\n", 664 expected_pages = 32L;
665
666 EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx "
667 "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n",
661 mapping->host->i_ino, wbc->range_start, wbc->range_end, 668 mapping->host->i_ino, wbc->range_start, wbc->range_end,
662 mapping->nrpages, start, end); 669 mapping->nrpages, start, end, expected_pages);
663 670
664 _pcol_init(&pcol, expected_pages, mapping->host); 671 _pcol_init(&pcol, expected_pages, mapping->host);
665 672
@@ -771,19 +778,28 @@ static int exofs_get_block(struct inode *inode, sector_t iblock,
771const struct osd_attr g_attr_logical_length = ATTR_DEF( 778const struct osd_attr g_attr_logical_length = ATTR_DEF(
772 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); 779 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
773 780
781static int _do_truncate(struct inode *inode)
782{
783 struct exofs_i_info *oi = exofs_i(inode);
784 loff_t isize = i_size_read(inode);
785 int ret;
786
787 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
788
789 nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
790
791 ret = exofs_oi_truncate(oi, (u64)isize);
792 EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize);
793 return ret;
794}
795
774/* 796/*
775 * Truncate a file to the specified size - all we have to do is set the size 797 * Truncate a file to the specified size - all we have to do is set the size
776 * attribute. We make sure the object exists first. 798 * attribute. We make sure the object exists first.
777 */ 799 */
778void exofs_truncate(struct inode *inode) 800void exofs_truncate(struct inode *inode)
779{ 801{
780 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
781 struct exofs_i_info *oi = exofs_i(inode); 802 struct exofs_i_info *oi = exofs_i(inode);
782 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
783 struct osd_request *or;
784 struct osd_attr attr;
785 loff_t isize = i_size_read(inode);
786 __be64 newsize;
787 int ret; 803 int ret;
788 804
789 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) 805 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
@@ -793,22 +809,6 @@ void exofs_truncate(struct inode *inode)
793 return; 809 return;
794 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 810 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
795 return; 811 return;
796 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
797
798 nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
799
800 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
801 if (unlikely(!or)) {
802 EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n");
803 goto fail;
804 }
805
806 osd_req_set_attributes(or, &obj);
807
808 newsize = cpu_to_be64((u64)isize);
809 attr = g_attr_logical_length;
810 attr.val_ptr = &newsize;
811 osd_req_add_set_attr_list(or, &attr, 1);
812 812
813 /* if we are about to truncate an object, and it hasn't been 813 /* if we are about to truncate an object, and it hasn't been
814 * created yet, wait 814 * created yet, wait
@@ -816,8 +816,7 @@ void exofs_truncate(struct inode *inode)
816 if (unlikely(wait_obj_created(oi))) 816 if (unlikely(wait_obj_created(oi)))
817 goto fail; 817 goto fail;
818 818
819 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); 819 ret = _do_truncate(inode);
820 osd_end_request(or);
821 if (ret) 820 if (ret)
822 goto fail; 821 goto fail;
823 822
@@ -847,65 +846,62 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
847 846
848/* 847/*
849 * Read an inode from the OSD, and return it as is. We also return the size 848 * Read an inode from the OSD, and return it as is. We also return the size
850 * attribute in the 'sanity' argument if we got compiled with debugging turned 849 * attribute in the 'obj_size' argument.
851 * on.
852 */ 850 */
853static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, 851static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
854 struct exofs_fcb *inode, uint64_t *sanity) 852 struct exofs_fcb *inode, uint64_t *obj_size)
855{ 853{
856 struct exofs_sb_info *sbi = sb->s_fs_info; 854 struct exofs_sb_info *sbi = sb->s_fs_info;
857 struct osd_request *or; 855 struct osd_attr attrs[2];
858 struct osd_attr attr; 856 struct exofs_io_state *ios;
859 struct osd_obj_id obj = {sbi->s_pid,
860 oi->vfs_inode.i_ino + EXOFS_OBJ_OFF};
861 int ret; 857 int ret;
862 858
863 exofs_make_credential(oi->i_cred, &obj); 859 *obj_size = ~0;
864 860 ret = exofs_get_io_state(sbi, &ios);
865 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 861 if (unlikely(ret)) {
866 if (unlikely(!or)) { 862 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
867 EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); 863 return ret;
868 return -ENOMEM;
869 } 864 }
870 osd_req_get_attributes(or, &obj);
871 865
872 /* we need the inode attribute */ 866 ios->obj.id = exofs_oi_objno(oi);
873 osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); 867 exofs_make_credential(oi->i_cred, &ios->obj);
868 ios->cred = oi->i_cred;
874 869
875#ifdef EXOFS_DEBUG_OBJ_ISIZE 870 attrs[0] = g_attr_inode_data;
876 /* we get the size attributes to do a sanity check */ 871 attrs[1] = g_attr_logical_length;
877 osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); 872 ios->in_attr = attrs;
878#endif 873 ios->in_attr_len = ARRAY_SIZE(attrs);
879 874
880 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); 875 ret = exofs_sbi_read(ios);
881 if (ret) 876 if (ret)
882 goto out; 877 goto out;
883 878
884 attr = g_attr_inode_data; 879 ret = extract_attr_from_ios(ios, &attrs[0]);
885 ret = extract_attr_from_req(or, &attr);
886 if (ret) { 880 if (ret) {
887 EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); 881 EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
888 goto out; 882 goto out;
889 } 883 }
884 WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE);
885 memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE);
890 886
891 WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); 887 ret = extract_attr_from_ios(ios, &attrs[1]);
892 memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE);
893
894#ifdef EXOFS_DEBUG_OBJ_ISIZE
895 attr = g_attr_logical_length;
896 ret = extract_attr_from_req(or, &attr);
897 if (ret) { 888 if (ret) {
898 EXOFS_ERR("ERROR: extract attr from or failed\n"); 889 EXOFS_ERR("%s: extract_attr of logical_length failed\n",
890 __func__);
899 goto out; 891 goto out;
900 } 892 }
901 *sanity = get_unaligned_be64(attr.val_ptr); 893 *obj_size = get_unaligned_be64(attrs[1].val_ptr);
902#endif
903 894
904out: 895out:
905 osd_end_request(or); 896 exofs_put_io_state(ios);
906 return ret; 897 return ret;
907} 898}
908 899
900static void __oi_init(struct exofs_i_info *oi)
901{
902 init_waitqueue_head(&oi->i_wq);
903 oi->i_flags = 0;
904}
909/* 905/*
910 * Fill in an inode read from the OSD and set it up for use 906 * Fill in an inode read from the OSD and set it up for use
911 */ 907 */
@@ -914,7 +910,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
914 struct exofs_i_info *oi; 910 struct exofs_i_info *oi;
915 struct exofs_fcb fcb; 911 struct exofs_fcb fcb;
916 struct inode *inode; 912 struct inode *inode;
917 uint64_t uninitialized_var(sanity); 913 uint64_t obj_size;
918 int ret; 914 int ret;
919 915
920 inode = iget_locked(sb, ino); 916 inode = iget_locked(sb, ino);
@@ -923,13 +919,13 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
923 if (!(inode->i_state & I_NEW)) 919 if (!(inode->i_state & I_NEW))
924 return inode; 920 return inode;
925 oi = exofs_i(inode); 921 oi = exofs_i(inode);
922 __oi_init(oi);
926 923
927 /* read the inode from the osd */ 924 /* read the inode from the osd */
928 ret = exofs_get_inode(sb, oi, &fcb, &sanity); 925 ret = exofs_get_inode(sb, oi, &fcb, &obj_size);
929 if (ret) 926 if (ret)
930 goto bad_inode; 927 goto bad_inode;
931 928
932 init_waitqueue_head(&oi->i_wq);
933 set_obj_created(oi); 929 set_obj_created(oi);
934 930
935 /* copy stuff from on-disk struct to in-memory struct */ 931 /* copy stuff from on-disk struct to in-memory struct */
@@ -947,14 +943,12 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
947 inode->i_blkbits = EXOFS_BLKSHIFT; 943 inode->i_blkbits = EXOFS_BLKSHIFT;
948 inode->i_generation = le32_to_cpu(fcb.i_generation); 944 inode->i_generation = le32_to_cpu(fcb.i_generation);
949 945
950#ifdef EXOFS_DEBUG_OBJ_ISIZE 946 if ((inode->i_size != obj_size) &&
951 if ((inode->i_size != sanity) &&
952 (!exofs_inode_is_fast_symlink(inode))) { 947 (!exofs_inode_is_fast_symlink(inode))) {
953 EXOFS_ERR("WARNING: Size of object from inode and " 948 EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n",
954 "attributes differ (%lld != %llu)\n", 949 inode->i_size, _LLU(obj_size));
955 inode->i_size, _LLU(sanity)); 950 /* FIXME: call exofs_inode_recovery() */
956 } 951 }
957#endif
958 952
959 oi->i_dir_start_lookup = 0; 953 oi->i_dir_start_lookup = 0;
960 954
@@ -1020,23 +1014,30 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
1020 * set the obj_created flag so that other methods know that the object exists on 1014 * set the obj_created flag so that other methods know that the object exists on
1021 * the OSD. 1015 * the OSD.
1022 */ 1016 */
1023static void create_done(struct osd_request *or, void *p) 1017static void create_done(struct exofs_io_state *ios, void *p)
1024{ 1018{
1025 struct inode *inode = p; 1019 struct inode *inode = p;
1026 struct exofs_i_info *oi = exofs_i(inode); 1020 struct exofs_i_info *oi = exofs_i(inode);
1027 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1021 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
1028 int ret; 1022 int ret;
1029 1023
1030 ret = exofs_check_ok(or); 1024 ret = exofs_check_io(ios, NULL);
1031 osd_end_request(or); 1025 exofs_put_io_state(ios);
1026
1032 atomic_dec(&sbi->s_curr_pending); 1027 atomic_dec(&sbi->s_curr_pending);
1033 1028
1034 if (unlikely(ret)) { 1029 if (unlikely(ret)) {
1035 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", 1030 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
1036 _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); 1031 _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid));
1037 make_bad_inode(inode); 1032 /*TODO: When FS is corrupted creation can fail, object already
1038 } else 1033 * exist. Get rid of this asynchronous creation, if exist
1039 set_obj_created(oi); 1034 * increment the obj counter and try the next object. Until we
1035 * succeed. All these dangling objects will be made into lost
1036 * files by chkfs.exofs
1037 */
1038 }
1039
1040 set_obj_created(oi);
1040 1041
1041 atomic_dec(&inode->i_count); 1042 atomic_dec(&inode->i_count);
1042 wake_up(&oi->i_wq); 1043 wake_up(&oi->i_wq);
@@ -1051,8 +1052,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1051 struct inode *inode; 1052 struct inode *inode;
1052 struct exofs_i_info *oi; 1053 struct exofs_i_info *oi;
1053 struct exofs_sb_info *sbi; 1054 struct exofs_sb_info *sbi;
1054 struct osd_request *or; 1055 struct exofs_io_state *ios;
1055 struct osd_obj_id obj;
1056 int ret; 1056 int ret;
1057 1057
1058 sb = dir->i_sb; 1058 sb = dir->i_sb;
@@ -1061,8 +1061,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1061 return ERR_PTR(-ENOMEM); 1061 return ERR_PTR(-ENOMEM);
1062 1062
1063 oi = exofs_i(inode); 1063 oi = exofs_i(inode);
1064 __oi_init(oi);
1064 1065
1065 init_waitqueue_head(&oi->i_wq);
1066 set_obj_2bcreated(oi); 1066 set_obj_2bcreated(oi);
1067 1067
1068 sbi = sb->s_fs_info; 1068 sbi = sb->s_fs_info;
@@ -1089,28 +1089,28 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1089 1089
1090 mark_inode_dirty(inode); 1090 mark_inode_dirty(inode);
1091 1091
1092 obj.partition = sbi->s_pid; 1092 ret = exofs_get_io_state(sbi, &ios);
1093 obj.id = inode->i_ino + EXOFS_OBJ_OFF; 1093 if (unlikely(ret)) {
1094 exofs_make_credential(oi->i_cred, &obj); 1094 EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n");
1095 1095 return ERR_PTR(ret);
1096 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
1097 if (unlikely(!or)) {
1098 EXOFS_ERR("exofs_new_inode: osd_start_request failed\n");
1099 return ERR_PTR(-ENOMEM);
1100 } 1096 }
1101 1097
1102 osd_req_create_object(or, &obj); 1098 ios->obj.id = exofs_oi_objno(oi);
1099 exofs_make_credential(oi->i_cred, &ios->obj);
1103 1100
1104 /* increment the refcount so that the inode will still be around when we 1101 /* increment the refcount so that the inode will still be around when we
1105 * reach the callback 1102 * reach the callback
1106 */ 1103 */
1107 atomic_inc(&inode->i_count); 1104 atomic_inc(&inode->i_count);
1108 1105
1109 ret = exofs_async_op(or, create_done, inode, oi->i_cred); 1106 ios->done = create_done;
1107 ios->private = inode;
1108 ios->cred = oi->i_cred;
1109 ret = exofs_sbi_create(ios);
1110 if (ret) { 1110 if (ret) {
1111 atomic_dec(&inode->i_count); 1111 atomic_dec(&inode->i_count);
1112 osd_end_request(or); 1112 exofs_put_io_state(ios);
1113 return ERR_PTR(-EIO); 1113 return ERR_PTR(ret);
1114 } 1114 }
1115 atomic_inc(&sbi->s_curr_pending); 1115 atomic_inc(&sbi->s_curr_pending);
1116 1116
@@ -1128,11 +1128,11 @@ struct updatei_args {
1128/* 1128/*
1129 * Callback function from exofs_update_inode(). 1129 * Callback function from exofs_update_inode().
1130 */ 1130 */
1131static void updatei_done(struct osd_request *or, void *p) 1131static void updatei_done(struct exofs_io_state *ios, void *p)
1132{ 1132{
1133 struct updatei_args *args = p; 1133 struct updatei_args *args = p;
1134 1134
1135 osd_end_request(or); 1135 exofs_put_io_state(ios);
1136 1136
1137 atomic_dec(&args->sbi->s_curr_pending); 1137 atomic_dec(&args->sbi->s_curr_pending);
1138 1138
@@ -1148,8 +1148,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1148 struct exofs_i_info *oi = exofs_i(inode); 1148 struct exofs_i_info *oi = exofs_i(inode);
1149 struct super_block *sb = inode->i_sb; 1149 struct super_block *sb = inode->i_sb;
1150 struct exofs_sb_info *sbi = sb->s_fs_info; 1150 struct exofs_sb_info *sbi = sb->s_fs_info;
1151 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; 1151 struct exofs_io_state *ios;
1152 struct osd_request *or;
1153 struct osd_attr attr; 1152 struct osd_attr attr;
1154 struct exofs_fcb *fcb; 1153 struct exofs_fcb *fcb;
1155 struct updatei_args *args; 1154 struct updatei_args *args;
@@ -1186,18 +1185,16 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1186 } else 1185 } else
1187 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1186 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
1188 1187
1189 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 1188 ret = exofs_get_io_state(sbi, &ios);
1190 if (unlikely(!or)) { 1189 if (unlikely(ret)) {
1191 EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); 1190 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
1192 ret = -ENOMEM;
1193 goto free_args; 1191 goto free_args;
1194 } 1192 }
1195 1193
1196 osd_req_set_attributes(or, &obj);
1197
1198 attr = g_attr_inode_data; 1194 attr = g_attr_inode_data;
1199 attr.val_ptr = fcb; 1195 attr.val_ptr = fcb;
1200 osd_req_add_set_attr_list(or, &attr, 1); 1196 ios->out_attr_len = 1;
1197 ios->out_attr = &attr;
1201 1198
1202 if (!obj_created(oi)) { 1199 if (!obj_created(oi)) {
1203 EXOFS_DBGMSG("!obj_created\n"); 1200 EXOFS_DBGMSG("!obj_created\n");
@@ -1206,22 +1203,19 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1206 EXOFS_DBGMSG("wait_event done\n"); 1203 EXOFS_DBGMSG("wait_event done\n");
1207 } 1204 }
1208 1205
1209 if (do_sync) { 1206 if (!do_sync) {
1210 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
1211 osd_end_request(or);
1212 goto free_args;
1213 } else {
1214 args->sbi = sbi; 1207 args->sbi = sbi;
1208 ios->done = updatei_done;
1209 ios->private = args;
1210 }
1215 1211
1216 ret = exofs_async_op(or, updatei_done, args, oi->i_cred); 1212 ret = exofs_oi_write(oi, ios);
1217 if (ret) { 1213 if (!do_sync && !ret) {
1218 osd_end_request(or);
1219 goto free_args;
1220 }
1221 atomic_inc(&sbi->s_curr_pending); 1214 atomic_inc(&sbi->s_curr_pending);
1222 goto out; /* deallocation in updatei_done */ 1215 goto out; /* deallocation in updatei_done */
1223 } 1216 }
1224 1217
1218 exofs_put_io_state(ios);
1225free_args: 1219free_args:
1226 kfree(args); 1220 kfree(args);
1227out: 1221out:
@@ -1238,11 +1232,12 @@ int exofs_write_inode(struct inode *inode, int wait)
1238 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1232 * Callback function from exofs_delete_inode() - don't have much cleaning up to
1239 * do. 1233 * do.
1240 */ 1234 */
1241static void delete_done(struct osd_request *or, void *p) 1235static void delete_done(struct exofs_io_state *ios, void *p)
1242{ 1236{
1243 struct exofs_sb_info *sbi; 1237 struct exofs_sb_info *sbi = p;
1244 osd_end_request(or); 1238
1245 sbi = p; 1239 exofs_put_io_state(ios);
1240
1246 atomic_dec(&sbi->s_curr_pending); 1241 atomic_dec(&sbi->s_curr_pending);
1247} 1242}
1248 1243
@@ -1256,8 +1251,7 @@ void exofs_delete_inode(struct inode *inode)
1256 struct exofs_i_info *oi = exofs_i(inode); 1251 struct exofs_i_info *oi = exofs_i(inode);
1257 struct super_block *sb = inode->i_sb; 1252 struct super_block *sb = inode->i_sb;
1258 struct exofs_sb_info *sbi = sb->s_fs_info; 1253 struct exofs_sb_info *sbi = sb->s_fs_info;
1259 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; 1254 struct exofs_io_state *ios;
1260 struct osd_request *or;
1261 int ret; 1255 int ret;
1262 1256
1263 truncate_inode_pages(&inode->i_data, 0); 1257 truncate_inode_pages(&inode->i_data, 0);
@@ -1274,25 +1268,26 @@ void exofs_delete_inode(struct inode *inode)
1274 1268
1275 clear_inode(inode); 1269 clear_inode(inode);
1276 1270
1277 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 1271 ret = exofs_get_io_state(sbi, &ios);
1278 if (unlikely(!or)) { 1272 if (unlikely(ret)) {
1279 EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); 1273 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
1280 return; 1274 return;
1281 } 1275 }
1282 1276
1283 osd_req_remove_object(or, &obj);
1284
1285 /* if we are deleting an obj that hasn't been created yet, wait */ 1277 /* if we are deleting an obj that hasn't been created yet, wait */
1286 if (!obj_created(oi)) { 1278 if (!obj_created(oi)) {
1287 BUG_ON(!obj_2bcreated(oi)); 1279 BUG_ON(!obj_2bcreated(oi));
1288 wait_event(oi->i_wq, obj_created(oi)); 1280 wait_event(oi->i_wq, obj_created(oi));
1289 } 1281 }
1290 1282
1291 ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); 1283 ios->obj.id = exofs_oi_objno(oi);
1284 ios->done = delete_done;
1285 ios->private = sbi;
1286 ios->cred = oi->i_cred;
1287 ret = exofs_sbi_remove(ios);
1292 if (ret) { 1288 if (ret) {
1293 EXOFS_ERR( 1289 EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__);
1294 "ERROR: @exofs_delete_inode exofs_async_op failed\n"); 1290 exofs_put_io_state(ios);
1295 osd_end_request(or);
1296 return; 1291 return;
1297 } 1292 }
1298 atomic_inc(&sbi->s_curr_pending); 1293 atomic_inc(&sbi->s_curr_pending);
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
new file mode 100644
index 000000000000..5bad01fa1f9f
--- /dev/null
+++ b/fs/exofs/ios.c
@@ -0,0 +1,421 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com>
6 *
7 * This file is part of exofs.
8 *
9 * exofs is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation. Since it is based on ext2, and the only
12 * valid version of GPL for the Linux kernel is version 2, the only valid
13 * version of GPL for exofs is version 2.
14 *
15 * exofs is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with exofs; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25#include <scsi/scsi_device.h>
26
27#include "exofs.h"
28
29void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
30{
31 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
32}
33
34int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
35 u64 offset, void *p, unsigned length)
36{
37 struct osd_request *or = osd_start_request(od, GFP_KERNEL);
38/* struct osd_sense_info osi = {.key = 0};*/
39 int ret;
40
41 if (unlikely(!or)) {
42 EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
43 return -ENOMEM;
44 }
45 ret = osd_req_read_kern(or, obj, offset, p, length);
46 if (unlikely(ret)) {
47 EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
48 goto out;
49 }
50
51 ret = osd_finalize_request(or, 0, cred, NULL);
52 if (unlikely(ret)) {
53 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
54 goto out;
55 }
56
57 ret = osd_execute_request(or);
58 if (unlikely(ret))
59 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
60 /* osd_req_decode_sense(or, ret); */
61
62out:
63 osd_end_request(or);
64 return ret;
65}
66
67int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios)
68{
69 struct exofs_io_state *ios;
70
71 /*TODO: Maybe use kmem_cach per sbi of size
72 * exofs_io_state_size(sbi->s_numdevs)
73 */
74 ios = kzalloc(exofs_io_state_size(sbi->s_numdevs), GFP_KERNEL);
75 if (unlikely(!ios)) {
76 *pios = NULL;
77 return -ENOMEM;
78 }
79
80 ios->sbi = sbi;
81 ios->obj.partition = sbi->s_pid;
82 *pios = ios;
83 return 0;
84}
85
86void exofs_put_io_state(struct exofs_io_state *ios)
87{
88 if (ios) {
89 unsigned i;
90
91 for (i = 0; i < ios->numdevs; i++) {
92 struct exofs_per_dev_state *per_dev = &ios->per_dev[i];
93
94 if (per_dev->or)
95 osd_end_request(per_dev->or);
96 if (per_dev->bio)
97 bio_put(per_dev->bio);
98 }
99
100 kfree(ios);
101 }
102}
103
104static void _sync_done(struct exofs_io_state *ios, void *p)
105{
106 struct completion *waiting = p;
107
108 complete(waiting);
109}
110
111static void _last_io(struct kref *kref)
112{
113 struct exofs_io_state *ios = container_of(
114 kref, struct exofs_io_state, kref);
115
116 ios->done(ios, ios->private);
117}
118
119static void _done_io(struct osd_request *or, void *p)
120{
121 struct exofs_io_state *ios = p;
122
123 kref_put(&ios->kref, _last_io);
124}
125
126static int exofs_io_execute(struct exofs_io_state *ios)
127{
128 DECLARE_COMPLETION_ONSTACK(wait);
129 bool sync = (ios->done == NULL);
130 int i, ret;
131
132 if (sync) {
133 ios->done = _sync_done;
134 ios->private = &wait;
135 }
136
137 for (i = 0; i < ios->numdevs; i++) {
138 struct osd_request *or = ios->per_dev[i].or;
139 if (unlikely(!or))
140 continue;
141
142 ret = osd_finalize_request(or, 0, ios->cred, NULL);
143 if (unlikely(ret)) {
144 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n",
145 ret);
146 return ret;
147 }
148 }
149
150 kref_init(&ios->kref);
151
152 for (i = 0; i < ios->numdevs; i++) {
153 struct osd_request *or = ios->per_dev[i].or;
154 if (unlikely(!or))
155 continue;
156
157 kref_get(&ios->kref);
158 osd_execute_request_async(or, _done_io, ios);
159 }
160
161 kref_put(&ios->kref, _last_io);
162 ret = 0;
163
164 if (sync) {
165 wait_for_completion(&wait);
166 ret = exofs_check_io(ios, NULL);
167 }
168 return ret;
169}
170
171int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
172{
173 enum osd_err_priority acumulated_osd_err = 0;
174 int acumulated_lin_err = 0;
175 int i;
176
177 for (i = 0; i < ios->numdevs; i++) {
178 struct osd_sense_info osi;
179 int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi);
180
181 if (likely(!ret))
182 continue;
183
184 if (unlikely(ret == -EFAULT)) {
185 EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__);
186 /*FIXME: All the pages in this device range should:
187 * clear_highpage(page);
188 */
189 }
190
191 if (osi.osd_err_pri >= acumulated_osd_err) {
192 acumulated_osd_err = osi.osd_err_pri;
193 acumulated_lin_err = ret;
194 }
195 }
196
197 /* TODO: raid specific residual calculations */
198 if (resid) {
199 if (likely(!acumulated_lin_err))
200 *resid = 0;
201 else
202 *resid = ios->length;
203 }
204
205 return acumulated_lin_err;
206}
207
208int exofs_sbi_create(struct exofs_io_state *ios)
209{
210 int i, ret;
211
212 for (i = 0; i < ios->sbi->s_numdevs; i++) {
213 struct osd_request *or;
214
215 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
216 if (unlikely(!or)) {
217 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
218 ret = -ENOMEM;
219 goto out;
220 }
221 ios->per_dev[i].or = or;
222 ios->numdevs++;
223
224 osd_req_create_object(or, &ios->obj);
225 }
226 ret = exofs_io_execute(ios);
227
228out:
229 return ret;
230}
231
232int exofs_sbi_remove(struct exofs_io_state *ios)
233{
234 int i, ret;
235
236 for (i = 0; i < ios->sbi->s_numdevs; i++) {
237 struct osd_request *or;
238
239 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
240 if (unlikely(!or)) {
241 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
242 ret = -ENOMEM;
243 goto out;
244 }
245 ios->per_dev[i].or = or;
246 ios->numdevs++;
247
248 osd_req_remove_object(or, &ios->obj);
249 }
250 ret = exofs_io_execute(ios);
251
252out:
253 return ret;
254}
255
256int exofs_sbi_write(struct exofs_io_state *ios)
257{
258 int i, ret;
259
260 for (i = 0; i < ios->sbi->s_numdevs; i++) {
261 struct osd_request *or;
262
263 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
264 if (unlikely(!or)) {
265 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
266 ret = -ENOMEM;
267 goto out;
268 }
269 ios->per_dev[i].or = or;
270 ios->numdevs++;
271
272 if (ios->bio) {
273 struct bio *bio;
274
275 if (i != 0) {
276 bio = bio_kmalloc(GFP_KERNEL,
277 ios->bio->bi_max_vecs);
278 if (unlikely(!bio)) {
279 ret = -ENOMEM;
280 goto out;
281 }
282
283 __bio_clone(bio, ios->bio);
284 bio->bi_bdev = NULL;
285 bio->bi_next = NULL;
286 ios->per_dev[i].bio = bio;
287 } else {
288 bio = ios->bio;
289 }
290
291 osd_req_write(or, &ios->obj, ios->offset, bio,
292 ios->length);
293/* EXOFS_DBGMSG("write sync=%d\n", sync);*/
294 } else if (ios->kern_buff) {
295 osd_req_write_kern(or, &ios->obj, ios->offset,
296 ios->kern_buff, ios->length);
297/* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/
298 } else {
299 osd_req_set_attributes(or, &ios->obj);
300/* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/
301 }
302
303 if (ios->out_attr)
304 osd_req_add_set_attr_list(or, ios->out_attr,
305 ios->out_attr_len);
306
307 if (ios->in_attr)
308 osd_req_add_get_attr_list(or, ios->in_attr,
309 ios->in_attr_len);
310 }
311 ret = exofs_io_execute(ios);
312
313out:
314 return ret;
315}
316
317int exofs_sbi_read(struct exofs_io_state *ios)
318{
319 int i, ret;
320
321 for (i = 0; i < 1; i++) {
322 struct osd_request *or;
323 unsigned first_dev = (unsigned)ios->obj.id;
324
325 first_dev %= ios->sbi->s_numdevs;
326 or = osd_start_request(ios->sbi->s_ods[first_dev], GFP_KERNEL);
327 if (unlikely(!or)) {
328 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
329 ret = -ENOMEM;
330 goto out;
331 }
332 ios->per_dev[i].or = or;
333 ios->numdevs++;
334
335 if (ios->bio) {
336 osd_req_read(or, &ios->obj, ios->offset, ios->bio,
337 ios->length);
338/* EXOFS_DBGMSG("read sync=%d\n", sync);*/
339 } else if (ios->kern_buff) {
340 osd_req_read_kern(or, &ios->obj, ios->offset,
341 ios->kern_buff, ios->length);
342/* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/
343 } else {
344 osd_req_get_attributes(or, &ios->obj);
345/* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/
346 }
347
348 if (ios->out_attr)
349 osd_req_add_set_attr_list(or, ios->out_attr,
350 ios->out_attr_len);
351
352 if (ios->in_attr)
353 osd_req_add_get_attr_list(or, ios->in_attr,
354 ios->in_attr_len);
355 }
356 ret = exofs_io_execute(ios);
357
358out:
359 return ret;
360}
361
362int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
363{
364 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
365 void *iter = NULL;
366 int nelem;
367
368 do {
369 nelem = 1;
370 osd_req_decode_get_attr_list(ios->per_dev[0].or,
371 &cur_attr, &nelem, &iter);
372 if ((cur_attr.attr_page == attr->attr_page) &&
373 (cur_attr.attr_id == attr->attr_id)) {
374 attr->len = cur_attr.len;
375 attr->val_ptr = cur_attr.val_ptr;
376 return 0;
377 }
378 } while (iter);
379
380 return -EIO;
381}
382
383int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
384{
385 struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
386 struct exofs_io_state *ios;
387 struct osd_attr attr;
388 __be64 newsize;
389 int i, ret;
390
391 if (exofs_get_io_state(sbi, &ios))
392 return -ENOMEM;
393
394 ios->obj.id = exofs_oi_objno(oi);
395 ios->cred = oi->i_cred;
396
397 newsize = cpu_to_be64(size);
398 attr = g_attr_logical_length;
399 attr.val_ptr = &newsize;
400
401 for (i = 0; i < sbi->s_numdevs; i++) {
402 struct osd_request *or;
403
404 or = osd_start_request(sbi->s_ods[i], GFP_KERNEL);
405 if (unlikely(!or)) {
406 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
407 ret = -ENOMEM;
408 goto out;
409 }
410 ios->per_dev[i].or = or;
411 ios->numdevs++;
412
413 osd_req_set_attributes(or, &ios->obj);
414 osd_req_add_set_attr_list(or, &attr, 1);
415 }
416 ret = exofs_io_execute(ios);
417
418out:
419 exofs_put_io_state(ios);
420 return ret;
421}
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
deleted file mode 100644
index 4372542df284..000000000000
--- a/fs/exofs/osd.c
+++ /dev/null
@@ -1,125 +0,0 @@
1/*
2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com>
6 *
7 * This file is part of exofs.
8 *
9 * exofs is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation. Since it is based on ext2, and the only
12 * valid version of GPL for the Linux kernel is version 2, the only valid
13 * version of GPL for exofs is version 2.
14 *
15 * exofs is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with exofs; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25#include <scsi/scsi_device.h>
26#include <scsi/osd_sense.h>
27
28#include "exofs.h"
29
30int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid)
31{
32 struct osd_sense_info osi;
33 int ret = osd_req_decode_sense(or, &osi);
34
35 if (ret) { /* translate to Linux codes */
36 if (osi.additional_code == scsi_invalid_field_in_cdb) {
37 if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE)
38 ret = -EFAULT;
39 if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID)
40 ret = -ENOENT;
41 else
42 ret = -EINVAL;
43 } else if (osi.additional_code == osd_quota_error)
44 ret = -ENOSPC;
45 else
46 ret = -EIO;
47 }
48
49 /* FIXME: should be include in osd_sense_info */
50 if (in_resid)
51 *in_resid = or->in.req ? or->in.req->resid_len : 0;
52
53 if (out_resid)
54 *out_resid = or->out.req ? or->out.req->resid_len : 0;
55
56 return ret;
57}
58
59void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
60{
61 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
62}
63
64/*
65 * Perform a synchronous OSD operation.
66 */
67int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
68{
69 int ret;
70
71 or->timeout = timeout;
72 ret = osd_finalize_request(or, 0, credential, NULL);
73 if (ret) {
74 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
75 return ret;
76 }
77
78 ret = osd_execute_request(or);
79
80 if (ret)
81 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
82 /* osd_req_decode_sense(or, ret); */
83 return ret;
84}
85
86/*
87 * Perform an asynchronous OSD operation.
88 */
89int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done,
90 void *caller_context, u8 *cred)
91{
92 int ret;
93
94 ret = osd_finalize_request(or, 0, cred, NULL);
95 if (ret) {
96 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
97 return ret;
98 }
99
100 ret = osd_execute_request_async(or, async_done, caller_context);
101
102 if (ret)
103 EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret);
104 return ret;
105}
106
107int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
108{
109 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
110 void *iter = NULL;
111 int nelem;
112
113 do {
114 nelem = 1;
115 osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
116 if ((cur_attr.attr_page == attr->attr_page) &&
117 (cur_attr.attr_id == attr->attr_id)) {
118 attr->len = cur_attr.len;
119 attr->val_ptr = cur_attr.val_ptr;
120 return 0;
121 }
122 } while (iter);
123
124 return -EIO;
125}
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
new file mode 100644
index 000000000000..423033addd1f
--- /dev/null
+++ b/fs/exofs/pnfs.h
@@ -0,0 +1,51 @@
1/*
2 * Copyright (C) 2008, 2009
3 * Boaz Harrosh <bharrosh@panasas.com>
4 *
5 * This file is part of exofs.
6 *
7 * exofs is free software; you can redistribute it and/or modify it under the
8 * terms of the GNU General Public License version 2 as published by the Free
9 * Software Foundation.
10 *
11 */
12
13/* FIXME: Remove this file once pnfs hits mainline */
14
15#ifndef __EXOFS_PNFS_H__
16#define __EXOFS_PNFS_H__
17
18#if defined(CONFIG_PNFS)
19
20
21/* FIXME: move this file to: linux/exportfs/pnfs_osd_xdr.h */
22#include "../nfs/objlayout/pnfs_osd_xdr.h"
23
24#else /* defined(CONFIG_PNFS) */
25
26enum pnfs_iomode {
27 IOMODE_READ = 1,
28 IOMODE_RW = 2,
29 IOMODE_ANY = 3,
30};
31
32/* Layout Structure */
33enum pnfs_osd_raid_algorithm4 {
34 PNFS_OSD_RAID_0 = 1,
35 PNFS_OSD_RAID_4 = 2,
36 PNFS_OSD_RAID_5 = 3,
37 PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */
38};
39
40struct pnfs_osd_data_map {
41 u32 odm_num_comps;
42 u64 odm_stripe_unit;
43 u32 odm_group_width;
44 u32 odm_group_depth;
45 u32 odm_mirror_cnt;
46 u32 odm_raid_algorithm;
47};
48
49#endif /* else defined(CONFIG_PNFS) */
50
51#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 9f500dec3b59..a1d1e77b12eb 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -203,49 +203,45 @@ int exofs_sync_fs(struct super_block *sb, int wait)
203{ 203{
204 struct exofs_sb_info *sbi; 204 struct exofs_sb_info *sbi;
205 struct exofs_fscb *fscb; 205 struct exofs_fscb *fscb;
206 struct osd_request *or; 206 struct exofs_io_state *ios;
207 struct osd_obj_id obj;
208 int ret = -ENOMEM; 207 int ret = -ENOMEM;
209 208
210 fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL);
211 if (!fscb) {
212 EXOFS_ERR("exofs_write_super: memory allocation failed.\n");
213 return -ENOMEM;
214 }
215
216 lock_super(sb); 209 lock_super(sb);
217 sbi = sb->s_fs_info; 210 sbi = sb->s_fs_info;
211 fscb = &sbi->s_fscb;
212
213 ret = exofs_get_io_state(sbi, &ios);
214 if (ret)
215 goto out;
216
217 /* Note: We only write the changing part of the fscb. .i.e upto the
218 * the fscb->s_dev_table_oid member. There is no read-modify-write
219 * here.
220 */
221 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
222 memset(fscb, 0, ios->length);
218 fscb->s_nextid = cpu_to_le64(sbi->s_nextid); 223 fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
219 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); 224 fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
220 fscb->s_magic = cpu_to_le16(sb->s_magic); 225 fscb->s_magic = cpu_to_le16(sb->s_magic);
221 fscb->s_newfs = 0; 226 fscb->s_newfs = 0;
227 fscb->s_version = EXOFS_FSCB_VER;
222 228
223 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 229 ios->obj.id = EXOFS_SUPER_ID;
224 if (unlikely(!or)) { 230 ios->offset = 0;
225 EXOFS_ERR("exofs_write_super: osd_start_request failed.\n"); 231 ios->kern_buff = fscb;
226 goto out; 232 ios->cred = sbi->s_cred;
227 }
228 233
229 obj.partition = sbi->s_pid; 234 ret = exofs_sbi_write(ios);
230 obj.id = EXOFS_SUPER_ID;
231 ret = osd_req_write_kern(or, &obj, 0, fscb, sizeof(*fscb));
232 if (unlikely(ret)) { 235 if (unlikely(ret)) {
233 EXOFS_ERR("exofs_write_super: osd_req_write_kern failed.\n"); 236 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
234 goto out;
235 }
236
237 ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
238 if (unlikely(ret)) {
239 EXOFS_ERR("exofs_write_super: exofs_sync_op failed.\n");
240 goto out; 237 goto out;
241 } 238 }
242 sb->s_dirt = 0; 239 sb->s_dirt = 0;
243 240
244out: 241out:
245 if (or) 242 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
246 osd_end_request(or); 243 exofs_put_io_state(ios);
247 unlock_super(sb); 244 unlock_super(sb);
248 kfree(fscb);
249 return ret; 245 return ret;
250} 246}
251 247
@@ -257,6 +253,29 @@ static void exofs_write_super(struct super_block *sb)
257 sb->s_dirt = 0; 253 sb->s_dirt = 0;
258} 254}
259 255
256static void _exofs_print_device(const char *msg, const char *dev_path,
257 struct osd_dev *od, u64 pid)
258{
259 const struct osd_dev_info *odi = osduld_device_info(od);
260
261 printk(KERN_NOTICE "exofs: %s %s osd_name-%s pid-0x%llx\n",
262 msg, dev_path ?: "", odi->osdname, _LLU(pid));
263}
264
265void exofs_free_sbi(struct exofs_sb_info *sbi)
266{
267 while (sbi->s_numdevs) {
268 int i = --sbi->s_numdevs;
269 struct osd_dev *od = sbi->s_ods[i];
270
271 if (od) {
272 sbi->s_ods[i] = NULL;
273 osduld_put_device(od);
274 }
275 }
276 kfree(sbi);
277}
278
260/* 279/*
261 * This function is called when the vfs is freeing the superblock. We just 280 * This function is called when the vfs is freeing the superblock. We just
262 * need to free our own part. 281 * need to free our own part.
@@ -279,11 +298,182 @@ static void exofs_put_super(struct super_block *sb)
279 msecs_to_jiffies(100)); 298 msecs_to_jiffies(100));
280 } 299 }
281 300
282 osduld_put_device(sbi->s_dev); 301 _exofs_print_device("Unmounting", NULL, sbi->s_ods[0], sbi->s_pid);
283 kfree(sb->s_fs_info); 302
303 exofs_free_sbi(sbi);
284 sb->s_fs_info = NULL; 304 sb->s_fs_info = NULL;
285} 305}
286 306
307static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
308 struct exofs_device_table *dt)
309{
310 sbi->data_map.odm_num_comps =
311 le32_to_cpu(dt->dt_data_map.cb_num_comps);
312 sbi->data_map.odm_stripe_unit =
313 le64_to_cpu(dt->dt_data_map.cb_stripe_unit);
314 sbi->data_map.odm_group_width =
315 le32_to_cpu(dt->dt_data_map.cb_group_width);
316 sbi->data_map.odm_group_depth =
317 le32_to_cpu(dt->dt_data_map.cb_group_depth);
318 sbi->data_map.odm_mirror_cnt =
319 le32_to_cpu(dt->dt_data_map.cb_mirror_cnt);
320 sbi->data_map.odm_raid_algorithm =
321 le32_to_cpu(dt->dt_data_map.cb_raid_algorithm);
322
323/* FIXME: Hard coded mirror only for now. if not so do not mount */
324 if ((sbi->data_map.odm_num_comps != numdevs) ||
325 (sbi->data_map.odm_stripe_unit != EXOFS_BLKSIZE) ||
326 (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) ||
327 (sbi->data_map.odm_mirror_cnt != (numdevs - 1)))
328 return -EINVAL;
329 else
330 return 0;
331}
332
333/* @odi is valid only as long as @fscb_dev is valid */
334static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
335 struct osd_dev_info *odi)
336{
337 odi->systemid_len = le32_to_cpu(dt_dev->systemid_len);
338 memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len);
339
340 odi->osdname_len = le32_to_cpu(dt_dev->osdname_len);
341 odi->osdname = dt_dev->osdname;
342
343 /* FIXME support long names. Will need a _put function */
344 if (dt_dev->long_name_offset)
345 return -EINVAL;
346
347 /* Make sure osdname is printable!
348 * mkexofs should give us space for a null-terminator else the
349 * device-table is invalid.
350 */
351 if (unlikely(odi->osdname_len >= sizeof(dt_dev->osdname)))
352 odi->osdname_len = sizeof(dt_dev->osdname) - 1;
353 dt_dev->osdname[odi->osdname_len] = 0;
354
355 /* If it's all zeros something is bad we read past end-of-obj */
356 return !(odi->systemid_len || odi->osdname_len);
357}
358
359static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
360 unsigned table_count)
361{
362 struct exofs_sb_info *sbi = *psbi;
363 struct osd_dev *fscb_od;
364 struct osd_obj_id obj = {.partition = sbi->s_pid,
365 .id = EXOFS_DEVTABLE_ID};
366 struct exofs_device_table *dt;
367 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
368 sizeof(*dt);
369 unsigned numdevs, i;
370 int ret;
371
372 dt = kmalloc(table_bytes, GFP_KERNEL);
373 if (unlikely(!dt)) {
374 EXOFS_ERR("ERROR: allocating %x bytes for device table\n",
375 table_bytes);
376 return -ENOMEM;
377 }
378
379 fscb_od = sbi->s_ods[0];
380 sbi->s_ods[0] = NULL;
381 sbi->s_numdevs = 0;
382 ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes);
383 if (unlikely(ret)) {
384 EXOFS_ERR("ERROR: reading device table\n");
385 goto out;
386 }
387
388 numdevs = le64_to_cpu(dt->dt_num_devices);
389 if (unlikely(!numdevs)) {
390 ret = -EINVAL;
391 goto out;
392 }
393 WARN_ON(table_count != numdevs);
394
395 ret = _read_and_match_data_map(sbi, numdevs, dt);
396 if (unlikely(ret))
397 goto out;
398
399 if (likely(numdevs > 1)) {
400 unsigned size = numdevs * sizeof(sbi->s_ods[0]);
401
402 sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL);
403 if (unlikely(!sbi)) {
404 ret = -ENOMEM;
405 goto out;
406 }
407 memset(&sbi->s_ods[1], 0, size - sizeof(sbi->s_ods[0]));
408 *psbi = sbi;
409 }
410
411 for (i = 0; i < numdevs; i++) {
412 struct exofs_fscb fscb;
413 struct osd_dev_info odi;
414 struct osd_dev *od;
415
416 if (exofs_devs_2_odi(&dt->dt_dev_table[i], &odi)) {
417 EXOFS_ERR("ERROR: Read all-zeros device entry\n");
418 ret = -EINVAL;
419 goto out;
420 }
421
422 printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n",
423 i, odi.osdname);
424
425 /* On all devices the device table is identical. The user can
426 * specify any one of the participating devices on the command
427 * line. We always keep them in device-table order.
428 */
429 if (fscb_od && osduld_device_same(fscb_od, &odi)) {
430 sbi->s_ods[i] = fscb_od;
431 ++sbi->s_numdevs;
432 fscb_od = NULL;
433 continue;
434 }
435
436 od = osduld_info_lookup(&odi);
437 if (unlikely(IS_ERR(od))) {
438 ret = PTR_ERR(od);
439 EXOFS_ERR("ERROR: device requested is not found "
440 "osd_name-%s =>%d\n", odi.osdname, ret);
441 goto out;
442 }
443
444 sbi->s_ods[i] = od;
445 ++sbi->s_numdevs;
446
447 /* Read the fscb of the other devices to make sure the FS
448 * partition is there.
449 */
450 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb,
451 sizeof(fscb));
452 if (unlikely(ret)) {
453 EXOFS_ERR("ERROR: Malformed participating device "
454 "error reading fscb osd_name-%s\n",
455 odi.osdname);
456 goto out;
457 }
458
459 /* TODO: verify other information is correct and FS-uuid
460 * matches. Benny what did you say about device table
461 * generation and old devices?
462 */
463 }
464
465out:
466 kfree(dt);
467 if (unlikely(!ret && fscb_od)) {
468 EXOFS_ERR(
469 "ERROR: Bad device-table container device not present\n");
470 osduld_put_device(fscb_od);
471 ret = -EINVAL;
472 }
473
474 return ret;
475}
476
287/* 477/*
288 * Read the superblock from the OSD and fill in the fields 478 * Read the superblock from the OSD and fill in the fields
289 */ 479 */
@@ -292,24 +482,25 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
292 struct inode *root; 482 struct inode *root;
293 struct exofs_mountopt *opts = data; 483 struct exofs_mountopt *opts = data;
294 struct exofs_sb_info *sbi; /*extended info */ 484 struct exofs_sb_info *sbi; /*extended info */
485 struct osd_dev *od; /* Master device */
295 struct exofs_fscb fscb; /*on-disk superblock info */ 486 struct exofs_fscb fscb; /*on-disk superblock info */
296 struct osd_request *or = NULL;
297 struct osd_obj_id obj; 487 struct osd_obj_id obj;
488 unsigned table_count;
298 int ret; 489 int ret;
299 490
300 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 491 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
301 if (!sbi) 492 if (!sbi)
302 return -ENOMEM; 493 return -ENOMEM;
303 sb->s_fs_info = sbi;
304 494
305 /* use mount options to fill superblock */ 495 /* use mount options to fill superblock */
306 sbi->s_dev = osduld_path_lookup(opts->dev_name); 496 od = osduld_path_lookup(opts->dev_name);
307 if (IS_ERR(sbi->s_dev)) { 497 if (IS_ERR(od)) {
308 ret = PTR_ERR(sbi->s_dev); 498 ret = PTR_ERR(od);
309 sbi->s_dev = NULL;
310 goto free_sbi; 499 goto free_sbi;
311 } 500 }
312 501
502 sbi->s_ods[0] = od;
503 sbi->s_numdevs = 1;
313 sbi->s_pid = opts->pid; 504 sbi->s_pid = opts->pid;
314 sbi->s_timeout = opts->timeout; 505 sbi->s_timeout = opts->timeout;
315 506
@@ -323,35 +514,13 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
323 sb->s_bdev = NULL; 514 sb->s_bdev = NULL;
324 sb->s_dev = 0; 515 sb->s_dev = 0;
325 516
326 /* read data from on-disk superblock object */
327 obj.partition = sbi->s_pid; 517 obj.partition = sbi->s_pid;
328 obj.id = EXOFS_SUPER_ID; 518 obj.id = EXOFS_SUPER_ID;
329 exofs_make_credential(sbi->s_cred, &obj); 519 exofs_make_credential(sbi->s_cred, &obj);
330 520
331 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 521 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb));
332 if (unlikely(!or)) { 522 if (unlikely(ret))
333 if (!silent)
334 EXOFS_ERR(
335 "exofs_fill_super: osd_start_request failed.\n");
336 ret = -ENOMEM;
337 goto free_sbi;
338 }
339 ret = osd_req_read_kern(or, &obj, 0, &fscb, sizeof(fscb));
340 if (unlikely(ret)) {
341 if (!silent)
342 EXOFS_ERR(
343 "exofs_fill_super: osd_req_read_kern failed.\n");
344 ret = -ENOMEM;
345 goto free_sbi;
346 }
347
348 ret = exofs_sync_op(or, sbi->s_timeout, sbi->s_cred);
349 if (unlikely(ret)) {
350 if (!silent)
351 EXOFS_ERR("exofs_fill_super: exofs_sync_op failed.\n");
352 ret = -EIO;
353 goto free_sbi; 523 goto free_sbi;
354 }
355 524
356 sb->s_magic = le16_to_cpu(fscb.s_magic); 525 sb->s_magic = le16_to_cpu(fscb.s_magic);
357 sbi->s_nextid = le64_to_cpu(fscb.s_nextid); 526 sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
@@ -364,12 +533,26 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
364 ret = -EINVAL; 533 ret = -EINVAL;
365 goto free_sbi; 534 goto free_sbi;
366 } 535 }
536 if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) {
537 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
538 EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
539 ret = -EINVAL;
540 goto free_sbi;
541 }
367 542
368 /* start generation numbers from a random point */ 543 /* start generation numbers from a random point */
369 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 544 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
370 spin_lock_init(&sbi->s_next_gen_lock); 545 spin_lock_init(&sbi->s_next_gen_lock);
371 546
547 table_count = le64_to_cpu(fscb.s_dev_table_count);
548 if (table_count) {
549 ret = exofs_read_lookup_dev_table(&sbi, table_count);
550 if (unlikely(ret))
551 goto free_sbi;
552 }
553
372 /* set up operation vectors */ 554 /* set up operation vectors */
555 sb->s_fs_info = sbi;
373 sb->s_op = &exofs_sops; 556 sb->s_op = &exofs_sops;
374 sb->s_export_op = &exofs_export_ops; 557 sb->s_export_op = &exofs_export_ops;
375 root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF); 558 root = exofs_iget(sb, EXOFS_ROOT_ID - EXOFS_OBJ_OFF);
@@ -395,16 +578,15 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
395 goto free_sbi; 578 goto free_sbi;
396 } 579 }
397 580
398 ret = 0; 581 _exofs_print_device("Mounting", opts->dev_name, sbi->s_ods[0],
399out: 582 sbi->s_pid);
400 if (or) 583 return 0;
401 osd_end_request(or);
402 return ret;
403 584
404free_sbi: 585free_sbi:
405 osduld_put_device(sbi->s_dev); /* NULL safe */ 586 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
406 kfree(sbi); 587 opts->dev_name, sbi->s_pid, ret);
407 goto out; 588 exofs_free_sbi(sbi);
589 return ret;
408} 590}
409 591
410/* 592/*
@@ -433,7 +615,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
433{ 615{
434 struct super_block *sb = dentry->d_sb; 616 struct super_block *sb = dentry->d_sb;
435 struct exofs_sb_info *sbi = sb->s_fs_info; 617 struct exofs_sb_info *sbi = sb->s_fs_info;
436 struct osd_obj_id obj = {sbi->s_pid, 0}; 618 struct exofs_io_state *ios;
437 struct osd_attr attrs[] = { 619 struct osd_attr attrs[] = {
438 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, 620 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
439 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), 621 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
@@ -442,32 +624,33 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
442 }; 624 };
443 uint64_t capacity = ULLONG_MAX; 625 uint64_t capacity = ULLONG_MAX;
444 uint64_t used = ULLONG_MAX; 626 uint64_t used = ULLONG_MAX;
445 struct osd_request *or;
446 uint8_t cred_a[OSD_CAP_LEN]; 627 uint8_t cred_a[OSD_CAP_LEN];
447 int ret; 628 int ret;
448 629
449 /* get used/capacity attributes */ 630 ret = exofs_get_io_state(sbi, &ios);
450 exofs_make_credential(cred_a, &obj); 631 if (ret) {
451 632 EXOFS_DBGMSG("exofs_get_io_state failed.\n");
452 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 633 return ret;
453 if (unlikely(!or)) {
454 EXOFS_DBGMSG("exofs_statfs: osd_start_request failed.\n");
455 return -ENOMEM;
456 } 634 }
457 635
458 osd_req_get_attributes(or, &obj); 636 exofs_make_credential(cred_a, &ios->obj);
459 osd_req_add_get_attr_list(or, attrs, ARRAY_SIZE(attrs)); 637 ios->cred = sbi->s_cred;
460 ret = exofs_sync_op(or, sbi->s_timeout, cred_a); 638 ios->in_attr = attrs;
639 ios->in_attr_len = ARRAY_SIZE(attrs);
640
641 ret = exofs_sbi_read(ios);
461 if (unlikely(ret)) 642 if (unlikely(ret))
462 goto out; 643 goto out;
463 644
464 ret = extract_attr_from_req(or, &attrs[0]); 645 ret = extract_attr_from_ios(ios, &attrs[0]);
465 if (likely(!ret)) 646 if (likely(!ret)) {
466 capacity = get_unaligned_be64(attrs[0].val_ptr); 647 capacity = get_unaligned_be64(attrs[0].val_ptr);
467 else 648 if (unlikely(!capacity))
649 capacity = ULLONG_MAX;
650 } else
468 EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n"); 651 EXOFS_DBGMSG("exofs_statfs: get capacity failed.\n");
469 652
470 ret = extract_attr_from_req(or, &attrs[1]); 653 ret = extract_attr_from_ios(ios, &attrs[1]);
471 if (likely(!ret)) 654 if (likely(!ret))
472 used = get_unaligned_be64(attrs[1].val_ptr); 655 used = get_unaligned_be64(attrs[1].val_ptr);
473 else 656 else
@@ -476,15 +659,15 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
476 /* fill in the stats buffer */ 659 /* fill in the stats buffer */
477 buf->f_type = EXOFS_SUPER_MAGIC; 660 buf->f_type = EXOFS_SUPER_MAGIC;
478 buf->f_bsize = EXOFS_BLKSIZE; 661 buf->f_bsize = EXOFS_BLKSIZE;
479 buf->f_blocks = (capacity >> EXOFS_BLKSHIFT); 662 buf->f_blocks = capacity >> 9;
480 buf->f_bfree = ((capacity - used) >> EXOFS_BLKSHIFT); 663 buf->f_bfree = (capacity - used) >> 9;
481 buf->f_bavail = buf->f_bfree; 664 buf->f_bavail = buf->f_bfree;
482 buf->f_files = sbi->s_numfiles; 665 buf->f_files = sbi->s_numfiles;
483 buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles; 666 buf->f_ffree = EXOFS_MAX_ID - sbi->s_numfiles;
484 buf->f_namelen = EXOFS_NAME_LEN; 667 buf->f_namelen = EXOFS_NAME_LEN;
485 668
486out: 669out:
487 osd_end_request(or); 670 exofs_put_io_state(ios);
488 return ret; 671 return ret;
489} 672}
490 673
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 6cde970b0a1a..fc2bd05d3559 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -353,8 +353,8 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
353 * ext2_find_entry() 353 * ext2_find_entry()
354 * 354 *
355 * finds an entry in the specified directory with the wanted name. It 355 * finds an entry in the specified directory with the wanted name. It
356 * returns the page in which the entry was found, and the entry itself 356 * returns the page in which the entry was found (as a parameter - res_page),
357 * (as a parameter - res_dir). Page is returned mapped and unlocked. 357 * and the entry itself. Page is returned mapped and unlocked.
358 * Entry is guaranteed to be valid. 358 * Entry is guaranteed to be valid.
359 */ 359 */
360struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir, 360struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir,
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 9a8a8e27a063..da318b0fa637 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -142,7 +142,7 @@ struct dentry *ext2_get_parent(struct dentry *child);
142/* super.c */ 142/* super.c */
143extern void ext2_error (struct super_block *, const char *, const char *, ...) 143extern void ext2_error (struct super_block *, const char *, const char *, ...)
144 __attribute__ ((format (printf, 3, 4))); 144 __attribute__ ((format (printf, 3, 4)));
145extern void ext2_warning (struct super_block *, const char *, const char *, ...) 145extern void ext2_msg(struct super_block *, const char *, const char *, ...)
146 __attribute__ ((format (printf, 3, 4))); 146 __attribute__ ((format (printf, 3, 4)));
147extern void ext2_update_dynamic_rev (struct super_block *sb); 147extern void ext2_update_dynamic_rev (struct super_block *sb);
148extern void ext2_write_super (struct super_block *); 148extern void ext2_write_super (struct super_block *);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index ade634076d0a..71b032c65a02 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -137,7 +137,8 @@ static int ext2_block_to_path(struct inode *inode,
137 int final = 0; 137 int final = 0;
138 138
139 if (i_block < 0) { 139 if (i_block < 0) {
140 ext2_warning (inode->i_sb, "ext2_block_to_path", "block < 0"); 140 ext2_msg(inode->i_sb, KERN_WARNING,
141 "warning: %s: block < 0", __func__);
141 } else if (i_block < direct_blocks) { 142 } else if (i_block < direct_blocks) {
142 offsets[n++] = i_block; 143 offsets[n++] = i_block;
143 final = direct_blocks; 144 final = direct_blocks;
@@ -157,7 +158,8 @@ static int ext2_block_to_path(struct inode *inode,
157 offsets[n++] = i_block & (ptrs - 1); 158 offsets[n++] = i_block & (ptrs - 1);
158 final = ptrs; 159 final = ptrs;
159 } else { 160 } else {
160 ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big"); 161 ext2_msg(inode->i_sb, KERN_WARNING,
162 "warning: %s: block is too big", __func__);
161 } 163 }
162 if (boundary) 164 if (boundary)
163 *boundary = final - 1 - (i_block & (ptrs - 1)); 165 *boundary = final - 1 - (i_block & (ptrs - 1));
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 1a9ffee47d56..1388802b7803 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -58,27 +58,27 @@ void ext2_error (struct super_block * sb, const char * function,
58 } 58 }
59 59
60 va_start(args, fmt); 60 va_start(args, fmt);
61 printk(KERN_CRIT "EXT2-fs error (device %s): %s: ",sb->s_id, function); 61 printk(KERN_CRIT "EXT2-fs (%s): error: %s: ", sb->s_id, function);
62 vprintk(fmt, args); 62 vprintk(fmt, args);
63 printk("\n"); 63 printk("\n");
64 va_end(args); 64 va_end(args);
65 65
66 if (test_opt(sb, ERRORS_PANIC)) 66 if (test_opt(sb, ERRORS_PANIC))
67 panic("EXT2-fs panic from previous error\n"); 67 panic("EXT2-fs: panic from previous error\n");
68 if (test_opt(sb, ERRORS_RO)) { 68 if (test_opt(sb, ERRORS_RO)) {
69 printk("Remounting filesystem read-only\n"); 69 ext2_msg(sb, KERN_CRIT,
70 "error: remounting filesystem read-only");
70 sb->s_flags |= MS_RDONLY; 71 sb->s_flags |= MS_RDONLY;
71 } 72 }
72} 73}
73 74
74void ext2_warning (struct super_block * sb, const char * function, 75void ext2_msg(struct super_block *sb, const char *prefix,
75 const char * fmt, ...) 76 const char *fmt, ...)
76{ 77{
77 va_list args; 78 va_list args;
78 79
79 va_start(args, fmt); 80 va_start(args, fmt);
80 printk(KERN_WARNING "EXT2-fs warning (device %s): %s: ", 81 printk("%sEXT2-fs (%s): ", prefix, sb->s_id);
81 sb->s_id, function);
82 vprintk(fmt, args); 82 vprintk(fmt, args);
83 printk("\n"); 83 printk("\n");
84 va_end(args); 84 va_end(args);
@@ -91,9 +91,9 @@ void ext2_update_dynamic_rev(struct super_block *sb)
91 if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV) 91 if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV)
92 return; 92 return;
93 93
94 ext2_warning(sb, __func__, 94 ext2_msg(sb, KERN_WARNING,
95 "updating to rev %d because of new feature flag, " 95 "warning: updating to rev %d because of "
96 "running e2fsck is recommended", 96 "new feature flag, running e2fsck is recommended",
97 EXT2_DYNAMIC_REV); 97 EXT2_DYNAMIC_REV);
98 98
99 es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO); 99 es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO);
@@ -419,10 +419,10 @@ static const match_table_t tokens = {
419 {Opt_err, NULL} 419 {Opt_err, NULL}
420}; 420};
421 421
422static int parse_options (char * options, 422static int parse_options(char *options, struct super_block *sb)
423 struct ext2_sb_info *sbi)
424{ 423{
425 char * p; 424 char *p;
425 struct ext2_sb_info *sbi = EXT2_SB(sb);
426 substring_t args[MAX_OPT_ARGS]; 426 substring_t args[MAX_OPT_ARGS];
427 int option; 427 int option;
428 428
@@ -505,7 +505,8 @@ static int parse_options (char * options,
505#else 505#else
506 case Opt_user_xattr: 506 case Opt_user_xattr:
507 case Opt_nouser_xattr: 507 case Opt_nouser_xattr:
508 printk("EXT2 (no)user_xattr options not supported\n"); 508 ext2_msg(sb, KERN_INFO, "(no)user_xattr options"
509 "not supported");
509 break; 510 break;
510#endif 511#endif
511#ifdef CONFIG_EXT2_FS_POSIX_ACL 512#ifdef CONFIG_EXT2_FS_POSIX_ACL
@@ -518,14 +519,15 @@ static int parse_options (char * options,
518#else 519#else
519 case Opt_acl: 520 case Opt_acl:
520 case Opt_noacl: 521 case Opt_noacl:
521 printk("EXT2 (no)acl options not supported\n"); 522 ext2_msg(sb, KERN_INFO,
523 "(no)acl options not supported");
522 break; 524 break;
523#endif 525#endif
524 case Opt_xip: 526 case Opt_xip:
525#ifdef CONFIG_EXT2_FS_XIP 527#ifdef CONFIG_EXT2_FS_XIP
526 set_opt (sbi->s_mount_opt, XIP); 528 set_opt (sbi->s_mount_opt, XIP);
527#else 529#else
528 printk("EXT2 xip option not supported\n"); 530 ext2_msg(sb, KERN_INFO, "xip option not supported");
529#endif 531#endif
530 break; 532 break;
531 533
@@ -542,19 +544,18 @@ static int parse_options (char * options,
542 case Opt_quota: 544 case Opt_quota:
543 case Opt_usrquota: 545 case Opt_usrquota:
544 case Opt_grpquota: 546 case Opt_grpquota:
545 printk(KERN_ERR 547 ext2_msg(sb, KERN_INFO,
546 "EXT2-fs: quota operations not supported.\n"); 548 "quota operations not supported");
547
548 break; 549 break;
549#endif 550#endif
550 551
551 case Opt_reservation: 552 case Opt_reservation:
552 set_opt(sbi->s_mount_opt, RESERVATION); 553 set_opt(sbi->s_mount_opt, RESERVATION);
553 printk("reservations ON\n"); 554 ext2_msg(sb, KERN_INFO, "reservations ON");
554 break; 555 break;
555 case Opt_noreservation: 556 case Opt_noreservation:
556 clear_opt(sbi->s_mount_opt, RESERVATION); 557 clear_opt(sbi->s_mount_opt, RESERVATION);
557 printk("reservations OFF\n"); 558 ext2_msg(sb, KERN_INFO, "reservations OFF");
558 break; 559 break;
559 case Opt_ignore: 560 case Opt_ignore:
560 break; 561 break;
@@ -573,34 +574,40 @@ static int ext2_setup_super (struct super_block * sb,
573 struct ext2_sb_info *sbi = EXT2_SB(sb); 574 struct ext2_sb_info *sbi = EXT2_SB(sb);
574 575
575 if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) { 576 if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) {
576 printk ("EXT2-fs warning: revision level too high, " 577 ext2_msg(sb, KERN_ERR,
577 "forcing read-only mode\n"); 578 "error: revision level too high, "
579 "forcing read-only mode");
578 res = MS_RDONLY; 580 res = MS_RDONLY;
579 } 581 }
580 if (read_only) 582 if (read_only)
581 return res; 583 return res;
582 if (!(sbi->s_mount_state & EXT2_VALID_FS)) 584 if (!(sbi->s_mount_state & EXT2_VALID_FS))
583 printk ("EXT2-fs warning: mounting unchecked fs, " 585 ext2_msg(sb, KERN_WARNING,
584 "running e2fsck is recommended\n"); 586 "warning: mounting unchecked fs, "
587 "running e2fsck is recommended");
585 else if ((sbi->s_mount_state & EXT2_ERROR_FS)) 588 else if ((sbi->s_mount_state & EXT2_ERROR_FS))
586 printk ("EXT2-fs warning: mounting fs with errors, " 589 ext2_msg(sb, KERN_WARNING,
587 "running e2fsck is recommended\n"); 590 "warning: mounting fs with errors, "
591 "running e2fsck is recommended");
588 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 592 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
589 le16_to_cpu(es->s_mnt_count) >= 593 le16_to_cpu(es->s_mnt_count) >=
590 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 594 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
591 printk ("EXT2-fs warning: maximal mount count reached, " 595 ext2_msg(sb, KERN_WARNING,
592 "running e2fsck is recommended\n"); 596 "warning: maximal mount count reached, "
597 "running e2fsck is recommended");
593 else if (le32_to_cpu(es->s_checkinterval) && 598 else if (le32_to_cpu(es->s_checkinterval) &&
594 (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds())) 599 (le32_to_cpu(es->s_lastcheck) +
595 printk ("EXT2-fs warning: checktime reached, " 600 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
596 "running e2fsck is recommended\n"); 601 ext2_msg(sb, KERN_WARNING,
602 "warning: checktime reached, "
603 "running e2fsck is recommended");
597 if (!le16_to_cpu(es->s_max_mnt_count)) 604 if (!le16_to_cpu(es->s_max_mnt_count))
598 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); 605 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
599 le16_add_cpu(&es->s_mnt_count, 1); 606 le16_add_cpu(&es->s_mnt_count, 1);
600 ext2_write_super(sb); 607 ext2_write_super(sb);
601 if (test_opt (sb, DEBUG)) 608 if (test_opt (sb, DEBUG))
602 printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, " 609 ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, "
603 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 610 "bpg=%lu, ipg=%lu, mo=%04lx]",
604 EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize, 611 EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize,
605 sbi->s_frag_size, 612 sbi->s_frag_size,
606 sbi->s_groups_count, 613 sbi->s_groups_count,
@@ -767,7 +774,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
767 */ 774 */
768 blocksize = sb_min_blocksize(sb, BLOCK_SIZE); 775 blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
769 if (!blocksize) { 776 if (!blocksize) {
770 printk ("EXT2-fs: unable to set blocksize\n"); 777 ext2_msg(sb, KERN_ERR, "error: unable to set blocksize");
771 goto failed_sbi; 778 goto failed_sbi;
772 } 779 }
773 780
@@ -783,7 +790,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
783 } 790 }
784 791
785 if (!(bh = sb_bread(sb, logic_sb_block))) { 792 if (!(bh = sb_bread(sb, logic_sb_block))) {
786 printk ("EXT2-fs: unable to read superblock\n"); 793 ext2_msg(sb, KERN_ERR, "error: unable to read superblock");
787 goto failed_sbi; 794 goto failed_sbi;
788 } 795 }
789 /* 796 /*
@@ -826,7 +833,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
826 833
827 set_opt(sbi->s_mount_opt, RESERVATION); 834 set_opt(sbi->s_mount_opt, RESERVATION);
828 835
829 if (!parse_options ((char *) data, sbi)) 836 if (!parse_options((char *) data, sb))
830 goto failed_mount; 837 goto failed_mount;
831 838
832 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 839 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -840,8 +847,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
840 (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) || 847 (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
841 EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 848 EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
842 EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U))) 849 EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U)))
843 printk("EXT2-fs warning: feature flags set on rev 0 fs, " 850 ext2_msg(sb, KERN_WARNING,
844 "running e2fsck is recommended\n"); 851 "warning: feature flags set on rev 0 fs, "
852 "running e2fsck is recommended");
845 /* 853 /*
846 * Check feature flags regardless of the revision level, since we 854 * Check feature flags regardless of the revision level, since we
847 * previously didn't change the revision level when setting the flags, 855 * previously didn't change the revision level when setting the flags,
@@ -849,16 +857,16 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
849 */ 857 */
850 features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP); 858 features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP);
851 if (features) { 859 if (features) {
852 printk("EXT2-fs: %s: couldn't mount because of " 860 ext2_msg(sb, KERN_ERR, "error: couldn't mount because of "
853 "unsupported optional features (%x).\n", 861 "unsupported optional features (%x)",
854 sb->s_id, le32_to_cpu(features)); 862 le32_to_cpu(features));
855 goto failed_mount; 863 goto failed_mount;
856 } 864 }
857 if (!(sb->s_flags & MS_RDONLY) && 865 if (!(sb->s_flags & MS_RDONLY) &&
858 (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){ 866 (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){
859 printk("EXT2-fs: %s: couldn't mount RDWR because of " 867 ext2_msg(sb, KERN_ERR, "error: couldn't mount RDWR because of "
860 "unsupported optional features (%x).\n", 868 "unsupported optional features (%x)",
861 sb->s_id, le32_to_cpu(features)); 869 le32_to_cpu(features));
862 goto failed_mount; 870 goto failed_mount;
863 } 871 }
864 872
@@ -866,7 +874,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
866 874
867 if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) { 875 if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) {
868 if (!silent) 876 if (!silent)
869 printk("XIP: Unsupported blocksize\n"); 877 ext2_msg(sb, KERN_ERR,
878 "error: unsupported blocksize for xip");
870 goto failed_mount; 879 goto failed_mount;
871 } 880 }
872 881
@@ -875,7 +884,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
875 brelse(bh); 884 brelse(bh);
876 885
877 if (!sb_set_blocksize(sb, blocksize)) { 886 if (!sb_set_blocksize(sb, blocksize)) {
878 printk(KERN_ERR "EXT2-fs: blocksize too small for device.\n"); 887 ext2_msg(sb, KERN_ERR, "error: blocksize is too small");
879 goto failed_sbi; 888 goto failed_sbi;
880 } 889 }
881 890
@@ -883,14 +892,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
883 offset = (sb_block*BLOCK_SIZE) % blocksize; 892 offset = (sb_block*BLOCK_SIZE) % blocksize;
884 bh = sb_bread(sb, logic_sb_block); 893 bh = sb_bread(sb, logic_sb_block);
885 if(!bh) { 894 if(!bh) {
886 printk("EXT2-fs: Couldn't read superblock on " 895 ext2_msg(sb, KERN_ERR, "error: couldn't read"
887 "2nd try.\n"); 896 "superblock on 2nd try");
888 goto failed_sbi; 897 goto failed_sbi;
889 } 898 }
890 es = (struct ext2_super_block *) (((char *)bh->b_data) + offset); 899 es = (struct ext2_super_block *) (((char *)bh->b_data) + offset);
891 sbi->s_es = es; 900 sbi->s_es = es;
892 if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) { 901 if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) {
893 printk ("EXT2-fs: Magic mismatch, very weird !\n"); 902 ext2_msg(sb, KERN_ERR, "error: magic mismatch");
894 goto failed_mount; 903 goto failed_mount;
895 } 904 }
896 } 905 }
@@ -906,7 +915,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
906 if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) || 915 if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) ||
907 !is_power_of_2(sbi->s_inode_size) || 916 !is_power_of_2(sbi->s_inode_size) ||
908 (sbi->s_inode_size > blocksize)) { 917 (sbi->s_inode_size > blocksize)) {
909 printk ("EXT2-fs: unsupported inode size: %d\n", 918 ext2_msg(sb, KERN_ERR,
919 "error: unsupported inode size: %d",
910 sbi->s_inode_size); 920 sbi->s_inode_size);
911 goto failed_mount; 921 goto failed_mount;
912 } 922 }
@@ -943,29 +953,33 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
943 953
944 if (sb->s_blocksize != bh->b_size) { 954 if (sb->s_blocksize != bh->b_size) {
945 if (!silent) 955 if (!silent)
946 printk ("VFS: Unsupported blocksize on dev " 956 ext2_msg(sb, KERN_ERR, "error: unsupported blocksize");
947 "%s.\n", sb->s_id);
948 goto failed_mount; 957 goto failed_mount;
949 } 958 }
950 959
951 if (sb->s_blocksize != sbi->s_frag_size) { 960 if (sb->s_blocksize != sbi->s_frag_size) {
952 printk ("EXT2-fs: fragsize %lu != blocksize %lu (not supported yet)\n", 961 ext2_msg(sb, KERN_ERR,
962 "error: fragsize %lu != blocksize %lu"
963 "(not supported yet)",
953 sbi->s_frag_size, sb->s_blocksize); 964 sbi->s_frag_size, sb->s_blocksize);
954 goto failed_mount; 965 goto failed_mount;
955 } 966 }
956 967
957 if (sbi->s_blocks_per_group > sb->s_blocksize * 8) { 968 if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
958 printk ("EXT2-fs: #blocks per group too big: %lu\n", 969 ext2_msg(sb, KERN_ERR,
970 "error: #blocks per group too big: %lu",
959 sbi->s_blocks_per_group); 971 sbi->s_blocks_per_group);
960 goto failed_mount; 972 goto failed_mount;
961 } 973 }
962 if (sbi->s_frags_per_group > sb->s_blocksize * 8) { 974 if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
963 printk ("EXT2-fs: #fragments per group too big: %lu\n", 975 ext2_msg(sb, KERN_ERR,
976 "error: #fragments per group too big: %lu",
964 sbi->s_frags_per_group); 977 sbi->s_frags_per_group);
965 goto failed_mount; 978 goto failed_mount;
966 } 979 }
967 if (sbi->s_inodes_per_group > sb->s_blocksize * 8) { 980 if (sbi->s_inodes_per_group > sb->s_blocksize * 8) {
968 printk ("EXT2-fs: #inodes per group too big: %lu\n", 981 ext2_msg(sb, KERN_ERR,
982 "error: #inodes per group too big: %lu",
969 sbi->s_inodes_per_group); 983 sbi->s_inodes_per_group);
970 goto failed_mount; 984 goto failed_mount;
971 } 985 }
@@ -979,13 +993,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
979 EXT2_DESC_PER_BLOCK(sb); 993 EXT2_DESC_PER_BLOCK(sb);
980 sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL); 994 sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL);
981 if (sbi->s_group_desc == NULL) { 995 if (sbi->s_group_desc == NULL) {
982 printk ("EXT2-fs: not enough memory\n"); 996 ext2_msg(sb, KERN_ERR, "error: not enough memory");
983 goto failed_mount; 997 goto failed_mount;
984 } 998 }
985 bgl_lock_init(sbi->s_blockgroup_lock); 999 bgl_lock_init(sbi->s_blockgroup_lock);
986 sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL); 1000 sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL);
987 if (!sbi->s_debts) { 1001 if (!sbi->s_debts) {
988 printk ("EXT2-fs: not enough memory\n"); 1002 ext2_msg(sb, KERN_ERR, "error: not enough memory");
989 goto failed_mount_group_desc; 1003 goto failed_mount_group_desc;
990 } 1004 }
991 for (i = 0; i < db_count; i++) { 1005 for (i = 0; i < db_count; i++) {
@@ -994,12 +1008,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
994 if (!sbi->s_group_desc[i]) { 1008 if (!sbi->s_group_desc[i]) {
995 for (j = 0; j < i; j++) 1009 for (j = 0; j < i; j++)
996 brelse (sbi->s_group_desc[j]); 1010 brelse (sbi->s_group_desc[j]);
997 printk ("EXT2-fs: unable to read group descriptors\n"); 1011 ext2_msg(sb, KERN_ERR,
1012 "error: unable to read group descriptors");
998 goto failed_mount_group_desc; 1013 goto failed_mount_group_desc;
999 } 1014 }
1000 } 1015 }
1001 if (!ext2_check_descriptors (sb)) { 1016 if (!ext2_check_descriptors (sb)) {
1002 printk ("EXT2-fs: group descriptors corrupted!\n"); 1017 ext2_msg(sb, KERN_ERR, "group descriptors corrupted");
1003 goto failed_mount2; 1018 goto failed_mount2;
1004 } 1019 }
1005 sbi->s_gdb_count = db_count; 1020 sbi->s_gdb_count = db_count;
@@ -1032,7 +1047,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1032 ext2_count_dirs(sb)); 1047 ext2_count_dirs(sb));
1033 } 1048 }
1034 if (err) { 1049 if (err) {
1035 printk(KERN_ERR "EXT2-fs: insufficient memory\n"); 1050 ext2_msg(sb, KERN_ERR, "error: insufficient memory");
1036 goto failed_mount3; 1051 goto failed_mount3;
1037 } 1052 }
1038 /* 1053 /*
@@ -1048,27 +1063,28 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1048 } 1063 }
1049 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 1064 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
1050 iput(root); 1065 iput(root);
1051 printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n"); 1066 ext2_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
1052 goto failed_mount3; 1067 goto failed_mount3;
1053 } 1068 }
1054 1069
1055 sb->s_root = d_alloc_root(root); 1070 sb->s_root = d_alloc_root(root);
1056 if (!sb->s_root) { 1071 if (!sb->s_root) {
1057 iput(root); 1072 iput(root);
1058 printk(KERN_ERR "EXT2-fs: get root inode failed\n"); 1073 ext2_msg(sb, KERN_ERR, "error: get root inode failed");
1059 ret = -ENOMEM; 1074 ret = -ENOMEM;
1060 goto failed_mount3; 1075 goto failed_mount3;
1061 } 1076 }
1062 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) 1077 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
1063 ext2_warning(sb, __func__, 1078 ext2_msg(sb, KERN_WARNING,
1064 "mounting ext3 filesystem as ext2"); 1079 "warning: mounting ext3 filesystem as ext2");
1065 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); 1080 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY);
1066 return 0; 1081 return 0;
1067 1082
1068cantfind_ext2: 1083cantfind_ext2:
1069 if (!silent) 1084 if (!silent)
1070 printk("VFS: Can't find an ext2 filesystem on dev %s.\n", 1085 ext2_msg(sb, KERN_ERR,
1071 sb->s_id); 1086 "error: can't find an ext2 filesystem on dev %s.",
1087 sb->s_id);
1072 goto failed_mount; 1088 goto failed_mount;
1073failed_mount3: 1089failed_mount3:
1074 percpu_counter_destroy(&sbi->s_freeblocks_counter); 1090 percpu_counter_destroy(&sbi->s_freeblocks_counter);
@@ -1121,8 +1137,24 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
1121static int ext2_sync_fs(struct super_block *sb, int wait) 1137static int ext2_sync_fs(struct super_block *sb, int wait)
1122{ 1138{
1123 struct ext2_super_block *es = EXT2_SB(sb)->s_es; 1139 struct ext2_super_block *es = EXT2_SB(sb)->s_es;
1140 struct buffer_head *sbh = EXT2_SB(sb)->s_sbh;
1124 1141
1125 lock_kernel(); 1142 lock_kernel();
1143 if (buffer_write_io_error(sbh)) {
1144 /*
1145 * Oh, dear. A previous attempt to write the
1146 * superblock failed. This could happen because the
1147 * USB device was yanked out. Or it could happen to
1148 * be a transient write error and maybe the block will
1149 * be remapped. Nothing we can do but to retry the
1150 * write and hope for the best.
1151 */
1152 ext2_msg(sb, KERN_ERR,
1153 "previous I/O error to superblock detected\n");
1154 clear_buffer_write_io_error(sbh);
1155 set_buffer_uptodate(sbh);
1156 }
1157
1126 if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { 1158 if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
1127 ext2_debug("setting valid to 0\n"); 1159 ext2_debug("setting valid to 0\n");
1128 es->s_state &= cpu_to_le16(~EXT2_VALID_FS); 1160 es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
@@ -1170,7 +1202,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1170 /* 1202 /*
1171 * Allow the "check" option to be passed as a remount option. 1203 * Allow the "check" option to be passed as a remount option.
1172 */ 1204 */
1173 if (!parse_options (data, sbi)) { 1205 if (!parse_options(data, sb)) {
1174 err = -EINVAL; 1206 err = -EINVAL;
1175 goto restore_opts; 1207 goto restore_opts;
1176 } 1208 }
@@ -1182,7 +1214,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1182 EXT2_MOUNT_XIP if not */ 1214 EXT2_MOUNT_XIP if not */
1183 1215
1184 if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) { 1216 if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) {
1185 printk("XIP: Unsupported blocksize\n"); 1217 ext2_msg(sb, KERN_WARNING,
1218 "warning: unsupported blocksize for xip");
1186 err = -EINVAL; 1219 err = -EINVAL;
1187 goto restore_opts; 1220 goto restore_opts;
1188 } 1221 }
@@ -1191,8 +1224,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1191 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != 1224 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
1192 (old_mount_opt & EXT2_MOUNT_XIP)) && 1225 (old_mount_opt & EXT2_MOUNT_XIP)) &&
1193 invalidate_inodes(sb)) { 1226 invalidate_inodes(sb)) {
1194 ext2_warning(sb, __func__, "refusing change of xip flag " 1227 ext2_msg(sb, KERN_WARNING, "warning: refusing change of "
1195 "with busy inodes while remounting"); 1228 "xip flag with busy inodes while remounting");
1196 sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; 1229 sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
1197 sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; 1230 sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
1198 } 1231 }
@@ -1216,9 +1249,10 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1216 __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, 1249 __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
1217 ~EXT2_FEATURE_RO_COMPAT_SUPP); 1250 ~EXT2_FEATURE_RO_COMPAT_SUPP);
1218 if (ret) { 1251 if (ret) {
1219 printk("EXT2-fs: %s: couldn't remount RDWR because of " 1252 ext2_msg(sb, KERN_WARNING,
1220 "unsupported optional features (%x).\n", 1253 "warning: couldn't remount RDWR because of "
1221 sb->s_id, le32_to_cpu(ret)); 1254 "unsupported optional features (%x).",
1255 le32_to_cpu(ret));
1222 err = -EROFS; 1256 err = -EROFS;
1223 goto restore_opts; 1257 goto restore_opts;
1224 } 1258 }
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index c18fbf3e4068..322a56b2dfb1 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -69,8 +69,9 @@ void ext2_xip_verify_sb(struct super_block *sb)
69 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) && 69 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) &&
70 !sb->s_bdev->bd_disk->fops->direct_access) { 70 !sb->s_bdev->bd_disk->fops->direct_access) {
71 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP); 71 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
72 ext2_warning(sb, __func__, 72 ext2_msg(sb, KERN_WARNING,
73 "ignoring xip option - not supported by bdev"); 73 "warning: ignoring xip option - "
74 "not supported by bdev");
74 } 75 }
75} 76}
76 77
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 354ed3b47b30..ad14227f509e 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1151,6 +1151,16 @@ static int do_journal_get_write_access(handle_t *handle,
1151 return ext3_journal_get_write_access(handle, bh); 1151 return ext3_journal_get_write_access(handle, bh);
1152} 1152}
1153 1153
1154/*
1155 * Truncate blocks that were not used by write. We have to truncate the
1156 * pagecache as well so that corresponding buffers get properly unmapped.
1157 */
1158static void ext3_truncate_failed_write(struct inode *inode)
1159{
1160 truncate_inode_pages(inode->i_mapping, inode->i_size);
1161 ext3_truncate(inode);
1162}
1163
1154static int ext3_write_begin(struct file *file, struct address_space *mapping, 1164static int ext3_write_begin(struct file *file, struct address_space *mapping,
1155 loff_t pos, unsigned len, unsigned flags, 1165 loff_t pos, unsigned len, unsigned flags,
1156 struct page **pagep, void **fsdata) 1166 struct page **pagep, void **fsdata)
@@ -1209,7 +1219,7 @@ write_begin_failed:
1209 unlock_page(page); 1219 unlock_page(page);
1210 page_cache_release(page); 1220 page_cache_release(page);
1211 if (pos + len > inode->i_size) 1221 if (pos + len > inode->i_size)
1212 ext3_truncate(inode); 1222 ext3_truncate_failed_write(inode);
1213 } 1223 }
1214 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1224 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1215 goto retry; 1225 goto retry;
@@ -1304,7 +1314,7 @@ static int ext3_ordered_write_end(struct file *file,
1304 page_cache_release(page); 1314 page_cache_release(page);
1305 1315
1306 if (pos + len > inode->i_size) 1316 if (pos + len > inode->i_size)
1307 ext3_truncate(inode); 1317 ext3_truncate_failed_write(inode);
1308 return ret ? ret : copied; 1318 return ret ? ret : copied;
1309} 1319}
1310 1320
@@ -1330,7 +1340,7 @@ static int ext3_writeback_write_end(struct file *file,
1330 page_cache_release(page); 1340 page_cache_release(page);
1331 1341
1332 if (pos + len > inode->i_size) 1342 if (pos + len > inode->i_size)
1333 ext3_truncate(inode); 1343 ext3_truncate_failed_write(inode);
1334 return ret ? ret : copied; 1344 return ret ? ret : copied;
1335} 1345}
1336 1346
@@ -1383,7 +1393,7 @@ static int ext3_journalled_write_end(struct file *file,
1383 page_cache_release(page); 1393 page_cache_release(page);
1384 1394
1385 if (pos + len > inode->i_size) 1395 if (pos + len > inode->i_size)
1386 ext3_truncate(inode); 1396 ext3_truncate_failed_write(inode);
1387 return ret ? ret : copied; 1397 return ret ? ret : copied;
1388} 1398}
1389 1399
@@ -2033,7 +2043,7 @@ static Indirect *ext3_find_shared(struct inode *inode, int depth,
2033 int k, err; 2043 int k, err;
2034 2044
2035 *top = 0; 2045 *top = 0;
2036 /* Make k index the deepest non-null offest + 1 */ 2046 /* Make k index the deepest non-null offset + 1 */
2037 for (k = depth; k > 1 && !offsets[k-1]; k--) 2047 for (k = depth; k > 1 && !offsets[k-1]; k--)
2038 ; 2048 ;
2039 partial = ext3_get_branch(inode, k, offsets, chain, &err); 2049 partial = ext3_get_branch(inode, k, offsets, chain, &err);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 8359e7b3dc89..5f83b6179178 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -266,7 +266,7 @@ static int setup_new_group_blocks(struct super_block *sb,
266 goto exit_bh; 266 goto exit_bh;
267 267
268 if (IS_ERR(gdb = bclean(handle, sb, block))) { 268 if (IS_ERR(gdb = bclean(handle, sb, block))) {
269 err = PTR_ERR(bh); 269 err = PTR_ERR(gdb);
270 goto exit_bh; 270 goto exit_bh;
271 } 271 }
272 ext3_journal_dirty_metadata(handle, gdb); 272 ext3_journal_dirty_metadata(handle, gdb);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 427496c4767c..7ad1e8c30bd0 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -135,12 +135,24 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn,
135 if (is_handle_aborted(handle)) 135 if (is_handle_aborted(handle))
136 return; 136 return;
137 137
138 printk(KERN_ERR "%s: aborting transaction: %s in %s\n", 138 printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n",
139 caller, errstr, err_fn); 139 caller, errstr, err_fn);
140 140
141 journal_abort_handle(handle); 141 journal_abort_handle(handle);
142} 142}
143 143
144void ext3_msg(struct super_block *sb, const char *prefix,
145 const char *fmt, ...)
146{
147 va_list args;
148
149 va_start(args, fmt);
150 printk("%sEXT3-fs (%s): ", prefix, sb->s_id);
151 vprintk(fmt, args);
152 printk("\n");
153 va_end(args);
154}
155
144/* Deal with the reporting of failure conditions on a filesystem such as 156/* Deal with the reporting of failure conditions on a filesystem such as
145 * inconsistencies detected or read IO failures. 157 * inconsistencies detected or read IO failures.
146 * 158 *
@@ -174,12 +186,13 @@ static void ext3_handle_error(struct super_block *sb)
174 journal_abort(journal, -EIO); 186 journal_abort(journal, -EIO);
175 } 187 }
176 if (test_opt (sb, ERRORS_RO)) { 188 if (test_opt (sb, ERRORS_RO)) {
177 printk (KERN_CRIT "Remounting filesystem read-only\n"); 189 ext3_msg(sb, KERN_CRIT,
190 "error: remounting filesystem read-only");
178 sb->s_flags |= MS_RDONLY; 191 sb->s_flags |= MS_RDONLY;
179 } 192 }
180 ext3_commit_super(sb, es, 1); 193 ext3_commit_super(sb, es, 1);
181 if (test_opt(sb, ERRORS_PANIC)) 194 if (test_opt(sb, ERRORS_PANIC))
182 panic("EXT3-fs (device %s): panic forced after error\n", 195 panic("EXT3-fs (%s): panic forced after error\n",
183 sb->s_id); 196 sb->s_id);
184} 197}
185 198
@@ -247,8 +260,7 @@ void __ext3_std_error (struct super_block * sb, const char * function,
247 return; 260 return;
248 261
249 errstr = ext3_decode_error(sb, errno, nbuf); 262 errstr = ext3_decode_error(sb, errno, nbuf);
250 printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n", 263 ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr);
251 sb->s_id, function, errstr);
252 264
253 ext3_handle_error(sb); 265 ext3_handle_error(sb);
254} 266}
@@ -268,21 +280,20 @@ void ext3_abort (struct super_block * sb, const char * function,
268{ 280{
269 va_list args; 281 va_list args;
270 282
271 printk (KERN_CRIT "ext3_abort called.\n");
272
273 va_start(args, fmt); 283 va_start(args, fmt);
274 printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function); 284 printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function);
275 vprintk(fmt, args); 285 vprintk(fmt, args);
276 printk("\n"); 286 printk("\n");
277 va_end(args); 287 va_end(args);
278 288
279 if (test_opt(sb, ERRORS_PANIC)) 289 if (test_opt(sb, ERRORS_PANIC))
280 panic("EXT3-fs panic from previous error\n"); 290 panic("EXT3-fs: panic from previous error\n");
281 291
282 if (sb->s_flags & MS_RDONLY) 292 if (sb->s_flags & MS_RDONLY)
283 return; 293 return;
284 294
285 printk(KERN_CRIT "Remounting filesystem read-only\n"); 295 ext3_msg(sb, KERN_CRIT,
296 "error: remounting filesystem read-only");
286 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; 297 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
287 sb->s_flags |= MS_RDONLY; 298 sb->s_flags |= MS_RDONLY;
288 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; 299 EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
@@ -296,7 +307,7 @@ void ext3_warning (struct super_block * sb, const char * function,
296 va_list args; 307 va_list args;
297 308
298 va_start(args, fmt); 309 va_start(args, fmt);
299 printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ", 310 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ",
300 sb->s_id, function); 311 sb->s_id, function);
301 vprintk(fmt, args); 312 vprintk(fmt, args);
302 printk("\n"); 313 printk("\n");
@@ -310,10 +321,10 @@ void ext3_update_dynamic_rev(struct super_block *sb)
310 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV) 321 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
311 return; 322 return;
312 323
313 ext3_warning(sb, __func__, 324 ext3_msg(sb, KERN_WARNING,
314 "updating to rev %d because of new feature flag, " 325 "warning: updating to rev %d because of "
315 "running e2fsck is recommended", 326 "new feature flag, running e2fsck is recommended",
316 EXT3_DYNAMIC_REV); 327 EXT3_DYNAMIC_REV);
317 328
318 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO); 329 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
319 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE); 330 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
@@ -331,7 +342,7 @@ void ext3_update_dynamic_rev(struct super_block *sb)
331/* 342/*
332 * Open the external journal device 343 * Open the external journal device
333 */ 344 */
334static struct block_device *ext3_blkdev_get(dev_t dev) 345static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
335{ 346{
336 struct block_device *bdev; 347 struct block_device *bdev;
337 char b[BDEVNAME_SIZE]; 348 char b[BDEVNAME_SIZE];
@@ -342,8 +353,9 @@ static struct block_device *ext3_blkdev_get(dev_t dev)
342 return bdev; 353 return bdev;
343 354
344fail: 355fail:
345 printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n", 356 ext3_msg(sb, "error: failed to open journal device %s: %ld",
346 __bdevname(dev, b), PTR_ERR(bdev)); 357 __bdevname(dev, b), PTR_ERR(bdev));
358
347 return NULL; 359 return NULL;
348} 360}
349 361
@@ -378,13 +390,13 @@ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
378{ 390{
379 struct list_head *l; 391 struct list_head *l;
380 392
381 printk(KERN_ERR "sb orphan head is %d\n", 393 ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d",
382 le32_to_cpu(sbi->s_es->s_last_orphan)); 394 le32_to_cpu(sbi->s_es->s_last_orphan));
383 395
384 printk(KERN_ERR "sb_info orphan list:\n"); 396 ext3_msg(sb, KERN_ERR, "sb_info orphan list:");
385 list_for_each(l, &sbi->s_orphan) { 397 list_for_each(l, &sbi->s_orphan) {
386 struct inode *inode = orphan_list_entry(l); 398 struct inode *inode = orphan_list_entry(l);
387 printk(KERN_ERR " " 399 ext3_msg(sb, KERN_ERR, " "
388 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", 400 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
389 inode->i_sb->s_id, inode->i_ino, inode, 401 inode->i_sb->s_id, inode->i_ino, inode,
390 inode->i_mode, inode->i_nlink, 402 inode->i_mode, inode->i_nlink,
@@ -527,9 +539,22 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl
527#if defined(CONFIG_QUOTA) 539#if defined(CONFIG_QUOTA)
528 struct ext3_sb_info *sbi = EXT3_SB(sb); 540 struct ext3_sb_info *sbi = EXT3_SB(sb);
529 541
530 if (sbi->s_jquota_fmt) 542 if (sbi->s_jquota_fmt) {
531 seq_printf(seq, ",jqfmt=%s", 543 char *fmtname = "";
532 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); 544
545 switch (sbi->s_jquota_fmt) {
546 case QFMT_VFS_OLD:
547 fmtname = "vfsold";
548 break;
549 case QFMT_VFS_V0:
550 fmtname = "vfsv0";
551 break;
552 case QFMT_VFS_V1:
553 fmtname = "vfsv1";
554 break;
555 }
556 seq_printf(seq, ",jqfmt=%s", fmtname);
557 }
533 558
534 if (sbi->s_qf_names[USRQUOTA]) 559 if (sbi->s_qf_names[USRQUOTA])
535 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 560 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -636,6 +661,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
636 if (test_opt(sb, DATA_ERR_ABORT)) 661 if (test_opt(sb, DATA_ERR_ABORT))
637 seq_puts(seq, ",data_err=abort"); 662 seq_puts(seq, ",data_err=abort");
638 663
664 if (test_opt(sb, NOLOAD))
665 seq_puts(seq, ",norecovery");
666
639 ext3_show_quota_options(seq, sb); 667 ext3_show_quota_options(seq, sb);
640 668
641 return 0; 669 return 0;
@@ -787,9 +815,9 @@ enum {
787 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 815 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
788 Opt_data_err_abort, Opt_data_err_ignore, 816 Opt_data_err_abort, Opt_data_err_ignore,
789 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 817 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
790 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 818 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
791 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 819 Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
792 Opt_grpquota 820 Opt_usrquota, Opt_grpquota
793}; 821};
794 822
795static const match_table_t tokens = { 823static const match_table_t tokens = {
@@ -818,6 +846,7 @@ static const match_table_t tokens = {
818 {Opt_reservation, "reservation"}, 846 {Opt_reservation, "reservation"},
819 {Opt_noreservation, "noreservation"}, 847 {Opt_noreservation, "noreservation"},
820 {Opt_noload, "noload"}, 848 {Opt_noload, "noload"},
849 {Opt_noload, "norecovery"},
821 {Opt_nobh, "nobh"}, 850 {Opt_nobh, "nobh"},
822 {Opt_bh, "bh"}, 851 {Opt_bh, "bh"},
823 {Opt_commit, "commit=%u"}, 852 {Opt_commit, "commit=%u"},
@@ -836,6 +865,7 @@ static const match_table_t tokens = {
836 {Opt_grpjquota, "grpjquota=%s"}, 865 {Opt_grpjquota, "grpjquota=%s"},
837 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 866 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
838 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 867 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
868 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
839 {Opt_grpquota, "grpquota"}, 869 {Opt_grpquota, "grpquota"},
840 {Opt_noquota, "noquota"}, 870 {Opt_noquota, "noquota"},
841 {Opt_quota, "quota"}, 871 {Opt_quota, "quota"},
@@ -845,7 +875,7 @@ static const match_table_t tokens = {
845 {Opt_err, NULL}, 875 {Opt_err, NULL},
846}; 876};
847 877
848static ext3_fsblk_t get_sb_block(void **data) 878static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
849{ 879{
850 ext3_fsblk_t sb_block; 880 ext3_fsblk_t sb_block;
851 char *options = (char *) *data; 881 char *options = (char *) *data;
@@ -856,7 +886,7 @@ static ext3_fsblk_t get_sb_block(void **data)
856 /*todo: use simple_strtoll with >32bit ext3 */ 886 /*todo: use simple_strtoll with >32bit ext3 */
857 sb_block = simple_strtoul(options, &options, 0); 887 sb_block = simple_strtoul(options, &options, 0);
858 if (*options && *options != ',') { 888 if (*options && *options != ',') {
859 printk("EXT3-fs: Invalid sb specification: %s\n", 889 ext3_msg(sb, "error: invalid sb specification: %s",
860 (char *) *data); 890 (char *) *data);
861 return 1; 891 return 1;
862 } 892 }
@@ -956,7 +986,8 @@ static int parse_options (char *options, struct super_block *sb,
956#else 986#else
957 case Opt_user_xattr: 987 case Opt_user_xattr:
958 case Opt_nouser_xattr: 988 case Opt_nouser_xattr:
959 printk("EXT3 (no)user_xattr options not supported\n"); 989 ext3_msg(sb, KERN_INFO,
990 "(no)user_xattr options not supported");
960 break; 991 break;
961#endif 992#endif
962#ifdef CONFIG_EXT3_FS_POSIX_ACL 993#ifdef CONFIG_EXT3_FS_POSIX_ACL
@@ -969,7 +1000,8 @@ static int parse_options (char *options, struct super_block *sb,
969#else 1000#else
970 case Opt_acl: 1001 case Opt_acl:
971 case Opt_noacl: 1002 case Opt_noacl:
972 printk("EXT3 (no)acl options not supported\n"); 1003 ext3_msg(sb, KERN_INFO,
1004 "(no)acl options not supported");
973 break; 1005 break;
974#endif 1006#endif
975 case Opt_reservation: 1007 case Opt_reservation:
@@ -985,16 +1017,16 @@ static int parse_options (char *options, struct super_block *sb,
985 user to specify an existing inode to be the 1017 user to specify an existing inode to be the
986 journal file. */ 1018 journal file. */
987 if (is_remount) { 1019 if (is_remount) {
988 printk(KERN_ERR "EXT3-fs: cannot specify " 1020 ext3_msg(sb, KERN_ERR, "error: cannot specify "
989 "journal on remount\n"); 1021 "journal on remount");
990 return 0; 1022 return 0;
991 } 1023 }
992 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 1024 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
993 break; 1025 break;
994 case Opt_journal_inum: 1026 case Opt_journal_inum:
995 if (is_remount) { 1027 if (is_remount) {
996 printk(KERN_ERR "EXT3-fs: cannot specify " 1028 ext3_msg(sb, KERN_ERR, "error: cannot specify "
997 "journal on remount\n"); 1029 "journal on remount");
998 return 0; 1030 return 0;
999 } 1031 }
1000 if (match_int(&args[0], &option)) 1032 if (match_int(&args[0], &option))
@@ -1003,8 +1035,8 @@ static int parse_options (char *options, struct super_block *sb,
1003 break; 1035 break;
1004 case Opt_journal_dev: 1036 case Opt_journal_dev:
1005 if (is_remount) { 1037 if (is_remount) {
1006 printk(KERN_ERR "EXT3-fs: cannot specify " 1038 ext3_msg(sb, KERN_ERR, "error: cannot specify "
1007 "journal on remount\n"); 1039 "journal on remount");
1008 return 0; 1040 return 0;
1009 } 1041 }
1010 if (match_int(&args[0], &option)) 1042 if (match_int(&args[0], &option))
@@ -1036,12 +1068,11 @@ static int parse_options (char *options, struct super_block *sb,
1036 if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) 1068 if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
1037 == data_opt) 1069 == data_opt)
1038 break; 1070 break;
1039 printk(KERN_ERR 1071 ext3_msg(sb, KERN_ERR,
1040 "EXT3-fs (device %s): Cannot change " 1072 "error: cannot change "
1041 "data mode on remount. The filesystem " 1073 "data mode on remount. The filesystem "
1042 "is mounted in data=%s mode and you " 1074 "is mounted in data=%s mode and you "
1043 "try to remount it in data=%s mode.\n", 1075 "try to remount it in data=%s mode.",
1044 sb->s_id,
1045 data_mode_string(sbi->s_mount_opt & 1076 data_mode_string(sbi->s_mount_opt &
1046 EXT3_MOUNT_DATA_FLAGS), 1077 EXT3_MOUNT_DATA_FLAGS),
1047 data_mode_string(data_opt)); 1078 data_mode_string(data_opt));
@@ -1066,31 +1097,31 @@ static int parse_options (char *options, struct super_block *sb,
1066set_qf_name: 1097set_qf_name:
1067 if (sb_any_quota_loaded(sb) && 1098 if (sb_any_quota_loaded(sb) &&
1068 !sbi->s_qf_names[qtype]) { 1099 !sbi->s_qf_names[qtype]) {
1069 printk(KERN_ERR 1100 ext3_msg(sb, KERN_ERR,
1070 "EXT3-fs: Cannot change journaled " 1101 "error: cannot change journaled "
1071 "quota options when quota turned on.\n"); 1102 "quota options when quota turned on.");
1072 return 0; 1103 return 0;
1073 } 1104 }
1074 qname = match_strdup(&args[0]); 1105 qname = match_strdup(&args[0]);
1075 if (!qname) { 1106 if (!qname) {
1076 printk(KERN_ERR 1107 ext3_msg(sb, KERN_ERR,
1077 "EXT3-fs: not enough memory for " 1108 "error: not enough memory for "
1078 "storing quotafile name.\n"); 1109 "storing quotafile name.");
1079 return 0; 1110 return 0;
1080 } 1111 }
1081 if (sbi->s_qf_names[qtype] && 1112 if (sbi->s_qf_names[qtype] &&
1082 strcmp(sbi->s_qf_names[qtype], qname)) { 1113 strcmp(sbi->s_qf_names[qtype], qname)) {
1083 printk(KERN_ERR 1114 ext3_msg(sb, KERN_ERR,
1084 "EXT3-fs: %s quota file already " 1115 "error: %s quota file already "
1085 "specified.\n", QTYPE2NAME(qtype)); 1116 "specified.", QTYPE2NAME(qtype));
1086 kfree(qname); 1117 kfree(qname);
1087 return 0; 1118 return 0;
1088 } 1119 }
1089 sbi->s_qf_names[qtype] = qname; 1120 sbi->s_qf_names[qtype] = qname;
1090 if (strchr(sbi->s_qf_names[qtype], '/')) { 1121 if (strchr(sbi->s_qf_names[qtype], '/')) {
1091 printk(KERN_ERR 1122 ext3_msg(sb, KERN_ERR,
1092 "EXT3-fs: quotafile must be on " 1123 "error: quotafile must be on "
1093 "filesystem root.\n"); 1124 "filesystem root.");
1094 kfree(sbi->s_qf_names[qtype]); 1125 kfree(sbi->s_qf_names[qtype]);
1095 sbi->s_qf_names[qtype] = NULL; 1126 sbi->s_qf_names[qtype] = NULL;
1096 return 0; 1127 return 0;
@@ -1105,9 +1136,9 @@ set_qf_name:
1105clear_qf_name: 1136clear_qf_name:
1106 if (sb_any_quota_loaded(sb) && 1137 if (sb_any_quota_loaded(sb) &&
1107 sbi->s_qf_names[qtype]) { 1138 sbi->s_qf_names[qtype]) {
1108 printk(KERN_ERR "EXT3-fs: Cannot change " 1139 ext3_msg(sb, KERN_ERR, "error: cannot change "
1109 "journaled quota options when " 1140 "journaled quota options when "
1110 "quota turned on.\n"); 1141 "quota turned on.");
1111 return 0; 1142 return 0;
1112 } 1143 }
1113 /* 1144 /*
@@ -1121,12 +1152,15 @@ clear_qf_name:
1121 goto set_qf_format; 1152 goto set_qf_format;
1122 case Opt_jqfmt_vfsv0: 1153 case Opt_jqfmt_vfsv0:
1123 qfmt = QFMT_VFS_V0; 1154 qfmt = QFMT_VFS_V0;
1155 goto set_qf_format;
1156 case Opt_jqfmt_vfsv1:
1157 qfmt = QFMT_VFS_V1;
1124set_qf_format: 1158set_qf_format:
1125 if (sb_any_quota_loaded(sb) && 1159 if (sb_any_quota_loaded(sb) &&
1126 sbi->s_jquota_fmt != qfmt) { 1160 sbi->s_jquota_fmt != qfmt) {
1127 printk(KERN_ERR "EXT3-fs: Cannot change " 1161 ext3_msg(sb, KERN_ERR, "error: cannot change "
1128 "journaled quota options when " 1162 "journaled quota options when "
1129 "quota turned on.\n"); 1163 "quota turned on.");
1130 return 0; 1164 return 0;
1131 } 1165 }
1132 sbi->s_jquota_fmt = qfmt; 1166 sbi->s_jquota_fmt = qfmt;
@@ -1142,8 +1176,8 @@ set_qf_format:
1142 break; 1176 break;
1143 case Opt_noquota: 1177 case Opt_noquota:
1144 if (sb_any_quota_loaded(sb)) { 1178 if (sb_any_quota_loaded(sb)) {
1145 printk(KERN_ERR "EXT3-fs: Cannot change quota " 1179 ext3_msg(sb, KERN_ERR, "error: cannot change "
1146 "options when quota turned on.\n"); 1180 "quota options when quota turned on.");
1147 return 0; 1181 return 0;
1148 } 1182 }
1149 clear_opt(sbi->s_mount_opt, QUOTA); 1183 clear_opt(sbi->s_mount_opt, QUOTA);
@@ -1154,8 +1188,8 @@ set_qf_format:
1154 case Opt_quota: 1188 case Opt_quota:
1155 case Opt_usrquota: 1189 case Opt_usrquota:
1156 case Opt_grpquota: 1190 case Opt_grpquota:
1157 printk(KERN_ERR 1191 ext3_msg(sb, KERN_ERR,
1158 "EXT3-fs: quota options not supported.\n"); 1192 "error: quota options not supported.");
1159 break; 1193 break;
1160 case Opt_usrjquota: 1194 case Opt_usrjquota:
1161 case Opt_grpjquota: 1195 case Opt_grpjquota:
@@ -1163,9 +1197,10 @@ set_qf_format:
1163 case Opt_offgrpjquota: 1197 case Opt_offgrpjquota:
1164 case Opt_jqfmt_vfsold: 1198 case Opt_jqfmt_vfsold:
1165 case Opt_jqfmt_vfsv0: 1199 case Opt_jqfmt_vfsv0:
1166 printk(KERN_ERR 1200 case Opt_jqfmt_vfsv1:
1167 "EXT3-fs: journaled quota options not " 1201 ext3_msg(sb, KERN_ERR,
1168 "supported.\n"); 1202 "error: journaled quota options not "
1203 "supported.");
1169 break; 1204 break;
1170 case Opt_noquota: 1205 case Opt_noquota:
1171 break; 1206 break;
@@ -1185,8 +1220,9 @@ set_qf_format:
1185 break; 1220 break;
1186 case Opt_resize: 1221 case Opt_resize:
1187 if (!is_remount) { 1222 if (!is_remount) {
1188 printk("EXT3-fs: resize option only available " 1223 ext3_msg(sb, KERN_ERR,
1189 "for remount\n"); 1224 "error: resize option only available "
1225 "for remount");
1190 return 0; 1226 return 0;
1191 } 1227 }
1192 if (match_int(&args[0], &option) != 0) 1228 if (match_int(&args[0], &option) != 0)
@@ -1200,9 +1236,9 @@ set_qf_format:
1200 clear_opt(sbi->s_mount_opt, NOBH); 1236 clear_opt(sbi->s_mount_opt, NOBH);
1201 break; 1237 break;
1202 default: 1238 default:
1203 printk (KERN_ERR 1239 ext3_msg(sb, KERN_ERR,
1204 "EXT3-fs: Unrecognized mount option \"%s\" " 1240 "error: unrecognized mount option \"%s\" "
1205 "or missing value\n", p); 1241 "or missing value", p);
1206 return 0; 1242 return 0;
1207 } 1243 }
1208 } 1244 }
@@ -1220,21 +1256,21 @@ set_qf_format:
1220 (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) || 1256 (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
1221 (sbi->s_qf_names[GRPQUOTA] && 1257 (sbi->s_qf_names[GRPQUOTA] &&
1222 (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) { 1258 (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
1223 printk(KERN_ERR "EXT3-fs: old and new quota " 1259 ext3_msg(sb, KERN_ERR, "error: old and new quota "
1224 "format mixing.\n"); 1260 "format mixing.");
1225 return 0; 1261 return 0;
1226 } 1262 }
1227 1263
1228 if (!sbi->s_jquota_fmt) { 1264 if (!sbi->s_jquota_fmt) {
1229 printk(KERN_ERR "EXT3-fs: journaled quota format " 1265 ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1230 "not specified.\n"); 1266 "not specified.");
1231 return 0; 1267 return 0;
1232 } 1268 }
1233 } else { 1269 } else {
1234 if (sbi->s_jquota_fmt) { 1270 if (sbi->s_jquota_fmt) {
1235 printk(KERN_ERR "EXT3-fs: journaled quota format " 1271 ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1236 "specified with no journaling " 1272 "specified with no journaling "
1237 "enabled.\n"); 1273 "enabled.");
1238 return 0; 1274 return 0;
1239 } 1275 }
1240 } 1276 }
@@ -1249,31 +1285,33 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1249 int res = 0; 1285 int res = 0;
1250 1286
1251 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) { 1287 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
1252 printk (KERN_ERR "EXT3-fs warning: revision level too high, " 1288 ext3_msg(sb, KERN_ERR,
1253 "forcing read-only mode\n"); 1289 "error: revision level too high, "
1290 "forcing read-only mode");
1254 res = MS_RDONLY; 1291 res = MS_RDONLY;
1255 } 1292 }
1256 if (read_only) 1293 if (read_only)
1257 return res; 1294 return res;
1258 if (!(sbi->s_mount_state & EXT3_VALID_FS)) 1295 if (!(sbi->s_mount_state & EXT3_VALID_FS))
1259 printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, " 1296 ext3_msg(sb, KERN_WARNING,
1260 "running e2fsck is recommended\n"); 1297 "warning: mounting unchecked fs, "
1298 "running e2fsck is recommended");
1261 else if ((sbi->s_mount_state & EXT3_ERROR_FS)) 1299 else if ((sbi->s_mount_state & EXT3_ERROR_FS))
1262 printk (KERN_WARNING 1300 ext3_msg(sb, KERN_WARNING,
1263 "EXT3-fs warning: mounting fs with errors, " 1301 "warning: mounting fs with errors, "
1264 "running e2fsck is recommended\n"); 1302 "running e2fsck is recommended");
1265 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1303 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1266 le16_to_cpu(es->s_mnt_count) >= 1304 le16_to_cpu(es->s_mnt_count) >=
1267 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1305 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1268 printk (KERN_WARNING 1306 ext3_msg(sb, KERN_WARNING,
1269 "EXT3-fs warning: maximal mount count reached, " 1307 "warning: maximal mount count reached, "
1270 "running e2fsck is recommended\n"); 1308 "running e2fsck is recommended");
1271 else if (le32_to_cpu(es->s_checkinterval) && 1309 else if (le32_to_cpu(es->s_checkinterval) &&
1272 (le32_to_cpu(es->s_lastcheck) + 1310 (le32_to_cpu(es->s_lastcheck) +
1273 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1311 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1274 printk (KERN_WARNING 1312 ext3_msg(sb, KERN_WARNING,
1275 "EXT3-fs warning: checktime reached, " 1313 "warning: checktime reached, "
1276 "running e2fsck is recommended\n"); 1314 "running e2fsck is recommended");
1277#if 0 1315#if 0
1278 /* @@@ We _will_ want to clear the valid bit if we find 1316 /* @@@ We _will_ want to clear the valid bit if we find
1279 inconsistencies, to force a fsck at reboot. But for 1317 inconsistencies, to force a fsck at reboot. But for
@@ -1290,22 +1328,20 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1290 1328
1291 ext3_commit_super(sb, es, 1); 1329 ext3_commit_super(sb, es, 1);
1292 if (test_opt(sb, DEBUG)) 1330 if (test_opt(sb, DEBUG))
1293 printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, " 1331 ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, "
1294 "bpg=%lu, ipg=%lu, mo=%04lx]\n", 1332 "bpg=%lu, ipg=%lu, mo=%04lx]",
1295 sb->s_blocksize, 1333 sb->s_blocksize,
1296 sbi->s_groups_count, 1334 sbi->s_groups_count,
1297 EXT3_BLOCKS_PER_GROUP(sb), 1335 EXT3_BLOCKS_PER_GROUP(sb),
1298 EXT3_INODES_PER_GROUP(sb), 1336 EXT3_INODES_PER_GROUP(sb),
1299 sbi->s_mount_opt); 1337 sbi->s_mount_opt);
1300 1338
1301 printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id);
1302 if (EXT3_SB(sb)->s_journal->j_inode == NULL) { 1339 if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
1303 char b[BDEVNAME_SIZE]; 1340 char b[BDEVNAME_SIZE];
1304 1341 ext3_msg(sb, KERN_INFO, "using external journal on %s",
1305 printk("external journal on %s\n",
1306 bdevname(EXT3_SB(sb)->s_journal->j_dev, b)); 1342 bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
1307 } else { 1343 } else {
1308 printk("internal journal\n"); 1344 ext3_msg(sb, KERN_INFO, "using internal journal");
1309 } 1345 }
1310 return res; 1346 return res;
1311} 1347}
@@ -1399,8 +1435,8 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1399 } 1435 }
1400 1436
1401 if (bdev_read_only(sb->s_bdev)) { 1437 if (bdev_read_only(sb->s_bdev)) {
1402 printk(KERN_ERR "EXT3-fs: write access " 1438 ext3_msg(sb, KERN_ERR, "error: write access "
1403 "unavailable, skipping orphan cleanup.\n"); 1439 "unavailable, skipping orphan cleanup.");
1404 return; 1440 return;
1405 } 1441 }
1406 1442
@@ -1414,8 +1450,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1414 } 1450 }
1415 1451
1416 if (s_flags & MS_RDONLY) { 1452 if (s_flags & MS_RDONLY) {
1417 printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n", 1453 ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
1418 sb->s_id);
1419 sb->s_flags &= ~MS_RDONLY; 1454 sb->s_flags &= ~MS_RDONLY;
1420 } 1455 }
1421#ifdef CONFIG_QUOTA 1456#ifdef CONFIG_QUOTA
@@ -1426,9 +1461,9 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1426 if (EXT3_SB(sb)->s_qf_names[i]) { 1461 if (EXT3_SB(sb)->s_qf_names[i]) {
1427 int ret = ext3_quota_on_mount(sb, i); 1462 int ret = ext3_quota_on_mount(sb, i);
1428 if (ret < 0) 1463 if (ret < 0)
1429 printk(KERN_ERR 1464 ext3_msg(sb, KERN_ERR,
1430 "EXT3-fs: Cannot turn on journaled " 1465 "error: cannot turn on journaled "
1431 "quota: error %d\n", ret); 1466 "quota: %d", ret);
1432 } 1467 }
1433 } 1468 }
1434#endif 1469#endif
@@ -1466,11 +1501,11 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1466#define PLURAL(x) (x), ((x)==1) ? "" : "s" 1501#define PLURAL(x) (x), ((x)==1) ? "" : "s"
1467 1502
1468 if (nr_orphans) 1503 if (nr_orphans)
1469 printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n", 1504 ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
1470 sb->s_id, PLURAL(nr_orphans)); 1505 PLURAL(nr_orphans));
1471 if (nr_truncates) 1506 if (nr_truncates)
1472 printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n", 1507 ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
1473 sb->s_id, PLURAL(nr_truncates)); 1508 PLURAL(nr_truncates));
1474#ifdef CONFIG_QUOTA 1509#ifdef CONFIG_QUOTA
1475 /* Turn quotas off */ 1510 /* Turn quotas off */
1476 for (i = 0; i < MAXQUOTAS; i++) { 1511 for (i = 0; i < MAXQUOTAS; i++) {
@@ -1554,7 +1589,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1554 struct ext3_super_block *es = NULL; 1589 struct ext3_super_block *es = NULL;
1555 struct ext3_sb_info *sbi; 1590 struct ext3_sb_info *sbi;
1556 ext3_fsblk_t block; 1591 ext3_fsblk_t block;
1557 ext3_fsblk_t sb_block = get_sb_block(&data); 1592 ext3_fsblk_t sb_block = get_sb_block(&data, sb);
1558 ext3_fsblk_t logic_sb_block; 1593 ext3_fsblk_t logic_sb_block;
1559 unsigned long offset = 0; 1594 unsigned long offset = 0;
1560 unsigned int journal_inum = 0; 1595 unsigned int journal_inum = 0;
@@ -1590,7 +1625,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1590 1625
1591 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); 1626 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
1592 if (!blocksize) { 1627 if (!blocksize) {
1593 printk(KERN_ERR "EXT3-fs: unable to set blocksize\n"); 1628 ext3_msg(sb, KERN_ERR, "error: unable to set blocksize");
1594 goto out_fail; 1629 goto out_fail;
1595 } 1630 }
1596 1631
@@ -1606,7 +1641,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1606 } 1641 }
1607 1642
1608 if (!(bh = sb_bread(sb, logic_sb_block))) { 1643 if (!(bh = sb_bread(sb, logic_sb_block))) {
1609 printk (KERN_ERR "EXT3-fs: unable to read superblock\n"); 1644 ext3_msg(sb, KERN_ERR, "error: unable to read superblock");
1610 goto out_fail; 1645 goto out_fail;
1611 } 1646 }
1612 /* 1647 /*
@@ -1665,9 +1700,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1665 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || 1700 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
1666 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 1701 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
1667 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U))) 1702 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
1668 printk(KERN_WARNING 1703 ext3_msg(sb, KERN_WARNING,
1669 "EXT3-fs warning: feature flags set on rev 0 fs, " 1704 "warning: feature flags set on rev 0 fs, "
1670 "running e2fsck is recommended\n"); 1705 "running e2fsck is recommended");
1671 /* 1706 /*
1672 * Check feature flags regardless of the revision level, since we 1707 * Check feature flags regardless of the revision level, since we
1673 * previously didn't change the revision level when setting the flags, 1708 * previously didn't change the revision level when setting the flags,
@@ -1675,25 +1710,25 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1675 */ 1710 */
1676 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP); 1711 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
1677 if (features) { 1712 if (features) {
1678 printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of " 1713 ext3_msg(sb, KERN_ERR,
1679 "unsupported optional features (%x).\n", 1714 "error: couldn't mount because of unsupported "
1680 sb->s_id, le32_to_cpu(features)); 1715 "optional features (%x)", le32_to_cpu(features));
1681 goto failed_mount; 1716 goto failed_mount;
1682 } 1717 }
1683 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP); 1718 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
1684 if (!(sb->s_flags & MS_RDONLY) && features) { 1719 if (!(sb->s_flags & MS_RDONLY) && features) {
1685 printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of " 1720 ext3_msg(sb, KERN_ERR,
1686 "unsupported optional features (%x).\n", 1721 "error: couldn't mount RDWR because of unsupported "
1687 sb->s_id, le32_to_cpu(features)); 1722 "optional features (%x)", le32_to_cpu(features));
1688 goto failed_mount; 1723 goto failed_mount;
1689 } 1724 }
1690 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); 1725 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
1691 1726
1692 if (blocksize < EXT3_MIN_BLOCK_SIZE || 1727 if (blocksize < EXT3_MIN_BLOCK_SIZE ||
1693 blocksize > EXT3_MAX_BLOCK_SIZE) { 1728 blocksize > EXT3_MAX_BLOCK_SIZE) {
1694 printk(KERN_ERR 1729 ext3_msg(sb, KERN_ERR,
1695 "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n", 1730 "error: couldn't mount because of unsupported "
1696 blocksize, sb->s_id); 1731 "filesystem blocksize %d", blocksize);
1697 goto failed_mount; 1732 goto failed_mount;
1698 } 1733 }
1699 1734
@@ -1704,30 +1739,31 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1704 * than the hardware sectorsize for the machine. 1739 * than the hardware sectorsize for the machine.
1705 */ 1740 */
1706 if (blocksize < hblock) { 1741 if (blocksize < hblock) {
1707 printk(KERN_ERR "EXT3-fs: blocksize %d too small for " 1742 ext3_msg(sb, KERN_ERR,
1708 "device blocksize %d.\n", blocksize, hblock); 1743 "error: fsblocksize %d too small for "
1744 "hardware sectorsize %d", blocksize, hblock);
1709 goto failed_mount; 1745 goto failed_mount;
1710 } 1746 }
1711 1747
1712 brelse (bh); 1748 brelse (bh);
1713 if (!sb_set_blocksize(sb, blocksize)) { 1749 if (!sb_set_blocksize(sb, blocksize)) {
1714 printk(KERN_ERR "EXT3-fs: bad blocksize %d.\n", 1750 ext3_msg(sb, KERN_ERR,
1715 blocksize); 1751 "error: bad blocksize %d", blocksize);
1716 goto out_fail; 1752 goto out_fail;
1717 } 1753 }
1718 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; 1754 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1719 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; 1755 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1720 bh = sb_bread(sb, logic_sb_block); 1756 bh = sb_bread(sb, logic_sb_block);
1721 if (!bh) { 1757 if (!bh) {
1722 printk(KERN_ERR 1758 ext3_msg(sb, KERN_ERR,
1723 "EXT3-fs: Can't read superblock on 2nd try.\n"); 1759 "error: can't read superblock on 2nd try");
1724 goto failed_mount; 1760 goto failed_mount;
1725 } 1761 }
1726 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset); 1762 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
1727 sbi->s_es = es; 1763 sbi->s_es = es;
1728 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) { 1764 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
1729 printk (KERN_ERR 1765 ext3_msg(sb, KERN_ERR,
1730 "EXT3-fs: Magic mismatch, very weird !\n"); 1766 "error: magic mismatch");
1731 goto failed_mount; 1767 goto failed_mount;
1732 } 1768 }
1733 } 1769 }
@@ -1743,8 +1779,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1743 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || 1779 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
1744 (!is_power_of_2(sbi->s_inode_size)) || 1780 (!is_power_of_2(sbi->s_inode_size)) ||
1745 (sbi->s_inode_size > blocksize)) { 1781 (sbi->s_inode_size > blocksize)) {
1746 printk (KERN_ERR 1782 ext3_msg(sb, KERN_ERR,
1747 "EXT3-fs: unsupported inode size: %d\n", 1783 "error: unsupported inode size: %d",
1748 sbi->s_inode_size); 1784 sbi->s_inode_size);
1749 goto failed_mount; 1785 goto failed_mount;
1750 } 1786 }
@@ -1752,8 +1788,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1752 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE << 1788 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
1753 le32_to_cpu(es->s_log_frag_size); 1789 le32_to_cpu(es->s_log_frag_size);
1754 if (blocksize != sbi->s_frag_size) { 1790 if (blocksize != sbi->s_frag_size) {
1755 printk(KERN_ERR 1791 ext3_msg(sb, KERN_ERR,
1756 "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n", 1792 "error: fragsize %lu != blocksize %u (unsupported)",
1757 sbi->s_frag_size, blocksize); 1793 sbi->s_frag_size, blocksize);
1758 goto failed_mount; 1794 goto failed_mount;
1759 } 1795 }
@@ -1789,31 +1825,31 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1789 } 1825 }
1790 1826
1791 if (sbi->s_blocks_per_group > blocksize * 8) { 1827 if (sbi->s_blocks_per_group > blocksize * 8) {
1792 printk (KERN_ERR 1828 ext3_msg(sb, KERN_ERR,
1793 "EXT3-fs: #blocks per group too big: %lu\n", 1829 "#blocks per group too big: %lu",
1794 sbi->s_blocks_per_group); 1830 sbi->s_blocks_per_group);
1795 goto failed_mount; 1831 goto failed_mount;
1796 } 1832 }
1797 if (sbi->s_frags_per_group > blocksize * 8) { 1833 if (sbi->s_frags_per_group > blocksize * 8) {
1798 printk (KERN_ERR 1834 ext3_msg(sb, KERN_ERR,
1799 "EXT3-fs: #fragments per group too big: %lu\n", 1835 "error: #fragments per group too big: %lu",
1800 sbi->s_frags_per_group); 1836 sbi->s_frags_per_group);
1801 goto failed_mount; 1837 goto failed_mount;
1802 } 1838 }
1803 if (sbi->s_inodes_per_group > blocksize * 8) { 1839 if (sbi->s_inodes_per_group > blocksize * 8) {
1804 printk (KERN_ERR 1840 ext3_msg(sb, KERN_ERR,
1805 "EXT3-fs: #inodes per group too big: %lu\n", 1841 "error: #inodes per group too big: %lu",
1806 sbi->s_inodes_per_group); 1842 sbi->s_inodes_per_group);
1807 goto failed_mount; 1843 goto failed_mount;
1808 } 1844 }
1809 1845
1810 if (le32_to_cpu(es->s_blocks_count) > 1846 if (le32_to_cpu(es->s_blocks_count) >
1811 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1847 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
1812 printk(KERN_ERR "EXT3-fs: filesystem on %s:" 1848 ext3_msg(sb, KERN_ERR,
1813 " too large to mount safely\n", sb->s_id); 1849 "error: filesystem is too large to mount safely");
1814 if (sizeof(sector_t) < 8) 1850 if (sizeof(sector_t) < 8)
1815 printk(KERN_WARNING "EXT3-fs: CONFIG_LBDAF not " 1851 ext3_msg(sb, KERN_ERR,
1816 "enabled\n"); 1852 "error: CONFIG_LBDAF not enabled");
1817 goto failed_mount; 1853 goto failed_mount;
1818 } 1854 }
1819 1855
@@ -1827,7 +1863,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1827 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 1863 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
1828 GFP_KERNEL); 1864 GFP_KERNEL);
1829 if (sbi->s_group_desc == NULL) { 1865 if (sbi->s_group_desc == NULL) {
1830 printk (KERN_ERR "EXT3-fs: not enough memory\n"); 1866 ext3_msg(sb, KERN_ERR,
1867 "error: not enough memory");
1831 goto failed_mount; 1868 goto failed_mount;
1832 } 1869 }
1833 1870
@@ -1837,14 +1874,15 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1837 block = descriptor_loc(sb, logic_sb_block, i); 1874 block = descriptor_loc(sb, logic_sb_block, i);
1838 sbi->s_group_desc[i] = sb_bread(sb, block); 1875 sbi->s_group_desc[i] = sb_bread(sb, block);
1839 if (!sbi->s_group_desc[i]) { 1876 if (!sbi->s_group_desc[i]) {
1840 printk (KERN_ERR "EXT3-fs: " 1877 ext3_msg(sb, KERN_ERR,
1841 "can't read group descriptor %d\n", i); 1878 "error: can't read group descriptor %d", i);
1842 db_count = i; 1879 db_count = i;
1843 goto failed_mount2; 1880 goto failed_mount2;
1844 } 1881 }
1845 } 1882 }
1846 if (!ext3_check_descriptors (sb)) { 1883 if (!ext3_check_descriptors (sb)) {
1847 printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n"); 1884 ext3_msg(sb, KERN_ERR,
1885 "error: group descriptors corrupted");
1848 goto failed_mount2; 1886 goto failed_mount2;
1849 } 1887 }
1850 sbi->s_gdb_count = db_count; 1888 sbi->s_gdb_count = db_count;
@@ -1862,7 +1900,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1862 ext3_count_dirs(sb)); 1900 ext3_count_dirs(sb));
1863 } 1901 }
1864 if (err) { 1902 if (err) {
1865 printk(KERN_ERR "EXT3-fs: insufficient memory\n"); 1903 ext3_msg(sb, KERN_ERR, "error: insufficient memory");
1866 goto failed_mount3; 1904 goto failed_mount3;
1867 } 1905 }
1868 1906
@@ -1910,9 +1948,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1910 goto failed_mount3; 1948 goto failed_mount3;
1911 } else { 1949 } else {
1912 if (!silent) 1950 if (!silent)
1913 printk (KERN_ERR 1951 ext3_msg(sb, KERN_ERR,
1914 "ext3: No journal on filesystem on %s\n", 1952 "error: no journal found. "
1915 sb->s_id); 1953 "mounting ext3 over ext2?");
1916 goto failed_mount3; 1954 goto failed_mount3;
1917 } 1955 }
1918 1956
@@ -1934,8 +1972,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1934 case EXT3_MOUNT_WRITEBACK_DATA: 1972 case EXT3_MOUNT_WRITEBACK_DATA:
1935 if (!journal_check_available_features 1973 if (!journal_check_available_features
1936 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { 1974 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
1937 printk(KERN_ERR "EXT3-fs: Journal does not support " 1975 ext3_msg(sb, KERN_ERR,
1938 "requested data journaling mode\n"); 1976 "error: journal does not support "
1977 "requested data journaling mode");
1939 goto failed_mount4; 1978 goto failed_mount4;
1940 } 1979 }
1941 default: 1980 default:
@@ -1944,8 +1983,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1944 1983
1945 if (test_opt(sb, NOBH)) { 1984 if (test_opt(sb, NOBH)) {
1946 if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) { 1985 if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
1947 printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - " 1986 ext3_msg(sb, KERN_WARNING,
1948 "its supported only with writeback mode\n"); 1987 "warning: ignoring nobh option - "
1988 "it is supported only with writeback mode");
1949 clear_opt(sbi->s_mount_opt, NOBH); 1989 clear_opt(sbi->s_mount_opt, NOBH);
1950 } 1990 }
1951 } 1991 }
@@ -1956,18 +1996,18 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1956 1996
1957 root = ext3_iget(sb, EXT3_ROOT_INO); 1997 root = ext3_iget(sb, EXT3_ROOT_INO);
1958 if (IS_ERR(root)) { 1998 if (IS_ERR(root)) {
1959 printk(KERN_ERR "EXT3-fs: get root inode failed\n"); 1999 ext3_msg(sb, KERN_ERR, "error: get root inode failed");
1960 ret = PTR_ERR(root); 2000 ret = PTR_ERR(root);
1961 goto failed_mount4; 2001 goto failed_mount4;
1962 } 2002 }
1963 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2003 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
1964 iput(root); 2004 iput(root);
1965 printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n"); 2005 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
1966 goto failed_mount4; 2006 goto failed_mount4;
1967 } 2007 }
1968 sb->s_root = d_alloc_root(root); 2008 sb->s_root = d_alloc_root(root);
1969 if (!sb->s_root) { 2009 if (!sb->s_root) {
1970 printk(KERN_ERR "EXT3-fs: get root dentry failed\n"); 2010 ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
1971 iput(root); 2011 iput(root);
1972 ret = -ENOMEM; 2012 ret = -ENOMEM;
1973 goto failed_mount4; 2013 goto failed_mount4;
@@ -1986,9 +2026,9 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1986 ext3_orphan_cleanup(sb, es); 2026 ext3_orphan_cleanup(sb, es);
1987 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; 2027 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
1988 if (needs_recovery) 2028 if (needs_recovery)
1989 printk (KERN_INFO "EXT3-fs: recovery complete.\n"); 2029 ext3_msg(sb, KERN_INFO, "recovery complete");
1990 ext3_mark_recovery_complete(sb, es); 2030 ext3_mark_recovery_complete(sb, es);
1991 printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n", 2031 ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode",
1992 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": 2032 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
1993 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": 2033 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
1994 "writeback"); 2034 "writeback");
@@ -1998,7 +2038,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1998 2038
1999cantfind_ext3: 2039cantfind_ext3:
2000 if (!silent) 2040 if (!silent)
2001 printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n", 2041 ext3_msg(sb, KERN_INFO,
2042 "error: can't find ext3 filesystem on dev %s.",
2002 sb->s_id); 2043 sb->s_id);
2003 goto failed_mount; 2044 goto failed_mount;
2004 2045
@@ -2066,27 +2107,27 @@ static journal_t *ext3_get_journal(struct super_block *sb,
2066 2107
2067 journal_inode = ext3_iget(sb, journal_inum); 2108 journal_inode = ext3_iget(sb, journal_inum);
2068 if (IS_ERR(journal_inode)) { 2109 if (IS_ERR(journal_inode)) {
2069 printk(KERN_ERR "EXT3-fs: no journal found.\n"); 2110 ext3_msg(sb, KERN_ERR, "error: no journal found");
2070 return NULL; 2111 return NULL;
2071 } 2112 }
2072 if (!journal_inode->i_nlink) { 2113 if (!journal_inode->i_nlink) {
2073 make_bad_inode(journal_inode); 2114 make_bad_inode(journal_inode);
2074 iput(journal_inode); 2115 iput(journal_inode);
2075 printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n"); 2116 ext3_msg(sb, KERN_ERR, "error: journal inode is deleted");
2076 return NULL; 2117 return NULL;
2077 } 2118 }
2078 2119
2079 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", 2120 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
2080 journal_inode, journal_inode->i_size); 2121 journal_inode, journal_inode->i_size);
2081 if (!S_ISREG(journal_inode->i_mode)) { 2122 if (!S_ISREG(journal_inode->i_mode)) {
2082 printk(KERN_ERR "EXT3-fs: invalid journal inode.\n"); 2123 ext3_msg(sb, KERN_ERR, "error: invalid journal inode");
2083 iput(journal_inode); 2124 iput(journal_inode);
2084 return NULL; 2125 return NULL;
2085 } 2126 }
2086 2127
2087 journal = journal_init_inode(journal_inode); 2128 journal = journal_init_inode(journal_inode);
2088 if (!journal) { 2129 if (!journal) {
2089 printk(KERN_ERR "EXT3-fs: Could not load journal inode\n"); 2130 ext3_msg(sb, KERN_ERR, "error: could not load journal inode");
2090 iput(journal_inode); 2131 iput(journal_inode);
2091 return NULL; 2132 return NULL;
2092 } 2133 }
@@ -2108,13 +2149,13 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2108 struct ext3_super_block * es; 2149 struct ext3_super_block * es;
2109 struct block_device *bdev; 2150 struct block_device *bdev;
2110 2151
2111 bdev = ext3_blkdev_get(j_dev); 2152 bdev = ext3_blkdev_get(j_dev, sb);
2112 if (bdev == NULL) 2153 if (bdev == NULL)
2113 return NULL; 2154 return NULL;
2114 2155
2115 if (bd_claim(bdev, sb)) { 2156 if (bd_claim(bdev, sb)) {
2116 printk(KERN_ERR 2157 ext3_msg(sb, KERN_ERR,
2117 "EXT3: failed to claim external journal device.\n"); 2158 "error: failed to claim external journal device");
2118 blkdev_put(bdev, FMODE_READ|FMODE_WRITE); 2159 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2119 return NULL; 2160 return NULL;
2120 } 2161 }
@@ -2122,8 +2163,8 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2122 blocksize = sb->s_blocksize; 2163 blocksize = sb->s_blocksize;
2123 hblock = bdev_logical_block_size(bdev); 2164 hblock = bdev_logical_block_size(bdev);
2124 if (blocksize < hblock) { 2165 if (blocksize < hblock) {
2125 printk(KERN_ERR 2166 ext3_msg(sb, KERN_ERR,
2126 "EXT3-fs: blocksize too small for journal device.\n"); 2167 "error: blocksize too small for journal device");
2127 goto out_bdev; 2168 goto out_bdev;
2128 } 2169 }
2129 2170
@@ -2131,8 +2172,8 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2131 offset = EXT3_MIN_BLOCK_SIZE % blocksize; 2172 offset = EXT3_MIN_BLOCK_SIZE % blocksize;
2132 set_blocksize(bdev, blocksize); 2173 set_blocksize(bdev, blocksize);
2133 if (!(bh = __bread(bdev, sb_block, blocksize))) { 2174 if (!(bh = __bread(bdev, sb_block, blocksize))) {
2134 printk(KERN_ERR "EXT3-fs: couldn't read superblock of " 2175 ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of "
2135 "external journal\n"); 2176 "external journal");
2136 goto out_bdev; 2177 goto out_bdev;
2137 } 2178 }
2138 2179
@@ -2140,14 +2181,14 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2140 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) || 2181 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
2141 !(le32_to_cpu(es->s_feature_incompat) & 2182 !(le32_to_cpu(es->s_feature_incompat) &
2142 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { 2183 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
2143 printk(KERN_ERR "EXT3-fs: external journal has " 2184 ext3_msg(sb, KERN_ERR, "error: external journal has "
2144 "bad superblock\n"); 2185 "bad superblock");
2145 brelse(bh); 2186 brelse(bh);
2146 goto out_bdev; 2187 goto out_bdev;
2147 } 2188 }
2148 2189
2149 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 2190 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
2150 printk(KERN_ERR "EXT3-fs: journal UUID does not match\n"); 2191 ext3_msg(sb, KERN_ERR, "error: journal UUID does not match");
2151 brelse(bh); 2192 brelse(bh);
2152 goto out_bdev; 2193 goto out_bdev;
2153 } 2194 }
@@ -2159,19 +2200,21 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2159 journal = journal_init_dev(bdev, sb->s_bdev, 2200 journal = journal_init_dev(bdev, sb->s_bdev,
2160 start, len, blocksize); 2201 start, len, blocksize);
2161 if (!journal) { 2202 if (!journal) {
2162 printk(KERN_ERR "EXT3-fs: failed to create device journal\n"); 2203 ext3_msg(sb, KERN_ERR,
2204 "error: failed to create device journal");
2163 goto out_bdev; 2205 goto out_bdev;
2164 } 2206 }
2165 journal->j_private = sb; 2207 journal->j_private = sb;
2166 ll_rw_block(READ, 1, &journal->j_sb_buffer); 2208 ll_rw_block(READ, 1, &journal->j_sb_buffer);
2167 wait_on_buffer(journal->j_sb_buffer); 2209 wait_on_buffer(journal->j_sb_buffer);
2168 if (!buffer_uptodate(journal->j_sb_buffer)) { 2210 if (!buffer_uptodate(journal->j_sb_buffer)) {
2169 printk(KERN_ERR "EXT3-fs: I/O error on journal device\n"); 2211 ext3_msg(sb, KERN_ERR, "I/O error on journal device");
2170 goto out_journal; 2212 goto out_journal;
2171 } 2213 }
2172 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { 2214 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
2173 printk(KERN_ERR "EXT3-fs: External journal has more than one " 2215 ext3_msg(sb, KERN_ERR,
2174 "user (unsupported) - %d\n", 2216 "error: external journal has more than one "
2217 "user (unsupported) - %d",
2175 be32_to_cpu(journal->j_superblock->s_nr_users)); 2218 be32_to_cpu(journal->j_superblock->s_nr_users));
2176 goto out_journal; 2219 goto out_journal;
2177 } 2220 }
@@ -2197,8 +2240,8 @@ static int ext3_load_journal(struct super_block *sb,
2197 2240
2198 if (journal_devnum && 2241 if (journal_devnum &&
2199 journal_devnum != le32_to_cpu(es->s_journal_dev)) { 2242 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2200 printk(KERN_INFO "EXT3-fs: external journal device major/minor " 2243 ext3_msg(sb, KERN_INFO, "external journal device major/minor "
2201 "numbers have changed\n"); 2244 "numbers have changed");
2202 journal_dev = new_decode_dev(journal_devnum); 2245 journal_dev = new_decode_dev(journal_devnum);
2203 } else 2246 } else
2204 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); 2247 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
@@ -2213,21 +2256,21 @@ static int ext3_load_journal(struct super_block *sb,
2213 2256
2214 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) { 2257 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
2215 if (sb->s_flags & MS_RDONLY) { 2258 if (sb->s_flags & MS_RDONLY) {
2216 printk(KERN_INFO "EXT3-fs: INFO: recovery " 2259 ext3_msg(sb, KERN_INFO,
2217 "required on readonly filesystem.\n"); 2260 "recovery required on readonly filesystem");
2218 if (really_read_only) { 2261 if (really_read_only) {
2219 printk(KERN_ERR "EXT3-fs: write access " 2262 ext3_msg(sb, KERN_ERR, "error: write access "
2220 "unavailable, cannot proceed.\n"); 2263 "unavailable, cannot proceed");
2221 return -EROFS; 2264 return -EROFS;
2222 } 2265 }
2223 printk (KERN_INFO "EXT3-fs: write access will " 2266 ext3_msg(sb, KERN_INFO,
2224 "be enabled during recovery.\n"); 2267 "write access will be enabled during recovery");
2225 } 2268 }
2226 } 2269 }
2227 2270
2228 if (journal_inum && journal_dev) { 2271 if (journal_inum && journal_dev) {
2229 printk(KERN_ERR "EXT3-fs: filesystem has both journal " 2272 ext3_msg(sb, KERN_ERR, "error: filesystem has both journal "
2230 "and inode journals!\n"); 2273 "and inode journals");
2231 return -EINVAL; 2274 return -EINVAL;
2232 } 2275 }
2233 2276
@@ -2242,7 +2285,7 @@ static int ext3_load_journal(struct super_block *sb,
2242 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2285 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2243 err = journal_update_format(journal); 2286 err = journal_update_format(journal);
2244 if (err) { 2287 if (err) {
2245 printk(KERN_ERR "EXT3-fs: error updating journal.\n"); 2288 ext3_msg(sb, KERN_ERR, "error updating journal");
2246 journal_destroy(journal); 2289 journal_destroy(journal);
2247 return err; 2290 return err;
2248 } 2291 }
@@ -2254,7 +2297,7 @@ static int ext3_load_journal(struct super_block *sb,
2254 err = journal_load(journal); 2297 err = journal_load(journal);
2255 2298
2256 if (err) { 2299 if (err) {
2257 printk(KERN_ERR "EXT3-fs: error loading journal.\n"); 2300 ext3_msg(sb, KERN_ERR, "error loading journal");
2258 journal_destroy(journal); 2301 journal_destroy(journal);
2259 return err; 2302 return err;
2260 } 2303 }
@@ -2273,16 +2316,17 @@ static int ext3_load_journal(struct super_block *sb,
2273 return 0; 2316 return 0;
2274} 2317}
2275 2318
2276static int ext3_create_journal(struct super_block * sb, 2319static int ext3_create_journal(struct super_block *sb,
2277 struct ext3_super_block * es, 2320 struct ext3_super_block *es,
2278 unsigned int journal_inum) 2321 unsigned int journal_inum)
2279{ 2322{
2280 journal_t *journal; 2323 journal_t *journal;
2281 int err; 2324 int err;
2282 2325
2283 if (sb->s_flags & MS_RDONLY) { 2326 if (sb->s_flags & MS_RDONLY) {
2284 printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to " 2327 ext3_msg(sb, KERN_ERR,
2285 "create journal.\n"); 2328 "error: readonly filesystem when trying to "
2329 "create journal");
2286 return -EROFS; 2330 return -EROFS;
2287 } 2331 }
2288 2332
@@ -2290,12 +2334,12 @@ static int ext3_create_journal(struct super_block * sb,
2290 if (!journal) 2334 if (!journal)
2291 return -EINVAL; 2335 return -EINVAL;
2292 2336
2293 printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n", 2337 ext3_msg(sb, KERN_INFO, "creating new journal on inode %u",
2294 journal_inum); 2338 journal_inum);
2295 2339
2296 err = journal_create(journal); 2340 err = journal_create(journal);
2297 if (err) { 2341 if (err) {
2298 printk(KERN_ERR "EXT3-fs: error creating journal.\n"); 2342 ext3_msg(sb, KERN_ERR, "error creating journal");
2299 journal_destroy(journal); 2343 journal_destroy(journal);
2300 return -EIO; 2344 return -EIO;
2301 } 2345 }
@@ -2376,8 +2420,8 @@ out:
2376 * has recorded an error from a previous lifetime, move that error to the 2420 * has recorded an error from a previous lifetime, move that error to the
2377 * main filesystem now. 2421 * main filesystem now.
2378 */ 2422 */
2379static void ext3_clear_journal_err(struct super_block * sb, 2423static void ext3_clear_journal_err(struct super_block *sb,
2380 struct ext3_super_block * es) 2424 struct ext3_super_block *es)
2381{ 2425{
2382 journal_t *journal; 2426 journal_t *journal;
2383 int j_errno; 2427 int j_errno;
@@ -2568,10 +2612,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2568 __le32 ret; 2612 __le32 ret;
2569 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, 2613 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
2570 ~EXT3_FEATURE_RO_COMPAT_SUPP))) { 2614 ~EXT3_FEATURE_RO_COMPAT_SUPP))) {
2571 printk(KERN_WARNING "EXT3-fs: %s: couldn't " 2615 ext3_msg(sb, KERN_WARNING,
2572 "remount RDWR because of unsupported " 2616 "warning: couldn't remount RDWR "
2573 "optional features (%x).\n", 2617 "because of unsupported optional "
2574 sb->s_id, le32_to_cpu(ret)); 2618 "features (%x)", le32_to_cpu(ret));
2575 err = -EROFS; 2619 err = -EROFS;
2576 goto restore_opts; 2620 goto restore_opts;
2577 } 2621 }
@@ -2582,11 +2626,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
2582 * require a full umount/remount for now. 2626 * require a full umount/remount for now.
2583 */ 2627 */
2584 if (es->s_last_orphan) { 2628 if (es->s_last_orphan) {
2585 printk(KERN_WARNING "EXT3-fs: %s: couldn't " 2629 ext3_msg(sb, KERN_WARNING, "warning: couldn't "
2586 "remount RDWR because of unprocessed " 2630 "remount RDWR because of unprocessed "
2587 "orphan inode list. Please " 2631 "orphan inode list. Please "
2588 "umount/remount instead.\n", 2632 "umount/remount instead.");
2589 sb->s_id);
2590 err = -EINVAL; 2633 err = -EINVAL;
2591 goto restore_opts; 2634 goto restore_opts;
2592 } 2635 }
@@ -2686,13 +2729,11 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2686 buf->f_bsize = sb->s_blocksize; 2729 buf->f_bsize = sb->s_blocksize;
2687 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last; 2730 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
2688 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); 2731 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
2689 es->s_free_blocks_count = cpu_to_le32(buf->f_bfree);
2690 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); 2732 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
2691 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) 2733 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
2692 buf->f_bavail = 0; 2734 buf->f_bavail = 0;
2693 buf->f_files = le32_to_cpu(es->s_inodes_count); 2735 buf->f_files = le32_to_cpu(es->s_inodes_count);
2694 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 2736 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
2695 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
2696 buf->f_namelen = EXT3_NAME_LEN; 2737 buf->f_namelen = EXT3_NAME_LEN;
2697 fsid = le64_to_cpup((void *)es->s_uuid) ^ 2738 fsid = le64_to_cpup((void *)es->s_uuid) ^
2698 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 2739 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
@@ -2837,9 +2878,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2837 if (EXT3_SB(sb)->s_qf_names[type]) { 2878 if (EXT3_SB(sb)->s_qf_names[type]) {
2838 /* Quotafile not of fs root? */ 2879 /* Quotafile not of fs root? */
2839 if (path.dentry->d_parent != sb->s_root) 2880 if (path.dentry->d_parent != sb->s_root)
2840 printk(KERN_WARNING 2881 ext3_msg(sb, KERN_WARNING,
2841 "EXT3-fs: Quota file not on filesystem root. " 2882 "warning: Quota file not on filesystem root. "
2842 "Journaled quota will not work.\n"); 2883 "Journaled quota will not work.");
2843 } 2884 }
2844 2885
2845 /* 2886 /*
@@ -2921,8 +2962,9 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
2921 handle_t *handle = journal_current_handle(); 2962 handle_t *handle = journal_current_handle();
2922 2963
2923 if (!handle) { 2964 if (!handle) {
2924 printk(KERN_WARNING "EXT3-fs: Quota write (off=%Lu, len=%Lu)" 2965 ext3_msg(sb, KERN_WARNING,
2925 " cancelled because transaction is not started.\n", 2966 "warning: quota write (off=%llu, len=%llu)"
2967 " cancelled because transaction is not started.",
2926 (unsigned long long)off, (unsigned long long)len); 2968 (unsigned long long)off, (unsigned long long)len);
2927 return -EIO; 2969 return -EIO;
2928 } 2970 }
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 545e37c4b91e..387d92d00b97 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -960,6 +960,10 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
960 if (error) 960 if (error)
961 goto cleanup; 961 goto cleanup;
962 962
963 error = ext3_journal_get_write_access(handle, is.iloc.bh);
964 if (error)
965 goto cleanup;
966
963 if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) { 967 if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
964 struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc); 968 struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
965 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); 969 memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
@@ -985,9 +989,6 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
985 if (flags & XATTR_CREATE) 989 if (flags & XATTR_CREATE)
986 goto cleanup; 990 goto cleanup;
987 } 991 }
988 error = ext3_journal_get_write_access(handle, is.iloc.bh);
989 if (error)
990 goto cleanup;
991 if (!value) { 992 if (!value) {
992 if (!is.s.not_found) 993 if (!is.s.not_found)
993 error = ext3_xattr_ibody_set(handle, inode, &i, &is); 994 error = ext3_xattr_ibody_set(handle, inode, &i, &is);
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 9f2d45d75b1a..9acf7e808139 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -26,6 +26,16 @@ config EXT4_FS
26 26
27 If unsure, say N. 27 If unsure, say N.
28 28
29config EXT4_USE_FOR_EXT23
30 bool "Use ext4 for ext2/ext3 file systems"
31 depends on EXT3_FS=n || EXT2_FS=n
32 default y
33 help
34 Allow the ext4 file system driver code to be used for ext2 or
35 ext3 file system mounts. This allows users to reduce their
36 compiled kernel size by using one file system driver for
37 ext2, ext3, and ext4 file systems.
38
29config EXT4_FS_XATTR 39config EXT4_FS_XATTR
30 bool "Ext4 extended attributes" 40 bool "Ext4 extended attributes"
31 depends on EXT4_FS 41 depends on EXT4_FS
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1d0418980f8d..22bc7435d913 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -499,44 +499,6 @@ error_return:
499} 499}
500 500
501/** 501/**
502 * ext4_free_blocks() -- Free given blocks and update quota
503 * @handle: handle for this transaction
504 * @inode: inode
505 * @block: start physical block to free
506 * @count: number of blocks to count
507 * @metadata: Are these metadata blocks
508 */
509void ext4_free_blocks(handle_t *handle, struct inode *inode,
510 ext4_fsblk_t block, unsigned long count,
511 int metadata)
512{
513 struct super_block *sb;
514 unsigned long dquot_freed_blocks;
515
516 /* this isn't the right place to decide whether block is metadata
517 * inode.c/extents.c knows better, but for safety ... */
518 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
519 metadata = 1;
520
521 /* We need to make sure we don't reuse
522 * block released untill the transaction commit.
523 * writeback mode have weak data consistency so
524 * don't force data as metadata when freeing block
525 * for writeback mode.
526 */
527 if (metadata == 0 && !ext4_should_writeback_data(inode))
528 metadata = 1;
529
530 sb = inode->i_sb;
531
532 ext4_mb_free_blocks(handle, inode, block, count,
533 metadata, &dquot_freed_blocks);
534 if (dquot_freed_blocks)
535 vfs_dq_free_block(inode, dquot_freed_blocks);
536 return;
537}
538
539/**
540 * ext4_has_free_blocks() 502 * ext4_has_free_blocks()
541 * @sbi: in-core super block structure. 503 * @sbi: in-core super block structure.
542 * @nblocks: number of needed blocks 504 * @nblocks: number of needed blocks
@@ -761,7 +723,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
761static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, 723static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
762 ext4_group_t group) 724 ext4_group_t group)
763{ 725{
764 return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; 726 if (!ext4_bg_has_super(sb, group))
727 return 0;
728
729 if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
730 return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
731 else
732 return EXT4_SB(sb)->s_gdb_count;
765} 733}
766 734
767/** 735/**
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 50784ef07563..4df8621ec31c 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb)
160 if (ext4_bg_has_super(sb, i) && 160 if (ext4_bg_has_super(sb, i) &&
161 ((i < 5) || ((i % flex_size) == 0))) 161 ((i < 5) || ((i % flex_size) == 0)))
162 add_system_zone(sbi, ext4_group_first_block_no(sb, i), 162 add_system_zone(sbi, ext4_group_first_block_no(sb, i),
163 sbi->s_gdb_count + 1); 163 ext4_bg_num_gdb(sb, i) + 1);
164 gdp = ext4_get_group_desc(sb, i, NULL); 164 gdp = ext4_get_group_desc(sb, i, NULL);
165 ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); 165 ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
166 if (ret) 166 if (ret)
@@ -228,6 +228,7 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
228 struct rb_node *n = sbi->system_blks.rb_node; 228 struct rb_node *n = sbi->system_blks.rb_node;
229 229
230 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || 230 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
231 (start_blk + count < start_blk) ||
231 (start_blk + count > ext4_blocks_count(sbi->s_es))) 232 (start_blk + count > ext4_blocks_count(sbi->s_es)))
232 return 0; 233 return 0;
233 while (n) { 234 while (n) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8825515eeddd..ab31e65d46d0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -376,6 +376,12 @@ struct ext4_new_group_data {
376 EXT4_GET_BLOCKS_DIO_CREATE_EXT) 376 EXT4_GET_BLOCKS_DIO_CREATE_EXT)
377 377
378/* 378/*
379 * Flags used by ext4_free_blocks
380 */
381#define EXT4_FREE_BLOCKS_METADATA 0x0001
382#define EXT4_FREE_BLOCKS_FORGET 0x0002
383
384/*
379 * ioctl commands 385 * ioctl commands
380 */ 386 */
381#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS 387#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
@@ -703,6 +709,13 @@ struct ext4_inode_info {
703 struct list_head i_aio_dio_complete_list; 709 struct list_head i_aio_dio_complete_list;
704 /* current io_end structure for async DIO write*/ 710 /* current io_end structure for async DIO write*/
705 ext4_io_end_t *cur_aio_dio; 711 ext4_io_end_t *cur_aio_dio;
712
713 /*
714 * Transactions that contain inode's metadata needed to complete
715 * fsync and fdatasync, respectively.
716 */
717 tid_t i_sync_tid;
718 tid_t i_datasync_tid;
706}; 719};
707 720
708/* 721/*
@@ -750,6 +763,7 @@ struct ext4_inode_info {
750#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 763#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
751#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 764#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
752#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 765#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
766#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
753 767
754#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 768#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
755#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 769#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
@@ -1324,8 +1338,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1324 ext4_fsblk_t goal, unsigned long *count, int *errp); 1338 ext4_fsblk_t goal, unsigned long *count, int *errp);
1325extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1339extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1326extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1340extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1327extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1328 ext4_fsblk_t block, unsigned long count, int metadata);
1329extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, 1341extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
1330 ext4_fsblk_t block, unsigned long count); 1342 ext4_fsblk_t block, unsigned long count);
1331extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1343extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@@ -1384,16 +1396,15 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int);
1384extern void ext4_discard_preallocations(struct inode *); 1396extern void ext4_discard_preallocations(struct inode *);
1385extern int __init init_ext4_mballoc(void); 1397extern int __init init_ext4_mballoc(void);
1386extern void exit_ext4_mballoc(void); 1398extern void exit_ext4_mballoc(void);
1387extern void ext4_mb_free_blocks(handle_t *, struct inode *, 1399extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1388 ext4_fsblk_t, unsigned long, int, unsigned long *); 1400 struct buffer_head *bh, ext4_fsblk_t block,
1401 unsigned long count, int flags);
1389extern int ext4_mb_add_groupinfo(struct super_block *sb, 1402extern int ext4_mb_add_groupinfo(struct super_block *sb,
1390 ext4_group_t i, struct ext4_group_desc *desc); 1403 ext4_group_t i, struct ext4_group_desc *desc);
1391extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); 1404extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
1392extern void ext4_mb_put_buddy_cache_lock(struct super_block *, 1405extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
1393 ext4_group_t, int); 1406 ext4_group_t, int);
1394/* inode.c */ 1407/* inode.c */
1395int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1396 struct buffer_head *bh, ext4_fsblk_t blocknr);
1397struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1408struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1398 ext4_lblk_t, int, int *); 1409 ext4_lblk_t, int, int *);
1399struct buffer_head *ext4_bread(handle_t *, struct inode *, 1410struct buffer_head *ext4_bread(handle_t *, struct inode *,
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 6a9409920dee..b57e5c711b6d 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -4,6 +4,8 @@
4 4
5#include "ext4_jbd2.h" 5#include "ext4_jbd2.h"
6 6
7#include <trace/events/ext4.h>
8
7int __ext4_journal_get_undo_access(const char *where, handle_t *handle, 9int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 10 struct buffer_head *bh)
9{ 11{
@@ -32,35 +34,69 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
32 return err; 34 return err;
33} 35}
34 36
35int __ext4_journal_forget(const char *where, handle_t *handle, 37/*
36 struct buffer_head *bh) 38 * The ext4 forget function must perform a revoke if we are freeing data
39 * which has been journaled. Metadata (eg. indirect blocks) must be
40 * revoked in all cases.
41 *
42 * "bh" may be NULL: a metadata block may have been freed from memory
43 * but there may still be a record of it in the journal, and that record
44 * still needs to be revoked.
45 *
46 * If the handle isn't valid we're not journaling, but we still need to
47 * call into ext4_journal_revoke() to put the buffer head.
48 */
49int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
50 struct inode *inode, struct buffer_head *bh,
51 ext4_fsblk_t blocknr)
37{ 52{
38 int err = 0; 53 int err;
39 54
40 if (ext4_handle_valid(handle)) { 55 might_sleep();
41 err = jbd2_journal_forget(handle, bh); 56
42 if (err) 57 trace_ext4_forget(inode, is_metadata, blocknr);
43 ext4_journal_abort_handle(where, __func__, bh, 58 BUFFER_TRACE(bh, "enter");
44 handle, err); 59
45 } 60 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
46 else 61 "data mode %x\n",
62 bh, is_metadata, inode->i_mode,
63 test_opt(inode->i_sb, DATA_FLAGS));
64
65 /* In the no journal case, we can just do a bforget and return */
66 if (!ext4_handle_valid(handle)) {
47 bforget(bh); 67 bforget(bh);
48 return err; 68 return 0;
49} 69 }
50 70
51int __ext4_journal_revoke(const char *where, handle_t *handle, 71 /* Never use the revoke function if we are doing full data
52 ext4_fsblk_t blocknr, struct buffer_head *bh) 72 * journaling: there is no need to, and a V1 superblock won't
53{ 73 * support it. Otherwise, only skip the revoke on un-journaled
54 int err = 0; 74 * data blocks. */
55 75
56 if (ext4_handle_valid(handle)) { 76 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
57 err = jbd2_journal_revoke(handle, blocknr, bh); 77 (!is_metadata && !ext4_should_journal_data(inode))) {
58 if (err) 78 if (bh) {
59 ext4_journal_abort_handle(where, __func__, bh, 79 BUFFER_TRACE(bh, "call jbd2_journal_forget");
60 handle, err); 80 err = jbd2_journal_forget(handle, bh);
81 if (err)
82 ext4_journal_abort_handle(where, __func__, bh,
83 handle, err);
84 return err;
85 }
86 return 0;
61 } 87 }
62 else 88
63 bforget(bh); 89 /*
90 * data!=journal && (is_metadata || should_journal_data(inode))
91 */
92 BUFFER_TRACE(bh, "call jbd2_journal_revoke");
93 err = jbd2_journal_revoke(handle, blocknr, bh);
94 if (err) {
95 ext4_journal_abort_handle(where, __func__, bh, handle, err);
96 ext4_abort(inode->i_sb, __func__,
97 "error %d when attempting revoke", err);
98 }
99 BUFFER_TRACE(bh, "exit");
64 return err; 100 return err;
65} 101}
66 102
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index a2865980342f..05eca817d704 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -49,7 +49,7 @@
49 49
50#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ 50#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
51 EXT4_XATTR_TRANS_BLOCKS - 2 + \ 51 EXT4_XATTR_TRANS_BLOCKS - 2 + \
52 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) 52 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
53 53
54/* 54/*
55 * Define the number of metadata blocks we need to account to modify data. 55 * Define the number of metadata blocks we need to account to modify data.
@@ -57,7 +57,7 @@
57 * This include super block, inode block, quota blocks and xattr blocks 57 * This include super block, inode block, quota blocks and xattr blocks
58 */ 58 */
59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ 59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
60 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) 60 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
61 61
62/* Delete operations potentially hit one directory's namespace plus an 62/* Delete operations potentially hit one directory's namespace plus an
63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be 63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
@@ -92,6 +92,7 @@
92 * but inode, sb and group updates are done only once */ 92 * but inode, sb and group updates are done only once */
93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ 93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
94 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) 94 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
95
95#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ 96#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
96 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) 97 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
97#else 98#else
@@ -99,6 +100,9 @@
99#define EXT4_QUOTA_INIT_BLOCKS(sb) 0 100#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
100#define EXT4_QUOTA_DEL_BLOCKS(sb) 0 101#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
101#endif 102#endif
103#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
104#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
105#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
102 106
103int 107int
104ext4_mark_iloc_dirty(handle_t *handle, 108ext4_mark_iloc_dirty(handle_t *handle,
@@ -116,12 +120,8 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
116int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); 120int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
117 121
118/* 122/*
119 * Wrapper functions with which ext4 calls into JBD. The intent here is 123 * Wrapper functions with which ext4 calls into JBD.
120 * to allow these to be turned into appropriate stubs so ext4 can control
121 * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't
122 * been done yet.
123 */ 124 */
124
125void ext4_journal_abort_handle(const char *caller, const char *err_fn, 125void ext4_journal_abort_handle(const char *caller, const char *err_fn,
126 struct buffer_head *bh, handle_t *handle, int err); 126 struct buffer_head *bh, handle_t *handle, int err);
127 127
@@ -131,13 +131,9 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
131int __ext4_journal_get_write_access(const char *where, handle_t *handle, 131int __ext4_journal_get_write_access(const char *where, handle_t *handle,
132 struct buffer_head *bh); 132 struct buffer_head *bh);
133 133
134/* When called with an invalid handle, this will still do a put on the BH */ 134int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
135int __ext4_journal_forget(const char *where, handle_t *handle, 135 struct inode *inode, struct buffer_head *bh,
136 struct buffer_head *bh); 136 ext4_fsblk_t blocknr);
137
138/* When called with an invalid handle, this will still do a put on the BH */
139int __ext4_journal_revoke(const char *where, handle_t *handle,
140 ext4_fsblk_t blocknr, struct buffer_head *bh);
141 137
142int __ext4_journal_get_create_access(const char *where, 138int __ext4_journal_get_create_access(const char *where,
143 handle_t *handle, struct buffer_head *bh); 139 handle_t *handle, struct buffer_head *bh);
@@ -149,12 +145,11 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
149 __ext4_journal_get_undo_access(__func__, (handle), (bh)) 145 __ext4_journal_get_undo_access(__func__, (handle), (bh))
150#define ext4_journal_get_write_access(handle, bh) \ 146#define ext4_journal_get_write_access(handle, bh) \
151 __ext4_journal_get_write_access(__func__, (handle), (bh)) 147 __ext4_journal_get_write_access(__func__, (handle), (bh))
152#define ext4_journal_revoke(handle, blocknr, bh) \ 148#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
153 __ext4_journal_revoke(__func__, (handle), (blocknr), (bh)) 149 __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\
150 (block_nr))
154#define ext4_journal_get_create_access(handle, bh) \ 151#define ext4_journal_get_create_access(handle, bh) \
155 __ext4_journal_get_create_access(__func__, (handle), (bh)) 152 __ext4_journal_get_create_access(__func__, (handle), (bh))
156#define ext4_journal_forget(handle, bh) \
157 __ext4_journal_forget(__func__, (handle), (bh))
158#define ext4_handle_dirty_metadata(handle, inode, bh) \ 153#define ext4_handle_dirty_metadata(handle, inode, bh) \
159 __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) 154 __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))
160 155
@@ -254,6 +249,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
254 return 0; 249 return 0;
255} 250}
256 251
252static inline void ext4_update_inode_fsync_trans(handle_t *handle,
253 struct inode *inode,
254 int datasync)
255{
256 struct ext4_inode_info *ei = EXT4_I(inode);
257
258 if (ext4_handle_valid(handle)) {
259 ei->i_sync_tid = handle->h_transaction->t_tid;
260 if (datasync)
261 ei->i_datasync_tid = handle->h_transaction->t_tid;
262 }
263}
264
257/* super.c */ 265/* super.c */
258int ext4_force_commit(struct super_block *sb); 266int ext4_force_commit(struct super_block *sb);
259 267
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 715264b4bae4..3a7928f825e4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1007,7 +1007,8 @@ cleanup:
1007 for (i = 0; i < depth; i++) { 1007 for (i = 0; i < depth; i++) {
1008 if (!ablocks[i]) 1008 if (!ablocks[i])
1009 continue; 1009 continue;
1010 ext4_free_blocks(handle, inode, ablocks[i], 1, 1); 1010 ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
1011 EXT4_FREE_BLOCKS_METADATA);
1011 } 1012 }
1012 } 1013 }
1013 kfree(ablocks); 1014 kfree(ablocks);
@@ -1761,7 +1762,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1761 while (block < last && block != EXT_MAX_BLOCK) { 1762 while (block < last && block != EXT_MAX_BLOCK) {
1762 num = last - block; 1763 num = last - block;
1763 /* find extent for this block */ 1764 /* find extent for this block */
1765 down_read(&EXT4_I(inode)->i_data_sem);
1764 path = ext4_ext_find_extent(inode, block, path); 1766 path = ext4_ext_find_extent(inode, block, path);
1767 up_read(&EXT4_I(inode)->i_data_sem);
1765 if (IS_ERR(path)) { 1768 if (IS_ERR(path)) {
1766 err = PTR_ERR(path); 1769 err = PTR_ERR(path);
1767 path = NULL; 1770 path = NULL;
@@ -1957,7 +1960,6 @@ errout:
1957static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, 1960static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1958 struct ext4_ext_path *path) 1961 struct ext4_ext_path *path)
1959{ 1962{
1960 struct buffer_head *bh;
1961 int err; 1963 int err;
1962 ext4_fsblk_t leaf; 1964 ext4_fsblk_t leaf;
1963 1965
@@ -1973,9 +1975,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1973 if (err) 1975 if (err)
1974 return err; 1976 return err;
1975 ext_debug("index is empty, remove it, free block %llu\n", leaf); 1977 ext_debug("index is empty, remove it, free block %llu\n", leaf);
1976 bh = sb_find_get_block(inode->i_sb, leaf); 1978 ext4_free_blocks(handle, inode, 0, leaf, 1,
1977 ext4_forget(handle, 1, inode, bh, leaf); 1979 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1978 ext4_free_blocks(handle, inode, leaf, 1, 1);
1979 return err; 1980 return err;
1980} 1981}
1981 1982
@@ -2042,12 +2043,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2042 struct ext4_extent *ex, 2043 struct ext4_extent *ex,
2043 ext4_lblk_t from, ext4_lblk_t to) 2044 ext4_lblk_t from, ext4_lblk_t to)
2044{ 2045{
2045 struct buffer_head *bh;
2046 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2046 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2047 int i, metadata = 0; 2047 int flags = EXT4_FREE_BLOCKS_FORGET;
2048 2048
2049 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2049 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2050 metadata = 1; 2050 flags |= EXT4_FREE_BLOCKS_METADATA;
2051#ifdef EXTENTS_STATS 2051#ifdef EXTENTS_STATS
2052 { 2052 {
2053 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2053 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2072,11 +2072,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2072 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2072 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2073 start = ext_pblock(ex) + ee_len - num; 2073 start = ext_pblock(ex) + ee_len - num;
2074 ext_debug("free last %u blocks starting %llu\n", num, start); 2074 ext_debug("free last %u blocks starting %llu\n", num, start);
2075 for (i = 0; i < num; i++) { 2075 ext4_free_blocks(handle, inode, 0, start, num, flags);
2076 bh = sb_find_get_block(inode->i_sb, start + i);
2077 ext4_forget(handle, 0, inode, bh, start + i);
2078 }
2079 ext4_free_blocks(handle, inode, start, num, metadata);
2080 } else if (from == le32_to_cpu(ex->ee_block) 2076 } else if (from == le32_to_cpu(ex->ee_block)
2081 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2077 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
2082 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", 2078 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2167,7 +2163,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2167 correct_index = 1; 2163 correct_index = 1;
2168 credits += (ext_depth(inode)) + 1; 2164 credits += (ext_depth(inode)) + 1;
2169 } 2165 }
2170 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 2166 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
2171 2167
2172 err = ext4_ext_truncate_extend_restart(handle, inode, credits); 2168 err = ext4_ext_truncate_extend_restart(handle, inode, credits);
2173 if (err) 2169 if (err)
@@ -3064,6 +3060,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3064 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { 3060 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
3065 ret = ext4_convert_unwritten_extents_dio(handle, inode, 3061 ret = ext4_convert_unwritten_extents_dio(handle, inode,
3066 path); 3062 path);
3063 if (ret >= 0)
3064 ext4_update_inode_fsync_trans(handle, inode, 1);
3067 goto out2; 3065 goto out2;
3068 } 3066 }
3069 /* buffered IO case */ 3067 /* buffered IO case */
@@ -3091,6 +3089,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3091 ret = ext4_ext_convert_to_initialized(handle, inode, 3089 ret = ext4_ext_convert_to_initialized(handle, inode,
3092 path, iblock, 3090 path, iblock,
3093 max_blocks); 3091 max_blocks);
3092 if (ret >= 0)
3093 ext4_update_inode_fsync_trans(handle, inode, 1);
3094out: 3094out:
3095 if (ret <= 0) { 3095 if (ret <= 0) {
3096 err = ret; 3096 err = ret;
@@ -3319,8 +3319,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3319 /* not a good idea to call discard here directly, 3319 /* not a good idea to call discard here directly,
3320 * but otherwise we'd need to call it every free() */ 3320 * but otherwise we'd need to call it every free() */
3321 ext4_discard_preallocations(inode); 3321 ext4_discard_preallocations(inode);
3322 ext4_free_blocks(handle, inode, ext_pblock(&newex), 3322 ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
3323 ext4_ext_get_actual_len(&newex), 0); 3323 ext4_ext_get_actual_len(&newex), 0);
3324 goto out2; 3324 goto out2;
3325 } 3325 }
3326 3326
@@ -3329,10 +3329,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3329 allocated = ext4_ext_get_actual_len(&newex); 3329 allocated = ext4_ext_get_actual_len(&newex);
3330 set_buffer_new(bh_result); 3330 set_buffer_new(bh_result);
3331 3331
3332 /* Cache only when it is _not_ an uninitialized extent */ 3332 /*
3333 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) 3333 * Cache the extent and update transaction to commit on fdatasync only
3334 * when it is _not_ an uninitialized extent.
3335 */
3336 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
3334 ext4_ext_put_in_cache(inode, iblock, allocated, newblock, 3337 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
3335 EXT4_EXT_CACHE_EXTENT); 3338 EXT4_EXT_CACHE_EXTENT);
3339 ext4_update_inode_fsync_trans(handle, inode, 1);
3340 } else
3341 ext4_update_inode_fsync_trans(handle, inode, 0);
3336out: 3342out:
3337 if (allocated > max_blocks) 3343 if (allocated > max_blocks)
3338 allocated = max_blocks; 3344 allocated = max_blocks;
@@ -3720,10 +3726,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3720 * Walk the extent tree gathering extent information. 3726 * Walk the extent tree gathering extent information.
3721 * ext4_ext_fiemap_cb will push extents back to user. 3727 * ext4_ext_fiemap_cb will push extents back to user.
3722 */ 3728 */
3723 down_read(&EXT4_I(inode)->i_data_sem);
3724 error = ext4_ext_walk_space(inode, start_blk, len_blks, 3729 error = ext4_ext_walk_space(inode, start_blk, len_blks,
3725 ext4_ext_fiemap_cb, fieinfo); 3730 ext4_ext_fiemap_cb, fieinfo);
3726 up_read(&EXT4_I(inode)->i_data_sem);
3727 } 3731 }
3728 3732
3729 return error; 3733 return error;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 2b1531266ee2..0b22497d92e1 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -51,25 +51,30 @@
51int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) 51int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
52{ 52{
53 struct inode *inode = dentry->d_inode; 53 struct inode *inode = dentry->d_inode;
54 struct ext4_inode_info *ei = EXT4_I(inode);
54 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 55 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
55 int err, ret = 0; 56 int ret;
57 tid_t commit_tid;
56 58
57 J_ASSERT(ext4_journal_current_handle() == NULL); 59 J_ASSERT(ext4_journal_current_handle() == NULL);
58 60
59 trace_ext4_sync_file(file, dentry, datasync); 61 trace_ext4_sync_file(file, dentry, datasync);
60 62
63 if (inode->i_sb->s_flags & MS_RDONLY)
64 return 0;
65
61 ret = flush_aio_dio_completed_IO(inode); 66 ret = flush_aio_dio_completed_IO(inode);
62 if (ret < 0) 67 if (ret < 0)
63 goto out; 68 return ret;
69
70 if (!journal)
71 return simple_fsync(file, dentry, datasync);
72
64 /* 73 /*
65 * data=writeback: 74 * data=writeback,ordered:
66 * The caller's filemap_fdatawrite()/wait will sync the data. 75 * The caller's filemap_fdatawrite()/wait will sync the data.
67 * sync_inode() will sync the metadata 76 * Metadata is in the journal, we wait for proper transaction to
68 * 77 * commit here.
69 * data=ordered:
70 * The caller's filemap_fdatawrite() will write the data and
71 * sync_inode() will write the inode if it is dirty. Then the caller's
72 * filemap_fdatawait() will wait on the pages.
73 * 78 *
74 * data=journal: 79 * data=journal:
75 * filemap_fdatawrite won't do anything (the buffers are clean). 80 * filemap_fdatawrite won't do anything (the buffers are clean).
@@ -79,32 +84,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
79 * (they were dirtied by commit). But that's OK - the blocks are 84 * (they were dirtied by commit). But that's OK - the blocks are
80 * safe in-journal, which is all fsync() needs to ensure. 85 * safe in-journal, which is all fsync() needs to ensure.
81 */ 86 */
82 if (ext4_should_journal_data(inode)) { 87 if (ext4_should_journal_data(inode))
83 ret = ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
84 goto out;
85 }
86 89
87 if (!journal) 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
88 ret = sync_mapping_buffers(inode->i_mapping); 91 if (jbd2_log_start_commit(journal, commit_tid))
89 92 jbd2_log_wait_commit(journal, commit_tid);
90 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 93 else if (journal->j_flags & JBD2_BARRIER)
91 goto out;
92
93 /*
94 * The VFS has written the file data. If the inode is unaltered
95 * then we need not start a commit.
96 */
97 if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
98 struct writeback_control wbc = {
99 .sync_mode = WB_SYNC_ALL,
100 .nr_to_write = 0, /* sys_fsync did this */
101 };
102 err = sync_inode(inode, &wbc);
103 if (ret == 0)
104 ret = err;
105 }
106out:
107 if (journal && (journal->j_flags & JBD2_BARRIER))
108 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 94 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
109 return ret; 95 return ret;
110} 96}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2c8caa51addb..5352db1a3086 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -71,58 +71,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
71} 71}
72 72
73/* 73/*
74 * The ext4 forget function must perform a revoke if we are freeing data
75 * which has been journaled. Metadata (eg. indirect blocks) must be
76 * revoked in all cases.
77 *
78 * "bh" may be NULL: a metadata block may have been freed from memory
79 * but there may still be a record of it in the journal, and that record
80 * still needs to be revoked.
81 *
82 * If the handle isn't valid we're not journaling, but we still need to
83 * call into ext4_journal_revoke() to put the buffer head.
84 */
85int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
86 struct buffer_head *bh, ext4_fsblk_t blocknr)
87{
88 int err;
89
90 might_sleep();
91
92 BUFFER_TRACE(bh, "enter");
93
94 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
95 "data mode %x\n",
96 bh, is_metadata, inode->i_mode,
97 test_opt(inode->i_sb, DATA_FLAGS));
98
99 /* Never use the revoke function if we are doing full data
100 * journaling: there is no need to, and a V1 superblock won't
101 * support it. Otherwise, only skip the revoke on un-journaled
102 * data blocks. */
103
104 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
105 (!is_metadata && !ext4_should_journal_data(inode))) {
106 if (bh) {
107 BUFFER_TRACE(bh, "call jbd2_journal_forget");
108 return ext4_journal_forget(handle, bh);
109 }
110 return 0;
111 }
112
113 /*
114 * data!=journal && (is_metadata || should_journal_data(inode))
115 */
116 BUFFER_TRACE(bh, "call ext4_journal_revoke");
117 err = ext4_journal_revoke(handle, blocknr, bh);
118 if (err)
119 ext4_abort(inode->i_sb, __func__,
120 "error %d when attempting revoke", err);
121 BUFFER_TRACE(bh, "exit");
122 return err;
123}
124
125/*
126 * Work out how many blocks we need to proceed with the next chunk of a 74 * Work out how many blocks we need to proceed with the next chunk of a
127 * truncate transaction. 75 * truncate transaction.
128 */ 76 */
@@ -721,7 +669,7 @@ allocated:
721 return ret; 669 return ret;
722failed_out: 670failed_out:
723 for (i = 0; i < index; i++) 671 for (i = 0; i < index; i++)
724 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 672 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
725 return ret; 673 return ret;
726} 674}
727 675
@@ -817,14 +765,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
817 return err; 765 return err;
818failed: 766failed:
819 /* Allocation failed, free what we already allocated */ 767 /* Allocation failed, free what we already allocated */
768 ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
820 for (i = 1; i <= n ; i++) { 769 for (i = 1; i <= n ; i++) {
821 BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); 770 /*
822 ext4_journal_forget(handle, branch[i].bh); 771 * branch[i].bh is newly allocated, so there is no
772 * need to revoke the block, which is why we don't
773 * need to set EXT4_FREE_BLOCKS_METADATA.
774 */
775 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
776 EXT4_FREE_BLOCKS_FORGET);
823 } 777 }
824 for (i = 0; i < indirect_blks; i++) 778 for (i = n+1; i < indirect_blks; i++)
825 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 779 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
826 780
827 ext4_free_blocks(handle, inode, new_blocks[i], num, 0); 781 ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
828 782
829 return err; 783 return err;
830} 784}
@@ -903,12 +857,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
903 857
904err_out: 858err_out:
905 for (i = 1; i <= num; i++) { 859 for (i = 1; i <= num; i++) {
906 BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); 860 /*
907 ext4_journal_forget(handle, where[i].bh); 861 * branch[i].bh is newly allocated, so there is no
908 ext4_free_blocks(handle, inode, 862 * need to revoke the block, which is why we don't
909 le32_to_cpu(where[i-1].key), 1, 0); 863 * need to set EXT4_FREE_BLOCKS_METADATA.
864 */
865 ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
866 EXT4_FREE_BLOCKS_FORGET);
910 } 867 }
911 ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0); 868 ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
869 blks, 0);
912 870
913 return err; 871 return err;
914} 872}
@@ -1021,10 +979,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
1021 if (!err) 979 if (!err)
1022 err = ext4_splice_branch(handle, inode, iblock, 980 err = ext4_splice_branch(handle, inode, iblock,
1023 partial, indirect_blks, count); 981 partial, indirect_blks, count);
1024 else 982 if (err)
1025 goto cleanup; 983 goto cleanup;
1026 984
1027 set_buffer_new(bh_result); 985 set_buffer_new(bh_result);
986
987 ext4_update_inode_fsync_trans(handle, inode, 1);
1028got_it: 988got_it:
1029 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); 989 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
1030 if (count > blocks_to_boundary) 990 if (count > blocks_to_boundary)
@@ -1052,7 +1012,7 @@ qsize_t ext4_get_reserved_space(struct inode *inode)
1052 EXT4_I(inode)->i_reserved_meta_blocks; 1012 EXT4_I(inode)->i_reserved_meta_blocks;
1053 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1013 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1054 1014
1055 return total; 1015 return (total << inode->i_blkbits);
1056} 1016}
1057/* 1017/*
1058 * Calculate the number of metadata blocks need to reserve 1018 * Calculate the number of metadata blocks need to reserve
@@ -1534,6 +1494,16 @@ static int do_journal_get_write_access(handle_t *handle,
1534 return ext4_journal_get_write_access(handle, bh); 1494 return ext4_journal_get_write_access(handle, bh);
1535} 1495}
1536 1496
1497/*
1498 * Truncate blocks that were not used by write. We have to truncate the
1499 * pagecache as well so that corresponding buffers get properly unmapped.
1500 */
1501static void ext4_truncate_failed_write(struct inode *inode)
1502{
1503 truncate_inode_pages(inode->i_mapping, inode->i_size);
1504 ext4_truncate(inode);
1505}
1506
1537static int ext4_write_begin(struct file *file, struct address_space *mapping, 1507static int ext4_write_begin(struct file *file, struct address_space *mapping,
1538 loff_t pos, unsigned len, unsigned flags, 1508 loff_t pos, unsigned len, unsigned flags,
1539 struct page **pagep, void **fsdata) 1509 struct page **pagep, void **fsdata)
@@ -1599,7 +1569,7 @@ retry:
1599 1569
1600 ext4_journal_stop(handle); 1570 ext4_journal_stop(handle);
1601 if (pos + len > inode->i_size) { 1571 if (pos + len > inode->i_size) {
1602 ext4_truncate(inode); 1572 ext4_truncate_failed_write(inode);
1603 /* 1573 /*
1604 * If truncate failed early the inode might 1574 * If truncate failed early the inode might
1605 * still be on the orphan list; we need to 1575 * still be on the orphan list; we need to
@@ -1709,7 +1679,7 @@ static int ext4_ordered_write_end(struct file *file,
1709 ret = ret2; 1679 ret = ret2;
1710 1680
1711 if (pos + len > inode->i_size) { 1681 if (pos + len > inode->i_size) {
1712 ext4_truncate(inode); 1682 ext4_truncate_failed_write(inode);
1713 /* 1683 /*
1714 * If truncate failed early the inode might still be 1684 * If truncate failed early the inode might still be
1715 * on the orphan list; we need to make sure the inode 1685 * on the orphan list; we need to make sure the inode
@@ -1751,7 +1721,7 @@ static int ext4_writeback_write_end(struct file *file,
1751 ret = ret2; 1721 ret = ret2;
1752 1722
1753 if (pos + len > inode->i_size) { 1723 if (pos + len > inode->i_size) {
1754 ext4_truncate(inode); 1724 ext4_truncate_failed_write(inode);
1755 /* 1725 /*
1756 * If truncate failed early the inode might still be 1726 * If truncate failed early the inode might still be
1757 * on the orphan list; we need to make sure the inode 1727 * on the orphan list; we need to make sure the inode
@@ -1814,7 +1784,7 @@ static int ext4_journalled_write_end(struct file *file,
1814 if (!ret) 1784 if (!ret)
1815 ret = ret2; 1785 ret = ret2;
1816 if (pos + len > inode->i_size) { 1786 if (pos + len > inode->i_size) {
1817 ext4_truncate(inode); 1787 ext4_truncate_failed_write(inode);
1818 /* 1788 /*
1819 * If truncate failed early the inode might still be 1789 * If truncate failed early the inode might still be
1820 * on the orphan list; we need to make sure the inode 1790 * on the orphan list; we need to make sure the inode
@@ -2600,7 +2570,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
2600} 2570}
2601 2571
2602static int __ext4_journalled_writepage(struct page *page, 2572static int __ext4_journalled_writepage(struct page *page,
2603 struct writeback_control *wbc,
2604 unsigned int len) 2573 unsigned int len)
2605{ 2574{
2606 struct address_space *mapping = page->mapping; 2575 struct address_space *mapping = page->mapping;
@@ -2758,7 +2727,7 @@ static int ext4_writepage(struct page *page,
2758 * doesn't seem much point in redirtying the page here. 2727 * doesn't seem much point in redirtying the page here.
2759 */ 2728 */
2760 ClearPageChecked(page); 2729 ClearPageChecked(page);
2761 return __ext4_journalled_writepage(page, wbc, len); 2730 return __ext4_journalled_writepage(page, len);
2762 } 2731 }
2763 2732
2764 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2733 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -2788,7 +2757,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2788 * number of contiguous block. So we will limit 2757 * number of contiguous block. So we will limit
2789 * number of contiguous block to a sane value 2758 * number of contiguous block to a sane value
2790 */ 2759 */
2791 if (!(inode->i_flags & EXT4_EXTENTS_FL) && 2760 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
2792 (max_blocks > EXT4_MAX_TRANS_DATA)) 2761 (max_blocks > EXT4_MAX_TRANS_DATA))
2793 max_blocks = EXT4_MAX_TRANS_DATA; 2762 max_blocks = EXT4_MAX_TRANS_DATA;
2794 2763
@@ -2933,7 +2902,7 @@ retry:
2933 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, 2902 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
2934 &mpd); 2903 &mpd);
2935 /* 2904 /*
2936 * If we have a contigous extent of pages and we 2905 * If we have a contiguous extent of pages and we
2937 * haven't done the I/O yet, map the blocks and submit 2906 * haven't done the I/O yet, map the blocks and submit
2938 * them for I/O. 2907 * them for I/O.
2939 */ 2908 */
@@ -3091,7 +3060,7 @@ retry:
3091 * i_size_read because we hold i_mutex. 3060 * i_size_read because we hold i_mutex.
3092 */ 3061 */
3093 if (pos + len > inode->i_size) 3062 if (pos + len > inode->i_size)
3094 ext4_truncate(inode); 3063 ext4_truncate_failed_write(inode);
3095 } 3064 }
3096 3065
3097 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3066 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -4064,7 +4033,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
4064 int k, err; 4033 int k, err;
4065 4034
4066 *top = 0; 4035 *top = 0;
4067 /* Make k index the deepest non-null offest + 1 */ 4036 /* Make k index the deepest non-null offset + 1 */
4068 for (k = depth; k > 1 && !offsets[k-1]; k--) 4037 for (k = depth; k > 1 && !offsets[k-1]; k--)
4069 ; 4038 ;
4070 partial = ext4_get_branch(inode, k, offsets, chain, &err); 4039 partial = ext4_get_branch(inode, k, offsets, chain, &err);
@@ -4120,6 +4089,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
4120 __le32 *last) 4089 __le32 *last)
4121{ 4090{
4122 __le32 *p; 4091 __le32 *p;
4092 int flags = EXT4_FREE_BLOCKS_FORGET;
4093
4094 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
4095 flags |= EXT4_FREE_BLOCKS_METADATA;
4096
4123 if (try_to_extend_transaction(handle, inode)) { 4097 if (try_to_extend_transaction(handle, inode)) {
4124 if (bh) { 4098 if (bh) {
4125 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4099 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@@ -4134,27 +4108,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
4134 } 4108 }
4135 } 4109 }
4136 4110
4137 /* 4111 for (p = first; p < last; p++)
4138 * Any buffers which are on the journal will be in memory. We 4112 *p = 0;
4139 * find them on the hash table so jbd2_journal_revoke() will
4140 * run jbd2_journal_forget() on them. We've already detached
4141 * each block from the file, so bforget() in
4142 * jbd2_journal_forget() should be safe.
4143 *
4144 * AKPM: turn on bforget in jbd2_journal_forget()!!!
4145 */
4146 for (p = first; p < last; p++) {
4147 u32 nr = le32_to_cpu(*p);
4148 if (nr) {
4149 struct buffer_head *tbh;
4150
4151 *p = 0;
4152 tbh = sb_find_get_block(inode->i_sb, nr);
4153 ext4_forget(handle, 0, inode, tbh, nr);
4154 }
4155 }
4156 4113
4157 ext4_free_blocks(handle, inode, block_to_free, count, 0); 4114 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
4158} 4115}
4159 4116
4160/** 4117/**
@@ -4342,7 +4299,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4342 blocks_for_truncate(inode)); 4299 blocks_for_truncate(inode));
4343 } 4300 }
4344 4301
4345 ext4_free_blocks(handle, inode, nr, 1, 1); 4302 ext4_free_blocks(handle, inode, 0, nr, 1,
4303 EXT4_FREE_BLOCKS_METADATA);
4346 4304
4347 if (parent_bh) { 4305 if (parent_bh) {
4348 /* 4306 /*
@@ -4781,8 +4739,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4781 struct ext4_iloc iloc; 4739 struct ext4_iloc iloc;
4782 struct ext4_inode *raw_inode; 4740 struct ext4_inode *raw_inode;
4783 struct ext4_inode_info *ei; 4741 struct ext4_inode_info *ei;
4784 struct buffer_head *bh;
4785 struct inode *inode; 4742 struct inode *inode;
4743 journal_t *journal = EXT4_SB(sb)->s_journal;
4786 long ret; 4744 long ret;
4787 int block; 4745 int block;
4788 4746
@@ -4793,11 +4751,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4793 return inode; 4751 return inode;
4794 4752
4795 ei = EXT4_I(inode); 4753 ei = EXT4_I(inode);
4754 iloc.bh = 0;
4796 4755
4797 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4756 ret = __ext4_get_inode_loc(inode, &iloc, 0);
4798 if (ret < 0) 4757 if (ret < 0)
4799 goto bad_inode; 4758 goto bad_inode;
4800 bh = iloc.bh;
4801 raw_inode = ext4_raw_inode(&iloc); 4759 raw_inode = ext4_raw_inode(&iloc);
4802 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 4760 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
4803 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 4761 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
@@ -4820,7 +4778,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4820 if (inode->i_mode == 0 || 4778 if (inode->i_mode == 0 ||
4821 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { 4779 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
4822 /* this inode is deleted */ 4780 /* this inode is deleted */
4823 brelse(bh);
4824 ret = -ESTALE; 4781 ret = -ESTALE;
4825 goto bad_inode; 4782 goto bad_inode;
4826 } 4783 }
@@ -4848,11 +4805,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4848 ei->i_data[block] = raw_inode->i_block[block]; 4805 ei->i_data[block] = raw_inode->i_block[block];
4849 INIT_LIST_HEAD(&ei->i_orphan); 4806 INIT_LIST_HEAD(&ei->i_orphan);
4850 4807
4808 /*
4809 * Set transaction id's of transactions that have to be committed
4810 * to finish f[data]sync. We set them to currently running transaction
4811 * as we cannot be sure that the inode or some of its metadata isn't
4812 * part of the transaction - the inode could have been reclaimed and
4813 * now it is reread from disk.
4814 */
4815 if (journal) {
4816 transaction_t *transaction;
4817 tid_t tid;
4818
4819 spin_lock(&journal->j_state_lock);
4820 if (journal->j_running_transaction)
4821 transaction = journal->j_running_transaction;
4822 else
4823 transaction = journal->j_committing_transaction;
4824 if (transaction)
4825 tid = transaction->t_tid;
4826 else
4827 tid = journal->j_commit_sequence;
4828 spin_unlock(&journal->j_state_lock);
4829 ei->i_sync_tid = tid;
4830 ei->i_datasync_tid = tid;
4831 }
4832
4851 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4833 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4852 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 4834 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
4853 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 4835 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
4854 EXT4_INODE_SIZE(inode->i_sb)) { 4836 EXT4_INODE_SIZE(inode->i_sb)) {
4855 brelse(bh);
4856 ret = -EIO; 4837 ret = -EIO;
4857 goto bad_inode; 4838 goto bad_inode;
4858 } 4839 }
@@ -4884,10 +4865,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4884 4865
4885 ret = 0; 4866 ret = 0;
4886 if (ei->i_file_acl && 4867 if (ei->i_file_acl &&
4887 ((ei->i_file_acl < 4868 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
4888 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
4889 EXT4_SB(sb)->s_gdb_count)) ||
4890 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
4891 ext4_error(sb, __func__, 4869 ext4_error(sb, __func__,
4892 "bad extended attribute block %llu in inode #%lu", 4870 "bad extended attribute block %llu in inode #%lu",
4893 ei->i_file_acl, inode->i_ino); 4871 ei->i_file_acl, inode->i_ino);
@@ -4905,10 +4883,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4905 /* Validate block references which are part of inode */ 4883 /* Validate block references which are part of inode */
4906 ret = ext4_check_inode_blockref(inode); 4884 ret = ext4_check_inode_blockref(inode);
4907 } 4885 }
4908 if (ret) { 4886 if (ret)
4909 brelse(bh);
4910 goto bad_inode; 4887 goto bad_inode;
4911 }
4912 4888
4913 if (S_ISREG(inode->i_mode)) { 4889 if (S_ISREG(inode->i_mode)) {
4914 inode->i_op = &ext4_file_inode_operations; 4890 inode->i_op = &ext4_file_inode_operations;
@@ -4936,7 +4912,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4936 init_special_inode(inode, inode->i_mode, 4912 init_special_inode(inode, inode->i_mode,
4937 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 4913 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4938 } else { 4914 } else {
4939 brelse(bh);
4940 ret = -EIO; 4915 ret = -EIO;
4941 ext4_error(inode->i_sb, __func__, 4916 ext4_error(inode->i_sb, __func__,
4942 "bogus i_mode (%o) for inode=%lu", 4917 "bogus i_mode (%o) for inode=%lu",
@@ -4949,6 +4924,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4949 return inode; 4924 return inode;
4950 4925
4951bad_inode: 4926bad_inode:
4927 brelse(iloc.bh);
4952 iget_failed(inode); 4928 iget_failed(inode);
4953 return ERR_PTR(ret); 4929 return ERR_PTR(ret);
4954} 4930}
@@ -5108,6 +5084,7 @@ static int ext4_do_update_inode(handle_t *handle,
5108 err = rc; 5084 err = rc;
5109 ei->i_state &= ~EXT4_STATE_NEW; 5085 ei->i_state &= ~EXT4_STATE_NEW;
5110 5086
5087 ext4_update_inode_fsync_trans(handle, inode, 0);
5111out_brelse: 5088out_brelse:
5112 brelse(bh); 5089 brelse(bh);
5113 ext4_std_error(inode->i_sb, err); 5090 ext4_std_error(inode->i_sb, err);
@@ -5227,8 +5204,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5227 5204
5228 /* (user+group)*(old+new) structure, inode write (sb, 5205 /* (user+group)*(old+new) structure, inode write (sb,
5229 * inode block, ? - but truncate inode update has it) */ 5206 * inode block, ? - but truncate inode update has it) */
5230 handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+ 5207 handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
5231 EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3); 5208 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
5232 if (IS_ERR(handle)) { 5209 if (IS_ERR(handle)) {
5233 error = PTR_ERR(handle); 5210 error = PTR_ERR(handle);
5234 goto err_out; 5211 goto err_out;
@@ -5376,7 +5353,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5376 * worse case, the indexs blocks spread over different block groups 5353 * worse case, the indexs blocks spread over different block groups
5377 * 5354 *
5378 * If datablocks are discontiguous, they are possible to spread over 5355 * If datablocks are discontiguous, they are possible to spread over
5379 * different block groups too. If they are contiugous, with flexbg, 5356 * different block groups too. If they are contiuguous, with flexbg,
5380 * they could still across block group boundary. 5357 * they could still across block group boundary.
5381 * 5358 *
5382 * Also account for superblock, inode, quota and xattr blocks 5359 * Also account for superblock, inode, quota and xattr blocks
@@ -5452,7 +5429,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
5452 * Calculate the journal credits for a chunk of data modification. 5429 * Calculate the journal credits for a chunk of data modification.
5453 * 5430 *
5454 * This is called from DIO, fallocate or whoever calling 5431 * This is called from DIO, fallocate or whoever calling
5455 * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. 5432 * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
5456 * 5433 *
5457 * journal buffers for data blocks are not included here, as DIO 5434 * journal buffers for data blocks are not included here, as DIO
5458 * and fallocate do no need to journal data buffers. 5435 * and fallocate do no need to journal data buffers.
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index c1cdf613e725..b63d193126db 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -221,31 +221,38 @@ setversion_out:
221 struct file *donor_filp; 221 struct file *donor_filp;
222 int err; 222 int err;
223 223
224 if (!(filp->f_mode & FMODE_READ) ||
225 !(filp->f_mode & FMODE_WRITE))
226 return -EBADF;
227
224 if (copy_from_user(&me, 228 if (copy_from_user(&me,
225 (struct move_extent __user *)arg, sizeof(me))) 229 (struct move_extent __user *)arg, sizeof(me)))
226 return -EFAULT; 230 return -EFAULT;
231 me.moved_len = 0;
227 232
228 donor_filp = fget(me.donor_fd); 233 donor_filp = fget(me.donor_fd);
229 if (!donor_filp) 234 if (!donor_filp)
230 return -EBADF; 235 return -EBADF;
231 236
232 if (!capable(CAP_DAC_OVERRIDE)) { 237 if (!(donor_filp->f_mode & FMODE_WRITE)) {
233 if ((current->real_cred->fsuid != inode->i_uid) || 238 err = -EBADF;
234 !(inode->i_mode & S_IRUSR) || 239 goto mext_out;
235 !(donor_filp->f_dentry->d_inode->i_mode &
236 S_IRUSR)) {
237 fput(donor_filp);
238 return -EACCES;
239 }
240 } 240 }
241 241
242 err = mnt_want_write(filp->f_path.mnt);
243 if (err)
244 goto mext_out;
245
242 err = ext4_move_extents(filp, donor_filp, me.orig_start, 246 err = ext4_move_extents(filp, donor_filp, me.orig_start,
243 me.donor_start, me.len, &me.moved_len); 247 me.donor_start, me.len, &me.moved_len);
244 fput(donor_filp); 248 mnt_drop_write(filp->f_path.mnt);
249 if (me.moved_len > 0)
250 file_remove_suid(donor_filp);
245 251
246 if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) 252 if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
247 return -EFAULT; 253 err = -EFAULT;
248 254mext_out:
255 fput(donor_filp);
249 return err; 256 return err;
250 } 257 }
251 258
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bba12824defa..c1e19d5b5985 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -142,7 +142,7 @@
142 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The 142 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
143 * value of s_mb_order2_reqs can be tuned via 143 * value of s_mb_order2_reqs can be tuned via
144 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to 144 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to
145 * stripe size (sbi->s_stripe), we try to search for contigous block in 145 * stripe size (sbi->s_stripe), we try to search for contiguous block in
146 * stripe size. This should result in better allocation on RAID setups. If 146 * stripe size. This should result in better allocation on RAID setups. If
147 * not, we search in the specific group using bitmap for best extents. The 147 * not, we search in the specific group using bitmap for best extents. The
148 * tunable min_to_scan and max_to_scan control the behaviour here. 148 * tunable min_to_scan and max_to_scan control the behaviour here.
@@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2529 struct ext4_group_info *db; 2529 struct ext4_group_info *db;
2530 int err, count = 0, count2 = 0; 2530 int err, count = 0, count2 = 0;
2531 struct ext4_free_data *entry; 2531 struct ext4_free_data *entry;
2532 ext4_fsblk_t discard_block;
2533 struct list_head *l, *ltmp; 2532 struct list_head *l, *ltmp;
2534 2533
2535 list_for_each_safe(l, ltmp, &txn->t_private_list) { 2534 list_for_each_safe(l, ltmp, &txn->t_private_list) {
@@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2559 page_cache_release(e4b.bd_bitmap_page); 2558 page_cache_release(e4b.bd_bitmap_page);
2560 } 2559 }
2561 ext4_unlock_group(sb, entry->group); 2560 ext4_unlock_group(sb, entry->group);
2562 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) 2561 if (test_opt(sb, DISCARD)) {
2563 + entry->start_blk 2562 ext4_fsblk_t discard_block;
2564 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 2563 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
2565 trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, 2564
2566 entry->count); 2565 discard_block = (ext4_fsblk_t)entry->group *
2567 sb_issue_discard(sb, discard_block, entry->count); 2566 EXT4_BLOCKS_PER_GROUP(sb)
2568 2567 + entry->start_blk
2568 + le32_to_cpu(es->s_first_data_block);
2569 trace_ext4_discard_blocks(sb,
2570 (unsigned long long)discard_block,
2571 entry->count);
2572 sb_issue_discard(sb, discard_block, entry->count);
2573 }
2569 kmem_cache_free(ext4_free_ext_cachep, entry); 2574 kmem_cache_free(ext4_free_ext_cachep, entry);
2570 ext4_mb_release_desc(&e4b); 2575 ext4_mb_release_desc(&e4b);
2571 } 2576 }
@@ -3006,6 +3011,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3006} 3011}
3007 3012
3008/* 3013/*
3014 * Called on failure; free up any blocks from the inode PA for this
3015 * context. We don't need this for MB_GROUP_PA because we only change
3016 * pa_free in ext4_mb_release_context(), but on failure, we've already
3017 * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
3018 */
3019static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3020{
3021 struct ext4_prealloc_space *pa = ac->ac_pa;
3022 int len;
3023
3024 if (pa && pa->pa_type == MB_INODE_PA) {
3025 len = ac->ac_b_ex.fe_len;
3026 pa->pa_free += len;
3027 }
3028
3029}
3030
3031/*
3009 * use blocks preallocated to inode 3032 * use blocks preallocated to inode
3010 */ 3033 */
3011static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, 3034static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
@@ -4290,6 +4313,7 @@ repeat:
4290 ac->ac_status = AC_STATUS_CONTINUE; 4313 ac->ac_status = AC_STATUS_CONTINUE;
4291 goto repeat; 4314 goto repeat;
4292 } else if (*errp) { 4315 } else if (*errp) {
4316 ext4_discard_allocated_blocks(ac);
4293 ac->ac_b_ex.fe_len = 0; 4317 ac->ac_b_ex.fe_len = 0;
4294 ar->len = 0; 4318 ar->len = 0;
4295 ext4_mb_show_ac(ac); 4319 ext4_mb_show_ac(ac);
@@ -4422,18 +4446,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4422 return 0; 4446 return 0;
4423} 4447}
4424 4448
4425/* 4449/**
4426 * Main entry point into mballoc to free blocks 4450 * ext4_free_blocks() -- Free given blocks and update quota
4451 * @handle: handle for this transaction
4452 * @inode: inode
4453 * @block: start physical block to free
4454 * @count: number of blocks to count
4455 * @metadata: Are these metadata blocks
4427 */ 4456 */
4428void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, 4457void ext4_free_blocks(handle_t *handle, struct inode *inode,
4429 ext4_fsblk_t block, unsigned long count, 4458 struct buffer_head *bh, ext4_fsblk_t block,
4430 int metadata, unsigned long *freed) 4459 unsigned long count, int flags)
4431{ 4460{
4432 struct buffer_head *bitmap_bh = NULL; 4461 struct buffer_head *bitmap_bh = NULL;
4433 struct super_block *sb = inode->i_sb; 4462 struct super_block *sb = inode->i_sb;
4434 struct ext4_allocation_context *ac = NULL; 4463 struct ext4_allocation_context *ac = NULL;
4435 struct ext4_group_desc *gdp; 4464 struct ext4_group_desc *gdp;
4436 struct ext4_super_block *es; 4465 struct ext4_super_block *es;
4466 unsigned long freed = 0;
4437 unsigned int overflow; 4467 unsigned int overflow;
4438 ext4_grpblk_t bit; 4468 ext4_grpblk_t bit;
4439 struct buffer_head *gd_bh; 4469 struct buffer_head *gd_bh;
@@ -4443,13 +4473,16 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4443 int err = 0; 4473 int err = 0;
4444 int ret; 4474 int ret;
4445 4475
4446 *freed = 0; 4476 if (bh) {
4477 if (block)
4478 BUG_ON(block != bh->b_blocknr);
4479 else
4480 block = bh->b_blocknr;
4481 }
4447 4482
4448 sbi = EXT4_SB(sb); 4483 sbi = EXT4_SB(sb);
4449 es = EXT4_SB(sb)->s_es; 4484 es = EXT4_SB(sb)->s_es;
4450 if (block < le32_to_cpu(es->s_first_data_block) || 4485 if (!ext4_data_block_valid(sbi, block, count)) {
4451 block + count < block ||
4452 block + count > ext4_blocks_count(es)) {
4453 ext4_error(sb, __func__, 4486 ext4_error(sb, __func__,
4454 "Freeing blocks not in datazone - " 4487 "Freeing blocks not in datazone - "
4455 "block = %llu, count = %lu", block, count); 4488 "block = %llu, count = %lu", block, count);
@@ -4457,7 +4490,32 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4457 } 4490 }
4458 4491
4459 ext4_debug("freeing block %llu\n", block); 4492 ext4_debug("freeing block %llu\n", block);
4460 trace_ext4_free_blocks(inode, block, count, metadata); 4493 trace_ext4_free_blocks(inode, block, count, flags);
4494
4495 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4496 struct buffer_head *tbh = bh;
4497 int i;
4498
4499 BUG_ON(bh && (count > 1));
4500
4501 for (i = 0; i < count; i++) {
4502 if (!bh)
4503 tbh = sb_find_get_block(inode->i_sb,
4504 block + i);
4505 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4506 inode, tbh, block + i);
4507 }
4508 }
4509
4510 /*
4511 * We need to make sure we don't reuse the freed block until
4512 * after the transaction is committed, which we can do by
4513 * treating the block as metadata, below. We make an
4514 * exception if the inode is to be written in writeback mode
4515 * since writeback mode has weak data consistency guarantees.
4516 */
4517 if (!ext4_should_writeback_data(inode))
4518 flags |= EXT4_FREE_BLOCKS_METADATA;
4461 4519
4462 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4520 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4463 if (ac) { 4521 if (ac) {
@@ -4533,7 +4591,8 @@ do_more:
4533 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4591 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4534 if (err) 4592 if (err)
4535 goto error_return; 4593 goto error_return;
4536 if (metadata && ext4_handle_valid(handle)) { 4594
4595 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4537 struct ext4_free_data *new_entry; 4596 struct ext4_free_data *new_entry;
4538 /* 4597 /*
4539 * blocks being freed are metadata. these blocks shouldn't 4598 * blocks being freed are metadata. these blocks shouldn't
@@ -4572,7 +4631,7 @@ do_more:
4572 4631
4573 ext4_mb_release_desc(&e4b); 4632 ext4_mb_release_desc(&e4b);
4574 4633
4575 *freed += count; 4634 freed += count;
4576 4635
4577 /* We dirtied the bitmap block */ 4636 /* We dirtied the bitmap block */
4578 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4637 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -4592,6 +4651,8 @@ do_more:
4592 } 4651 }
4593 sb->s_dirt = 1; 4652 sb->s_dirt = 1;
4594error_return: 4653error_return:
4654 if (freed)
4655 vfs_dq_free_block(inode, freed);
4595 brelse(bitmap_bh); 4656 brelse(bitmap_bh);
4596 ext4_std_error(sb, err); 4657 ext4_std_error(sb, err);
4597 if (ac) 4658 if (ac)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index a93d5b80f3e2..81415814b00b 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
238 * So allocate a credit of 3. We may update 238 * So allocate a credit of 3. We may update
239 * quota (user and group). 239 * quota (user and group).
240 */ 240 */
241 needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 241 needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
242 242
243 if (ext4_journal_extend(handle, needed) != 0) 243 if (ext4_journal_extend(handle, needed) != 0)
244 retval = ext4_journal_restart(handle, needed); 244 retval = ext4_journal_restart(handle, needed);
@@ -262,13 +262,17 @@ static int free_dind_blocks(handle_t *handle,
262 for (i = 0; i < max_entries; i++) { 262 for (i = 0; i < max_entries; i++) {
263 if (tmp_idata[i]) { 263 if (tmp_idata[i]) {
264 extend_credit_for_blkdel(handle, inode); 264 extend_credit_for_blkdel(handle, inode);
265 ext4_free_blocks(handle, inode, 265 ext4_free_blocks(handle, inode, 0,
266 le32_to_cpu(tmp_idata[i]), 1, 1); 266 le32_to_cpu(tmp_idata[i]), 1,
267 EXT4_FREE_BLOCKS_METADATA |
268 EXT4_FREE_BLOCKS_FORGET);
267 } 269 }
268 } 270 }
269 put_bh(bh); 271 put_bh(bh);
270 extend_credit_for_blkdel(handle, inode); 272 extend_credit_for_blkdel(handle, inode);
271 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 273 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
274 EXT4_FREE_BLOCKS_METADATA |
275 EXT4_FREE_BLOCKS_FORGET);
272 return 0; 276 return 0;
273} 277}
274 278
@@ -297,7 +301,9 @@ static int free_tind_blocks(handle_t *handle,
297 } 301 }
298 put_bh(bh); 302 put_bh(bh);
299 extend_credit_for_blkdel(handle, inode); 303 extend_credit_for_blkdel(handle, inode);
300 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 304 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
305 EXT4_FREE_BLOCKS_METADATA |
306 EXT4_FREE_BLOCKS_FORGET);
301 return 0; 307 return 0;
302} 308}
303 309
@@ -308,8 +314,10 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
308 /* ei->i_data[EXT4_IND_BLOCK] */ 314 /* ei->i_data[EXT4_IND_BLOCK] */
309 if (i_data[0]) { 315 if (i_data[0]) {
310 extend_credit_for_blkdel(handle, inode); 316 extend_credit_for_blkdel(handle, inode);
311 ext4_free_blocks(handle, inode, 317 ext4_free_blocks(handle, inode, 0,
312 le32_to_cpu(i_data[0]), 1, 1); 318 le32_to_cpu(i_data[0]), 1,
319 EXT4_FREE_BLOCKS_METADATA |
320 EXT4_FREE_BLOCKS_FORGET);
313 } 321 }
314 322
315 /* ei->i_data[EXT4_DIND_BLOCK] */ 323 /* ei->i_data[EXT4_DIND_BLOCK] */
@@ -419,7 +427,8 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
419 } 427 }
420 put_bh(bh); 428 put_bh(bh);
421 extend_credit_for_blkdel(handle, inode); 429 extend_credit_for_blkdel(handle, inode);
422 ext4_free_blocks(handle, inode, block, 1, 1); 430 ext4_free_blocks(handle, inode, 0, block, 1,
431 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
423 return retval; 432 return retval;
424} 433}
425 434
@@ -477,7 +486,7 @@ int ext4_ext_migrate(struct inode *inode)
477 handle = ext4_journal_start(inode, 486 handle = ext4_journal_start(inode,
478 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 487 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
479 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 488 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
480 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) 489 EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
481 + 1); 490 + 1);
482 if (IS_ERR(handle)) { 491 if (IS_ERR(handle)) {
483 retval = PTR_ERR(handle); 492 retval = PTR_ERR(handle);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 25b6b1457360..82c415be87a4 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -77,12 +77,14 @@ static int
77mext_next_extent(struct inode *inode, struct ext4_ext_path *path, 77mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
78 struct ext4_extent **extent) 78 struct ext4_extent **extent)
79{ 79{
80 struct ext4_extent_header *eh;
80 int ppos, leaf_ppos = path->p_depth; 81 int ppos, leaf_ppos = path->p_depth;
81 82
82 ppos = leaf_ppos; 83 ppos = leaf_ppos;
83 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { 84 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
84 /* leaf block */ 85 /* leaf block */
85 *extent = ++path[ppos].p_ext; 86 *extent = ++path[ppos].p_ext;
87 path[ppos].p_block = ext_pblock(path[ppos].p_ext);
86 return 0; 88 return 0;
87 } 89 }
88 90
@@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
119 ext_block_hdr(path[cur_ppos+1].p_bh); 121 ext_block_hdr(path[cur_ppos+1].p_bh);
120 } 122 }
121 123
124 path[leaf_ppos].p_ext = *extent = NULL;
125
126 eh = path[leaf_ppos].p_hdr;
127 if (le16_to_cpu(eh->eh_entries) == 0)
128 /* empty leaf is found */
129 return -ENODATA;
130
122 /* leaf block */ 131 /* leaf block */
123 path[leaf_ppos].p_ext = *extent = 132 path[leaf_ppos].p_ext = *extent =
124 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); 133 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
134 path[leaf_ppos].p_block =
135 ext_pblock(path[leaf_ppos].p_ext);
125 return 0; 136 return 0;
126 } 137 }
127 } 138 }
@@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
155} 166}
156 167
157/** 168/**
158 * mext_double_down_read - Acquire two inodes' read semaphore 169 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
159 *
160 * @orig_inode: original inode structure
161 * @donor_inode: donor inode structure
162 * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
163 */
164static void
165mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
166{
167 struct inode *first = orig_inode, *second = donor_inode;
168
169 /*
170 * Use the inode number to provide the stable locking order instead
171 * of its address, because the C language doesn't guarantee you can
172 * compare pointers that don't come from the same array.
173 */
174 if (donor_inode->i_ino < orig_inode->i_ino) {
175 first = donor_inode;
176 second = orig_inode;
177 }
178
179 down_read(&EXT4_I(first)->i_data_sem);
180 down_read(&EXT4_I(second)->i_data_sem);
181}
182
183/**
184 * mext_double_down_write - Acquire two inodes' write semaphore
185 * 170 *
186 * @orig_inode: original inode structure 171 * @orig_inode: original inode structure
187 * @donor_inode: donor inode structure 172 * @donor_inode: donor inode structure
188 * Acquire write semaphore of the two inodes (orig and donor) by i_ino order. 173 * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
174 * i_ino order.
189 */ 175 */
190static void 176static void
191mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) 177double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
192{ 178{
193 struct inode *first = orig_inode, *second = donor_inode; 179 struct inode *first = orig_inode, *second = donor_inode;
194 180
@@ -203,32 +189,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
203 } 189 }
204 190
205 down_write(&EXT4_I(first)->i_data_sem); 191 down_write(&EXT4_I(first)->i_data_sem);
206 down_write(&EXT4_I(second)->i_data_sem); 192 down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
207} 193}
208 194
209/** 195/**
210 * mext_double_up_read - Release two inodes' read semaphore 196 * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
211 * 197 *
212 * @orig_inode: original inode structure to be released its lock first 198 * @orig_inode: original inode structure to be released its lock first
213 * @donor_inode: donor inode structure to be released its lock second 199 * @donor_inode: donor inode structure to be released its lock second
214 * Release read semaphore of two inodes (orig and donor). 200 * Release write lock of i_data_sem of two inodes (orig and donor).
215 */ 201 */
216static void 202static void
217mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) 203double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
218{
219 up_read(&EXT4_I(orig_inode)->i_data_sem);
220 up_read(&EXT4_I(donor_inode)->i_data_sem);
221}
222
223/**
224 * mext_double_up_write - Release two inodes' write semaphore
225 *
226 * @orig_inode: original inode structure to be released its lock first
227 * @donor_inode: donor inode structure to be released its lock second
228 * Release write semaphore of two inodes (orig and donor).
229 */
230static void
231mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
232{ 204{
233 up_write(&EXT4_I(orig_inode)->i_data_sem); 205 up_write(&EXT4_I(orig_inode)->i_data_sem);
234 up_write(&EXT4_I(donor_inode)->i_data_sem); 206 up_write(&EXT4_I(donor_inode)->i_data_sem);
@@ -596,7 +568,7 @@ out:
596 * @tmp_oext: the extent that will belong to the donor inode 568 * @tmp_oext: the extent that will belong to the donor inode
597 * @orig_off: block offset of original inode 569 * @orig_off: block offset of original inode
598 * @donor_off: block offset of donor inode 570 * @donor_off: block offset of donor inode
599 * @max_count: the maximun length of extents 571 * @max_count: the maximum length of extents
600 * 572 *
601 * Return 0 on success, or a negative error value on failure. 573 * Return 0 on success, or a negative error value on failure.
602 */ 574 */
@@ -661,6 +633,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
661 * @donor_inode: donor inode 633 * @donor_inode: donor inode
662 * @from: block offset of orig_inode 634 * @from: block offset of orig_inode
663 * @count: block count to be replaced 635 * @count: block count to be replaced
636 * @err: pointer to save return value
664 * 637 *
665 * Replace original inode extents and donor inode extents page by page. 638 * Replace original inode extents and donor inode extents page by page.
666 * We implement this replacement in the following three steps: 639 * We implement this replacement in the following three steps:
@@ -671,33 +644,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
671 * 3. Change the block information of donor inode to point at the saved 644 * 3. Change the block information of donor inode to point at the saved
672 * original inode blocks in the dummy extents. 645 * original inode blocks in the dummy extents.
673 * 646 *
674 * Return 0 on success, or a negative error value on failure. 647 * Return replaced block count.
675 */ 648 */
676static int 649static int
677mext_replace_branches(handle_t *handle, struct inode *orig_inode, 650mext_replace_branches(handle_t *handle, struct inode *orig_inode,
678 struct inode *donor_inode, ext4_lblk_t from, 651 struct inode *donor_inode, ext4_lblk_t from,
679 ext4_lblk_t count) 652 ext4_lblk_t count, int *err)
680{ 653{
681 struct ext4_ext_path *orig_path = NULL; 654 struct ext4_ext_path *orig_path = NULL;
682 struct ext4_ext_path *donor_path = NULL; 655 struct ext4_ext_path *donor_path = NULL;
683 struct ext4_extent *oext, *dext; 656 struct ext4_extent *oext, *dext;
684 struct ext4_extent tmp_dext, tmp_oext; 657 struct ext4_extent tmp_dext, tmp_oext;
685 ext4_lblk_t orig_off = from, donor_off = from; 658 ext4_lblk_t orig_off = from, donor_off = from;
686 int err = 0;
687 int depth; 659 int depth;
688 int replaced_count = 0; 660 int replaced_count = 0;
689 int dext_alen; 661 int dext_alen;
690 662
691 mext_double_down_write(orig_inode, donor_inode); 663 /* Protect extent trees against block allocations via delalloc */
664 double_down_write_data_sem(orig_inode, donor_inode);
692 665
693 /* Get the original extent for the block "orig_off" */ 666 /* Get the original extent for the block "orig_off" */
694 err = get_ext_path(orig_inode, orig_off, &orig_path); 667 *err = get_ext_path(orig_inode, orig_off, &orig_path);
695 if (err) 668 if (*err)
696 goto out; 669 goto out;
697 670
698 /* Get the donor extent for the head */ 671 /* Get the donor extent for the head */
699 err = get_ext_path(donor_inode, donor_off, &donor_path); 672 *err = get_ext_path(donor_inode, donor_off, &donor_path);
700 if (err) 673 if (*err)
701 goto out; 674 goto out;
702 depth = ext_depth(orig_inode); 675 depth = ext_depth(orig_inode);
703 oext = orig_path[depth].p_ext; 676 oext = orig_path[depth].p_ext;
@@ -707,9 +680,9 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
707 dext = donor_path[depth].p_ext; 680 dext = donor_path[depth].p_ext;
708 tmp_dext = *dext; 681 tmp_dext = *dext;
709 682
710 err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 683 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
711 donor_off, count); 684 donor_off, count);
712 if (err) 685 if (*err)
713 goto out; 686 goto out;
714 687
715 /* Loop for the donor extents */ 688 /* Loop for the donor extents */
@@ -718,7 +691,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
718 if (!dext) { 691 if (!dext) {
719 ext4_error(donor_inode->i_sb, __func__, 692 ext4_error(donor_inode->i_sb, __func__,
720 "The extent for donor must be found"); 693 "The extent for donor must be found");
721 err = -EIO; 694 *err = -EIO;
722 goto out; 695 goto out;
723 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { 696 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
724 ext4_error(donor_inode->i_sb, __func__, 697 ext4_error(donor_inode->i_sb, __func__,
@@ -726,20 +699,20 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
726 "extent(%u) should be equal", 699 "extent(%u) should be equal",
727 donor_off, 700 donor_off,
728 le32_to_cpu(tmp_dext.ee_block)); 701 le32_to_cpu(tmp_dext.ee_block));
729 err = -EIO; 702 *err = -EIO;
730 goto out; 703 goto out;
731 } 704 }
732 705
733 /* Set donor extent to orig extent */ 706 /* Set donor extent to orig extent */
734 err = mext_leaf_block(handle, orig_inode, 707 *err = mext_leaf_block(handle, orig_inode,
735 orig_path, &tmp_dext, &orig_off); 708 orig_path, &tmp_dext, &orig_off);
736 if (err < 0) 709 if (*err)
737 goto out; 710 goto out;
738 711
739 /* Set orig extent to donor extent */ 712 /* Set orig extent to donor extent */
740 err = mext_leaf_block(handle, donor_inode, 713 *err = mext_leaf_block(handle, donor_inode,
741 donor_path, &tmp_oext, &donor_off); 714 donor_path, &tmp_oext, &donor_off);
742 if (err < 0) 715 if (*err)
743 goto out; 716 goto out;
744 717
745 dext_alen = ext4_ext_get_actual_len(&tmp_dext); 718 dext_alen = ext4_ext_get_actual_len(&tmp_dext);
@@ -753,35 +726,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
753 726
754 if (orig_path) 727 if (orig_path)
755 ext4_ext_drop_refs(orig_path); 728 ext4_ext_drop_refs(orig_path);
756 err = get_ext_path(orig_inode, orig_off, &orig_path); 729 *err = get_ext_path(orig_inode, orig_off, &orig_path);
757 if (err) 730 if (*err)
758 goto out; 731 goto out;
759 depth = ext_depth(orig_inode); 732 depth = ext_depth(orig_inode);
760 oext = orig_path[depth].p_ext; 733 oext = orig_path[depth].p_ext;
761 if (le32_to_cpu(oext->ee_block) +
762 ext4_ext_get_actual_len(oext) <= orig_off) {
763 err = 0;
764 goto out;
765 }
766 tmp_oext = *oext; 734 tmp_oext = *oext;
767 735
768 if (donor_path) 736 if (donor_path)
769 ext4_ext_drop_refs(donor_path); 737 ext4_ext_drop_refs(donor_path);
770 err = get_ext_path(donor_inode, donor_off, &donor_path); 738 *err = get_ext_path(donor_inode, donor_off, &donor_path);
771 if (err) 739 if (*err)
772 goto out; 740 goto out;
773 depth = ext_depth(donor_inode); 741 depth = ext_depth(donor_inode);
774 dext = donor_path[depth].p_ext; 742 dext = donor_path[depth].p_ext;
775 if (le32_to_cpu(dext->ee_block) +
776 ext4_ext_get_actual_len(dext) <= donor_off) {
777 err = 0;
778 goto out;
779 }
780 tmp_dext = *dext; 743 tmp_dext = *dext;
781 744
782 err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 745 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
783 donor_off, count - replaced_count); 746 donor_off, count - replaced_count);
784 if (err) 747 if (*err)
785 goto out; 748 goto out;
786 } 749 }
787 750
@@ -795,8 +758,12 @@ out:
795 kfree(donor_path); 758 kfree(donor_path);
796 } 759 }
797 760
798 mext_double_up_write(orig_inode, donor_inode); 761 ext4_ext_invalidate_cache(orig_inode);
799 return err; 762 ext4_ext_invalidate_cache(donor_inode);
763
764 double_up_write_data_sem(orig_inode, donor_inode);
765
766 return replaced_count;
800} 767}
801 768
802/** 769/**
@@ -808,16 +775,17 @@ out:
808 * @data_offset_in_page: block index where data swapping starts 775 * @data_offset_in_page: block index where data swapping starts
809 * @block_len_in_page: the number of blocks to be swapped 776 * @block_len_in_page: the number of blocks to be swapped
810 * @uninit: orig extent is uninitialized or not 777 * @uninit: orig extent is uninitialized or not
778 * @err: pointer to save return value
811 * 779 *
812 * Save the data in original inode blocks and replace original inode extents 780 * Save the data in original inode blocks and replace original inode extents
813 * with donor inode extents by calling mext_replace_branches(). 781 * with donor inode extents by calling mext_replace_branches().
814 * Finally, write out the saved data in new original inode blocks. Return 0 782 * Finally, write out the saved data in new original inode blocks. Return
815 * on success, or a negative error value on failure. 783 * replaced block count.
816 */ 784 */
817static int 785static int
818move_extent_per_page(struct file *o_filp, struct inode *donor_inode, 786move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
819 pgoff_t orig_page_offset, int data_offset_in_page, 787 pgoff_t orig_page_offset, int data_offset_in_page,
820 int block_len_in_page, int uninit) 788 int block_len_in_page, int uninit, int *err)
821{ 789{
822 struct inode *orig_inode = o_filp->f_dentry->d_inode; 790 struct inode *orig_inode = o_filp->f_dentry->d_inode;
823 struct address_space *mapping = orig_inode->i_mapping; 791 struct address_space *mapping = orig_inode->i_mapping;
@@ -829,9 +797,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
829 long long offs = orig_page_offset << PAGE_CACHE_SHIFT; 797 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
830 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 798 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
831 unsigned int w_flags = 0; 799 unsigned int w_flags = 0;
832 unsigned int tmp_data_len, data_len; 800 unsigned int tmp_data_size, data_size, replaced_size;
833 void *fsdata; 801 void *fsdata;
834 int ret, i, jblocks; 802 int i, jblocks;
803 int err2 = 0;
804 int replaced_count = 0;
835 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 805 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
836 806
837 /* 807 /*
@@ -841,8 +811,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
841 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 811 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
842 handle = ext4_journal_start(orig_inode, jblocks); 812 handle = ext4_journal_start(orig_inode, jblocks);
843 if (IS_ERR(handle)) { 813 if (IS_ERR(handle)) {
844 ret = PTR_ERR(handle); 814 *err = PTR_ERR(handle);
845 return ret; 815 return 0;
846 } 816 }
847 817
848 if (segment_eq(get_fs(), KERNEL_DS)) 818 if (segment_eq(get_fs(), KERNEL_DS))
@@ -858,39 +828,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
858 * Just swap data blocks between orig and donor. 828 * Just swap data blocks between orig and donor.
859 */ 829 */
860 if (uninit) { 830 if (uninit) {
861 ret = mext_replace_branches(handle, orig_inode, 831 replaced_count = mext_replace_branches(handle, orig_inode,
862 donor_inode, orig_blk_offset, 832 donor_inode, orig_blk_offset,
863 block_len_in_page); 833 block_len_in_page, err);
864
865 /* Clear the inode cache not to refer to the old data */
866 ext4_ext_invalidate_cache(orig_inode);
867 ext4_ext_invalidate_cache(donor_inode);
868 goto out2; 834 goto out2;
869 } 835 }
870 836
871 offs = (long long)orig_blk_offset << orig_inode->i_blkbits; 837 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
872 838
873 /* Calculate data_len */ 839 /* Calculate data_size */
874 if ((orig_blk_offset + block_len_in_page - 1) == 840 if ((orig_blk_offset + block_len_in_page - 1) ==
875 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { 841 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
876 /* Replace the last block */ 842 /* Replace the last block */
877 tmp_data_len = orig_inode->i_size & (blocksize - 1); 843 tmp_data_size = orig_inode->i_size & (blocksize - 1);
878 /* 844 /*
879 * If data_len equal zero, it shows data_len is multiples of 845 * If data_size equal zero, it shows data_size is multiples of
880 * blocksize. So we set appropriate value. 846 * blocksize. So we set appropriate value.
881 */ 847 */
882 if (tmp_data_len == 0) 848 if (tmp_data_size == 0)
883 tmp_data_len = blocksize; 849 tmp_data_size = blocksize;
884 850
885 data_len = tmp_data_len + 851 data_size = tmp_data_size +
886 ((block_len_in_page - 1) << orig_inode->i_blkbits); 852 ((block_len_in_page - 1) << orig_inode->i_blkbits);
887 } else { 853 } else
888 data_len = block_len_in_page << orig_inode->i_blkbits; 854 data_size = block_len_in_page << orig_inode->i_blkbits;
889 } 855
856 replaced_size = data_size;
890 857
891 ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, 858 *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
892 &page, &fsdata); 859 &page, &fsdata);
893 if (unlikely(ret < 0)) 860 if (unlikely(*err < 0))
894 goto out; 861 goto out;
895 862
896 if (!PageUptodate(page)) { 863 if (!PageUptodate(page)) {
@@ -911,14 +878,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
911 /* Release old bh and drop refs */ 878 /* Release old bh and drop refs */
912 try_to_release_page(page, 0); 879 try_to_release_page(page, 0);
913 880
914 ret = mext_replace_branches(handle, orig_inode, donor_inode, 881 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
915 orig_blk_offset, block_len_in_page); 882 orig_blk_offset, block_len_in_page,
916 if (ret < 0) 883 &err2);
917 goto out; 884 if (err2) {
918 885 if (replaced_count) {
919 /* Clear the inode cache not to refer to the old data */ 886 block_len_in_page = replaced_count;
920 ext4_ext_invalidate_cache(orig_inode); 887 replaced_size =
921 ext4_ext_invalidate_cache(donor_inode); 888 block_len_in_page << orig_inode->i_blkbits;
889 } else
890 goto out;
891 }
922 892
923 if (!page_has_buffers(page)) 893 if (!page_has_buffers(page))
924 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); 894 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
@@ -928,16 +898,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
928 bh = bh->b_this_page; 898 bh = bh->b_this_page;
929 899
930 for (i = 0; i < block_len_in_page; i++) { 900 for (i = 0; i < block_len_in_page; i++) {
931 ret = ext4_get_block(orig_inode, 901 *err = ext4_get_block(orig_inode,
932 (sector_t)(orig_blk_offset + i), bh, 0); 902 (sector_t)(orig_blk_offset + i), bh, 0);
933 if (ret < 0) 903 if (*err < 0)
934 goto out; 904 goto out;
935 905
936 if (bh->b_this_page != NULL) 906 if (bh->b_this_page != NULL)
937 bh = bh->b_this_page; 907 bh = bh->b_this_page;
938 } 908 }
939 909
940 ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, 910 *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
941 page, fsdata); 911 page, fsdata);
942 page = NULL; 912 page = NULL;
943 913
@@ -951,7 +921,10 @@ out:
951out2: 921out2:
952 ext4_journal_stop(handle); 922 ext4_journal_stop(handle);
953 923
954 return ret < 0 ? ret : 0; 924 if (err2)
925 *err = err2;
926
927 return replaced_count;
955} 928}
956 929
957/** 930/**
@@ -962,7 +935,6 @@ out2:
962 * @orig_start: logical start offset in block for orig 935 * @orig_start: logical start offset in block for orig
963 * @donor_start: logical start offset in block for donor 936 * @donor_start: logical start offset in block for donor
964 * @len: the number of blocks to be moved 937 * @len: the number of blocks to be moved
965 * @moved_len: moved block length
966 * 938 *
967 * Check the arguments of ext4_move_extents() whether the files can be 939 * Check the arguments of ext4_move_extents() whether the files can be
968 * exchanged with each other. 940 * exchanged with each other.
@@ -970,8 +942,8 @@ out2:
970 */ 942 */
971static int 943static int
972mext_check_arguments(struct inode *orig_inode, 944mext_check_arguments(struct inode *orig_inode,
973 struct inode *donor_inode, __u64 orig_start, 945 struct inode *donor_inode, __u64 orig_start,
974 __u64 donor_start, __u64 *len, __u64 moved_len) 946 __u64 donor_start, __u64 *len)
975{ 947{
976 ext4_lblk_t orig_blocks, donor_blocks; 948 ext4_lblk_t orig_blocks, donor_blocks;
977 unsigned int blkbits = orig_inode->i_blkbits; 949 unsigned int blkbits = orig_inode->i_blkbits;
@@ -985,6 +957,13 @@ mext_check_arguments(struct inode *orig_inode,
985 return -EINVAL; 957 return -EINVAL;
986 } 958 }
987 959
960 if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
961 ext4_debug("ext4 move extent: suid or sgid is set"
962 " to donor file [ino:orig %lu, donor %lu]\n",
963 orig_inode->i_ino, donor_inode->i_ino);
964 return -EINVAL;
965 }
966
988 /* Ext4 move extent does not support swapfile */ 967 /* Ext4 move extent does not support swapfile */
989 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { 968 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
990 ext4_debug("ext4 move extent: The argument files should " 969 ext4_debug("ext4 move extent: The argument files should "
@@ -1025,13 +1004,6 @@ mext_check_arguments(struct inode *orig_inode,
1025 return -EINVAL; 1004 return -EINVAL;
1026 } 1005 }
1027 1006
1028 if (moved_len) {
1029 ext4_debug("ext4 move extent: moved_len should be 0 "
1030 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1031 donor_inode->i_ino);
1032 return -EINVAL;
1033 }
1034
1035 if ((orig_start > EXT_MAX_BLOCK) || 1007 if ((orig_start > EXT_MAX_BLOCK) ||
1036 (donor_start > EXT_MAX_BLOCK) || 1008 (donor_start > EXT_MAX_BLOCK) ||
1037 (*len > EXT_MAX_BLOCK) || 1009 (*len > EXT_MAX_BLOCK) ||
@@ -1088,7 +1060,7 @@ mext_check_arguments(struct inode *orig_inode,
1088 } 1060 }
1089 1061
1090 if (!*len) { 1062 if (!*len) {
1091 ext4_debug("ext4 move extent: len shoudld not be 0 " 1063 ext4_debug("ext4 move extent: len should not be 0 "
1092 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, 1064 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1093 donor_inode->i_ino); 1065 donor_inode->i_ino);
1094 return -EINVAL; 1066 return -EINVAL;
@@ -1232,16 +1204,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1232 return -EINVAL; 1204 return -EINVAL;
1233 } 1205 }
1234 1206
1235 /* protect orig and donor against a truncate */ 1207 /* Protect orig and donor inodes against a truncate */
1236 ret1 = mext_inode_double_lock(orig_inode, donor_inode); 1208 ret1 = mext_inode_double_lock(orig_inode, donor_inode);
1237 if (ret1 < 0) 1209 if (ret1 < 0)
1238 return ret1; 1210 return ret1;
1239 1211
1240 mext_double_down_read(orig_inode, donor_inode); 1212 /* Protect extent tree against block allocations via delalloc */
1213 double_down_write_data_sem(orig_inode, donor_inode);
1241 /* Check the filesystem environment whether move_extent can be done */ 1214 /* Check the filesystem environment whether move_extent can be done */
1242 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, 1215 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
1243 donor_start, &len, *moved_len); 1216 donor_start, &len);
1244 mext_double_up_read(orig_inode, donor_inode);
1245 if (ret1) 1217 if (ret1)
1246 goto out; 1218 goto out;
1247 1219
@@ -1355,36 +1327,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1355 seq_start = le32_to_cpu(ext_cur->ee_block); 1327 seq_start = le32_to_cpu(ext_cur->ee_block);
1356 rest_blocks = seq_blocks; 1328 rest_blocks = seq_blocks;
1357 1329
1358 /* Discard preallocations of two inodes */ 1330 /*
1359 down_write(&EXT4_I(orig_inode)->i_data_sem); 1331 * Up semaphore to avoid following problems:
1360 ext4_discard_preallocations(orig_inode); 1332 * a. transaction deadlock among ext4_journal_start,
1361 up_write(&EXT4_I(orig_inode)->i_data_sem); 1333 * ->write_begin via pagefault, and jbd2_journal_commit
1362 1334 * b. racing with ->readpage, ->write_begin, and ext4_get_block
1363 down_write(&EXT4_I(donor_inode)->i_data_sem); 1335 * in move_extent_per_page
1364 ext4_discard_preallocations(donor_inode); 1336 */
1365 up_write(&EXT4_I(donor_inode)->i_data_sem); 1337 double_up_write_data_sem(orig_inode, donor_inode);
1366 1338
1367 while (orig_page_offset <= seq_end_page) { 1339 while (orig_page_offset <= seq_end_page) {
1368 1340
1369 /* Swap original branches with new branches */ 1341 /* Swap original branches with new branches */
1370 ret1 = move_extent_per_page(o_filp, donor_inode, 1342 block_len_in_page = move_extent_per_page(
1343 o_filp, donor_inode,
1371 orig_page_offset, 1344 orig_page_offset,
1372 data_offset_in_page, 1345 data_offset_in_page,
1373 block_len_in_page, uninit); 1346 block_len_in_page, uninit,
1374 if (ret1 < 0) 1347 &ret1);
1375 goto out; 1348
1376 orig_page_offset++;
1377 /* Count how many blocks we have exchanged */ 1349 /* Count how many blocks we have exchanged */
1378 *moved_len += block_len_in_page; 1350 *moved_len += block_len_in_page;
1351 if (ret1 < 0)
1352 break;
1379 if (*moved_len > len) { 1353 if (*moved_len > len) {
1380 ext4_error(orig_inode->i_sb, __func__, 1354 ext4_error(orig_inode->i_sb, __func__,
1381 "We replaced blocks too much! " 1355 "We replaced blocks too much! "
1382 "sum of replaced: %llu requested: %llu", 1356 "sum of replaced: %llu requested: %llu",
1383 *moved_len, len); 1357 *moved_len, len);
1384 ret1 = -EIO; 1358 ret1 = -EIO;
1385 goto out; 1359 break;
1386 } 1360 }
1387 1361
1362 orig_page_offset++;
1388 data_offset_in_page = 0; 1363 data_offset_in_page = 0;
1389 rest_blocks -= block_len_in_page; 1364 rest_blocks -= block_len_in_page;
1390 if (rest_blocks > blocks_per_page) 1365 if (rest_blocks > blocks_per_page)
@@ -1393,6 +1368,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1393 block_len_in_page = rest_blocks; 1368 block_len_in_page = rest_blocks;
1394 } 1369 }
1395 1370
1371 double_down_write_data_sem(orig_inode, donor_inode);
1372 if (ret1 < 0)
1373 break;
1374
1396 /* Decrease buffer counter */ 1375 /* Decrease buffer counter */
1397 if (holecheck_path) 1376 if (holecheck_path)
1398 ext4_ext_drop_refs(holecheck_path); 1377 ext4_ext_drop_refs(holecheck_path);
@@ -1414,6 +1393,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1414 1393
1415 } 1394 }
1416out: 1395out:
1396 if (*moved_len) {
1397 ext4_discard_preallocations(orig_inode);
1398 ext4_discard_preallocations(donor_inode);
1399 }
1400
1417 if (orig_path) { 1401 if (orig_path) {
1418 ext4_ext_drop_refs(orig_path); 1402 ext4_ext_drop_refs(orig_path);
1419 kfree(orig_path); 1403 kfree(orig_path);
@@ -1422,7 +1406,7 @@ out:
1422 ext4_ext_drop_refs(holecheck_path); 1406 ext4_ext_drop_refs(holecheck_path);
1423 kfree(holecheck_path); 1407 kfree(holecheck_path);
1424 } 1408 }
1425 1409 double_up_write_data_sem(orig_inode, donor_inode);
1426 ret2 = mext_inode_double_unlock(orig_inode, donor_inode); 1410 ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
1427 1411
1428 if (ret1) 1412 if (ret1)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6d2c1b897fc7..17a17e10dd60 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1292,9 +1292,6 @@ errout:
1292 * add_dirent_to_buf will attempt search the directory block for 1292 * add_dirent_to_buf will attempt search the directory block for
1293 * space. It will return -ENOSPC if no space is available, and -EIO 1293 * space. It will return -ENOSPC if no space is available, and -EIO
1294 * and -EEXIST if directory entry already exists. 1294 * and -EEXIST if directory entry already exists.
1295 *
1296 * NOTE! bh is NOT released in the case where ENOSPC is returned. In
1297 * all other cases bh is released.
1298 */ 1295 */
1299static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, 1296static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1300 struct inode *inode, struct ext4_dir_entry_2 *de, 1297 struct inode *inode, struct ext4_dir_entry_2 *de,
@@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1315 top = bh->b_data + blocksize - reclen; 1312 top = bh->b_data + blocksize - reclen;
1316 while ((char *) de <= top) { 1313 while ((char *) de <= top) {
1317 if (!ext4_check_dir_entry("ext4_add_entry", dir, de, 1314 if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
1318 bh, offset)) { 1315 bh, offset))
1319 brelse(bh);
1320 return -EIO; 1316 return -EIO;
1321 } 1317 if (ext4_match(namelen, name, de))
1322 if (ext4_match(namelen, name, de)) {
1323 brelse(bh);
1324 return -EEXIST; 1318 return -EEXIST;
1325 }
1326 nlen = EXT4_DIR_REC_LEN(de->name_len); 1319 nlen = EXT4_DIR_REC_LEN(de->name_len);
1327 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); 1320 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
1328 if ((de->inode? rlen - nlen: rlen) >= reclen) 1321 if ((de->inode? rlen - nlen: rlen) >= reclen)
@@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1337 err = ext4_journal_get_write_access(handle, bh); 1330 err = ext4_journal_get_write_access(handle, bh);
1338 if (err) { 1331 if (err) {
1339 ext4_std_error(dir->i_sb, err); 1332 ext4_std_error(dir->i_sb, err);
1340 brelse(bh);
1341 return err; 1333 return err;
1342 } 1334 }
1343 1335
@@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1377 err = ext4_handle_dirty_metadata(handle, dir, bh); 1369 err = ext4_handle_dirty_metadata(handle, dir, bh);
1378 if (err) 1370 if (err)
1379 ext4_std_error(dir->i_sb, err); 1371 ext4_std_error(dir->i_sb, err);
1380 brelse(bh);
1381 return 0; 1372 return 0;
1382} 1373}
1383 1374
@@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1471 if (!(de)) 1462 if (!(de))
1472 return retval; 1463 return retval;
1473 1464
1474 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1465 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1466 brelse(bh);
1467 return retval;
1475} 1468}
1476 1469
1477/* 1470/*
@@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1514 if(!bh) 1507 if(!bh)
1515 return retval; 1508 return retval;
1516 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1509 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1517 if (retval != -ENOSPC) 1510 if (retval != -ENOSPC) {
1511 brelse(bh);
1518 return retval; 1512 return retval;
1513 }
1519 1514
1520 if (blocks == 1 && !dx_fallback && 1515 if (blocks == 1 && !dx_fallback &&
1521 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) 1516 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
@@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1528 de = (struct ext4_dir_entry_2 *) bh->b_data; 1523 de = (struct ext4_dir_entry_2 *) bh->b_data;
1529 de->inode = 0; 1524 de->inode = 0;
1530 de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); 1525 de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
1531 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1526 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1527 brelse(bh);
1528 return retval;
1532} 1529}
1533 1530
1534/* 1531/*
@@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1561 goto journal_error; 1558 goto journal_error;
1562 1559
1563 err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1560 err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1564 if (err != -ENOSPC) { 1561 if (err != -ENOSPC)
1565 bh = NULL;
1566 goto cleanup; 1562 goto cleanup;
1567 }
1568 1563
1569 /* Block full, should compress but for now just split */ 1564 /* Block full, should compress but for now just split */
1570 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", 1565 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
@@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1657 if (!de) 1652 if (!de)
1658 goto cleanup; 1653 goto cleanup;
1659 err = add_dirent_to_buf(handle, dentry, inode, de, bh); 1654 err = add_dirent_to_buf(handle, dentry, inode, de, bh);
1660 bh = NULL;
1661 goto cleanup; 1655 goto cleanup;
1662 1656
1663journal_error: 1657journal_error:
@@ -1775,7 +1769,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
1775retry: 1769retry:
1776 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1770 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1777 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1771 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1778 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 1772 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1779 if (IS_ERR(handle)) 1773 if (IS_ERR(handle))
1780 return PTR_ERR(handle); 1774 return PTR_ERR(handle);
1781 1775
@@ -1809,7 +1803,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
1809retry: 1803retry:
1810 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1804 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1811 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1805 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1812 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 1806 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1813 if (IS_ERR(handle)) 1807 if (IS_ERR(handle))
1814 return PTR_ERR(handle); 1808 return PTR_ERR(handle);
1815 1809
@@ -1846,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1846retry: 1840retry:
1847 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1841 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1848 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1842 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1849 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 1843 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1850 if (IS_ERR(handle)) 1844 if (IS_ERR(handle))
1851 return PTR_ERR(handle); 1845 return PTR_ERR(handle);
1852 1846
@@ -2259,7 +2253,7 @@ static int ext4_symlink(struct inode *dir,
2259retry: 2253retry:
2260 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2254 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2261 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2255 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2262 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 2256 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2263 if (IS_ERR(handle)) 2257 if (IS_ERR(handle))
2264 return PTR_ERR(handle); 2258 return PTR_ERR(handle);
2265 2259
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3cfc343c41b5..3b2c5541d8a6 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct super_block *sb,
247 goto exit_bh; 247 goto exit_bh;
248 248
249 if (IS_ERR(gdb = bclean(handle, sb, block))) { 249 if (IS_ERR(gdb = bclean(handle, sb, block))) {
250 err = PTR_ERR(bh); 250 err = PTR_ERR(gdb);
251 goto exit_bh; 251 goto exit_bh;
252 } 252 }
253 ext4_handle_dirty_metadata(handle, NULL, gdb); 253 ext4_handle_dirty_metadata(handle, NULL, gdb);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d4ca92aab514..768c111a77ec 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -603,10 +603,6 @@ static void ext4_put_super(struct super_block *sb)
603 if (sb->s_dirt) 603 if (sb->s_dirt)
604 ext4_commit_super(sb, 1); 604 ext4_commit_super(sb, 1);
605 605
606 ext4_release_system_zone(sb);
607 ext4_mb_release(sb);
608 ext4_ext_release(sb);
609 ext4_xattr_put_super(sb);
610 if (sbi->s_journal) { 606 if (sbi->s_journal) {
611 err = jbd2_journal_destroy(sbi->s_journal); 607 err = jbd2_journal_destroy(sbi->s_journal);
612 sbi->s_journal = NULL; 608 sbi->s_journal = NULL;
@@ -614,6 +610,12 @@ static void ext4_put_super(struct super_block *sb)
614 ext4_abort(sb, __func__, 610 ext4_abort(sb, __func__,
615 "Couldn't clean up the journal"); 611 "Couldn't clean up the journal");
616 } 612 }
613
614 ext4_release_system_zone(sb);
615 ext4_mb_release(sb);
616 ext4_ext_release(sb);
617 ext4_xattr_put_super(sb);
618
617 if (!(sb->s_flags & MS_RDONLY)) { 619 if (!(sb->s_flags & MS_RDONLY)) {
618 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 620 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
619 es->s_state = cpu_to_le16(sbi->s_mount_state); 621 es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -704,6 +706,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
704 spin_lock_init(&(ei->i_block_reservation_lock)); 706 spin_lock_init(&(ei->i_block_reservation_lock));
705 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); 707 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
706 ei->cur_aio_dio = NULL; 708 ei->cur_aio_dio = NULL;
709 ei->i_sync_tid = 0;
710 ei->i_datasync_tid = 0;
707 711
708 return &ei->vfs_inode; 712 return &ei->vfs_inode;
709} 713}
@@ -765,9 +769,22 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
765#if defined(CONFIG_QUOTA) 769#if defined(CONFIG_QUOTA)
766 struct ext4_sb_info *sbi = EXT4_SB(sb); 770 struct ext4_sb_info *sbi = EXT4_SB(sb);
767 771
768 if (sbi->s_jquota_fmt) 772 if (sbi->s_jquota_fmt) {
769 seq_printf(seq, ",jqfmt=%s", 773 char *fmtname = "";
770 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); 774
775 switch (sbi->s_jquota_fmt) {
776 case QFMT_VFS_OLD:
777 fmtname = "vfsold";
778 break;
779 case QFMT_VFS_V0:
780 fmtname = "vfsv0";
781 break;
782 case QFMT_VFS_V1:
783 fmtname = "vfsv1";
784 break;
785 }
786 seq_printf(seq, ",jqfmt=%s", fmtname);
787 }
771 788
772 if (sbi->s_qf_names[USRQUOTA]) 789 if (sbi->s_qf_names[USRQUOTA])
773 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 790 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -899,6 +916,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
899 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 916 if (test_opt(sb, NO_AUTO_DA_ALLOC))
900 seq_puts(seq, ",noauto_da_alloc"); 917 seq_puts(seq, ",noauto_da_alloc");
901 918
919 if (test_opt(sb, DISCARD))
920 seq_puts(seq, ",discard");
921
922 if (test_opt(sb, NOLOAD))
923 seq_puts(seq, ",norecovery");
924
902 ext4_show_quota_options(seq, sb); 925 ext4_show_quota_options(seq, sb);
903 926
904 return 0; 927 return 0;
@@ -1074,12 +1097,13 @@ enum {
1074 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1097 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1075 Opt_data_err_abort, Opt_data_err_ignore, 1098 Opt_data_err_abort, Opt_data_err_ignore,
1076 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1099 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1077 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 1100 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1078 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, 1101 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
1079 Opt_usrquota, Opt_grpquota, Opt_i_version, 1102 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
1080 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1103 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1081 Opt_block_validity, Opt_noblock_validity, 1104 Opt_block_validity, Opt_noblock_validity,
1082 Opt_inode_readahead_blks, Opt_journal_ioprio 1105 Opt_inode_readahead_blks, Opt_journal_ioprio,
1106 Opt_discard, Opt_nodiscard,
1083}; 1107};
1084 1108
1085static const match_table_t tokens = { 1109static const match_table_t tokens = {
@@ -1104,6 +1128,7 @@ static const match_table_t tokens = {
1104 {Opt_acl, "acl"}, 1128 {Opt_acl, "acl"},
1105 {Opt_noacl, "noacl"}, 1129 {Opt_noacl, "noacl"},
1106 {Opt_noload, "noload"}, 1130 {Opt_noload, "noload"},
1131 {Opt_noload, "norecovery"},
1107 {Opt_nobh, "nobh"}, 1132 {Opt_nobh, "nobh"},
1108 {Opt_bh, "bh"}, 1133 {Opt_bh, "bh"},
1109 {Opt_commit, "commit=%u"}, 1134 {Opt_commit, "commit=%u"},
@@ -1125,6 +1150,7 @@ static const match_table_t tokens = {
1125 {Opt_grpjquota, "grpjquota=%s"}, 1150 {Opt_grpjquota, "grpjquota=%s"},
1126 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1151 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1127 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1152 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1153 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1128 {Opt_grpquota, "grpquota"}, 1154 {Opt_grpquota, "grpquota"},
1129 {Opt_noquota, "noquota"}, 1155 {Opt_noquota, "noquota"},
1130 {Opt_quota, "quota"}, 1156 {Opt_quota, "quota"},
@@ -1144,6 +1170,8 @@ static const match_table_t tokens = {
1144 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1170 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1145 {Opt_auto_da_alloc, "auto_da_alloc"}, 1171 {Opt_auto_da_alloc, "auto_da_alloc"},
1146 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1172 {Opt_noauto_da_alloc, "noauto_da_alloc"},
1173 {Opt_discard, "discard"},
1174 {Opt_nodiscard, "nodiscard"},
1147 {Opt_err, NULL}, 1175 {Opt_err, NULL},
1148}; 1176};
1149 1177
@@ -1425,6 +1453,9 @@ clear_qf_name:
1425 goto set_qf_format; 1453 goto set_qf_format;
1426 case Opt_jqfmt_vfsv0: 1454 case Opt_jqfmt_vfsv0:
1427 qfmt = QFMT_VFS_V0; 1455 qfmt = QFMT_VFS_V0;
1456 goto set_qf_format;
1457 case Opt_jqfmt_vfsv1:
1458 qfmt = QFMT_VFS_V1;
1428set_qf_format: 1459set_qf_format:
1429 if (sb_any_quota_loaded(sb) && 1460 if (sb_any_quota_loaded(sb) &&
1430 sbi->s_jquota_fmt != qfmt) { 1461 sbi->s_jquota_fmt != qfmt) {
@@ -1467,6 +1498,7 @@ set_qf_format:
1467 case Opt_offgrpjquota: 1498 case Opt_offgrpjquota:
1468 case Opt_jqfmt_vfsold: 1499 case Opt_jqfmt_vfsold:
1469 case Opt_jqfmt_vfsv0: 1500 case Opt_jqfmt_vfsv0:
1501 case Opt_jqfmt_vfsv1:
1470 ext4_msg(sb, KERN_ERR, 1502 ext4_msg(sb, KERN_ERR,
1471 "journaled quota options not supported"); 1503 "journaled quota options not supported");
1472 break; 1504 break;
@@ -1565,6 +1597,12 @@ set_qf_format:
1565 else 1597 else
1566 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1598 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1567 break; 1599 break;
1600 case Opt_discard:
1601 set_opt(sbi->s_mount_opt, DISCARD);
1602 break;
1603 case Opt_nodiscard:
1604 clear_opt(sbi->s_mount_opt, DISCARD);
1605 break;
1568 default: 1606 default:
1569 ext4_msg(sb, KERN_ERR, 1607 ext4_msg(sb, KERN_ERR,
1570 "Unrecognized mount option \"%s\" " 1608 "Unrecognized mount option \"%s\" "
@@ -1673,14 +1711,14 @@ static int ext4_fill_flex_info(struct super_block *sb)
1673 size_t size; 1711 size_t size;
1674 int i; 1712 int i;
1675 1713
1676 if (!sbi->s_es->s_log_groups_per_flex) { 1714 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1715 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1716
1717 if (groups_per_flex < 2) {
1677 sbi->s_log_groups_per_flex = 0; 1718 sbi->s_log_groups_per_flex = 0;
1678 return 1; 1719 return 1;
1679 } 1720 }
1680 1721
1681 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1682 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1683
1684 /* We allocate both existing and potentially added groups */ 1722 /* We allocate both existing and potentially added groups */
1685 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1723 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1686 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1724 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
@@ -2721,26 +2759,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2721 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2759 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2722 if (ext4_load_journal(sb, es, journal_devnum)) 2760 if (ext4_load_journal(sb, es, journal_devnum))
2723 goto failed_mount3; 2761 goto failed_mount3;
2724 if (!(sb->s_flags & MS_RDONLY) &&
2725 EXT4_SB(sb)->s_journal->j_failed_commit) {
2726 ext4_msg(sb, KERN_CRIT, "error: "
2727 "ext4_fill_super: Journal transaction "
2728 "%u is corrupt",
2729 EXT4_SB(sb)->s_journal->j_failed_commit);
2730 if (test_opt(sb, ERRORS_RO)) {
2731 ext4_msg(sb, KERN_CRIT,
2732 "Mounting filesystem read-only");
2733 sb->s_flags |= MS_RDONLY;
2734 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2735 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2736 }
2737 if (test_opt(sb, ERRORS_PANIC)) {
2738 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2739 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2740 ext4_commit_super(sb, 1);
2741 goto failed_mount4;
2742 }
2743 }
2744 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2762 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2745 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2763 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2746 ext4_msg(sb, KERN_ERR, "required journal recovery " 2764 ext4_msg(sb, KERN_ERR, "required journal recovery "
@@ -3668,13 +3686,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3668 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3686 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3669 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 3687 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3670 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 3688 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3671 ext4_free_blocks_count_set(es, buf->f_bfree);
3672 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3689 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3673 if (buf->f_bfree < ext4_r_blocks_count(es)) 3690 if (buf->f_bfree < ext4_r_blocks_count(es))
3674 buf->f_bavail = 0; 3691 buf->f_bavail = 0;
3675 buf->f_files = le32_to_cpu(es->s_inodes_count); 3692 buf->f_files = le32_to_cpu(es->s_inodes_count);
3676 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 3693 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
3677 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
3678 buf->f_namelen = EXT4_NAME_LEN; 3694 buf->f_namelen = EXT4_NAME_LEN;
3679 fsid = le64_to_cpup((void *)es->s_uuid) ^ 3695 fsid = le64_to_cpup((void *)es->s_uuid) ^
3680 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3696 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
@@ -3966,6 +3982,58 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags,
3966 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 3982 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3967} 3983}
3968 3984
3985#if !defined(CONTIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
3986static struct file_system_type ext2_fs_type = {
3987 .owner = THIS_MODULE,
3988 .name = "ext2",
3989 .get_sb = ext4_get_sb,
3990 .kill_sb = kill_block_super,
3991 .fs_flags = FS_REQUIRES_DEV,
3992};
3993
3994static inline void register_as_ext2(void)
3995{
3996 int err = register_filesystem(&ext2_fs_type);
3997 if (err)
3998 printk(KERN_WARNING
3999 "EXT4-fs: Unable to register as ext2 (%d)\n", err);
4000}
4001
4002static inline void unregister_as_ext2(void)
4003{
4004 unregister_filesystem(&ext2_fs_type);
4005}
4006#else
4007static inline void register_as_ext2(void) { }
4008static inline void unregister_as_ext2(void) { }
4009#endif
4010
4011#if !defined(CONTIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
4012static struct file_system_type ext3_fs_type = {
4013 .owner = THIS_MODULE,
4014 .name = "ext3",
4015 .get_sb = ext4_get_sb,
4016 .kill_sb = kill_block_super,
4017 .fs_flags = FS_REQUIRES_DEV,
4018};
4019
4020static inline void register_as_ext3(void)
4021{
4022 int err = register_filesystem(&ext3_fs_type);
4023 if (err)
4024 printk(KERN_WARNING
4025 "EXT4-fs: Unable to register as ext3 (%d)\n", err);
4026}
4027
4028static inline void unregister_as_ext3(void)
4029{
4030 unregister_filesystem(&ext3_fs_type);
4031}
4032#else
4033static inline void register_as_ext3(void) { }
4034static inline void unregister_as_ext3(void) { }
4035#endif
4036
3969static struct file_system_type ext4_fs_type = { 4037static struct file_system_type ext4_fs_type = {
3970 .owner = THIS_MODULE, 4038 .owner = THIS_MODULE,
3971 .name = "ext4", 4039 .name = "ext4",
@@ -3995,11 +4063,15 @@ static int __init init_ext4_fs(void)
3995 err = init_inodecache(); 4063 err = init_inodecache();
3996 if (err) 4064 if (err)
3997 goto out1; 4065 goto out1;
4066 register_as_ext2();
4067 register_as_ext3();
3998 err = register_filesystem(&ext4_fs_type); 4068 err = register_filesystem(&ext4_fs_type);
3999 if (err) 4069 if (err)
4000 goto out; 4070 goto out;
4001 return 0; 4071 return 0;
4002out: 4072out:
4073 unregister_as_ext2();
4074 unregister_as_ext3();
4003 destroy_inodecache(); 4075 destroy_inodecache();
4004out1: 4076out1:
4005 exit_ext4_xattr(); 4077 exit_ext4_xattr();
@@ -4015,6 +4087,8 @@ out4:
4015 4087
4016static void __exit exit_ext4_fs(void) 4088static void __exit exit_ext4_fs(void)
4017{ 4089{
4090 unregister_as_ext2();
4091 unregister_as_ext3();
4018 unregister_filesystem(&ext4_fs_type); 4092 unregister_filesystem(&ext4_fs_type);
4019 destroy_inodecache(); 4093 destroy_inodecache();
4020 exit_ext4_xattr(); 4094 exit_ext4_xattr();
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fed5b01d7a8d..910bf9a59cb3 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -482,9 +482,10 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
482 ea_bdebug(bh, "refcount now=0; freeing"); 482 ea_bdebug(bh, "refcount now=0; freeing");
483 if (ce) 483 if (ce)
484 mb_cache_entry_free(ce); 484 mb_cache_entry_free(ce);
485 ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
486 get_bh(bh); 485 get_bh(bh);
487 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 486 ext4_free_blocks(handle, inode, bh, 0, 1,
487 EXT4_FREE_BLOCKS_METADATA |
488 EXT4_FREE_BLOCKS_FORGET);
488 } else { 489 } else {
489 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 490 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
490 error = ext4_handle_dirty_metadata(handle, inode, bh); 491 error = ext4_handle_dirty_metadata(handle, inode, bh);
@@ -832,7 +833,8 @@ inserted:
832 new_bh = sb_getblk(sb, block); 833 new_bh = sb_getblk(sb, block);
833 if (!new_bh) { 834 if (!new_bh) {
834getblk_failed: 835getblk_failed:
835 ext4_free_blocks(handle, inode, block, 1, 1); 836 ext4_free_blocks(handle, inode, 0, block, 1,
837 EXT4_FREE_BLOCKS_METADATA);
836 error = -EIO; 838 error = -EIO;
837 goto cleanup; 839 goto cleanup;
838 } 840 }
@@ -988,6 +990,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
988 if (error) 990 if (error)
989 goto cleanup; 991 goto cleanup;
990 992
993 error = ext4_journal_get_write_access(handle, is.iloc.bh);
994 if (error)
995 goto cleanup;
996
991 if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { 997 if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
992 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); 998 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
993 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 999 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
@@ -1013,9 +1019,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1013 if (flags & XATTR_CREATE) 1019 if (flags & XATTR_CREATE)
1014 goto cleanup; 1020 goto cleanup;
1015 } 1021 }
1016 error = ext4_journal_get_write_access(handle, is.iloc.bh);
1017 if (error)
1018 goto cleanup;
1019 if (!value) { 1022 if (!value) {
1020 if (!is.s.not_found) 1023 if (!is.s.not_found)
1021 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 1024 error = ext4_xattr_ibody_set(handle, inode, &i, &is);
diff --git a/fs/file_table.c b/fs/file_table.c
index 8eb44042e009..4bef4c01ec6f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -13,7 +13,6 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/security.h> 15#include <linux/security.h>
16#include <linux/ima.h>
17#include <linux/eventpoll.h> 16#include <linux/eventpoll.h>
18#include <linux/rcupdate.h> 17#include <linux/rcupdate.h>
19#include <linux/mount.h> 18#include <linux/mount.h>
@@ -280,7 +279,6 @@ void __fput(struct file *file)
280 if (file->f_op && file->f_op->release) 279 if (file->f_op && file->f_op->release)
281 file->f_op->release(inode, file); 280 file->f_op->release(inode, file);
282 security_file_free(file); 281 security_file_free(file);
283 ima_file_free(file);
284 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) 282 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
285 cdev_put(inode->i_cdev); 283 cdev_put(inode->i_cdev);
286 fops_put(file->f_op); 284 fops_put(file->f_op);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9d5360c4c2af..49bc1b8e8f19 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -614,7 +614,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
614 struct writeback_control *wbc) 614 struct writeback_control *wbc)
615{ 615{
616 struct super_block *sb = wbc->sb, *pin_sb = NULL; 616 struct super_block *sb = wbc->sb, *pin_sb = NULL;
617 const int is_blkdev_sb = sb_is_blkdev_sb(sb);
618 const unsigned long start = jiffies; /* livelock avoidance */ 617 const unsigned long start = jiffies; /* livelock avoidance */
619 618
620 spin_lock(&inode_lock); 619 spin_lock(&inode_lock);
@@ -635,36 +634,11 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
635 continue; 634 continue;
636 } 635 }
637 636
638 if (!bdi_cap_writeback_dirty(wb->bdi)) {
639 redirty_tail(inode);
640 if (is_blkdev_sb) {
641 /*
642 * Dirty memory-backed blockdev: the ramdisk
643 * driver does this. Skip just this inode
644 */
645 continue;
646 }
647 /*
648 * Dirty memory-backed inode against a filesystem other
649 * than the kernel-internal bdev filesystem. Skip the
650 * entire superblock.
651 */
652 break;
653 }
654
655 if (inode->i_state & (I_NEW | I_WILL_FREE)) { 637 if (inode->i_state & (I_NEW | I_WILL_FREE)) {
656 requeue_io(inode); 638 requeue_io(inode);
657 continue; 639 continue;
658 } 640 }
659 641
660 if (wbc->nonblocking && bdi_write_congested(wb->bdi)) {
661 wbc->encountered_congestion = 1;
662 if (!is_blkdev_sb)
663 break; /* Skip a congested fs */
664 requeue_io(inode);
665 continue; /* Skip a congested blockdev */
666 }
667
668 /* 642 /*
669 * Was this inode dirtied after sync_sb_inodes was called? 643 * Was this inode dirtied after sync_sb_inodes was called?
670 * This keeps sync from extra jobs and livelock. 644 * This keeps sync from extra jobs and livelock.
@@ -756,6 +730,7 @@ static long wb_writeback(struct bdi_writeback *wb,
756 .sync_mode = args->sync_mode, 730 .sync_mode = args->sync_mode,
757 .older_than_this = NULL, 731 .older_than_this = NULL,
758 .for_kupdate = args->for_kupdate, 732 .for_kupdate = args->for_kupdate,
733 .for_background = args->for_background,
759 .range_cyclic = args->range_cyclic, 734 .range_cyclic = args->range_cyclic,
760 }; 735 };
761 unsigned long oldest_jif; 736 unsigned long oldest_jif;
@@ -787,7 +762,6 @@ static long wb_writeback(struct bdi_writeback *wb,
787 break; 762 break;
788 763
789 wbc.more_io = 0; 764 wbc.more_io = 0;
790 wbc.encountered_congestion = 0;
791 wbc.nr_to_write = MAX_WRITEBACK_PAGES; 765 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
792 wbc.pages_skipped = 0; 766 wbc.pages_skipped = 0;
793 writeback_inodes_wb(wb, &wbc); 767 writeback_inodes_wb(wb, &wbc);
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 5971359d2090..4dcddf83326f 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -8,6 +8,8 @@ config GFS2_FS
8 select FS_POSIX_ACL 8 select FS_POSIX_ACL
9 select CRC32 9 select CRC32
10 select SLOW_WORK 10 select SLOW_WORK
11 select QUOTA
12 select QUOTACTL
11 help 13 help
12 A cluster filesystem. 14 A cluster filesystem.
13 15
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3fc4e3ac7d84..3eb1ea846173 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -12,6 +12,7 @@
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/completion.h> 13#include <linux/completion.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/xattr.h>
15#include <linux/posix_acl.h> 16#include <linux/posix_acl.h>
16#include <linux/posix_acl_xattr.h> 17#include <linux/posix_acl_xattr.h>
17#include <linux/gfs2_ondisk.h> 18#include <linux/gfs2_ondisk.h>
@@ -26,108 +27,44 @@
26#include "trans.h" 27#include "trans.h"
27#include "util.h" 28#include "util.h"
28 29
29#define ACL_ACCESS 1 30static const char *gfs2_acl_name(int type)
30#define ACL_DEFAULT 0
31
32int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
33 struct gfs2_ea_request *er, int *remove, mode_t *mode)
34{ 31{
35 struct posix_acl *acl; 32 switch (type) {
36 int error; 33 case ACL_TYPE_ACCESS:
37 34 return GFS2_POSIX_ACL_ACCESS;
38 error = gfs2_acl_validate_remove(ip, access); 35 case ACL_TYPE_DEFAULT:
39 if (error) 36 return GFS2_POSIX_ACL_DEFAULT;
40 return error;
41
42 if (!er->er_data)
43 return -EINVAL;
44
45 acl = posix_acl_from_xattr(er->er_data, er->er_data_len);
46 if (IS_ERR(acl))
47 return PTR_ERR(acl);
48 if (!acl) {
49 *remove = 1;
50 return 0;
51 }
52
53 error = posix_acl_valid(acl);
54 if (error)
55 goto out;
56
57 if (access) {
58 error = posix_acl_equiv_mode(acl, mode);
59 if (!error)
60 *remove = 1;
61 else if (error > 0)
62 error = 0;
63 } 37 }
64 38 return NULL;
65out:
66 posix_acl_release(acl);
67 return error;
68}
69
70int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
71{
72 if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl)
73 return -EOPNOTSUPP;
74 if (!is_owner_or_cap(&ip->i_inode))
75 return -EPERM;
76 if (S_ISLNK(ip->i_inode.i_mode))
77 return -EOPNOTSUPP;
78 if (!access && !S_ISDIR(ip->i_inode.i_mode))
79 return -EACCES;
80
81 return 0;
82} 39}
83 40
84static int acl_get(struct gfs2_inode *ip, const char *name, 41static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
85 struct posix_acl **acl, struct gfs2_ea_location *el,
86 char **datap, unsigned int *lenp)
87{ 42{
43 struct posix_acl *acl;
44 const char *name;
88 char *data; 45 char *data;
89 unsigned int len; 46 int len;
90 int error;
91
92 el->el_bh = NULL;
93 47
94 if (!ip->i_eattr) 48 if (!ip->i_eattr)
95 return 0; 49 return NULL;
96
97 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, el);
98 if (error)
99 return error;
100 if (!el->el_ea)
101 return 0;
102 if (!GFS2_EA_DATA_LEN(el->el_ea))
103 goto out;
104 50
105 len = GFS2_EA_DATA_LEN(el->el_ea); 51 acl = get_cached_acl(&ip->i_inode, type);
106 data = kmalloc(len, GFP_NOFS); 52 if (acl != ACL_NOT_CACHED)
107 error = -ENOMEM; 53 return acl;
108 if (!data)
109 goto out;
110 54
111 error = gfs2_ea_get_copy(ip, el, data, len); 55 name = gfs2_acl_name(type);
112 if (error < 0) 56 if (name == NULL)
113 goto out_kfree; 57 return ERR_PTR(-EINVAL);
114 error = 0;
115 58
116 if (acl) { 59 len = gfs2_xattr_acl_get(ip, name, &data);
117 *acl = posix_acl_from_xattr(data, len); 60 if (len < 0)
118 if (IS_ERR(*acl)) 61 return ERR_PTR(len);
119 error = PTR_ERR(*acl); 62 if (len == 0)
120 } 63 return NULL;
121 64
122out_kfree: 65 acl = posix_acl_from_xattr(data, len);
123 if (error || !datap) { 66 kfree(data);
124 kfree(data); 67 return acl;
125 } else {
126 *datap = data;
127 *lenp = len;
128 }
129out:
130 return error;
131} 68}
132 69
133/** 70/**
@@ -140,14 +77,12 @@ out:
140 77
141int gfs2_check_acl(struct inode *inode, int mask) 78int gfs2_check_acl(struct inode *inode, int mask)
142{ 79{
143 struct gfs2_ea_location el; 80 struct posix_acl *acl;
144 struct posix_acl *acl = NULL;
145 int error; 81 int error;
146 82
147 error = acl_get(GFS2_I(inode), GFS2_POSIX_ACL_ACCESS, &acl, &el, NULL, NULL); 83 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
148 brelse(el.el_bh); 84 if (IS_ERR(acl))
149 if (error) 85 return PTR_ERR(acl);
150 return error;
151 86
152 if (acl) { 87 if (acl) {
153 error = posix_acl_permission(inode, acl, mask); 88 error = posix_acl_permission(inode, acl, mask);
@@ -158,57 +93,75 @@ int gfs2_check_acl(struct inode *inode, int mask)
158 return -EAGAIN; 93 return -EAGAIN;
159} 94}
160 95
161static int munge_mode(struct gfs2_inode *ip, mode_t mode) 96static int gfs2_set_mode(struct inode *inode, mode_t mode)
162{ 97{
163 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 98 int error = 0;
164 struct buffer_head *dibh;
165 int error;
166 99
167 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 100 if (mode != inode->i_mode) {
168 if (error) 101 struct iattr iattr;
169 return error;
170 102
171 error = gfs2_meta_inode_buffer(ip, &dibh); 103 iattr.ia_valid = ATTR_MODE;
172 if (!error) { 104 iattr.ia_mode = mode;
173 gfs2_assert_withdraw(sdp, 105
174 (ip->i_inode.i_mode & S_IFMT) == (mode & S_IFMT)); 106 error = gfs2_setattr_simple(GFS2_I(inode), &iattr);
175 ip->i_inode.i_mode = mode;
176 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
177 gfs2_dinode_out(ip, dibh->b_data);
178 brelse(dibh);
179 } 107 }
180 108
181 gfs2_trans_end(sdp); 109 return error;
110}
111
112static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl)
113{
114 int error;
115 int len;
116 char *data;
117 const char *name = gfs2_acl_name(type);
182 118
183 return 0; 119 BUG_ON(name == NULL);
120 len = posix_acl_to_xattr(acl, NULL, 0);
121 if (len == 0)
122 return 0;
123 data = kmalloc(len, GFP_NOFS);
124 if (data == NULL)
125 return -ENOMEM;
126 error = posix_acl_to_xattr(acl, data, len);
127 if (error < 0)
128 goto out;
129 error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0);
130 if (!error)
131 set_cached_acl(inode, type, acl);
132out:
133 kfree(data);
134 return error;
184} 135}
185 136
186int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) 137int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode)
187{ 138{
188 struct gfs2_ea_location el;
189 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 139 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
190 struct posix_acl *acl = NULL, *clone; 140 struct posix_acl *acl, *clone;
191 mode_t mode = ip->i_inode.i_mode; 141 mode_t mode = inode->i_mode;
192 char *data = NULL; 142 int error = 0;
193 unsigned int len;
194 int error;
195 143
196 if (!sdp->sd_args.ar_posix_acl) 144 if (!sdp->sd_args.ar_posix_acl)
197 return 0; 145 return 0;
198 if (S_ISLNK(ip->i_inode.i_mode)) 146 if (S_ISLNK(inode->i_mode))
199 return 0; 147 return 0;
200 148
201 error = acl_get(dip, GFS2_POSIX_ACL_DEFAULT, &acl, &el, &data, &len); 149 acl = gfs2_acl_get(dip, ACL_TYPE_DEFAULT);
202 brelse(el.el_bh); 150 if (IS_ERR(acl))
203 if (error) 151 return PTR_ERR(acl);
204 return error;
205 if (!acl) { 152 if (!acl) {
206 mode &= ~current_umask(); 153 mode &= ~current_umask();
207 if (mode != ip->i_inode.i_mode) 154 if (mode != inode->i_mode)
208 error = munge_mode(ip, mode); 155 error = gfs2_set_mode(inode, mode);
209 return error; 156 return error;
210 } 157 }
211 158
159 if (S_ISDIR(inode->i_mode)) {
160 error = gfs2_acl_set(inode, ACL_TYPE_DEFAULT, acl);
161 if (error)
162 goto out;
163 }
164
212 clone = posix_acl_clone(acl, GFP_NOFS); 165 clone = posix_acl_clone(acl, GFP_NOFS);
213 error = -ENOMEM; 166 error = -ENOMEM;
214 if (!clone) 167 if (!clone)
@@ -216,43 +169,32 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
216 posix_acl_release(acl); 169 posix_acl_release(acl);
217 acl = clone; 170 acl = clone;
218 171
219 if (S_ISDIR(ip->i_inode.i_mode)) {
220 error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS,
221 GFS2_POSIX_ACL_DEFAULT, data, len, 0);
222 if (error)
223 goto out;
224 }
225
226 error = posix_acl_create_masq(acl, &mode); 172 error = posix_acl_create_masq(acl, &mode);
227 if (error < 0) 173 if (error < 0)
228 goto out; 174 goto out;
229 if (error == 0) 175 if (error == 0)
230 goto munge; 176 goto munge;
231 177
232 posix_acl_to_xattr(acl, data, len); 178 error = gfs2_acl_set(inode, ACL_TYPE_ACCESS, acl);
233 error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS,
234 GFS2_POSIX_ACL_ACCESS, data, len, 0);
235 if (error) 179 if (error)
236 goto out; 180 goto out;
237munge: 181munge:
238 error = munge_mode(ip, mode); 182 error = gfs2_set_mode(inode, mode);
239out: 183out:
240 posix_acl_release(acl); 184 posix_acl_release(acl);
241 kfree(data);
242 return error; 185 return error;
243} 186}
244 187
245int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) 188int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
246{ 189{
247 struct posix_acl *acl = NULL, *clone; 190 struct posix_acl *acl, *clone;
248 struct gfs2_ea_location el;
249 char *data; 191 char *data;
250 unsigned int len; 192 unsigned int len;
251 int error; 193 int error;
252 194
253 error = acl_get(ip, GFS2_POSIX_ACL_ACCESS, &acl, &el, &data, &len); 195 acl = gfs2_acl_get(ip, ACL_TYPE_ACCESS);
254 if (error) 196 if (IS_ERR(acl))
255 goto out_brelse; 197 return PTR_ERR(acl);
256 if (!acl) 198 if (!acl)
257 return gfs2_setattr_simple(ip, attr); 199 return gfs2_setattr_simple(ip, attr);
258 200
@@ -265,15 +207,134 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
265 207
266 error = posix_acl_chmod_masq(acl, attr->ia_mode); 208 error = posix_acl_chmod_masq(acl, attr->ia_mode);
267 if (!error) { 209 if (!error) {
210 len = posix_acl_to_xattr(acl, NULL, 0);
211 data = kmalloc(len, GFP_NOFS);
212 error = -ENOMEM;
213 if (data == NULL)
214 goto out;
268 posix_acl_to_xattr(acl, data, len); 215 posix_acl_to_xattr(acl, data, len);
269 error = gfs2_ea_acl_chmod(ip, &el, attr, data); 216 error = gfs2_xattr_acl_chmod(ip, attr, data);
217 kfree(data);
218 set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl);
270 } 219 }
271 220
272out: 221out:
273 posix_acl_release(acl); 222 posix_acl_release(acl);
274 kfree(data);
275out_brelse:
276 brelse(el.el_bh);
277 return error; 223 return error;
278} 224}
279 225
226static int gfs2_acl_type(const char *name)
227{
228 if (strcmp(name, GFS2_POSIX_ACL_ACCESS) == 0)
229 return ACL_TYPE_ACCESS;
230 if (strcmp(name, GFS2_POSIX_ACL_DEFAULT) == 0)
231 return ACL_TYPE_DEFAULT;
232 return -EINVAL;
233}
234
235static int gfs2_xattr_system_get(struct inode *inode, const char *name,
236 void *buffer, size_t size)
237{
238 struct posix_acl *acl;
239 int type;
240 int error;
241
242 type = gfs2_acl_type(name);
243 if (type < 0)
244 return type;
245
246 acl = gfs2_acl_get(GFS2_I(inode), type);
247 if (IS_ERR(acl))
248 return PTR_ERR(acl);
249 if (acl == NULL)
250 return -ENODATA;
251
252 error = posix_acl_to_xattr(acl, buffer, size);
253 posix_acl_release(acl);
254
255 return error;
256}
257
258static int gfs2_xattr_system_set(struct inode *inode, const char *name,
259 const void *value, size_t size, int flags)
260{
261 struct gfs2_sbd *sdp = GFS2_SB(inode);
262 struct posix_acl *acl = NULL;
263 int error = 0, type;
264
265 if (!sdp->sd_args.ar_posix_acl)
266 return -EOPNOTSUPP;
267
268 type = gfs2_acl_type(name);
269 if (type < 0)
270 return type;
271 if (flags & XATTR_CREATE)
272 return -EINVAL;
273 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
274 return value ? -EACCES : 0;
275 if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
276 return -EPERM;
277 if (S_ISLNK(inode->i_mode))
278 return -EOPNOTSUPP;
279
280 if (!value)
281 goto set_acl;
282
283 acl = posix_acl_from_xattr(value, size);
284 if (!acl) {
285 /*
286 * acl_set_file(3) may request that we set default ACLs with
287 * zero length -- defend (gracefully) against that here.
288 */
289 goto out;
290 }
291 if (IS_ERR(acl)) {
292 error = PTR_ERR(acl);
293 goto out;
294 }
295
296 error = posix_acl_valid(acl);
297 if (error)
298 goto out_release;
299
300 error = -EINVAL;
301 if (acl->a_count > GFS2_ACL_MAX_ENTRIES)
302 goto out_release;
303
304 if (type == ACL_TYPE_ACCESS) {
305 mode_t mode = inode->i_mode;
306 error = posix_acl_equiv_mode(acl, &mode);
307
308 if (error <= 0) {
309 posix_acl_release(acl);
310 acl = NULL;
311
312 if (error < 0)
313 return error;
314 }
315
316 error = gfs2_set_mode(inode, mode);
317 if (error)
318 goto out_release;
319 }
320
321set_acl:
322 error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0);
323 if (!error) {
324 if (acl)
325 set_cached_acl(inode, type, acl);
326 else
327 forget_cached_acl(inode, type);
328 }
329out_release:
330 posix_acl_release(acl);
331out:
332 return error;
333}
334
335struct xattr_handler gfs2_xattr_system_handler = {
336 .prefix = XATTR_SYSTEM_PREFIX,
337 .get = gfs2_xattr_system_get,
338 .set = gfs2_xattr_system_set,
339};
340
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index 6751930bfb64..9306a2e6620c 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -13,26 +13,12 @@
13#include "incore.h" 13#include "incore.h"
14 14
15#define GFS2_POSIX_ACL_ACCESS "posix_acl_access" 15#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
16#define GFS2_POSIX_ACL_ACCESS_LEN 16
17#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" 16#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
18#define GFS2_POSIX_ACL_DEFAULT_LEN 17 17#define GFS2_ACL_MAX_ENTRIES 25
19 18
20#define GFS2_ACL_IS_ACCESS(name, len) \ 19extern int gfs2_check_acl(struct inode *inode, int mask);
21 ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \ 20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode);
22 !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len))) 21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
23 22extern struct xattr_handler gfs2_xattr_system_handler;
24#define GFS2_ACL_IS_DEFAULT(name, len) \
25 ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \
26 !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len)))
27
28struct gfs2_ea_request;
29
30int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
31 struct gfs2_ea_request *er,
32 int *remove, mode_t *mode);
33int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
34int gfs2_check_acl(struct inode *inode, int mask);
35int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
36int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
37 23
38#endif /* __ACL_DOT_H__ */ 24#endif /* __ACL_DOT_H__ */
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 694b5d48f036..7b8da9415267 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -269,7 +269,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
269 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 269 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
270 unsigned offset = i_size & (PAGE_CACHE_SIZE-1); 270 unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
271 unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); 271 unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
272 struct backing_dev_info *bdi = mapping->backing_dev_info;
273 int i; 272 int i;
274 int ret; 273 int ret;
275 274
@@ -313,11 +312,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
313 312
314 if (ret || (--(wbc->nr_to_write) <= 0)) 313 if (ret || (--(wbc->nr_to_write) <= 0))
315 ret = 1; 314 ret = 1;
316 if (wbc->nonblocking && bdi_write_congested(bdi)) {
317 wbc->encountered_congestion = 1;
318 ret = 1;
319 }
320
321 } 315 }
322 gfs2_trans_end(sdp); 316 gfs2_trans_end(sdp);
323 return ret; 317 return ret;
@@ -338,7 +332,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
338static int gfs2_write_cache_jdata(struct address_space *mapping, 332static int gfs2_write_cache_jdata(struct address_space *mapping,
339 struct writeback_control *wbc) 333 struct writeback_control *wbc)
340{ 334{
341 struct backing_dev_info *bdi = mapping->backing_dev_info;
342 int ret = 0; 335 int ret = 0;
343 int done = 0; 336 int done = 0;
344 struct pagevec pvec; 337 struct pagevec pvec;
@@ -348,11 +341,6 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
348 int scanned = 0; 341 int scanned = 0;
349 int range_whole = 0; 342 int range_whole = 0;
350 343
351 if (wbc->nonblocking && bdi_write_congested(bdi)) {
352 wbc->encountered_congestion = 1;
353 return 0;
354 }
355
356 pagevec_init(&pvec, 0); 344 pagevec_init(&pvec, 0);
357 if (wbc->range_cyclic) { 345 if (wbc->range_cyclic) {
358 index = mapping->writeback_index; /* Start from prev offset */ 346 index = mapping->writeback_index; /* Start from prev offset */
@@ -819,8 +807,10 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
819 mark_inode_dirty(inode); 807 mark_inode_dirty(inode);
820 } 808 }
821 809
822 if (inode == sdp->sd_rindex) 810 if (inode == sdp->sd_rindex) {
823 adjust_fs_space(inode); 811 adjust_fs_space(inode);
812 ip->i_gh.gh_flags |= GL_NOCACHE;
813 }
824 814
825 brelse(dibh); 815 brelse(dibh);
826 gfs2_trans_end(sdp); 816 gfs2_trans_end(sdp);
@@ -889,8 +879,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
889 mark_inode_dirty(inode); 879 mark_inode_dirty(inode);
890 } 880 }
891 881
892 if (inode == sdp->sd_rindex) 882 if (inode == sdp->sd_rindex) {
893 adjust_fs_space(inode); 883 adjust_fs_space(inode);
884 ip->i_gh.gh_flags |= GL_NOCACHE;
885 }
894 886
895 brelse(dibh); 887 brelse(dibh);
896 gfs2_trans_end(sdp); 888 gfs2_trans_end(sdp);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 297d7e5cebad..25fddc100f18 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -525,38 +525,6 @@ consist_inode:
525 return ERR_PTR(-EIO); 525 return ERR_PTR(-EIO);
526} 526}
527 527
528
529/**
530 * dirent_first - Return the first dirent
531 * @dip: the directory
532 * @bh: The buffer
533 * @dent: Pointer to list of dirents
534 *
535 * return first dirent whether bh points to leaf or stuffed dinode
536 *
537 * Returns: IS_LEAF, IS_DINODE, or -errno
538 */
539
540static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh,
541 struct gfs2_dirent **dent)
542{
543 struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data;
544
545 if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
546 if (gfs2_meta_check(GFS2_SB(&dip->i_inode), bh))
547 return -EIO;
548 *dent = (struct gfs2_dirent *)(bh->b_data +
549 sizeof(struct gfs2_leaf));
550 return IS_LEAF;
551 } else {
552 if (gfs2_metatype_check(GFS2_SB(&dip->i_inode), bh, GFS2_METATYPE_DI))
553 return -EIO;
554 *dent = (struct gfs2_dirent *)(bh->b_data +
555 sizeof(struct gfs2_dinode));
556 return IS_DINODE;
557 }
558}
559
560static int dirent_check_reclen(struct gfs2_inode *dip, 528static int dirent_check_reclen(struct gfs2_inode *dip,
561 const struct gfs2_dirent *d, const void *end_p) 529 const struct gfs2_dirent *d, const void *end_p)
562{ 530{
@@ -1006,7 +974,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
1006 divider = (start + half_len) << (32 - dip->i_depth); 974 divider = (start + half_len) << (32 - dip->i_depth);
1007 975
1008 /* Copy the entries */ 976 /* Copy the entries */
1009 dirent_first(dip, obh, &dent); 977 dent = (struct gfs2_dirent *)(obh->b_data + sizeof(struct gfs2_leaf));
1010 978
1011 do { 979 do {
1012 next = dent; 980 next = dent;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8b674b1f3a55..f455a03a09e2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -241,15 +241,14 @@ int gfs2_glock_put(struct gfs2_glock *gl)
241 int rv = 0; 241 int rv = 0;
242 242
243 write_lock(gl_lock_addr(gl->gl_hash)); 243 write_lock(gl_lock_addr(gl->gl_hash));
244 if (atomic_dec_and_test(&gl->gl_ref)) { 244 if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) {
245 hlist_del(&gl->gl_list); 245 hlist_del(&gl->gl_list);
246 write_unlock(gl_lock_addr(gl->gl_hash));
247 spin_lock(&lru_lock);
248 if (!list_empty(&gl->gl_lru)) { 246 if (!list_empty(&gl->gl_lru)) {
249 list_del_init(&gl->gl_lru); 247 list_del_init(&gl->gl_lru);
250 atomic_dec(&lru_count); 248 atomic_dec(&lru_count);
251 } 249 }
252 spin_unlock(&lru_lock); 250 spin_unlock(&lru_lock);
251 write_unlock(gl_lock_addr(gl->gl_hash));
253 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 252 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
254 glock_free(gl); 253 glock_free(gl);
255 rv = 1; 254 rv = 1;
@@ -513,7 +512,6 @@ retry:
513 GLOCK_BUG_ON(gl, 1); 512 GLOCK_BUG_ON(gl, 1);
514 } 513 }
515 spin_unlock(&gl->gl_spin); 514 spin_unlock(&gl->gl_spin);
516 gfs2_glock_put(gl);
517 return; 515 return;
518 } 516 }
519 517
@@ -524,8 +522,6 @@ retry:
524 if (glops->go_xmote_bh) { 522 if (glops->go_xmote_bh) {
525 spin_unlock(&gl->gl_spin); 523 spin_unlock(&gl->gl_spin);
526 rv = glops->go_xmote_bh(gl, gh); 524 rv = glops->go_xmote_bh(gl, gh);
527 if (rv == -EAGAIN)
528 return;
529 spin_lock(&gl->gl_spin); 525 spin_lock(&gl->gl_spin);
530 if (rv) { 526 if (rv) {
531 do_error(gl, rv); 527 do_error(gl, rv);
@@ -540,7 +536,6 @@ out:
540 clear_bit(GLF_LOCK, &gl->gl_flags); 536 clear_bit(GLF_LOCK, &gl->gl_flags);
541out_locked: 537out_locked:
542 spin_unlock(&gl->gl_spin); 538 spin_unlock(&gl->gl_spin);
543 gfs2_glock_put(gl);
544} 539}
545 540
546static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, 541static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
@@ -600,7 +595,6 @@ __acquires(&gl->gl_spin)
600 595
601 if (!(ret & LM_OUT_ASYNC)) { 596 if (!(ret & LM_OUT_ASYNC)) {
602 finish_xmote(gl, ret); 597 finish_xmote(gl, ret);
603 gfs2_glock_hold(gl);
604 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 598 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
605 gfs2_glock_put(gl); 599 gfs2_glock_put(gl);
606 } else { 600 } else {
@@ -672,12 +666,17 @@ out:
672 return; 666 return;
673 667
674out_sched: 668out_sched:
669 clear_bit(GLF_LOCK, &gl->gl_flags);
670 smp_mb__after_clear_bit();
675 gfs2_glock_hold(gl); 671 gfs2_glock_hold(gl);
676 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 672 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
677 gfs2_glock_put_nolock(gl); 673 gfs2_glock_put_nolock(gl);
674 return;
675
678out_unlock: 676out_unlock:
679 clear_bit(GLF_LOCK, &gl->gl_flags); 677 clear_bit(GLF_LOCK, &gl->gl_flags);
680 goto out; 678 smp_mb__after_clear_bit();
679 return;
681} 680}
682 681
683static void delete_work_func(struct work_struct *work) 682static void delete_work_func(struct work_struct *work)
@@ -707,9 +706,12 @@ static void glock_work_func(struct work_struct *work)
707{ 706{
708 unsigned long delay = 0; 707 unsigned long delay = 0;
709 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 708 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
709 int drop_ref = 0;
710 710
711 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) 711 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
712 finish_xmote(gl, gl->gl_reply); 712 finish_xmote(gl, gl->gl_reply);
713 drop_ref = 1;
714 }
713 down_read(&gfs2_umount_flush_sem); 715 down_read(&gfs2_umount_flush_sem);
714 spin_lock(&gl->gl_spin); 716 spin_lock(&gl->gl_spin);
715 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 717 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
@@ -727,6 +729,8 @@ static void glock_work_func(struct work_struct *work)
727 if (!delay || 729 if (!delay ||
728 queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) 730 queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
729 gfs2_glock_put(gl); 731 gfs2_glock_put(gl);
732 if (drop_ref)
733 gfs2_glock_put(gl);
730} 734}
731 735
732/** 736/**
@@ -1361,10 +1365,6 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
1361 list_del_init(&gl->gl_lru); 1365 list_del_init(&gl->gl_lru);
1362 atomic_dec(&lru_count); 1366 atomic_dec(&lru_count);
1363 1367
1364 /* Check if glock is about to be freed */
1365 if (atomic_read(&gl->gl_ref) == 0)
1366 continue;
1367
1368 /* Test for being demotable */ 1368 /* Test for being demotable */
1369 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 1369 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1370 gfs2_glock_hold(gl); 1370 gfs2_glock_hold(gl);
@@ -1375,10 +1375,11 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
1375 handle_callback(gl, LM_ST_UNLOCKED, 0); 1375 handle_callback(gl, LM_ST_UNLOCKED, 0);
1376 nr--; 1376 nr--;
1377 } 1377 }
1378 clear_bit(GLF_LOCK, &gl->gl_flags);
1379 smp_mb__after_clear_bit();
1378 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 1380 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1379 gfs2_glock_put_nolock(gl); 1381 gfs2_glock_put_nolock(gl);
1380 spin_unlock(&gl->gl_spin); 1382 spin_unlock(&gl->gl_spin);
1381 clear_bit(GLF_LOCK, &gl->gl_flags);
1382 spin_lock(&lru_lock); 1383 spin_lock(&lru_lock);
1383 continue; 1384 continue;
1384 } 1385 }
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index c609894ec0d0..13f0bd228132 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -180,15 +180,6 @@ static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
180 return gl->gl_state == LM_ST_SHARED; 180 return gl->gl_state == LM_ST_SHARED;
181} 181}
182 182
183static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
184{
185 int ret;
186 spin_lock(&gl->gl_spin);
187 ret = test_bit(GLF_DEMOTE, &gl->gl_flags);
188 spin_unlock(&gl->gl_spin);
189 return ret;
190}
191
192int gfs2_glock_get(struct gfs2_sbd *sdp, 183int gfs2_glock_get(struct gfs2_sbd *sdp,
193 u64 number, const struct gfs2_glock_operations *glops, 184 u64 number, const struct gfs2_glock_operations *glops,
194 int create, struct gfs2_glock **glp); 185 int create, struct gfs2_glock **glp);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 6985eef06c39..78554acc0605 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -13,6 +13,7 @@
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/gfs2_ondisk.h> 14#include <linux/gfs2_ondisk.h>
15#include <linux/bio.h> 15#include <linux/bio.h>
16#include <linux/posix_acl.h>
16 17
17#include "gfs2.h" 18#include "gfs2.h"
18#include "incore.h" 19#include "incore.h"
@@ -184,8 +185,10 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
184 if (flags & DIO_METADATA) { 185 if (flags & DIO_METADATA) {
185 struct address_space *mapping = gl->gl_aspace->i_mapping; 186 struct address_space *mapping = gl->gl_aspace->i_mapping;
186 truncate_inode_pages(mapping, 0); 187 truncate_inode_pages(mapping, 0);
187 if (ip) 188 if (ip) {
188 set_bit(GIF_INVALID, &ip->i_flags); 189 set_bit(GIF_INVALID, &ip->i_flags);
190 forget_all_cached_acls(&ip->i_inode);
191 }
189 } 192 }
190 193
191 if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) 194 if (ip == GFS2_I(gl->gl_sbd->sd_rindex))
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6edb423f90b3..4792200978c8 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -429,7 +429,11 @@ struct gfs2_args {
429 unsigned int ar_meta:1; /* mount metafs */ 429 unsigned int ar_meta:1; /* mount metafs */
430 unsigned int ar_discard:1; /* discard requests */ 430 unsigned int ar_discard:1; /* discard requests */
431 unsigned int ar_errors:2; /* errors=withdraw | panic */ 431 unsigned int ar_errors:2; /* errors=withdraw | panic */
432 unsigned int ar_nobarrier:1; /* do not send barriers */
432 int ar_commit; /* Commit interval */ 433 int ar_commit; /* Commit interval */
434 int ar_statfs_quantum; /* The fast statfs interval */
435 int ar_quota_quantum; /* The quota interval */
436 int ar_statfs_percent; /* The % change to force sync */
433}; 437};
434 438
435struct gfs2_tune { 439struct gfs2_tune {
@@ -558,6 +562,7 @@ struct gfs2_sbd {
558 spinlock_t sd_statfs_spin; 562 spinlock_t sd_statfs_spin;
559 struct gfs2_statfs_change_host sd_statfs_master; 563 struct gfs2_statfs_change_host sd_statfs_master;
560 struct gfs2_statfs_change_host sd_statfs_local; 564 struct gfs2_statfs_change_host sd_statfs_local;
565 int sd_statfs_force_sync;
561 566
562 /* Resource group stuff */ 567 /* Resource group stuff */
563 568
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index fb15d3b1f409..26ba2a4c4a2d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -871,7 +871,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
871 if (error) 871 if (error)
872 goto fail_gunlock2; 872 goto fail_gunlock2;
873 873
874 error = gfs2_acl_create(dip, GFS2_I(inode)); 874 error = gfs2_acl_create(dip, inode);
875 if (error) 875 if (error)
876 goto fail_gunlock2; 876 goto fail_gunlock2;
877 877
@@ -947,9 +947,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
947 947
948 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 948 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
949 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); 949 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
950 str->di_header.__pad0 = 0;
951 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); 950 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
952 str->di_header.__pad1 = 0;
953 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 951 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
954 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 952 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
955 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 953 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 13c6237c5f67..4511b08fc451 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -596,7 +596,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
596 memset(lh, 0, sizeof(struct gfs2_log_header)); 596 memset(lh, 0, sizeof(struct gfs2_log_header));
597 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 597 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
598 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); 598 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
599 lh->lh_header.__pad0 = cpu_to_be64(0);
599 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); 600 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
601 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
600 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++); 602 lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
601 lh->lh_flags = cpu_to_be32(flags); 603 lh->lh_flags = cpu_to_be32(flags);
602 lh->lh_tail = cpu_to_be32(tail); 604 lh->lh_tail = cpu_to_be32(tail);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 9969ff062c5b..de97632ba32f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -132,6 +132,7 @@ static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
132static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 132static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
133{ 133{
134 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 134 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
135 struct gfs2_meta_header *mh;
135 struct gfs2_trans *tr; 136 struct gfs2_trans *tr;
136 137
137 lock_buffer(bd->bd_bh); 138 lock_buffer(bd->bd_bh);
@@ -148,6 +149,9 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
148 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 149 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
149 gfs2_meta_check(sdp, bd->bd_bh); 150 gfs2_meta_check(sdp, bd->bd_bh);
150 gfs2_pin(sdp, bd->bd_bh); 151 gfs2_pin(sdp, bd->bd_bh);
152 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
153 mh->__pad0 = cpu_to_be64(0);
154 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
151 sdp->sd_log_num_buf++; 155 sdp->sd_log_num_buf++;
152 list_add(&le->le_list, &sdp->sd_log_le_buf); 156 list_add(&le->le_list, &sdp->sd_log_le_buf);
153 tr->tr_num_buf_new++; 157 tr->tr_num_buf_new++;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 52fb6c048981..edfee24f3636 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -18,6 +18,7 @@
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/gfs2_ondisk.h> 19#include <linux/gfs2_ondisk.h>
20#include <linux/slow-work.h> 20#include <linux/slow-work.h>
21#include <linux/quotaops.h>
21 22
22#include "gfs2.h" 23#include "gfs2.h"
23#include "incore.h" 24#include "incore.h"
@@ -62,13 +63,10 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
62 gt->gt_quota_warn_period = 10; 63 gt->gt_quota_warn_period = 10;
63 gt->gt_quota_scale_num = 1; 64 gt->gt_quota_scale_num = 1;
64 gt->gt_quota_scale_den = 1; 65 gt->gt_quota_scale_den = 1;
65 gt->gt_quota_quantum = 60;
66 gt->gt_new_files_jdata = 0; 66 gt->gt_new_files_jdata = 0;
67 gt->gt_max_readahead = 1 << 18; 67 gt->gt_max_readahead = 1 << 18;
68 gt->gt_stall_secs = 600; 68 gt->gt_stall_secs = 600;
69 gt->gt_complain_secs = 10; 69 gt->gt_complain_secs = 10;
70 gt->gt_statfs_quantum = 30;
71 gt->gt_statfs_slow = 0;
72} 70}
73 71
74static struct gfs2_sbd *init_sbd(struct super_block *sb) 72static struct gfs2_sbd *init_sbd(struct super_block *sb)
@@ -1114,7 +1112,7 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp)
1114 * Returns: errno 1112 * Returns: errno
1115 */ 1113 */
1116 1114
1117static int fill_super(struct super_block *sb, void *data, int silent) 1115static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent)
1118{ 1116{
1119 struct gfs2_sbd *sdp; 1117 struct gfs2_sbd *sdp;
1120 struct gfs2_holder mount_gh; 1118 struct gfs2_holder mount_gh;
@@ -1125,17 +1123,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1125 printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n"); 1123 printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n");
1126 return -ENOMEM; 1124 return -ENOMEM;
1127 } 1125 }
1128 1126 sdp->sd_args = *args;
1129 sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT;
1130 sdp->sd_args.ar_data = GFS2_DATA_DEFAULT;
1131 sdp->sd_args.ar_commit = 60;
1132 sdp->sd_args.ar_errors = GFS2_ERRORS_DEFAULT;
1133
1134 error = gfs2_mount_args(sdp, &sdp->sd_args, data);
1135 if (error) {
1136 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
1137 goto fail;
1138 }
1139 1127
1140 if (sdp->sd_args.ar_spectator) { 1128 if (sdp->sd_args.ar_spectator) {
1141 sb->s_flags |= MS_RDONLY; 1129 sb->s_flags |= MS_RDONLY;
@@ -1143,11 +1131,15 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1143 } 1131 }
1144 if (sdp->sd_args.ar_posix_acl) 1132 if (sdp->sd_args.ar_posix_acl)
1145 sb->s_flags |= MS_POSIXACL; 1133 sb->s_flags |= MS_POSIXACL;
1134 if (sdp->sd_args.ar_nobarrier)
1135 set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
1146 1136
1147 sb->s_magic = GFS2_MAGIC; 1137 sb->s_magic = GFS2_MAGIC;
1148 sb->s_op = &gfs2_super_ops; 1138 sb->s_op = &gfs2_super_ops;
1149 sb->s_export_op = &gfs2_export_ops; 1139 sb->s_export_op = &gfs2_export_ops;
1150 sb->s_xattr = gfs2_xattr_handlers; 1140 sb->s_xattr = gfs2_xattr_handlers;
1141 sb->s_qcop = &gfs2_quotactl_ops;
1142 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
1151 sb->s_time_gran = 1; 1143 sb->s_time_gran = 1;
1152 sb->s_maxbytes = MAX_LFS_FILESIZE; 1144 sb->s_maxbytes = MAX_LFS_FILESIZE;
1153 1145
@@ -1160,6 +1152,15 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1160 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; 1152 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
1161 1153
1162 sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit; 1154 sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit;
1155 sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum;
1156 if (sdp->sd_args.ar_statfs_quantum) {
1157 sdp->sd_tune.gt_statfs_slow = 0;
1158 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum;
1159 }
1160 else {
1161 sdp->sd_tune.gt_statfs_slow = 1;
1162 sdp->sd_tune.gt_statfs_quantum = 30;
1163 }
1163 1164
1164 error = init_names(sdp, silent); 1165 error = init_names(sdp, silent);
1165 if (error) 1166 if (error)
@@ -1243,18 +1244,127 @@ fail:
1243 return error; 1244 return error;
1244} 1245}
1245 1246
1246static int gfs2_get_sb(struct file_system_type *fs_type, int flags, 1247static int set_gfs2_super(struct super_block *s, void *data)
1247 const char *dev_name, void *data, struct vfsmount *mnt)
1248{ 1248{
1249 return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); 1249 s->s_bdev = data;
1250 s->s_dev = s->s_bdev->bd_dev;
1251
1252 /*
1253 * We set the bdi here to the queue backing, file systems can
1254 * overwrite this in ->fill_super()
1255 */
1256 s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
1257 return 0;
1250} 1258}
1251 1259
1252static int test_meta_super(struct super_block *s, void *ptr) 1260static int test_gfs2_super(struct super_block *s, void *ptr)
1253{ 1261{
1254 struct block_device *bdev = ptr; 1262 struct block_device *bdev = ptr;
1255 return (bdev == s->s_bdev); 1263 return (bdev == s->s_bdev);
1256} 1264}
1257 1265
1266/**
1267 * gfs2_get_sb - Get the GFS2 superblock
1268 * @fs_type: The GFS2 filesystem type
1269 * @flags: Mount flags
1270 * @dev_name: The name of the device
1271 * @data: The mount arguments
1272 * @mnt: The vfsmnt for this mount
1273 *
1274 * Q. Why not use get_sb_bdev() ?
1275 * A. We need to select one of two root directories to mount, independent
1276 * of whether this is the initial, or subsequent, mount of this sb
1277 *
1278 * Returns: 0 or -ve on error
1279 */
1280
1281static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1282 const char *dev_name, void *data, struct vfsmount *mnt)
1283{
1284 struct block_device *bdev;
1285 struct super_block *s;
1286 fmode_t mode = FMODE_READ;
1287 int error;
1288 struct gfs2_args args;
1289 struct gfs2_sbd *sdp;
1290
1291 if (!(flags & MS_RDONLY))
1292 mode |= FMODE_WRITE;
1293
1294 bdev = open_bdev_exclusive(dev_name, mode, fs_type);
1295 if (IS_ERR(bdev))
1296 return PTR_ERR(bdev);
1297
1298 /*
1299 * once the super is inserted into the list by sget, s_umount
1300 * will protect the lockfs code from trying to start a snapshot
1301 * while we are mounting
1302 */
1303 mutex_lock(&bdev->bd_fsfreeze_mutex);
1304 if (bdev->bd_fsfreeze_count > 0) {
1305 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1306 error = -EBUSY;
1307 goto error_bdev;
1308 }
1309 s = sget(fs_type, test_gfs2_super, set_gfs2_super, bdev);
1310 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1311 error = PTR_ERR(s);
1312 if (IS_ERR(s))
1313 goto error_bdev;
1314
1315 memset(&args, 0, sizeof(args));
1316 args.ar_quota = GFS2_QUOTA_DEFAULT;
1317 args.ar_data = GFS2_DATA_DEFAULT;
1318 args.ar_commit = 60;
1319 args.ar_statfs_quantum = 30;
1320 args.ar_quota_quantum = 60;
1321 args.ar_errors = GFS2_ERRORS_DEFAULT;
1322
1323 error = gfs2_mount_args(&args, data);
1324 if (error) {
1325 printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
1326 if (s->s_root)
1327 goto error_super;
1328 deactivate_locked_super(s);
1329 return error;
1330 }
1331
1332 if (s->s_root) {
1333 error = -EBUSY;
1334 if ((flags ^ s->s_flags) & MS_RDONLY)
1335 goto error_super;
1336 close_bdev_exclusive(bdev, mode);
1337 } else {
1338 char b[BDEVNAME_SIZE];
1339
1340 s->s_flags = flags;
1341 s->s_mode = mode;
1342 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1343 sb_set_blocksize(s, block_size(bdev));
1344 error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
1345 if (error) {
1346 deactivate_locked_super(s);
1347 return error;
1348 }
1349 s->s_flags |= MS_ACTIVE;
1350 bdev->bd_super = s;
1351 }
1352
1353 sdp = s->s_fs_info;
1354 mnt->mnt_sb = s;
1355 if (args.ar_meta)
1356 mnt->mnt_root = dget(sdp->sd_master_dir);
1357 else
1358 mnt->mnt_root = dget(sdp->sd_root_dir);
1359 return 0;
1360
1361error_super:
1362 deactivate_locked_super(s);
1363error_bdev:
1364 close_bdev_exclusive(bdev, mode);
1365 return error;
1366}
1367
1258static int set_meta_super(struct super_block *s, void *ptr) 1368static int set_meta_super(struct super_block *s, void *ptr)
1259{ 1369{
1260 return -EINVAL; 1370 return -EINVAL;
@@ -1274,13 +1384,17 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
1274 dev_name, error); 1384 dev_name, error);
1275 return error; 1385 return error;
1276 } 1386 }
1277 s = sget(&gfs2_fs_type, test_meta_super, set_meta_super, 1387 s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super,
1278 path.dentry->d_inode->i_sb->s_bdev); 1388 path.dentry->d_inode->i_sb->s_bdev);
1279 path_put(&path); 1389 path_put(&path);
1280 if (IS_ERR(s)) { 1390 if (IS_ERR(s)) {
1281 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); 1391 printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
1282 return PTR_ERR(s); 1392 return PTR_ERR(s);
1283 } 1393 }
1394 if ((flags ^ s->s_flags) & MS_RDONLY) {
1395 deactivate_locked_super(s);
1396 return -EBUSY;
1397 }
1284 sdp = s->s_fs_info; 1398 sdp = s->s_fs_info;
1285 mnt->mnt_sb = s; 1399 mnt->mnt_sb = s;
1286 mnt->mnt_root = dget(sdp->sd_master_dir); 1400 mnt->mnt_root = dget(sdp->sd_master_dir);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 2e9b9326bfc9..e3bf6eab8750 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -15,7 +15,7 @@
15 * fuzziness in the current usage value of IDs that are being used on different 15 * fuzziness in the current usage value of IDs that are being used on different
16 * nodes in the cluster simultaneously. So, it is possible for a user on 16 * nodes in the cluster simultaneously. So, it is possible for a user on
17 * multiple nodes to overrun their quota, but that overrun is controlable. 17 * multiple nodes to overrun their quota, but that overrun is controlable.
18 * Since quota tags are part of transactions, there is no need to a quota check 18 * Since quota tags are part of transactions, there is no need for a quota check
19 * program to be run on node crashes or anything like that. 19 * program to be run on node crashes or anything like that.
20 * 20 *
21 * There are couple of knobs that let the administrator manage the quota 21 * There are couple of knobs that let the administrator manage the quota
@@ -47,6 +47,8 @@
47#include <linux/gfs2_ondisk.h> 47#include <linux/gfs2_ondisk.h>
48#include <linux/kthread.h> 48#include <linux/kthread.h>
49#include <linux/freezer.h> 49#include <linux/freezer.h>
50#include <linux/quota.h>
51#include <linux/dqblk_xfs.h>
50 52
51#include "gfs2.h" 53#include "gfs2.h"
52#include "incore.h" 54#include "incore.h"
@@ -65,13 +67,6 @@
65#define QUOTA_USER 1 67#define QUOTA_USER 1
66#define QUOTA_GROUP 0 68#define QUOTA_GROUP 0
67 69
68struct gfs2_quota_host {
69 u64 qu_limit;
70 u64 qu_warn;
71 s64 qu_value;
72 u32 qu_ll_next;
73};
74
75struct gfs2_quota_change_host { 70struct gfs2_quota_change_host {
76 u64 qc_change; 71 u64 qc_change;
77 u32 qc_flags; /* GFS2_QCF_... */ 72 u32 qc_flags; /* GFS2_QCF_... */
@@ -164,7 +159,7 @@ fail:
164 return error; 159 return error;
165} 160}
166 161
167static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create, 162static int qd_get(struct gfs2_sbd *sdp, int user, u32 id,
168 struct gfs2_quota_data **qdp) 163 struct gfs2_quota_data **qdp)
169{ 164{
170 struct gfs2_quota_data *qd = NULL, *new_qd = NULL; 165 struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
@@ -202,7 +197,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create,
202 197
203 spin_unlock(&qd_lru_lock); 198 spin_unlock(&qd_lru_lock);
204 199
205 if (qd || !create) { 200 if (qd) {
206 if (new_qd) { 201 if (new_qd) {
207 gfs2_glock_put(new_qd->qd_gl); 202 gfs2_glock_put(new_qd->qd_gl);
208 kmem_cache_free(gfs2_quotad_cachep, new_qd); 203 kmem_cache_free(gfs2_quotad_cachep, new_qd);
@@ -461,12 +456,12 @@ static void qd_unlock(struct gfs2_quota_data *qd)
461 qd_put(qd); 456 qd_put(qd);
462} 457}
463 458
464static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, int create, 459static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id,
465 struct gfs2_quota_data **qdp) 460 struct gfs2_quota_data **qdp)
466{ 461{
467 int error; 462 int error;
468 463
469 error = qd_get(sdp, user, id, create, qdp); 464 error = qd_get(sdp, user, id, qdp);
470 if (error) 465 if (error)
471 return error; 466 return error;
472 467
@@ -508,20 +503,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
508 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) 503 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
509 return 0; 504 return 0;
510 505
511 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, CREATE, qd); 506 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
512 if (error) 507 if (error)
513 goto out; 508 goto out;
514 al->al_qd_num++; 509 al->al_qd_num++;
515 qd++; 510 qd++;
516 511
517 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, CREATE, qd); 512 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
518 if (error) 513 if (error)
519 goto out; 514 goto out;
520 al->al_qd_num++; 515 al->al_qd_num++;
521 qd++; 516 qd++;
522 517
523 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { 518 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
524 error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd); 519 error = qdsb_get(sdp, QUOTA_USER, uid, qd);
525 if (error) 520 if (error)
526 goto out; 521 goto out;
527 al->al_qd_num++; 522 al->al_qd_num++;
@@ -529,7 +524,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
529 } 524 }
530 525
531 if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) { 526 if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) {
532 error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd); 527 error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
533 if (error) 528 if (error)
534 goto out; 529 goto out;
535 al->al_qd_num++; 530 al->al_qd_num++;
@@ -617,48 +612,36 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
617 mutex_unlock(&sdp->sd_quota_mutex); 612 mutex_unlock(&sdp->sd_quota_mutex);
618} 613}
619 614
620static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
621{
622 const struct gfs2_quota *str = buf;
623
624 qu->qu_limit = be64_to_cpu(str->qu_limit);
625 qu->qu_warn = be64_to_cpu(str->qu_warn);
626 qu->qu_value = be64_to_cpu(str->qu_value);
627 qu->qu_ll_next = be32_to_cpu(str->qu_ll_next);
628}
629
630static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
631{
632 struct gfs2_quota *str = buf;
633
634 str->qu_limit = cpu_to_be64(qu->qu_limit);
635 str->qu_warn = cpu_to_be64(qu->qu_warn);
636 str->qu_value = cpu_to_be64(qu->qu_value);
637 str->qu_ll_next = cpu_to_be32(qu->qu_ll_next);
638 memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
639}
640
641/** 615/**
642 * gfs2_adjust_quota 616 * gfs2_adjust_quota - adjust record of current block usage
617 * @ip: The quota inode
618 * @loc: Offset of the entry in the quota file
619 * @change: The amount of usage change to record
620 * @qd: The quota data
621 * @fdq: The updated limits to record
643 * 622 *
644 * This function was mostly borrowed from gfs2_block_truncate_page which was 623 * This function was mostly borrowed from gfs2_block_truncate_page which was
645 * in turn mostly borrowed from ext3 624 * in turn mostly borrowed from ext3
625 *
626 * Returns: 0 or -ve on error
646 */ 627 */
628
647static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, 629static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
648 s64 change, struct gfs2_quota_data *qd) 630 s64 change, struct gfs2_quota_data *qd,
631 struct fs_disk_quota *fdq)
649{ 632{
650 struct inode *inode = &ip->i_inode; 633 struct inode *inode = &ip->i_inode;
651 struct address_space *mapping = inode->i_mapping; 634 struct address_space *mapping = inode->i_mapping;
652 unsigned long index = loc >> PAGE_CACHE_SHIFT; 635 unsigned long index = loc >> PAGE_CACHE_SHIFT;
653 unsigned offset = loc & (PAGE_CACHE_SIZE - 1); 636 unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
654 unsigned blocksize, iblock, pos; 637 unsigned blocksize, iblock, pos;
655 struct buffer_head *bh; 638 struct buffer_head *bh, *dibh;
656 struct page *page; 639 struct page *page;
657 void *kaddr; 640 void *kaddr;
658 char *ptr; 641 struct gfs2_quota *qp;
659 struct gfs2_quota_host qp;
660 s64 value; 642 s64 value;
661 int err = -EIO; 643 int err = -EIO;
644 u64 size;
662 645
663 if (gfs2_is_stuffed(ip)) 646 if (gfs2_is_stuffed(ip))
664 gfs2_unstuff_dinode(ip, NULL); 647 gfs2_unstuff_dinode(ip, NULL);
@@ -700,18 +683,38 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
700 gfs2_trans_add_bh(ip->i_gl, bh, 0); 683 gfs2_trans_add_bh(ip->i_gl, bh, 0);
701 684
702 kaddr = kmap_atomic(page, KM_USER0); 685 kaddr = kmap_atomic(page, KM_USER0);
703 ptr = kaddr + offset; 686 qp = kaddr + offset;
704 gfs2_quota_in(&qp, ptr); 687 value = (s64)be64_to_cpu(qp->qu_value) + change;
705 qp.qu_value += change; 688 qp->qu_value = cpu_to_be64(value);
706 value = qp.qu_value; 689 qd->qd_qb.qb_value = qp->qu_value;
707 gfs2_quota_out(&qp, ptr); 690 if (fdq) {
691 if (fdq->d_fieldmask & FS_DQ_BSOFT) {
692 qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit);
693 qd->qd_qb.qb_warn = qp->qu_warn;
694 }
695 if (fdq->d_fieldmask & FS_DQ_BHARD) {
696 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit);
697 qd->qd_qb.qb_limit = qp->qu_limit;
698 }
699 }
708 flush_dcache_page(page); 700 flush_dcache_page(page);
709 kunmap_atomic(kaddr, KM_USER0); 701 kunmap_atomic(kaddr, KM_USER0);
710 err = 0; 702
711 qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC); 703 err = gfs2_meta_inode_buffer(ip, &dibh);
712 qd->qd_qb.qb_value = cpu_to_be64(value); 704 if (err)
713 ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC); 705 goto unlock;
714 ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value); 706
707 size = loc + sizeof(struct gfs2_quota);
708 if (size > inode->i_size) {
709 ip->i_disksize = size;
710 i_size_write(inode, size);
711 }
712 inode->i_mtime = inode->i_atime = CURRENT_TIME;
713 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
714 gfs2_dinode_out(ip, dibh->b_data);
715 brelse(dibh);
716 mark_inode_dirty(inode);
717
715unlock: 718unlock:
716 unlock_page(page); 719 unlock_page(page);
717 page_cache_release(page); 720 page_cache_release(page);
@@ -739,9 +742,9 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
739 return -ENOMEM; 742 return -ENOMEM;
740 743
741 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); 744 sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
745 mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA);
742 for (qx = 0; qx < num_qd; qx++) { 746 for (qx = 0; qx < num_qd; qx++) {
743 error = gfs2_glock_nq_init(qda[qx]->qd_gl, 747 error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
744 LM_ST_EXCLUSIVE,
745 GL_NOCACHE, &ghs[qx]); 748 GL_NOCACHE, &ghs[qx]);
746 if (error) 749 if (error)
747 goto out; 750 goto out;
@@ -795,9 +798,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
795 for (x = 0; x < num_qd; x++) { 798 for (x = 0; x < num_qd; x++) {
796 qd = qda[x]; 799 qd = qda[x];
797 offset = qd2offset(qd); 800 offset = qd2offset(qd);
798 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, 801 error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL);
799 (struct gfs2_quota_data *)
800 qd);
801 if (error) 802 if (error)
802 goto out_end_trans; 803 goto out_end_trans;
803 804
@@ -817,21 +818,44 @@ out_gunlock:
817out: 818out:
818 while (qx--) 819 while (qx--)
819 gfs2_glock_dq_uninit(&ghs[qx]); 820 gfs2_glock_dq_uninit(&ghs[qx]);
821 mutex_unlock(&ip->i_inode.i_mutex);
820 kfree(ghs); 822 kfree(ghs);
821 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl); 823 gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl);
822 return error; 824 return error;
823} 825}
824 826
827static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
828{
829 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
830 struct gfs2_quota q;
831 struct gfs2_quota_lvb *qlvb;
832 loff_t pos;
833 int error;
834
835 memset(&q, 0, sizeof(struct gfs2_quota));
836 pos = qd2offset(qd);
837 error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q));
838 if (error < 0)
839 return error;
840
841 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
842 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
843 qlvb->__pad = 0;
844 qlvb->qb_limit = q.qu_limit;
845 qlvb->qb_warn = q.qu_warn;
846 qlvb->qb_value = q.qu_value;
847 qd->qd_qb = *qlvb;
848
849 return 0;
850}
851
825static int do_glock(struct gfs2_quota_data *qd, int force_refresh, 852static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
826 struct gfs2_holder *q_gh) 853 struct gfs2_holder *q_gh)
827{ 854{
828 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 855 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
829 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); 856 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
830 struct gfs2_holder i_gh; 857 struct gfs2_holder i_gh;
831 struct gfs2_quota_host q;
832 char buf[sizeof(struct gfs2_quota)];
833 int error; 858 int error;
834 struct gfs2_quota_lvb *qlvb;
835 859
836restart: 860restart:
837 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); 861 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
@@ -841,11 +865,9 @@ restart:
841 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 865 qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
842 866
843 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { 867 if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
844 loff_t pos;
845 gfs2_glock_dq_uninit(q_gh); 868 gfs2_glock_dq_uninit(q_gh);
846 error = gfs2_glock_nq_init(qd->qd_gl, 869 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE,
847 LM_ST_EXCLUSIVE, GL_NOCACHE, 870 GL_NOCACHE, q_gh);
848 q_gh);
849 if (error) 871 if (error)
850 return error; 872 return error;
851 873
@@ -853,29 +875,14 @@ restart:
853 if (error) 875 if (error)
854 goto fail; 876 goto fail;
855 877
856 memset(buf, 0, sizeof(struct gfs2_quota)); 878 error = update_qd(sdp, qd);
857 pos = qd2offset(qd); 879 if (error)
858 error = gfs2_internal_read(ip, NULL, buf, &pos,
859 sizeof(struct gfs2_quota));
860 if (error < 0)
861 goto fail_gunlock; 880 goto fail_gunlock;
862 881
863 gfs2_glock_dq_uninit(&i_gh); 882 gfs2_glock_dq_uninit(&i_gh);
864 883 gfs2_glock_dq_uninit(q_gh);
865 gfs2_quota_in(&q, buf); 884 force_refresh = 0;
866 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; 885 goto restart;
867 qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
868 qlvb->__pad = 0;
869 qlvb->qb_limit = cpu_to_be64(q.qu_limit);
870 qlvb->qb_warn = cpu_to_be64(q.qu_warn);
871 qlvb->qb_value = cpu_to_be64(q.qu_value);
872 qd->qd_qb = *qlvb;
873
874 if (gfs2_glock_is_blocking(qd->qd_gl)) {
875 gfs2_glock_dq_uninit(q_gh);
876 force_refresh = 0;
877 goto restart;
878 }
879 } 886 }
880 887
881 return 0; 888 return 0;
@@ -995,7 +1002,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
995{ 1002{
996 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; 1003 struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
997 1004
998 printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\r\n", 1005 printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n",
999 sdp->sd_fsname, type, 1006 sdp->sd_fsname, type,
1000 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group", 1007 (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
1001 qd->qd_id); 1008 qd->qd_id);
@@ -1032,6 +1039,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1032 1039
1033 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { 1040 if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
1034 print_message(qd, "exceeded"); 1041 print_message(qd, "exceeded");
1042 quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ?
1043 USRQUOTA : GRPQUOTA, qd->qd_id,
1044 sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);
1045
1035 error = -EDQUOT; 1046 error = -EDQUOT;
1036 break; 1047 break;
1037 } else if (be64_to_cpu(qd->qd_qb.qb_warn) && 1048 } else if (be64_to_cpu(qd->qd_qb.qb_warn) &&
@@ -1039,6 +1050,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1039 time_after_eq(jiffies, qd->qd_last_warn + 1050 time_after_eq(jiffies, qd->qd_last_warn +
1040 gfs2_tune_get(sdp, 1051 gfs2_tune_get(sdp,
1041 gt_quota_warn_period) * HZ)) { 1052 gt_quota_warn_period) * HZ)) {
1053 quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ?
1054 USRQUOTA : GRPQUOTA, qd->qd_id,
1055 sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
1042 error = print_message(qd, "warning"); 1056 error = print_message(qd, "warning");
1043 qd->qd_last_warn = jiffies; 1057 qd->qd_last_warn = jiffies;
1044 } 1058 }
@@ -1069,8 +1083,9 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1069 } 1083 }
1070} 1084}
1071 1085
1072int gfs2_quota_sync(struct gfs2_sbd *sdp) 1086int gfs2_quota_sync(struct super_block *sb, int type)
1073{ 1087{
1088 struct gfs2_sbd *sdp = sb->s_fs_info;
1074 struct gfs2_quota_data **qda; 1089 struct gfs2_quota_data **qda;
1075 unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync); 1090 unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync);
1076 unsigned int num_qd; 1091 unsigned int num_qd;
@@ -1118,7 +1133,7 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
1118 struct gfs2_holder q_gh; 1133 struct gfs2_holder q_gh;
1119 int error; 1134 int error;
1120 1135
1121 error = qd_get(sdp, user, id, CREATE, &qd); 1136 error = qd_get(sdp, user, id, &qd);
1122 if (error) 1137 if (error)
1123 return error; 1138 return error;
1124 1139
@@ -1127,7 +1142,6 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
1127 gfs2_glock_dq_uninit(&q_gh); 1142 gfs2_glock_dq_uninit(&q_gh);
1128 1143
1129 qd_put(qd); 1144 qd_put(qd);
1130
1131 return error; 1145 return error;
1132} 1146}
1133 1147
@@ -1298,12 +1312,12 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
1298} 1312}
1299 1313
1300static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, 1314static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg,
1301 int (*fxn)(struct gfs2_sbd *sdp), 1315 int (*fxn)(struct super_block *sb, int type),
1302 unsigned long t, unsigned long *timeo, 1316 unsigned long t, unsigned long *timeo,
1303 unsigned int *new_timeo) 1317 unsigned int *new_timeo)
1304{ 1318{
1305 if (t >= *timeo) { 1319 if (t >= *timeo) {
1306 int error = fxn(sdp); 1320 int error = fxn(sdp->sd_vfs, 0);
1307 quotad_error(sdp, msg, error); 1321 quotad_error(sdp, msg, error);
1308 *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ; 1322 *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ;
1309 } else { 1323 } else {
@@ -1330,6 +1344,14 @@ static void quotad_check_trunc_list(struct gfs2_sbd *sdp)
1330 } 1344 }
1331} 1345}
1332 1346
1347void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) {
1348 if (!sdp->sd_statfs_force_sync) {
1349 sdp->sd_statfs_force_sync = 1;
1350 wake_up(&sdp->sd_quota_wait);
1351 }
1352}
1353
1354
1333/** 1355/**
1334 * gfs2_quotad - Write cached quota changes into the quota file 1356 * gfs2_quotad - Write cached quota changes into the quota file
1335 * @sdp: Pointer to GFS2 superblock 1357 * @sdp: Pointer to GFS2 superblock
@@ -1349,8 +1371,15 @@ int gfs2_quotad(void *data)
1349 while (!kthread_should_stop()) { 1371 while (!kthread_should_stop()) {
1350 1372
1351 /* Update the master statfs file */ 1373 /* Update the master statfs file */
1352 quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, 1374 if (sdp->sd_statfs_force_sync) {
1353 &statfs_timeo, &tune->gt_statfs_quantum); 1375 int error = gfs2_statfs_sync(sdp->sd_vfs, 0);
1376 quotad_error(sdp, "statfs", error);
1377 statfs_timeo = gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
1378 }
1379 else
1380 quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t,
1381 &statfs_timeo,
1382 &tune->gt_statfs_quantum);
1354 1383
1355 /* Update quota file */ 1384 /* Update quota file */
1356 quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, 1385 quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t,
@@ -1367,7 +1396,7 @@ int gfs2_quotad(void *data)
1367 spin_lock(&sdp->sd_trunc_lock); 1396 spin_lock(&sdp->sd_trunc_lock);
1368 empty = list_empty(&sdp->sd_trunc_list); 1397 empty = list_empty(&sdp->sd_trunc_list);
1369 spin_unlock(&sdp->sd_trunc_lock); 1398 spin_unlock(&sdp->sd_trunc_lock);
1370 if (empty) 1399 if (empty && !sdp->sd_statfs_force_sync)
1371 t -= schedule_timeout(t); 1400 t -= schedule_timeout(t);
1372 else 1401 else
1373 t = 0; 1402 t = 0;
@@ -1377,3 +1406,181 @@ int gfs2_quotad(void *data)
1377 return 0; 1406 return 0;
1378} 1407}
1379 1408
1409static int gfs2_quota_get_xstate(struct super_block *sb,
1410 struct fs_quota_stat *fqs)
1411{
1412 struct gfs2_sbd *sdp = sb->s_fs_info;
1413
1414 memset(fqs, 0, sizeof(struct fs_quota_stat));
1415 fqs->qs_version = FS_QSTAT_VERSION;
1416 if (sdp->sd_args.ar_quota == GFS2_QUOTA_ON)
1417 fqs->qs_flags = (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD);
1418 else if (sdp->sd_args.ar_quota == GFS2_QUOTA_ACCOUNT)
1419 fqs->qs_flags = (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT);
1420 if (sdp->sd_quota_inode) {
1421 fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr;
1422 fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks;
1423 }
1424 fqs->qs_uquota.qfs_nextents = 1; /* unsupported */
1425 fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */
1426 fqs->qs_incoredqs = atomic_read(&qd_lru_count);
1427 return 0;
1428}
1429
1430static int gfs2_xquota_get(struct super_block *sb, int type, qid_t id,
1431 struct fs_disk_quota *fdq)
1432{
1433 struct gfs2_sbd *sdp = sb->s_fs_info;
1434 struct gfs2_quota_lvb *qlvb;
1435 struct gfs2_quota_data *qd;
1436 struct gfs2_holder q_gh;
1437 int error;
1438
1439 memset(fdq, 0, sizeof(struct fs_disk_quota));
1440
1441 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1442 return -ESRCH; /* Crazy XFS error code */
1443
1444 if (type == USRQUOTA)
1445 type = QUOTA_USER;
1446 else if (type == GRPQUOTA)
1447 type = QUOTA_GROUP;
1448 else
1449 return -EINVAL;
1450
1451 error = qd_get(sdp, type, id, &qd);
1452 if (error)
1453 return error;
1454 error = do_glock(qd, FORCE, &q_gh);
1455 if (error)
1456 goto out;
1457
1458 qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
1459 fdq->d_version = FS_DQUOT_VERSION;
1460 fdq->d_flags = (type == QUOTA_USER) ? XFS_USER_QUOTA : XFS_GROUP_QUOTA;
1461 fdq->d_id = id;
1462 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit);
1463 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn);
1464 fdq->d_bcount = be64_to_cpu(qlvb->qb_value);
1465
1466 gfs2_glock_dq_uninit(&q_gh);
1467out:
1468 qd_put(qd);
1469 return error;
1470}
1471
1472/* GFS2 only supports a subset of the XFS fields */
1473#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD)
1474
1475static int gfs2_xquota_set(struct super_block *sb, int type, qid_t id,
1476 struct fs_disk_quota *fdq)
1477{
1478 struct gfs2_sbd *sdp = sb->s_fs_info;
1479 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
1480 struct gfs2_quota_data *qd;
1481 struct gfs2_holder q_gh, i_gh;
1482 unsigned int data_blocks, ind_blocks;
1483 unsigned int blocks = 0;
1484 int alloc_required;
1485 struct gfs2_alloc *al;
1486 loff_t offset;
1487 int error;
1488
1489 if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
1490 return -ESRCH; /* Crazy XFS error code */
1491
1492 switch(type) {
1493 case USRQUOTA:
1494 type = QUOTA_USER;
1495 if (fdq->d_flags != XFS_USER_QUOTA)
1496 return -EINVAL;
1497 break;
1498 case GRPQUOTA:
1499 type = QUOTA_GROUP;
1500 if (fdq->d_flags != XFS_GROUP_QUOTA)
1501 return -EINVAL;
1502 break;
1503 default:
1504 return -EINVAL;
1505 }
1506
1507 if (fdq->d_fieldmask & ~GFS2_FIELDMASK)
1508 return -EINVAL;
1509 if (fdq->d_id != id)
1510 return -EINVAL;
1511
1512 error = qd_get(sdp, type, id, &qd);
1513 if (error)
1514 return error;
1515
1516 mutex_lock(&ip->i_inode.i_mutex);
1517 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
1518 if (error)
1519 goto out_put;
1520 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1521 if (error)
1522 goto out_q;
1523
1524 /* Check for existing entry, if none then alloc new blocks */
1525 error = update_qd(sdp, qd);
1526 if (error)
1527 goto out_i;
1528
1529 /* If nothing has changed, this is a no-op */
1530 if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
1531 (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn)))
1532 fdq->d_fieldmask ^= FS_DQ_BSOFT;
1533 if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
1534 (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit)))
1535 fdq->d_fieldmask ^= FS_DQ_BHARD;
1536 if (fdq->d_fieldmask == 0)
1537 goto out_i;
1538
1539 offset = qd2offset(qd);
1540 error = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota),
1541 &alloc_required);
1542 if (error)
1543 goto out_i;
1544 if (alloc_required) {
1545 al = gfs2_alloc_get(ip);
1546 if (al == NULL)
1547 goto out_i;
1548 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
1549 &data_blocks, &ind_blocks);
1550 blocks = al->al_requested = 1 + data_blocks + ind_blocks;
1551 error = gfs2_inplace_reserve(ip);
1552 if (error)
1553 goto out_alloc;
1554 }
1555
1556 error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0);
1557 if (error)
1558 goto out_release;
1559
1560 /* Apply changes */
1561 error = gfs2_adjust_quota(ip, offset, 0, qd, fdq);
1562
1563 gfs2_trans_end(sdp);
1564out_release:
1565 if (alloc_required) {
1566 gfs2_inplace_release(ip);
1567out_alloc:
1568 gfs2_alloc_put(ip);
1569 }
1570out_i:
1571 gfs2_glock_dq_uninit(&i_gh);
1572out_q:
1573 gfs2_glock_dq_uninit(&q_gh);
1574out_put:
1575 mutex_unlock(&ip->i_inode.i_mutex);
1576 qd_put(qd);
1577 return error;
1578}
1579
1580const struct quotactl_ops gfs2_quotactl_ops = {
1581 .quota_sync = gfs2_quota_sync,
1582 .get_xstate = gfs2_quota_get_xstate,
1583 .get_xquota = gfs2_xquota_get,
1584 .set_xquota = gfs2_xquota_set,
1585};
1586
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 0fa5fa63d0e8..e271fa07ad02 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -25,13 +25,15 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
25extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 25extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
26 u32 uid, u32 gid); 26 u32 uid, u32 gid);
27 27
28extern int gfs2_quota_sync(struct gfs2_sbd *sdp); 28extern int gfs2_quota_sync(struct super_block *sb, int type);
29extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); 29extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
30 30
31extern int gfs2_quota_init(struct gfs2_sbd *sdp); 31extern int gfs2_quota_init(struct gfs2_sbd *sdp);
32extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp); 32extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
33extern int gfs2_quotad(void *data); 33extern int gfs2_quotad(void *data);
34 34
35extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
36
35static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) 37static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
36{ 38{
37 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 39 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
@@ -50,5 +52,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
50} 52}
51 53
52extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask); 54extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
55extern const struct quotactl_ops gfs2_quotactl_ops;
53 56
54#endif /* __QUOTA_DOT_H__ */ 57#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 09fa31965576..4b9bece3d437 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -410,7 +410,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
410 memset(lh, 0, sizeof(struct gfs2_log_header)); 410 memset(lh, 0, sizeof(struct gfs2_log_header));
411 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 411 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
412 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); 412 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
413 lh->lh_header.__pad0 = cpu_to_be64(0);
413 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); 414 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
415 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
414 lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1); 416 lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
415 lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT); 417 lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
416 lh->lh_blkno = cpu_to_be32(lblock); 418 lh->lh_blkno = cpu_to_be32(lblock);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8f1cfb02a6cb..0608f490c295 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1710,11 +1710,16 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
1710{ 1710{
1711 struct gfs2_rgrpd *rgd; 1711 struct gfs2_rgrpd *rgd;
1712 struct gfs2_holder ri_gh, rgd_gh; 1712 struct gfs2_holder ri_gh, rgd_gh;
1713 struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
1714 int ri_locked = 0;
1713 int error; 1715 int error;
1714 1716
1715 error = gfs2_rindex_hold(sdp, &ri_gh); 1717 if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
1716 if (error) 1718 error = gfs2_rindex_hold(sdp, &ri_gh);
1717 goto fail; 1719 if (error)
1720 goto fail;
1721 ri_locked = 1;
1722 }
1718 1723
1719 error = -EINVAL; 1724 error = -EINVAL;
1720 rgd = gfs2_blk2rgrpd(sdp, no_addr); 1725 rgd = gfs2_blk2rgrpd(sdp, no_addr);
@@ -1730,7 +1735,8 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
1730 1735
1731 gfs2_glock_dq_uninit(&rgd_gh); 1736 gfs2_glock_dq_uninit(&rgd_gh);
1732fail_rindex: 1737fail_rindex:
1733 gfs2_glock_dq_uninit(&ri_gh); 1738 if (ri_locked)
1739 gfs2_glock_dq_uninit(&ri_gh);
1734fail: 1740fail:
1735 return error; 1741 return error;
1736} 1742}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 0ec3ec672de1..c282ad41f3d1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -70,6 +70,11 @@ enum {
70 Opt_commit, 70 Opt_commit,
71 Opt_err_withdraw, 71 Opt_err_withdraw,
72 Opt_err_panic, 72 Opt_err_panic,
73 Opt_statfs_quantum,
74 Opt_statfs_percent,
75 Opt_quota_quantum,
76 Opt_barrier,
77 Opt_nobarrier,
73 Opt_error, 78 Opt_error,
74}; 79};
75 80
@@ -101,18 +106,23 @@ static const match_table_t tokens = {
101 {Opt_commit, "commit=%d"}, 106 {Opt_commit, "commit=%d"},
102 {Opt_err_withdraw, "errors=withdraw"}, 107 {Opt_err_withdraw, "errors=withdraw"},
103 {Opt_err_panic, "errors=panic"}, 108 {Opt_err_panic, "errors=panic"},
109 {Opt_statfs_quantum, "statfs_quantum=%d"},
110 {Opt_statfs_percent, "statfs_percent=%d"},
111 {Opt_quota_quantum, "quota_quantum=%d"},
112 {Opt_barrier, "barrier"},
113 {Opt_nobarrier, "nobarrier"},
104 {Opt_error, NULL} 114 {Opt_error, NULL}
105}; 115};
106 116
107/** 117/**
108 * gfs2_mount_args - Parse mount options 118 * gfs2_mount_args - Parse mount options
109 * @sdp: 119 * @args: The structure into which the parsed options will be written
110 * @data: 120 * @options: The options to parse
111 * 121 *
112 * Return: errno 122 * Return: errno
113 */ 123 */
114 124
115int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) 125int gfs2_mount_args(struct gfs2_args *args, char *options)
116{ 126{
117 char *o; 127 char *o;
118 int token; 128 int token;
@@ -157,7 +167,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
157 break; 167 break;
158 case Opt_debug: 168 case Opt_debug:
159 if (args->ar_errors == GFS2_ERRORS_PANIC) { 169 if (args->ar_errors == GFS2_ERRORS_PANIC) {
160 fs_info(sdp, "-o debug and -o errors=panic " 170 printk(KERN_WARNING "GFS2: -o debug and -o errors=panic "
161 "are mutually exclusive.\n"); 171 "are mutually exclusive.\n");
162 return -EINVAL; 172 return -EINVAL;
163 } 173 }
@@ -210,7 +220,29 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
210 case Opt_commit: 220 case Opt_commit:
211 rv = match_int(&tmp[0], &args->ar_commit); 221 rv = match_int(&tmp[0], &args->ar_commit);
212 if (rv || args->ar_commit <= 0) { 222 if (rv || args->ar_commit <= 0) {
213 fs_info(sdp, "commit mount option requires a positive numeric argument\n"); 223 printk(KERN_WARNING "GFS2: commit mount option requires a positive numeric argument\n");
224 return rv ? rv : -EINVAL;
225 }
226 break;
227 case Opt_statfs_quantum:
228 rv = match_int(&tmp[0], &args->ar_statfs_quantum);
229 if (rv || args->ar_statfs_quantum < 0) {
230 printk(KERN_WARNING "GFS2: statfs_quantum mount option requires a non-negative numeric argument\n");
231 return rv ? rv : -EINVAL;
232 }
233 break;
234 case Opt_quota_quantum:
235 rv = match_int(&tmp[0], &args->ar_quota_quantum);
236 if (rv || args->ar_quota_quantum <= 0) {
237 printk(KERN_WARNING "GFS2: quota_quantum mount option requires a positive numeric argument\n");
238 return rv ? rv : -EINVAL;
239 }
240 break;
241 case Opt_statfs_percent:
242 rv = match_int(&tmp[0], &args->ar_statfs_percent);
243 if (rv || args->ar_statfs_percent < 0 ||
244 args->ar_statfs_percent > 100) {
245 printk(KERN_WARNING "statfs_percent mount option requires a numeric argument between 0 and 100\n");
214 return rv ? rv : -EINVAL; 246 return rv ? rv : -EINVAL;
215 } 247 }
216 break; 248 break;
@@ -219,15 +251,21 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
219 break; 251 break;
220 case Opt_err_panic: 252 case Opt_err_panic:
221 if (args->ar_debug) { 253 if (args->ar_debug) {
222 fs_info(sdp, "-o debug and -o errors=panic " 254 printk(KERN_WARNING "GFS2: -o debug and -o errors=panic "
223 "are mutually exclusive.\n"); 255 "are mutually exclusive.\n");
224 return -EINVAL; 256 return -EINVAL;
225 } 257 }
226 args->ar_errors = GFS2_ERRORS_PANIC; 258 args->ar_errors = GFS2_ERRORS_PANIC;
227 break; 259 break;
260 case Opt_barrier:
261 args->ar_nobarrier = 0;
262 break;
263 case Opt_nobarrier:
264 args->ar_nobarrier = 1;
265 break;
228 case Opt_error: 266 case Opt_error:
229 default: 267 default:
230 fs_info(sdp, "invalid mount option: %s\n", o); 268 printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o);
231 return -EINVAL; 269 return -EINVAL;
232 } 270 }
233 } 271 }
@@ -442,7 +480,10 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
442{ 480{
443 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); 481 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
444 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; 482 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
483 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
445 struct buffer_head *l_bh; 484 struct buffer_head *l_bh;
485 s64 x, y;
486 int need_sync = 0;
446 int error; 487 int error;
447 488
448 error = gfs2_meta_inode_buffer(l_ip, &l_bh); 489 error = gfs2_meta_inode_buffer(l_ip, &l_bh);
@@ -456,9 +497,17 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
456 l_sc->sc_free += free; 497 l_sc->sc_free += free;
457 l_sc->sc_dinodes += dinodes; 498 l_sc->sc_dinodes += dinodes;
458 gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode)); 499 gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode));
500 if (sdp->sd_args.ar_statfs_percent) {
501 x = 100 * l_sc->sc_free;
502 y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent;
503 if (x >= y || x <= -y)
504 need_sync = 1;
505 }
459 spin_unlock(&sdp->sd_statfs_spin); 506 spin_unlock(&sdp->sd_statfs_spin);
460 507
461 brelse(l_bh); 508 brelse(l_bh);
509 if (need_sync)
510 gfs2_wake_up_statfs(sdp);
462} 511}
463 512
464void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, 513void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
@@ -484,8 +533,9 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
484 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); 533 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
485} 534}
486 535
487int gfs2_statfs_sync(struct gfs2_sbd *sdp) 536int gfs2_statfs_sync(struct super_block *sb, int type)
488{ 537{
538 struct gfs2_sbd *sdp = sb->s_fs_info;
489 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 539 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
490 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); 540 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
491 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; 541 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
@@ -521,6 +571,7 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp)
521 goto out_bh2; 571 goto out_bh2;
522 572
523 update_statfs(sdp, m_bh, l_bh); 573 update_statfs(sdp, m_bh, l_bh);
574 sdp->sd_statfs_force_sync = 0;
524 575
525 gfs2_trans_end(sdp); 576 gfs2_trans_end(sdp);
526 577
@@ -712,8 +763,8 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
712 int error; 763 int error;
713 764
714 flush_workqueue(gfs2_delete_workqueue); 765 flush_workqueue(gfs2_delete_workqueue);
715 gfs2_quota_sync(sdp); 766 gfs2_quota_sync(sdp->sd_vfs, 0);
716 gfs2_statfs_sync(sdp); 767 gfs2_statfs_sync(sdp->sd_vfs, 0);
717 768
718 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, 769 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
719 &t_gh); 770 &t_gh);
@@ -1061,8 +1112,13 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1061 1112
1062 spin_lock(&gt->gt_spin); 1113 spin_lock(&gt->gt_spin);
1063 args.ar_commit = gt->gt_log_flush_secs; 1114 args.ar_commit = gt->gt_log_flush_secs;
1115 args.ar_quota_quantum = gt->gt_quota_quantum;
1116 if (gt->gt_statfs_slow)
1117 args.ar_statfs_quantum = 0;
1118 else
1119 args.ar_statfs_quantum = gt->gt_statfs_quantum;
1064 spin_unlock(&gt->gt_spin); 1120 spin_unlock(&gt->gt_spin);
1065 error = gfs2_mount_args(sdp, &args, data); 1121 error = gfs2_mount_args(&args, data);
1066 if (error) 1122 if (error)
1067 return error; 1123 return error;
1068 1124
@@ -1097,8 +1153,21 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1097 sb->s_flags |= MS_POSIXACL; 1153 sb->s_flags |= MS_POSIXACL;
1098 else 1154 else
1099 sb->s_flags &= ~MS_POSIXACL; 1155 sb->s_flags &= ~MS_POSIXACL;
1156 if (sdp->sd_args.ar_nobarrier)
1157 set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
1158 else
1159 clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
1100 spin_lock(&gt->gt_spin); 1160 spin_lock(&gt->gt_spin);
1101 gt->gt_log_flush_secs = args.ar_commit; 1161 gt->gt_log_flush_secs = args.ar_commit;
1162 gt->gt_quota_quantum = args.ar_quota_quantum;
1163 if (args.ar_statfs_quantum) {
1164 gt->gt_statfs_slow = 0;
1165 gt->gt_statfs_quantum = args.ar_statfs_quantum;
1166 }
1167 else {
1168 gt->gt_statfs_slow = 1;
1169 gt->gt_statfs_quantum = 30;
1170 }
1102 spin_unlock(&gt->gt_spin); 1171 spin_unlock(&gt->gt_spin);
1103 1172
1104 gfs2_online_uevent(sdp); 1173 gfs2_online_uevent(sdp);
@@ -1179,7 +1248,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1179{ 1248{
1180 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; 1249 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
1181 struct gfs2_args *args = &sdp->sd_args; 1250 struct gfs2_args *args = &sdp->sd_args;
1182 int lfsecs; 1251 int val;
1183 1252
1184 if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) 1253 if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
1185 seq_printf(s, ",meta"); 1254 seq_printf(s, ",meta");
@@ -1240,9 +1309,17 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1240 } 1309 }
1241 if (args->ar_discard) 1310 if (args->ar_discard)
1242 seq_printf(s, ",discard"); 1311 seq_printf(s, ",discard");
1243 lfsecs = sdp->sd_tune.gt_log_flush_secs; 1312 val = sdp->sd_tune.gt_log_flush_secs;
1244 if (lfsecs != 60) 1313 if (val != 60)
1245 seq_printf(s, ",commit=%d", lfsecs); 1314 seq_printf(s, ",commit=%d", val);
1315 val = sdp->sd_tune.gt_statfs_quantum;
1316 if (val != 30)
1317 seq_printf(s, ",statfs_quantum=%d", val);
1318 val = sdp->sd_tune.gt_quota_quantum;
1319 if (val != 60)
1320 seq_printf(s, ",quota_quantum=%d", val);
1321 if (args->ar_statfs_percent)
1322 seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent);
1246 if (args->ar_errors != GFS2_ERRORS_DEFAULT) { 1323 if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
1247 const char *state; 1324 const char *state;
1248 1325
@@ -1259,6 +1336,9 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1259 } 1336 }
1260 seq_printf(s, ",errors=%s", state); 1337 seq_printf(s, ",errors=%s", state);
1261 } 1338 }
1339 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
1340 seq_printf(s, ",nobarrier");
1341
1262 return 0; 1342 return 0;
1263} 1343}
1264 1344
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 235db3682885..3df60f2d84e3 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -27,7 +27,7 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
27 27
28extern void gfs2_jindex_free(struct gfs2_sbd *sdp); 28extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
29 29
30extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data); 30extern int gfs2_mount_args(struct gfs2_args *args, char *data);
31 31
32extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); 32extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
33extern int gfs2_jdesc_check(struct gfs2_jdesc *jd); 33extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
@@ -44,7 +44,7 @@ extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
44 const void *buf); 44 const void *buf);
45extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, 45extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
46 struct buffer_head *l_bh); 46 struct buffer_head *l_bh);
47extern int gfs2_statfs_sync(struct gfs2_sbd *sdp); 47extern int gfs2_statfs_sync(struct super_block *sb, int type);
48 48
49extern int gfs2_freeze_fs(struct gfs2_sbd *sdp); 49extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
50extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); 50extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 446329728d52..c5dad1eb7b91 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -158,7 +158,7 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
158 if (simple_strtol(buf, NULL, 0) != 1) 158 if (simple_strtol(buf, NULL, 0) != 1)
159 return -EINVAL; 159 return -EINVAL;
160 160
161 gfs2_statfs_sync(sdp); 161 gfs2_statfs_sync(sdp->sd_vfs, 0);
162 return len; 162 return len;
163} 163}
164 164
@@ -171,13 +171,14 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
171 if (simple_strtol(buf, NULL, 0) != 1) 171 if (simple_strtol(buf, NULL, 0) != 1)
172 return -EINVAL; 172 return -EINVAL;
173 173
174 gfs2_quota_sync(sdp); 174 gfs2_quota_sync(sdp->sd_vfs, 0);
175 return len; 175 return len;
176} 176}
177 177
178static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf, 178static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
179 size_t len) 179 size_t len)
180{ 180{
181 int error;
181 u32 id; 182 u32 id;
182 183
183 if (!capable(CAP_SYS_ADMIN)) 184 if (!capable(CAP_SYS_ADMIN))
@@ -185,13 +186,14 @@ static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
185 186
186 id = simple_strtoul(buf, NULL, 0); 187 id = simple_strtoul(buf, NULL, 0);
187 188
188 gfs2_quota_refresh(sdp, 1, id); 189 error = gfs2_quota_refresh(sdp, 1, id);
189 return len; 190 return error ? error : len;
190} 191}
191 192
192static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf, 193static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
193 size_t len) 194 size_t len)
194{ 195{
196 int error;
195 u32 id; 197 u32 id;
196 198
197 if (!capable(CAP_SYS_ADMIN)) 199 if (!capable(CAP_SYS_ADMIN))
@@ -199,8 +201,8 @@ static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
199 201
200 id = simple_strtoul(buf, NULL, 0); 202 id = simple_strtoul(buf, NULL, 0);
201 203
202 gfs2_quota_refresh(sdp, 0, id); 204 error = gfs2_quota_refresh(sdp, 0, id);
203 return len; 205 return error ? error : len;
204} 206}
205 207
206static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len) 208static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 8a0f8ef6ee27..912f5cbc4740 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -186,8 +186,8 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
186 return 0; 186 return 0;
187} 187}
188 188
189int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, 189static int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name,
190 struct gfs2_ea_location *el) 190 struct gfs2_ea_location *el)
191{ 191{
192 struct ea_find ef; 192 struct ea_find ef;
193 int error; 193 int error;
@@ -516,8 +516,8 @@ out:
516 return error; 516 return error;
517} 517}
518 518
519int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, 519static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
520 char *data, size_t size) 520 char *data, size_t size)
521{ 521{
522 int ret; 522 int ret;
523 size_t len = GFS2_EA_DATA_LEN(el->el_ea); 523 size_t len = GFS2_EA_DATA_LEN(el->el_ea);
@@ -534,6 +534,36 @@ int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
534 return len; 534 return len;
535} 535}
536 536
537int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **ppdata)
538{
539 struct gfs2_ea_location el;
540 int error;
541 int len;
542 char *data;
543
544 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, &el);
545 if (error)
546 return error;
547 if (!el.el_ea)
548 goto out;
549 if (!GFS2_EA_DATA_LEN(el.el_ea))
550 goto out;
551
552 len = GFS2_EA_DATA_LEN(el.el_ea);
553 data = kmalloc(len, GFP_NOFS);
554 error = -ENOMEM;
555 if (data == NULL)
556 goto out;
557
558 error = gfs2_ea_get_copy(ip, &el, data, len);
559 if (error == 0)
560 error = len;
561 *ppdata = data;
562out:
563 brelse(el.el_bh);
564 return error;
565}
566
537/** 567/**
538 * gfs2_xattr_get - Get a GFS2 extended attribute 568 * gfs2_xattr_get - Get a GFS2 extended attribute
539 * @inode: The inode 569 * @inode: The inode
@@ -1259,22 +1289,26 @@ fail:
1259 return error; 1289 return error;
1260} 1290}
1261 1291
1262int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, 1292int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1263 struct iattr *attr, char *data)
1264{ 1293{
1294 struct gfs2_ea_location el;
1265 struct buffer_head *dibh; 1295 struct buffer_head *dibh;
1266 int error; 1296 int error;
1267 1297
1268 if (GFS2_EA_IS_STUFFED(el->el_ea)) { 1298 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el);
1299 if (error)
1300 return error;
1301
1302 if (GFS2_EA_IS_STUFFED(el.el_ea)) {
1269 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); 1303 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0);
1270 if (error) 1304 if (error)
1271 return error; 1305 return error;
1272 1306
1273 gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); 1307 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1);
1274 memcpy(GFS2_EA2DATA(el->el_ea), data, 1308 memcpy(GFS2_EA2DATA(el.el_ea), data,
1275 GFS2_EA_DATA_LEN(el->el_ea)); 1309 GFS2_EA_DATA_LEN(el.el_ea));
1276 } else 1310 } else
1277 error = ea_acl_chmod_unstuffed(ip, el->el_ea, data); 1311 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data);
1278 1312
1279 if (error) 1313 if (error)
1280 return error; 1314 return error;
@@ -1507,18 +1541,6 @@ static int gfs2_xattr_user_set(struct inode *inode, const char *name,
1507 return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags); 1541 return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags);
1508} 1542}
1509 1543
1510static int gfs2_xattr_system_get(struct inode *inode, const char *name,
1511 void *buffer, size_t size)
1512{
1513 return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size);
1514}
1515
1516static int gfs2_xattr_system_set(struct inode *inode, const char *name,
1517 const void *value, size_t size, int flags)
1518{
1519 return gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, flags);
1520}
1521
1522static int gfs2_xattr_security_get(struct inode *inode, const char *name, 1544static int gfs2_xattr_security_get(struct inode *inode, const char *name,
1523 void *buffer, size_t size) 1545 void *buffer, size_t size)
1524{ 1546{
@@ -1543,12 +1565,6 @@ static struct xattr_handler gfs2_xattr_security_handler = {
1543 .set = gfs2_xattr_security_set, 1565 .set = gfs2_xattr_security_set,
1544}; 1566};
1545 1567
1546static struct xattr_handler gfs2_xattr_system_handler = {
1547 .prefix = XATTR_SYSTEM_PREFIX,
1548 .get = gfs2_xattr_system_get,
1549 .set = gfs2_xattr_system_set,
1550};
1551
1552struct xattr_handler *gfs2_xattr_handlers[] = { 1568struct xattr_handler *gfs2_xattr_handlers[] = {
1553 &gfs2_xattr_user_handler, 1569 &gfs2_xattr_user_handler,
1554 &gfs2_xattr_security_handler, 1570 &gfs2_xattr_security_handler,
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index cbdfd7743733..8d6ae5813c4d 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -62,11 +62,7 @@ extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
62 62
63/* Exported to acl.c */ 63/* Exported to acl.c */
64 64
65extern int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, 65extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data);
66 struct gfs2_ea_location *el); 66extern int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data);
67extern int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
68 char *data, size_t size);
69extern int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
70 struct iattr *attr, char *data);
71 67
72#endif /* __EATTR_DOT_H__ */ 68#endif /* __EATTR_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 4d8e3be55976..06c1f02de611 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -18,7 +18,6 @@
18#include <linux/hash.h> 18#include <linux/hash.h>
19#include <linux/swap.h> 19#include <linux/swap.h>
20#include <linux/security.h> 20#include <linux/security.h>
21#include <linux/ima.h>
22#include <linux/pagemap.h> 21#include <linux/pagemap.h>
23#include <linux/cdev.h> 22#include <linux/cdev.h>
24#include <linux/bootmem.h> 23#include <linux/bootmem.h>
@@ -157,11 +156,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
157 156
158 if (security_inode_alloc(inode)) 157 if (security_inode_alloc(inode))
159 goto out; 158 goto out;
160
161 /* allocate and initialize an i_integrity */
162 if (ima_inode_alloc(inode))
163 goto out_free_security;
164
165 spin_lock_init(&inode->i_lock); 159 spin_lock_init(&inode->i_lock);
166 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); 160 lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
167 161
@@ -201,9 +195,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
201#endif 195#endif
202 196
203 return 0; 197 return 0;
204
205out_free_security:
206 security_inode_free(inode);
207out: 198out:
208 return -ENOMEM; 199 return -ENOMEM;
209} 200}
@@ -235,7 +226,6 @@ static struct inode *alloc_inode(struct super_block *sb)
235void __destroy_inode(struct inode *inode) 226void __destroy_inode(struct inode *inode)
236{ 227{
237 BUG_ON(inode_has_buffers(inode)); 228 BUG_ON(inode_has_buffers(inode));
238 ima_inode_free(inode);
239 security_inode_free(inode); 229 security_inode_free(inode);
240 fsnotify_inode_delete(inode); 230 fsnotify_inode_delete(inode);
241#ifdef CONFIG_FS_POSIX_ACL 231#ifdef CONFIG_FS_POSIX_ACL
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index defb932eee9a..0b3fa7974fa8 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -36,286 +36,323 @@ static void *zisofs_zlib_workspace;
36static DEFINE_MUTEX(zisofs_zlib_lock); 36static DEFINE_MUTEX(zisofs_zlib_lock);
37 37
38/* 38/*
39 * When decompressing, we typically obtain more than one page 39 * Read data of @inode from @block_start to @block_end and uncompress
40 * per reference. We inject the additional pages into the page 40 * to one zisofs block. Store the data in the @pages array with @pcount
41 * cache as a form of readahead. 41 * entries. Start storing at offset @poffset of the first page.
42 */ 42 */
43static int zisofs_readpage(struct file *file, struct page *page) 43static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
44 loff_t block_end, int pcount,
45 struct page **pages, unsigned poffset,
46 int *errp)
44{ 47{
45 struct inode *inode = file->f_path.dentry->d_inode;
46 struct address_space *mapping = inode->i_mapping;
47 unsigned int maxpage, xpage, fpage, blockindex;
48 unsigned long offset;
49 unsigned long blockptr, blockendptr, cstart, cend, csize;
50 struct buffer_head *bh, *ptrbh[2];
51 unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
52 unsigned int bufshift = ISOFS_BUFFER_BITS(inode);
53 unsigned long bufmask = bufsize - 1;
54 int err = -EIO;
55 int i;
56 unsigned int header_size = ISOFS_I(inode)->i_format_parm[0];
57 unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1]; 48 unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
58 /* unsigned long zisofs_block_size = 1UL << zisofs_block_shift; */ 49 unsigned int bufsize = ISOFS_BUFFER_SIZE(inode);
59 unsigned int zisofs_block_page_shift = zisofs_block_shift-PAGE_CACHE_SHIFT; 50 unsigned int bufshift = ISOFS_BUFFER_BITS(inode);
60 unsigned long zisofs_block_pages = 1UL << zisofs_block_page_shift; 51 unsigned int bufmask = bufsize - 1;
61 unsigned long zisofs_block_page_mask = zisofs_block_pages-1; 52 int i, block_size = block_end - block_start;
62 struct page *pages[zisofs_block_pages]; 53 z_stream stream = { .total_out = 0,
63 unsigned long index = page->index; 54 .avail_in = 0,
64 int indexblocks; 55 .avail_out = 0, };
65 56 int zerr;
66 /* We have already been given one page, this is the one 57 int needblocks = (block_size + (block_start & bufmask) + bufmask)
67 we must do. */ 58 >> bufshift;
68 xpage = index & zisofs_block_page_mask; 59 int haveblocks;
69 pages[xpage] = page; 60 blkcnt_t blocknum;
70 61 struct buffer_head *bhs[needblocks + 1];
71 /* The remaining pages need to be allocated and inserted */ 62 int curbh, curpage;
72 offset = index & ~zisofs_block_page_mask; 63
73 blockindex = offset >> zisofs_block_page_shift; 64 if (block_size > deflateBound(1UL << zisofs_block_shift)) {
74 maxpage = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 65 *errp = -EIO;
75
76 /*
77 * If this page is wholly outside i_size we just return zero;
78 * do_generic_file_read() will handle this for us
79 */
80 if (page->index >= maxpage) {
81 SetPageUptodate(page);
82 unlock_page(page);
83 return 0; 66 return 0;
84 } 67 }
85 68 /* Empty block? */
86 maxpage = min(zisofs_block_pages, maxpage-offset); 69 if (block_size == 0) {
87 70 for ( i = 0 ; i < pcount ; i++ ) {
88 for ( i = 0 ; i < maxpage ; i++, offset++ ) { 71 if (!pages[i])
89 if ( i != xpage ) { 72 continue;
90 pages[i] = grab_cache_page_nowait(mapping, offset); 73 memset(page_address(pages[i]), 0, PAGE_CACHE_SIZE);
91 } 74 flush_dcache_page(pages[i]);
92 page = pages[i]; 75 SetPageUptodate(pages[i]);
93 if ( page ) {
94 ClearPageError(page);
95 kmap(page);
96 } 76 }
77 return ((loff_t)pcount) << PAGE_CACHE_SHIFT;
97 } 78 }
98 79
99 /* This is the last page filled, plus one; used in case of abort. */ 80 /* Because zlib is not thread-safe, do all the I/O at the top. */
100 fpage = 0; 81 blocknum = block_start >> bufshift;
82 memset(bhs, 0, (needblocks + 1) * sizeof(struct buffer_head *));
83 haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks);
84 ll_rw_block(READ, haveblocks, bhs);
101 85
102 /* Find the pointer to this specific chunk */ 86 curbh = 0;
103 /* Note: we're not using isonum_731() here because the data is known aligned */ 87 curpage = 0;
104 /* Note: header_size is in 32-bit words (4 bytes) */ 88 /*
105 blockptr = (header_size + blockindex) << 2; 89 * First block is special since it may be fractional. We also wait for
106 blockendptr = blockptr + 4; 90 * it before grabbing the zlib mutex; odds are that the subsequent
91 * blocks are going to come in in short order so we don't hold the zlib
92 * mutex longer than necessary.
93 */
107 94
108 indexblocks = ((blockptr^blockendptr) >> bufshift) ? 2 : 1; 95 if (!bhs[0])
109 ptrbh[0] = ptrbh[1] = NULL; 96 goto b_eio;
110 97
111 if ( isofs_get_blocks(inode, blockptr >> bufshift, ptrbh, indexblocks) != indexblocks ) { 98 wait_on_buffer(bhs[0]);
112 if ( ptrbh[0] ) brelse(ptrbh[0]); 99 if (!buffer_uptodate(bhs[0])) {
113 printk(KERN_DEBUG "zisofs: Null buffer on reading block table, inode = %lu, block = %lu\n", 100 *errp = -EIO;
114 inode->i_ino, blockptr >> bufshift); 101 goto b_eio;
115 goto eio;
116 }
117 ll_rw_block(READ, indexblocks, ptrbh);
118
119 bh = ptrbh[0];
120 if ( !bh || (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
121 printk(KERN_DEBUG "zisofs: Failed to read block table, inode = %lu, block = %lu\n",
122 inode->i_ino, blockptr >> bufshift);
123 if ( ptrbh[1] )
124 brelse(ptrbh[1]);
125 goto eio;
126 }
127 cstart = le32_to_cpu(*(__le32 *)(bh->b_data + (blockptr & bufmask)));
128
129 if ( indexblocks == 2 ) {
130 /* We just crossed a block boundary. Switch to the next block */
131 brelse(bh);
132 bh = ptrbh[1];
133 if ( !bh || (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
134 printk(KERN_DEBUG "zisofs: Failed to read block table, inode = %lu, block = %lu\n",
135 inode->i_ino, blockendptr >> bufshift);
136 goto eio;
137 }
138 } 102 }
139 cend = le32_to_cpu(*(__le32 *)(bh->b_data + (blockendptr & bufmask)));
140 brelse(bh);
141 103
142 if (cstart > cend) 104 stream.workspace = zisofs_zlib_workspace;
143 goto eio; 105 mutex_lock(&zisofs_zlib_lock);
144 106
145 csize = cend-cstart; 107 zerr = zlib_inflateInit(&stream);
146 108 if (zerr != Z_OK) {
147 if (csize > deflateBound(1UL << zisofs_block_shift)) 109 if (zerr == Z_MEM_ERROR)
148 goto eio; 110 *errp = -ENOMEM;
149 111 else
150 /* Now page[] contains an array of pages, any of which can be NULL, 112 *errp = -EIO;
151 and the locks on which we hold. We should now read the data and 113 printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
152 release the pages. If the pages are NULL the decompressed data 114 zerr);
153 for that particular page should be discarded. */ 115 goto z_eio;
154 116 }
155 if ( csize == 0 ) { 117
156 /* This data block is empty. */ 118 while (curpage < pcount && curbh < haveblocks &&
157 119 zerr != Z_STREAM_END) {
158 for ( fpage = 0 ; fpage < maxpage ; fpage++ ) { 120 if (!stream.avail_out) {
159 if ( (page = pages[fpage]) != NULL ) { 121 if (pages[curpage]) {
160 memset(page_address(page), 0, PAGE_CACHE_SIZE); 122 stream.next_out = page_address(pages[curpage])
161 123 + poffset;
162 flush_dcache_page(page); 124 stream.avail_out = PAGE_CACHE_SIZE - poffset;
163 SetPageUptodate(page); 125 poffset = 0;
164 kunmap(page); 126 } else {
165 unlock_page(page); 127 stream.next_out = (void *)&zisofs_sink_page;
166 if ( fpage == xpage ) 128 stream.avail_out = PAGE_CACHE_SIZE;
167 err = 0; /* The critical page */
168 else
169 page_cache_release(page);
170 } 129 }
171 } 130 }
172 } else { 131 if (!stream.avail_in) {
173 /* This data block is compressed. */ 132 wait_on_buffer(bhs[curbh]);
174 z_stream stream; 133 if (!buffer_uptodate(bhs[curbh])) {
175 int bail = 0, left_out = -1; 134 *errp = -EIO;
176 int zerr; 135 break;
177 int needblocks = (csize + (cstart & bufmask) + bufmask) >> bufshift; 136 }
178 int haveblocks; 137 stream.next_in = bhs[curbh]->b_data +
179 struct buffer_head *bhs[needblocks+1]; 138 (block_start & bufmask);
180 struct buffer_head **bhptr; 139 stream.avail_in = min_t(unsigned, bufsize -
181 140 (block_start & bufmask),
182 /* Because zlib is not thread-safe, do all the I/O at the top. */ 141 block_size);
183 142 block_size -= stream.avail_in;
184 blockptr = cstart >> bufshift; 143 block_start = 0;
185 memset(bhs, 0, (needblocks+1)*sizeof(struct buffer_head *));
186 haveblocks = isofs_get_blocks(inode, blockptr, bhs, needblocks);
187 ll_rw_block(READ, haveblocks, bhs);
188
189 bhptr = &bhs[0];
190 bh = *bhptr++;
191
192 /* First block is special since it may be fractional.
193 We also wait for it before grabbing the zlib
194 mutex; odds are that the subsequent blocks are
195 going to come in in short order so we don't hold
196 the zlib mutex longer than necessary. */
197
198 if ( !bh || (wait_on_buffer(bh), !buffer_uptodate(bh)) ) {
199 printk(KERN_DEBUG "zisofs: Hit null buffer, fpage = %d, xpage = %d, csize = %ld\n",
200 fpage, xpage, csize);
201 goto b_eio;
202 }
203 stream.next_in = bh->b_data + (cstart & bufmask);
204 stream.avail_in = min(bufsize-(cstart & bufmask), csize);
205 csize -= stream.avail_in;
206
207 stream.workspace = zisofs_zlib_workspace;
208 mutex_lock(&zisofs_zlib_lock);
209
210 zerr = zlib_inflateInit(&stream);
211 if ( zerr != Z_OK ) {
212 if ( err && zerr == Z_MEM_ERROR )
213 err = -ENOMEM;
214 printk(KERN_DEBUG "zisofs: zisofs_inflateInit returned %d\n",
215 zerr);
216 goto z_eio;
217 } 144 }
218 145
219 while ( !bail && fpage < maxpage ) { 146 while (stream.avail_out && stream.avail_in) {
220 page = pages[fpage]; 147 zerr = zlib_inflate(&stream, Z_SYNC_FLUSH);
221 if ( page ) 148 if (zerr == Z_BUF_ERROR && stream.avail_in == 0)
222 stream.next_out = page_address(page); 149 break;
223 else 150 if (zerr == Z_STREAM_END)
224 stream.next_out = (void *)&zisofs_sink_page; 151 break;
225 stream.avail_out = PAGE_CACHE_SIZE; 152 if (zerr != Z_OK) {
226 153 /* EOF, error, or trying to read beyond end of input */
227 while ( stream.avail_out ) { 154 if (zerr == Z_MEM_ERROR)
228 int ao, ai; 155 *errp = -ENOMEM;
229 if ( stream.avail_in == 0 && left_out ) { 156 else {
230 if ( !csize ) { 157 printk(KERN_DEBUG
231 printk(KERN_WARNING "zisofs: ZF read beyond end of input\n"); 158 "zisofs: zisofs_inflate returned"
232 bail = 1; 159 " %d, inode = %lu,"
233 break; 160 " page idx = %d, bh idx = %d,"
234 } else { 161 " avail_in = %d,"
235 bh = *bhptr++; 162 " avail_out = %d\n",
236 if ( !bh || 163 zerr, inode->i_ino, curpage,
237 (wait_on_buffer(bh), !buffer_uptodate(bh)) ) { 164 curbh, stream.avail_in,
238 /* Reached an EIO */ 165 stream.avail_out);
239 printk(KERN_DEBUG "zisofs: Hit null buffer, fpage = %d, xpage = %d, csize = %ld\n", 166 *errp = -EIO;
240 fpage, xpage, csize);
241
242 bail = 1;
243 break;
244 }
245 stream.next_in = bh->b_data;
246 stream.avail_in = min(csize,bufsize);
247 csize -= stream.avail_in;
248 }
249 }
250 ao = stream.avail_out; ai = stream.avail_in;
251 zerr = zlib_inflate(&stream, Z_SYNC_FLUSH);
252 left_out = stream.avail_out;
253 if ( zerr == Z_BUF_ERROR && stream.avail_in == 0 )
254 continue;
255 if ( zerr != Z_OK ) {
256 /* EOF, error, or trying to read beyond end of input */
257 if ( err && zerr == Z_MEM_ERROR )
258 err = -ENOMEM;
259 if ( zerr != Z_STREAM_END )
260 printk(KERN_DEBUG "zisofs: zisofs_inflate returned %d, inode = %lu, index = %lu, fpage = %d, xpage = %d, avail_in = %d, avail_out = %d, ai = %d, ao = %d\n",
261 zerr, inode->i_ino, index,
262 fpage, xpage,
263 stream.avail_in, stream.avail_out,
264 ai, ao);
265 bail = 1;
266 break;
267 } 167 }
168 goto inflate_out;
268 } 169 }
170 }
269 171
270 if ( stream.avail_out && zerr == Z_STREAM_END ) { 172 if (!stream.avail_out) {
271 /* Fractional page written before EOF. This may 173 /* This page completed */
272 be the last page in the file. */ 174 if (pages[curpage]) {
273 memset(stream.next_out, 0, stream.avail_out); 175 flush_dcache_page(pages[curpage]);
274 stream.avail_out = 0; 176 SetPageUptodate(pages[curpage]);
275 } 177 }
178 curpage++;
179 }
180 if (!stream.avail_in)
181 curbh++;
182 }
183inflate_out:
184 zlib_inflateEnd(&stream);
276 185
277 if ( !stream.avail_out ) { 186z_eio:
278 /* This page completed */ 187 mutex_unlock(&zisofs_zlib_lock);
279 if ( page ) { 188
280 flush_dcache_page(page); 189b_eio:
281 SetPageUptodate(page); 190 for (i = 0; i < haveblocks; i++)
282 kunmap(page); 191 brelse(bhs[i]);
283 unlock_page(page); 192 return stream.total_out;
284 if ( fpage == xpage ) 193}
285 err = 0; /* The critical page */ 194
286 else 195/*
287 page_cache_release(page); 196 * Uncompress data so that pages[full_page] is fully uptodate and possibly
288 } 197 * fills in other pages if we have data for them.
289 fpage++; 198 */
290 } 199static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
200 struct page **pages)
201{
202 loff_t start_off, end_off;
203 loff_t block_start, block_end;
204 unsigned int header_size = ISOFS_I(inode)->i_format_parm[0];
205 unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
206 unsigned int blockptr;
207 loff_t poffset = 0;
208 blkcnt_t cstart_block, cend_block;
209 struct buffer_head *bh;
210 unsigned int blkbits = ISOFS_BUFFER_BITS(inode);
211 unsigned int blksize = 1 << blkbits;
212 int err;
213 loff_t ret;
214
215 BUG_ON(!pages[full_page]);
216
217 /*
218 * We want to read at least 'full_page' page. Because we have to
219 * uncompress the whole compression block anyway, fill the surrounding
220 * pages with the data we have anyway...
221 */
222 start_off = page_offset(pages[full_page]);
223 end_off = min_t(loff_t, start_off + PAGE_CACHE_SIZE, inode->i_size);
224
225 cstart_block = start_off >> zisofs_block_shift;
226 cend_block = (end_off + (1 << zisofs_block_shift) - 1)
227 >> zisofs_block_shift;
228
229 WARN_ON(start_off - (full_page << PAGE_CACHE_SHIFT) !=
230 ((cstart_block << zisofs_block_shift) & PAGE_CACHE_MASK));
231
232 /* Find the pointer to this specific chunk */
233 /* Note: we're not using isonum_731() here because the data is known aligned */
234 /* Note: header_size is in 32-bit words (4 bytes) */
235 blockptr = (header_size + cstart_block) << 2;
236 bh = isofs_bread(inode, blockptr >> blkbits);
237 if (!bh)
238 return -EIO;
239 block_start = le32_to_cpu(*(__le32 *)
240 (bh->b_data + (blockptr & (blksize - 1))));
241
242 while (cstart_block < cend_block && pcount > 0) {
243 /* Load end of the compressed block in the file */
244 blockptr += 4;
245 /* Traversed to next block? */
246 if (!(blockptr & (blksize - 1))) {
247 brelse(bh);
248
249 bh = isofs_bread(inode, blockptr >> blkbits);
250 if (!bh)
251 return -EIO;
252 }
253 block_end = le32_to_cpu(*(__le32 *)
254 (bh->b_data + (blockptr & (blksize - 1))));
255 if (block_start > block_end) {
256 brelse(bh);
257 return -EIO;
258 }
259 err = 0;
260 ret = zisofs_uncompress_block(inode, block_start, block_end,
261 pcount, pages, poffset, &err);
262 poffset += ret;
263 pages += poffset >> PAGE_CACHE_SHIFT;
264 pcount -= poffset >> PAGE_CACHE_SHIFT;
265 full_page -= poffset >> PAGE_CACHE_SHIFT;
266 poffset &= ~PAGE_CACHE_MASK;
267
268 if (err) {
269 brelse(bh);
270 /*
271 * Did we finish reading the page we really wanted
272 * to read?
273 */
274 if (full_page < 0)
275 return 0;
276 return err;
291 } 277 }
292 zlib_inflateEnd(&stream);
293 278
294 z_eio: 279 block_start = block_end;
295 mutex_unlock(&zisofs_zlib_lock); 280 cstart_block++;
281 }
282
283 if (poffset && *pages) {
284 memset(page_address(*pages) + poffset, 0,
285 PAGE_CACHE_SIZE - poffset);
286 flush_dcache_page(*pages);
287 SetPageUptodate(*pages);
288 }
289 return 0;
290}
296 291
297 b_eio: 292/*
298 for ( i = 0 ; i < haveblocks ; i++ ) { 293 * When decompressing, we typically obtain more than one page
299 if ( bhs[i] ) 294 * per reference. We inject the additional pages into the page
300 brelse(bhs[i]); 295 * cache as a form of readahead.
296 */
297static int zisofs_readpage(struct file *file, struct page *page)
298{
299 struct inode *inode = file->f_path.dentry->d_inode;
300 struct address_space *mapping = inode->i_mapping;
301 int err;
302 int i, pcount, full_page;
303 unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
304 unsigned int zisofs_pages_per_cblock =
305 PAGE_CACHE_SHIFT <= zisofs_block_shift ?
306 (1 << (zisofs_block_shift - PAGE_CACHE_SHIFT)) : 0;
307 struct page *pages[max_t(unsigned, zisofs_pages_per_cblock, 1)];
308 pgoff_t index = page->index, end_index;
309
310 end_index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
311 /*
312 * If this page is wholly outside i_size we just return zero;
313 * do_generic_file_read() will handle this for us
314 */
315 if (index >= end_index) {
316 SetPageUptodate(page);
317 unlock_page(page);
318 return 0;
319 }
320
321 if (PAGE_CACHE_SHIFT <= zisofs_block_shift) {
322 /* We have already been given one page, this is the one
323 we must do. */
324 full_page = index & (zisofs_pages_per_cblock - 1);
325 pcount = min_t(int, zisofs_pages_per_cblock,
326 end_index - (index & ~(zisofs_pages_per_cblock - 1)));
327 index -= full_page;
328 } else {
329 full_page = 0;
330 pcount = 1;
331 }
332 pages[full_page] = page;
333
334 for (i = 0; i < pcount; i++, index++) {
335 if (i != full_page)
336 pages[i] = grab_cache_page_nowait(mapping, index);
337 if (pages[i]) {
338 ClearPageError(pages[i]);
339 kmap(pages[i]);
301 } 340 }
302 } 341 }
303 342
304eio: 343 err = zisofs_fill_pages(inode, full_page, pcount, pages);
305 344
306 /* Release any residual pages, do not SetPageUptodate */ 345 /* Release any residual pages, do not SetPageUptodate */
307 while ( fpage < maxpage ) { 346 for (i = 0; i < pcount; i++) {
308 page = pages[fpage]; 347 if (pages[i]) {
309 if ( page ) { 348 flush_dcache_page(pages[i]);
310 flush_dcache_page(page); 349 if (i == full_page && err)
311 if ( fpage == xpage ) 350 SetPageError(pages[i]);
312 SetPageError(page); 351 kunmap(pages[i]);
313 kunmap(page); 352 unlock_page(pages[i]);
314 unlock_page(page); 353 if (i != full_page)
315 if ( fpage != xpage ) 354 page_cache_release(pages[i]);
316 page_cache_release(page);
317 } 355 }
318 fpage++;
319 } 356 }
320 357
321 /* At this point, err contains 0 or -EIO depending on the "critical" page */ 358 /* At this point, err contains 0 or -EIO depending on the "critical" page */
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c2fb2dd0131f..96a685c550fd 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -518,8 +518,7 @@ repeat:
518 if (algo == SIG('p', 'z')) { 518 if (algo == SIG('p', 'z')) {
519 int block_shift = 519 int block_shift =
520 isonum_711(&rr->u.ZF.parms[1]); 520 isonum_711(&rr->u.ZF.parms[1]);
521 if (block_shift < PAGE_CACHE_SHIFT 521 if (block_shift > 17) {
522 || block_shift > 17) {
523 printk(KERN_WARNING "isofs: " 522 printk(KERN_WARNING "isofs: "
524 "Can't handle ZF block " 523 "Can't handle ZF block "
525 "size of 2^%d\n", 524 "size of 2^%d\n",
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index d4cfd6d2779e..6a10238d2c63 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -286,7 +286,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
286 if (err) { 286 if (err) {
287 /* 287 /*
288 * Because AS_EIO is cleared by 288 * Because AS_EIO is cleared by
289 * wait_on_page_writeback_range(), set it again so 289 * filemap_fdatawait_range(), set it again so
290 * that user process can get -EIO from fsync(). 290 * that user process can get -EIO from fsync().
291 */ 291 */
292 set_bit(AS_EIO, 292 set_bit(AS_EIO,
@@ -636,6 +636,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
636 JBUFFER_TRACE(jh, "ph3: write metadata"); 636 JBUFFER_TRACE(jh, "ph3: write metadata");
637 flags = jbd2_journal_write_metadata_buffer(commit_transaction, 637 flags = jbd2_journal_write_metadata_buffer(commit_transaction,
638 jh, &new_jh, blocknr); 638 jh, &new_jh, blocknr);
639 if (flags < 0) {
640 jbd2_journal_abort(journal, flags);
641 continue;
642 }
639 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); 643 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
640 wbuf[bufs++] = jh2bh(new_jh); 644 wbuf[bufs++] = jh2bh(new_jh);
641 645
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index fed85388ee86..b7ca3a92a4db 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno);
78EXPORT_SYMBOL(jbd2_journal_ack_err); 78EXPORT_SYMBOL(jbd2_journal_ack_err);
79EXPORT_SYMBOL(jbd2_journal_clear_err); 79EXPORT_SYMBOL(jbd2_journal_clear_err);
80EXPORT_SYMBOL(jbd2_log_wait_commit); 80EXPORT_SYMBOL(jbd2_log_wait_commit);
81EXPORT_SYMBOL(jbd2_log_start_commit);
81EXPORT_SYMBOL(jbd2_journal_start_commit); 82EXPORT_SYMBOL(jbd2_journal_start_commit);
82EXPORT_SYMBOL(jbd2_journal_force_commit_nested); 83EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
83EXPORT_SYMBOL(jbd2_journal_wipe); 84EXPORT_SYMBOL(jbd2_journal_wipe);
@@ -358,6 +359,10 @@ repeat:
358 359
359 jbd_unlock_bh_state(bh_in); 360 jbd_unlock_bh_state(bh_in);
360 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); 361 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
362 if (!tmp) {
363 jbd2_journal_put_journal_head(new_jh);
364 return -ENOMEM;
365 }
361 jbd_lock_bh_state(bh_in); 366 jbd_lock_bh_state(bh_in);
362 if (jh_in->b_frozen_data) { 367 if (jh_in->b_frozen_data) {
363 jbd2_free(tmp, bh_in->b_size); 368 jbd2_free(tmp, bh_in->b_size);
@@ -1248,6 +1253,13 @@ int jbd2_journal_load(journal_t *journal)
1248 if (jbd2_journal_recover(journal)) 1253 if (jbd2_journal_recover(journal))
1249 goto recovery_error; 1254 goto recovery_error;
1250 1255
1256 if (journal->j_failed_commit) {
1257 printk(KERN_ERR "JBD2: journal transaction %u on %s "
1258 "is corrupt.\n", journal->j_failed_commit,
1259 journal->j_devname);
1260 return -EIO;
1261 }
1262
1251 /* OK, we've finished with the dynamic journal bits: 1263 /* OK, we've finished with the dynamic journal bits:
1252 * reinitialise the dynamic contents of the superblock in memory 1264 * reinitialise the dynamic contents of the superblock in memory
1253 * and reset them on disk. */ 1265 * and reset them on disk. */
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index f25e70c1b51c..f0294410868d 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -177,7 +177,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
177 spin_unlock(&jffs2_compressor_list_lock); 177 spin_unlock(&jffs2_compressor_list_lock);
178 break; 178 break;
179 default: 179 default:
180 printk(KERN_ERR "JFFS2: unknow compression mode.\n"); 180 printk(KERN_ERR "JFFS2: unknown compression mode.\n");
181 } 181 }
182 out: 182 out:
183 if (ret == JFFS2_COMPR_NONE) { 183 if (ret == JFFS2_COMPR_NONE) {
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 1a80301004b8..378991cfe40f 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -931,7 +931,7 @@ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_re
931 * Helper function for jffs2_get_inode_nodes(). 931 * Helper function for jffs2_get_inode_nodes().
932 * The function detects whether more data should be read and reads it if yes. 932 * The function detects whether more data should be read and reads it if yes.
933 * 933 *
934 * Returns: 0 on succes; 934 * Returns: 0 on success;
935 * negative error code on failure. 935 * negative error code on failure.
936 */ 936 */
937static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, 937static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 082e844ab2db..4b107881acd5 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -31,7 +31,7 @@
31 * is used to release xattr name/value pair and detach from c->xattrindex. 31 * is used to release xattr name/value pair and detach from c->xattrindex.
32 * reclaim_xattr_datum(c) 32 * reclaim_xattr_datum(c)
33 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when 33 * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when
34 * memory usage by cache is over c->xdatum_mem_threshold. Currentry, this threshold 34 * memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold
35 * is hard coded as 32KiB. 35 * is hard coded as 32KiB.
36 * do_verify_xattr_datum(c, xd) 36 * do_verify_xattr_datum(c, xd)
37 * is used to load the xdatum informations without name/value pair from the medium. 37 * is used to load the xdatum informations without name/value pair from the medium.
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 2bc7d8aa5740..d9b031cf69f5 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -755,7 +755,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
755 * allocation group. 755 * allocation group.
756 */ 756 */
757 if ((blkno & (bmp->db_agsize - 1)) == 0) 757 if ((blkno & (bmp->db_agsize - 1)) == 0)
758 /* check if the AG is currenly being written to. 758 /* check if the AG is currently being written to.
759 * if so, call dbNextAG() to find a non-busy 759 * if so, call dbNextAG() to find a non-busy
760 * AG with sufficient free space. 760 * AG with sufficient free space.
761 */ 761 */
@@ -3337,7 +3337,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3337 for (i = 0, n = 0; i < agno; n++) { 3337 for (i = 0, n = 0; i < agno; n++) {
3338 bmp->db_agfree[n] = 0; /* init collection point */ 3338 bmp->db_agfree[n] = 0; /* init collection point */
3339 3339
3340 /* coalesce cotiguous k AGs; */ 3340 /* coalesce contiguous k AGs; */
3341 for (j = 0; j < k && i < agno; j++, i++) { 3341 for (j = 0; j < k && i < agno; j++, i++) {
3342 /* merge AGi to AGn */ 3342 /* merge AGi to AGn */
3343 bmp->db_agfree[n] += bmp->db_agfree[i]; 3343 bmp->db_agfree[n] += bmp->db_agfree[i];
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1a54ae14a192..e50cfa3d9654 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -371,82 +371,74 @@ EXPORT_SYMBOL_GPL(lockd_down);
371 371
372static ctl_table nlm_sysctls[] = { 372static ctl_table nlm_sysctls[] = {
373 { 373 {
374 .ctl_name = CTL_UNNUMBERED,
375 .procname = "nlm_grace_period", 374 .procname = "nlm_grace_period",
376 .data = &nlm_grace_period, 375 .data = &nlm_grace_period,
377 .maxlen = sizeof(unsigned long), 376 .maxlen = sizeof(unsigned long),
378 .mode = 0644, 377 .mode = 0644,
379 .proc_handler = &proc_doulongvec_minmax, 378 .proc_handler = proc_doulongvec_minmax,
380 .extra1 = (unsigned long *) &nlm_grace_period_min, 379 .extra1 = (unsigned long *) &nlm_grace_period_min,
381 .extra2 = (unsigned long *) &nlm_grace_period_max, 380 .extra2 = (unsigned long *) &nlm_grace_period_max,
382 }, 381 },
383 { 382 {
384 .ctl_name = CTL_UNNUMBERED,
385 .procname = "nlm_timeout", 383 .procname = "nlm_timeout",
386 .data = &nlm_timeout, 384 .data = &nlm_timeout,
387 .maxlen = sizeof(unsigned long), 385 .maxlen = sizeof(unsigned long),
388 .mode = 0644, 386 .mode = 0644,
389 .proc_handler = &proc_doulongvec_minmax, 387 .proc_handler = proc_doulongvec_minmax,
390 .extra1 = (unsigned long *) &nlm_timeout_min, 388 .extra1 = (unsigned long *) &nlm_timeout_min,
391 .extra2 = (unsigned long *) &nlm_timeout_max, 389 .extra2 = (unsigned long *) &nlm_timeout_max,
392 }, 390 },
393 { 391 {
394 .ctl_name = CTL_UNNUMBERED,
395 .procname = "nlm_udpport", 392 .procname = "nlm_udpport",
396 .data = &nlm_udpport, 393 .data = &nlm_udpport,
397 .maxlen = sizeof(int), 394 .maxlen = sizeof(int),
398 .mode = 0644, 395 .mode = 0644,
399 .proc_handler = &proc_dointvec_minmax, 396 .proc_handler = proc_dointvec_minmax,
400 .extra1 = (int *) &nlm_port_min, 397 .extra1 = (int *) &nlm_port_min,
401 .extra2 = (int *) &nlm_port_max, 398 .extra2 = (int *) &nlm_port_max,
402 }, 399 },
403 { 400 {
404 .ctl_name = CTL_UNNUMBERED,
405 .procname = "nlm_tcpport", 401 .procname = "nlm_tcpport",
406 .data = &nlm_tcpport, 402 .data = &nlm_tcpport,
407 .maxlen = sizeof(int), 403 .maxlen = sizeof(int),
408 .mode = 0644, 404 .mode = 0644,
409 .proc_handler = &proc_dointvec_minmax, 405 .proc_handler = proc_dointvec_minmax,
410 .extra1 = (int *) &nlm_port_min, 406 .extra1 = (int *) &nlm_port_min,
411 .extra2 = (int *) &nlm_port_max, 407 .extra2 = (int *) &nlm_port_max,
412 }, 408 },
413 { 409 {
414 .ctl_name = CTL_UNNUMBERED,
415 .procname = "nsm_use_hostnames", 410 .procname = "nsm_use_hostnames",
416 .data = &nsm_use_hostnames, 411 .data = &nsm_use_hostnames,
417 .maxlen = sizeof(int), 412 .maxlen = sizeof(int),
418 .mode = 0644, 413 .mode = 0644,
419 .proc_handler = &proc_dointvec, 414 .proc_handler = proc_dointvec,
420 }, 415 },
421 { 416 {
422 .ctl_name = CTL_UNNUMBERED,
423 .procname = "nsm_local_state", 417 .procname = "nsm_local_state",
424 .data = &nsm_local_state, 418 .data = &nsm_local_state,
425 .maxlen = sizeof(int), 419 .maxlen = sizeof(int),
426 .mode = 0644, 420 .mode = 0644,
427 .proc_handler = &proc_dointvec, 421 .proc_handler = proc_dointvec,
428 }, 422 },
429 { .ctl_name = 0 } 423 { }
430}; 424};
431 425
432static ctl_table nlm_sysctl_dir[] = { 426static ctl_table nlm_sysctl_dir[] = {
433 { 427 {
434 .ctl_name = CTL_UNNUMBERED,
435 .procname = "nfs", 428 .procname = "nfs",
436 .mode = 0555, 429 .mode = 0555,
437 .child = nlm_sysctls, 430 .child = nlm_sysctls,
438 }, 431 },
439 { .ctl_name = 0 } 432 { }
440}; 433};
441 434
442static ctl_table nlm_sysctl_root[] = { 435static ctl_table nlm_sysctl_root[] = {
443 { 436 {
444 .ctl_name = CTL_FS,
445 .procname = "fs", 437 .procname = "fs",
446 .mode = 0555, 438 .mode = 0555,
447 .child = nlm_sysctl_dir, 439 .child = nlm_sysctl_dir,
448 }, 440 },
449 { .ctl_name = 0 } 441 { }
450}; 442};
451 443
452#endif /* CONFIG_SYSCTL */ 444#endif /* CONFIG_SYSCTL */
diff --git a/fs/namei.c b/fs/namei.c
index d11f404667e9..87f97ba90ad1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1279,28 +1279,6 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1279 return __lookup_hash(&this, base, NULL); 1279 return __lookup_hash(&this, base, NULL);
1280} 1280}
1281 1281
1282/**
1283 * lookup_one_noperm - bad hack for sysfs
1284 * @name: pathname component to lookup
1285 * @base: base directory to lookup from
1286 *
1287 * This is a variant of lookup_one_len that doesn't perform any permission
1288 * checks. It's a horrible hack to work around the braindead sysfs
1289 * architecture and should not be used anywhere else.
1290 *
1291 * DON'T USE THIS FUNCTION EVER, thanks.
1292 */
1293struct dentry *lookup_one_noperm(const char *name, struct dentry *base)
1294{
1295 int err;
1296 struct qstr this;
1297
1298 err = __lookup_one_len(name, &this, base, strlen(name));
1299 if (err)
1300 return ERR_PTR(err);
1301 return __lookup_hash(&this, base, NULL);
1302}
1303
1304int user_path_at(int dfd, const char __user *name, unsigned flags, 1282int user_path_at(int dfd, const char __user *name, unsigned flags,
1305 struct path *path) 1283 struct path *path)
1306{ 1284{
@@ -1678,6 +1656,15 @@ struct file *do_filp_open(int dfd, const char *pathname,
1678 int will_write; 1656 int will_write;
1679 int flag = open_to_namei_flags(open_flag); 1657 int flag = open_to_namei_flags(open_flag);
1680 1658
1659 /*
1660 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
1661 * check for O_DSYNC if the need any syncing at all we enforce it's
1662 * always set instead of having to deal with possibly weird behaviour
1663 * for malicious applications setting only __O_SYNC.
1664 */
1665 if (open_flag & __O_SYNC)
1666 open_flag |= O_DSYNC;
1667
1681 if (!acc_mode) 1668 if (!acc_mode)
1682 acc_mode = MAY_OPEN | ACC_MODE(flag); 1669 acc_mode = MAY_OPEN | ACC_MODE(flag);
1683 1670
diff --git a/fs/namespace.c b/fs/namespace.c
index bdc3cb4fd222..7d70d63ceb29 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1921,6 +1921,16 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1921 if (data_page) 1921 if (data_page)
1922 ((char *)data_page)[PAGE_SIZE - 1] = 0; 1922 ((char *)data_page)[PAGE_SIZE - 1] = 0;
1923 1923
1924 /* ... and get the mountpoint */
1925 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
1926 if (retval)
1927 return retval;
1928
1929 retval = security_sb_mount(dev_name, &path,
1930 type_page, flags, data_page);
1931 if (retval)
1932 goto dput_out;
1933
1924 /* Default to relatime unless overriden */ 1934 /* Default to relatime unless overriden */
1925 if (!(flags & MS_NOATIME)) 1935 if (!(flags & MS_NOATIME))
1926 mnt_flags |= MNT_RELATIME; 1936 mnt_flags |= MNT_RELATIME;
@@ -1945,16 +1955,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1945 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | 1955 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
1946 MS_STRICTATIME); 1956 MS_STRICTATIME);
1947 1957
1948 /* ... and get the mountpoint */
1949 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
1950 if (retval)
1951 return retval;
1952
1953 retval = security_sb_mount(dev_name, &path,
1954 type_page, flags, data_page);
1955 if (retval)
1956 goto dput_out;
1957
1958 if (flags & MS_REMOUNT) 1958 if (flags & MS_REMOUNT)
1959 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, 1959 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
1960 data_page); 1960 data_page);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 0d58caf4a6e1..ec8f45f12e05 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -835,7 +835,7 @@ static int ncp_ioctl_need_write(unsigned int cmd)
835 case NCP_IOC_SETROOT: 835 case NCP_IOC_SETROOT:
836 return 0; 836 return 0;
837 default: 837 default:
838 /* unkown IOCTL command, assume write */ 838 /* unknown IOCTL command, assume write */
839 return 1; 839 return 1;
840 } 840 }
841} 841}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f5fdd39e037a..6b891328f332 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -581,7 +581,7 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
581{ 581{
582 struct nfs_open_context *ctx; 582 struct nfs_open_context *ctx;
583 583
584 if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) 584 if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
585 return 1; 585 return 1;
586 ctx = nfs_file_open_context(filp); 586 ctx = nfs_file_open_context(filp);
587 if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) 587 if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
@@ -622,7 +622,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
622 622
623 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); 623 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
624 result = generic_file_aio_write(iocb, iov, nr_segs, pos); 624 result = generic_file_aio_write(iocb, iov, nr_segs, pos);
625 /* Return error values for O_SYNC and IS_SYNC() */ 625 /* Return error values for O_DSYNC and IS_SYNC() */
626 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { 626 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
627 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); 627 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
628 if (err < 0) 628 if (err < 0)
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index b62481dabae9..70e1fbbaaeab 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -22,63 +22,55 @@ static struct ctl_table_header *nfs_callback_sysctl_table;
22static ctl_table nfs_cb_sysctls[] = { 22static ctl_table nfs_cb_sysctls[] = {
23#ifdef CONFIG_NFS_V4 23#ifdef CONFIG_NFS_V4
24 { 24 {
25 .ctl_name = CTL_UNNUMBERED,
26 .procname = "nfs_callback_tcpport", 25 .procname = "nfs_callback_tcpport",
27 .data = &nfs_callback_set_tcpport, 26 .data = &nfs_callback_set_tcpport,
28 .maxlen = sizeof(int), 27 .maxlen = sizeof(int),
29 .mode = 0644, 28 .mode = 0644,
30 .proc_handler = &proc_dointvec_minmax, 29 .proc_handler = proc_dointvec_minmax,
31 .extra1 = (int *)&nfs_set_port_min, 30 .extra1 = (int *)&nfs_set_port_min,
32 .extra2 = (int *)&nfs_set_port_max, 31 .extra2 = (int *)&nfs_set_port_max,
33 }, 32 },
34 { 33 {
35 .ctl_name = CTL_UNNUMBERED,
36 .procname = "idmap_cache_timeout", 34 .procname = "idmap_cache_timeout",
37 .data = &nfs_idmap_cache_timeout, 35 .data = &nfs_idmap_cache_timeout,
38 .maxlen = sizeof(int), 36 .maxlen = sizeof(int),
39 .mode = 0644, 37 .mode = 0644,
40 .proc_handler = &proc_dointvec_jiffies, 38 .proc_handler = proc_dointvec_jiffies,
41 .strategy = &sysctl_jiffies,
42 }, 39 },
43#endif 40#endif
44 { 41 {
45 .ctl_name = CTL_UNNUMBERED,
46 .procname = "nfs_mountpoint_timeout", 42 .procname = "nfs_mountpoint_timeout",
47 .data = &nfs_mountpoint_expiry_timeout, 43 .data = &nfs_mountpoint_expiry_timeout,
48 .maxlen = sizeof(nfs_mountpoint_expiry_timeout), 44 .maxlen = sizeof(nfs_mountpoint_expiry_timeout),
49 .mode = 0644, 45 .mode = 0644,
50 .proc_handler = &proc_dointvec_jiffies, 46 .proc_handler = proc_dointvec_jiffies,
51 .strategy = &sysctl_jiffies,
52 }, 47 },
53 { 48 {
54 .ctl_name = CTL_UNNUMBERED,
55 .procname = "nfs_congestion_kb", 49 .procname = "nfs_congestion_kb",
56 .data = &nfs_congestion_kb, 50 .data = &nfs_congestion_kb,
57 .maxlen = sizeof(nfs_congestion_kb), 51 .maxlen = sizeof(nfs_congestion_kb),
58 .mode = 0644, 52 .mode = 0644,
59 .proc_handler = &proc_dointvec, 53 .proc_handler = proc_dointvec,
60 }, 54 },
61 { .ctl_name = 0 } 55 { }
62}; 56};
63 57
64static ctl_table nfs_cb_sysctl_dir[] = { 58static ctl_table nfs_cb_sysctl_dir[] = {
65 { 59 {
66 .ctl_name = CTL_UNNUMBERED,
67 .procname = "nfs", 60 .procname = "nfs",
68 .mode = 0555, 61 .mode = 0555,
69 .child = nfs_cb_sysctls, 62 .child = nfs_cb_sysctls,
70 }, 63 },
71 { .ctl_name = 0 } 64 { }
72}; 65};
73 66
74static ctl_table nfs_cb_sysctl_root[] = { 67static ctl_table nfs_cb_sysctl_root[] = {
75 { 68 {
76 .ctl_name = CTL_FS,
77 .procname = "fs", 69 .procname = "fs",
78 .mode = 0555, 70 .mode = 0555,
79 .child = nfs_cb_sysctl_dir, 71 .child = nfs_cb_sysctl_dir,
80 }, 72 },
81 { .ctl_name = 0 } 73 { }
82}; 74};
83 75
84int nfs_register_sysctl(void) 76int nfs_register_sysctl(void)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6d40a538e3db..d171696017f4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -178,7 +178,7 @@ static int wb_priority(struct writeback_control *wbc)
178{ 178{
179 if (wbc->for_reclaim) 179 if (wbc->for_reclaim)
180 return FLUSH_HIGHPRI | FLUSH_STABLE; 180 return FLUSH_HIGHPRI | FLUSH_STABLE;
181 if (wbc->for_kupdate) 181 if (wbc->for_kupdate || wbc->for_background)
182 return FLUSH_LOWPRI; 182 return FLUSH_LOWPRI;
183 return 0; 183 return 0;
184} 184}
@@ -774,7 +774,7 @@ int nfs_updatepage(struct file *file, struct page *page,
774 */ 774 */
775 if (nfs_write_pageuptodate(page, inode) && 775 if (nfs_write_pageuptodate(page, inode) &&
776 inode->i_flock == NULL && 776 inode->i_flock == NULL &&
777 !(file->f_flags & O_SYNC)) { 777 !(file->f_flags & O_DSYNC)) {
778 count = max(count + offset, nfs_page_length(page)); 778 count = max(count + offset, nfs_page_length(page));
779 offset = 0; 779 offset = 0;
780 } 780 }
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index d69e6ae59251..3f959f1879d8 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -142,29 +142,75 @@ static void nilfs_palloc_desc_block_init(struct inode *inode,
142 } 142 }
143} 143}
144 144
145static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
146 int create,
147 void (*init_block)(struct inode *,
148 struct buffer_head *,
149 void *),
150 struct buffer_head **bhp,
151 struct nilfs_bh_assoc *prev,
152 spinlock_t *lock)
153{
154 int ret;
155
156 spin_lock(lock);
157 if (prev->bh && blkoff == prev->blkoff) {
158 get_bh(prev->bh);
159 *bhp = prev->bh;
160 spin_unlock(lock);
161 return 0;
162 }
163 spin_unlock(lock);
164
165 ret = nilfs_mdt_get_block(inode, blkoff, create, init_block, bhp);
166 if (!ret) {
167 spin_lock(lock);
168 /*
169 * The following code must be safe for change of the
170 * cache contents during the get block call.
171 */
172 brelse(prev->bh);
173 get_bh(*bhp);
174 prev->bh = *bhp;
175 prev->blkoff = blkoff;
176 spin_unlock(lock);
177 }
178 return ret;
179}
180
145static int nilfs_palloc_get_desc_block(struct inode *inode, 181static int nilfs_palloc_get_desc_block(struct inode *inode,
146 unsigned long group, 182 unsigned long group,
147 int create, struct buffer_head **bhp) 183 int create, struct buffer_head **bhp)
148{ 184{
149 return nilfs_mdt_get_block(inode, 185 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
150 nilfs_palloc_desc_blkoff(inode, group), 186
151 create, nilfs_palloc_desc_block_init, bhp); 187 return nilfs_palloc_get_block(inode,
188 nilfs_palloc_desc_blkoff(inode, group),
189 create, nilfs_palloc_desc_block_init,
190 bhp, &cache->prev_desc, &cache->lock);
152} 191}
153 192
154static int nilfs_palloc_get_bitmap_block(struct inode *inode, 193static int nilfs_palloc_get_bitmap_block(struct inode *inode,
155 unsigned long group, 194 unsigned long group,
156 int create, struct buffer_head **bhp) 195 int create, struct buffer_head **bhp)
157{ 196{
158 return nilfs_mdt_get_block(inode, 197 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
159 nilfs_palloc_bitmap_blkoff(inode, group), 198
160 create, NULL, bhp); 199 return nilfs_palloc_get_block(inode,
200 nilfs_palloc_bitmap_blkoff(inode, group),
201 create, NULL, bhp,
202 &cache->prev_bitmap, &cache->lock);
161} 203}
162 204
163int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, 205int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
164 int create, struct buffer_head **bhp) 206 int create, struct buffer_head **bhp)
165{ 207{
166 return nilfs_mdt_get_block(inode, nilfs_palloc_entry_blkoff(inode, nr), 208 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
167 create, NULL, bhp); 209
210 return nilfs_palloc_get_block(inode,
211 nilfs_palloc_entry_blkoff(inode, nr),
212 create, NULL, bhp,
213 &cache->prev_entry, &cache->lock);
168} 214}
169 215
170static struct nilfs_palloc_group_desc * 216static struct nilfs_palloc_group_desc *
@@ -176,13 +222,6 @@ nilfs_palloc_block_get_group_desc(const struct inode *inode,
176 group % nilfs_palloc_groups_per_desc_block(inode); 222 group % nilfs_palloc_groups_per_desc_block(inode);
177} 223}
178 224
179static unsigned char *
180nilfs_palloc_block_get_bitmap(const struct inode *inode,
181 const struct buffer_head *bh, void *kaddr)
182{
183 return (unsigned char *)(kaddr + bh_offset(bh));
184}
185
186void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, 225void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
187 const struct buffer_head *bh, void *kaddr) 226 const struct buffer_head *bh, void *kaddr)
188{ 227{
@@ -289,8 +328,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
289 if (ret < 0) 328 if (ret < 0)
290 goto out_desc; 329 goto out_desc;
291 bitmap_kaddr = kmap(bitmap_bh->b_page); 330 bitmap_kaddr = kmap(bitmap_bh->b_page);
292 bitmap = nilfs_palloc_block_get_bitmap( 331 bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
293 inode, bitmap_bh, bitmap_kaddr);
294 pos = nilfs_palloc_find_available_slot( 332 pos = nilfs_palloc_find_available_slot(
295 inode, group, group_offset, bitmap, 333 inode, group, group_offset, bitmap,
296 entries_per_group); 334 entries_per_group);
@@ -351,8 +389,7 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
351 desc = nilfs_palloc_block_get_group_desc(inode, group, 389 desc = nilfs_palloc_block_get_group_desc(inode, group,
352 req->pr_desc_bh, desc_kaddr); 390 req->pr_desc_bh, desc_kaddr);
353 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); 391 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
354 bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, 392 bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
355 bitmap_kaddr);
356 393
357 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), 394 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
358 group_offset, bitmap)) 395 group_offset, bitmap))
@@ -385,8 +422,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
385 desc = nilfs_palloc_block_get_group_desc(inode, group, 422 desc = nilfs_palloc_block_get_group_desc(inode, group,
386 req->pr_desc_bh, desc_kaddr); 423 req->pr_desc_bh, desc_kaddr);
387 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); 424 bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
388 bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, 425 bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
389 bitmap_kaddr);
390 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), 426 if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group),
391 group_offset, bitmap)) 427 group_offset, bitmap))
392 printk(KERN_WARNING "%s: entry numer %llu already freed\n", 428 printk(KERN_WARNING "%s: entry numer %llu already freed\n",
@@ -472,8 +508,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
472 desc = nilfs_palloc_block_get_group_desc( 508 desc = nilfs_palloc_block_get_group_desc(
473 inode, group, desc_bh, desc_kaddr); 509 inode, group, desc_bh, desc_kaddr);
474 bitmap_kaddr = kmap(bitmap_bh->b_page); 510 bitmap_kaddr = kmap(bitmap_bh->b_page);
475 bitmap = nilfs_palloc_block_get_bitmap( 511 bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
476 inode, bitmap_bh, bitmap_kaddr);
477 for (j = i, n = 0; 512 for (j = i, n = 0;
478 (j < nitems) && nilfs_palloc_group_is_in(inode, group, 513 (j < nitems) && nilfs_palloc_group_is_in(inode, group,
479 entry_nrs[j]); 514 entry_nrs[j]);
@@ -502,3 +537,30 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
502 } 537 }
503 return 0; 538 return 0;
504} 539}
540
541void nilfs_palloc_setup_cache(struct inode *inode,
542 struct nilfs_palloc_cache *cache)
543{
544 NILFS_MDT(inode)->mi_palloc_cache = cache;
545 spin_lock_init(&cache->lock);
546}
547
548void nilfs_palloc_clear_cache(struct inode *inode)
549{
550 struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache;
551
552 spin_lock(&cache->lock);
553 brelse(cache->prev_desc.bh);
554 brelse(cache->prev_bitmap.bh);
555 brelse(cache->prev_entry.bh);
556 cache->prev_desc.bh = NULL;
557 cache->prev_bitmap.bh = NULL;
558 cache->prev_entry.bh = NULL;
559 spin_unlock(&cache->lock);
560}
561
562void nilfs_palloc_destroy_cache(struct inode *inode)
563{
564 nilfs_palloc_clear_cache(inode);
565 NILFS_MDT(inode)->mi_palloc_cache = NULL;
566}
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index 4ace5475c2c7..f4543ac4f560 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -69,4 +69,25 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t);
69#define nilfs_clear_bit_atomic ext2_clear_bit_atomic 69#define nilfs_clear_bit_atomic ext2_clear_bit_atomic
70#define nilfs_find_next_zero_bit ext2_find_next_zero_bit 70#define nilfs_find_next_zero_bit ext2_find_next_zero_bit
71 71
72/*
73 * persistent object allocator cache
74 */
75
76struct nilfs_bh_assoc {
77 unsigned long blkoff;
78 struct buffer_head *bh;
79};
80
81struct nilfs_palloc_cache {
82 spinlock_t lock;
83 struct nilfs_bh_assoc prev_desc;
84 struct nilfs_bh_assoc prev_bitmap;
85 struct nilfs_bh_assoc prev_entry;
86};
87
88void nilfs_palloc_setup_cache(struct inode *inode,
89 struct nilfs_palloc_cache *cache);
90void nilfs_palloc_clear_cache(struct inode *inode);
91void nilfs_palloc_destroy_cache(struct inode *inode);
92
72#endif /* _NILFS_ALLOC_H */ 93#endif /* _NILFS_ALLOC_H */
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 08834df6ec68..f4a14ea2ed9c 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -402,19 +402,11 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
402void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n) 402void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n)
403{ 403{
404 inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); 404 inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
405 if (NILFS_MDT(bmap->b_inode))
406 nilfs_mdt_mark_dirty(bmap->b_inode);
407 else
408 mark_inode_dirty(bmap->b_inode);
409} 405}
410 406
411void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n) 407void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n)
412{ 408{
413 inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); 409 inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
414 if (NILFS_MDT(bmap->b_inode))
415 nilfs_mdt_mark_dirty(bmap->b_inode);
416 else
417 mark_inode_dirty(bmap->b_inode);
418} 410}
419 411
420__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, 412__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 84c25382f8e3..471e269536ae 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -68,9 +68,34 @@ void nilfs_btnode_cache_clear(struct address_space *btnc)
68 truncate_inode_pages(btnc, 0); 68 truncate_inode_pages(btnc, 0);
69} 69}
70 70
71struct buffer_head *
72nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
73{
74 struct inode *inode = NILFS_BTNC_I(btnc);
75 struct buffer_head *bh;
76
77 bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
78 if (unlikely(!bh))
79 return NULL;
80
81 if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
82 buffer_dirty(bh))) {
83 brelse(bh);
84 BUG();
85 }
86 memset(bh->b_data, 0, 1 << inode->i_blkbits);
87 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
88 bh->b_blocknr = blocknr;
89 set_buffer_mapped(bh);
90 set_buffer_uptodate(bh);
91
92 unlock_page(bh->b_page);
93 page_cache_release(bh->b_page);
94 return bh;
95}
96
71int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, 97int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
72 sector_t pblocknr, struct buffer_head **pbh, 98 sector_t pblocknr, struct buffer_head **pbh)
73 int newblk)
74{ 99{
75 struct buffer_head *bh; 100 struct buffer_head *bh;
76 struct inode *inode = NILFS_BTNC_I(btnc); 101 struct inode *inode = NILFS_BTNC_I(btnc);
@@ -81,19 +106,6 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
81 return -ENOMEM; 106 return -ENOMEM;
82 107
83 err = -EEXIST; /* internal code */ 108 err = -EEXIST; /* internal code */
84 if (newblk) {
85 if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
86 buffer_dirty(bh))) {
87 brelse(bh);
88 BUG();
89 }
90 memset(bh->b_data, 0, 1 << inode->i_blkbits);
91 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
92 bh->b_blocknr = blocknr;
93 set_buffer_mapped(bh);
94 set_buffer_uptodate(bh);
95 goto found;
96 }
97 109
98 if (buffer_uptodate(bh) || buffer_dirty(bh)) 110 if (buffer_uptodate(bh) || buffer_dirty(bh))
99 goto found; 111 goto found;
@@ -135,27 +147,6 @@ out_locked:
135 return err; 147 return err;
136} 148}
137 149
138int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr,
139 sector_t pblocknr, struct buffer_head **pbh, int newblk)
140{
141 struct buffer_head *bh;
142 int err;
143
144 err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk);
145 if (err == -EEXIST) /* internal code (cache hit) */
146 return 0;
147 if (unlikely(err))
148 return err;
149
150 bh = *pbh;
151 wait_on_buffer(bh);
152 if (!buffer_uptodate(bh)) {
153 brelse(bh);
154 return -EIO;
155 }
156 return 0;
157}
158
159/** 150/**
160 * nilfs_btnode_delete - delete B-tree node buffer 151 * nilfs_btnode_delete - delete B-tree node buffer
161 * @bh: buffer to be deleted 152 * @bh: buffer to be deleted
@@ -244,12 +235,13 @@ retry:
244 unlock_page(obh->b_page); 235 unlock_page(obh->b_page);
245 } 236 }
246 237
247 err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1); 238 nbh = nilfs_btnode_create_block(btnc, newkey);
248 if (likely(!err)) { 239 if (!nbh)
249 BUG_ON(nbh == obh); 240 return -ENOMEM;
250 ctxt->newbh = nbh; 241
251 } 242 BUG_ON(nbh == obh);
252 return err; 243 ctxt->newbh = nbh;
244 return 0;
253 245
254 failed_unlock: 246 failed_unlock:
255 unlock_page(obh->b_page); 247 unlock_page(obh->b_page);
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 3e2275172ed6..07da83f07712 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -40,10 +40,10 @@ struct nilfs_btnode_chkey_ctxt {
40void nilfs_btnode_cache_init_once(struct address_space *); 40void nilfs_btnode_cache_init_once(struct address_space *);
41void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); 41void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
42void nilfs_btnode_cache_clear(struct address_space *); 42void nilfs_btnode_cache_clear(struct address_space *);
43struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
44 __u64 blocknr);
43int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, 45int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t,
44 struct buffer_head **, int); 46 struct buffer_head **);
45int nilfs_btnode_get(struct address_space *, __u64, sector_t,
46 struct buffer_head **, int);
47void nilfs_btnode_delete(struct buffer_head *); 47void nilfs_btnode_delete(struct buffer_head *);
48int nilfs_btnode_prepare_change_key(struct address_space *, 48int nilfs_btnode_prepare_change_key(struct address_space *,
49 struct nilfs_btnode_chkey_ctxt *); 49 struct nilfs_btnode_chkey_ctxt *);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index e25b507a474f..7cdd98b8d514 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -114,7 +114,18 @@ static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
114{ 114{
115 struct address_space *btnc = 115 struct address_space *btnc =
116 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; 116 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
117 return nilfs_btnode_get(btnc, ptr, 0, bhp, 0); 117 int err;
118
119 err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp);
120 if (err)
121 return err == -EEXIST ? 0 : err;
122
123 wait_on_buffer(*bhp);
124 if (!buffer_uptodate(*bhp)) {
125 brelse(*bhp);
126 return -EIO;
127 }
128 return 0;
118} 129}
119 130
120static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, 131static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
@@ -122,12 +133,15 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
122{ 133{
123 struct address_space *btnc = 134 struct address_space *btnc =
124 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; 135 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
125 int ret; 136 struct buffer_head *bh;
126 137
127 ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1); 138 bh = nilfs_btnode_create_block(btnc, ptr);
128 if (!ret) 139 if (!bh)
129 set_buffer_nilfs_volatile(*bhp); 140 return -ENOMEM;
130 return ret; 141
142 set_buffer_nilfs_volatile(bh);
143 *bhp = bh;
144 return 0;
131} 145}
132 146
133static inline int 147static inline int
@@ -444,6 +458,18 @@ nilfs_btree_get_node(const struct nilfs_btree *btree,
444 nilfs_btree_get_nonroot_node(path, level); 458 nilfs_btree_get_nonroot_node(path, level);
445} 459}
446 460
461static inline int
462nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
463{
464 if (unlikely(nilfs_btree_node_get_level(node) != level)) {
465 dump_stack();
466 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
467 nilfs_btree_node_get_level(node), level);
468 return 1;
469 }
470 return 0;
471}
472
447static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, 473static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
448 struct nilfs_btree_path *path, 474 struct nilfs_btree_path *path,
449 __u64 key, __u64 *ptrp, int minlevel) 475 __u64 key, __u64 *ptrp, int minlevel)
@@ -467,7 +493,8 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
467 if (ret < 0) 493 if (ret < 0)
468 return ret; 494 return ret;
469 node = nilfs_btree_get_nonroot_node(path, level); 495 node = nilfs_btree_get_nonroot_node(path, level);
470 BUG_ON(level != nilfs_btree_node_get_level(node)); 496 if (nilfs_btree_bad_node(node, level))
497 return -EINVAL;
471 if (!found) 498 if (!found)
472 found = nilfs_btree_node_lookup(node, key, &index); 499 found = nilfs_btree_node_lookup(node, key, &index);
473 else 500 else
@@ -512,7 +539,8 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
512 if (ret < 0) 539 if (ret < 0)
513 return ret; 540 return ret;
514 node = nilfs_btree_get_nonroot_node(path, level); 541 node = nilfs_btree_get_nonroot_node(path, level);
515 BUG_ON(level != nilfs_btree_node_get_level(node)); 542 if (nilfs_btree_bad_node(node, level))
543 return -EINVAL;
516 index = nilfs_btree_node_get_nchildren(node) - 1; 544 index = nilfs_btree_node_get_nchildren(node) - 1;
517 ptr = nilfs_btree_node_get_ptr(btree, node, index); 545 ptr = nilfs_btree_node_get_ptr(btree, node, index);
518 path[level].bp_index = index; 546 path[level].bp_index = index;
@@ -638,13 +666,11 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree,
638{ 666{
639 if (level < nilfs_btree_height(btree) - 1) { 667 if (level < nilfs_btree_height(btree) - 1) {
640 do { 668 do {
641 lock_buffer(path[level].bp_bh);
642 nilfs_btree_node_set_key( 669 nilfs_btree_node_set_key(
643 nilfs_btree_get_nonroot_node(path, level), 670 nilfs_btree_get_nonroot_node(path, level),
644 path[level].bp_index, key); 671 path[level].bp_index, key);
645 if (!buffer_dirty(path[level].bp_bh)) 672 if (!buffer_dirty(path[level].bp_bh))
646 nilfs_btnode_mark_dirty(path[level].bp_bh); 673 nilfs_btnode_mark_dirty(path[level].bp_bh);
647 unlock_buffer(path[level].bp_bh);
648 } while ((path[level].bp_index == 0) && 674 } while ((path[level].bp_index == 0) &&
649 (++level < nilfs_btree_height(btree) - 1)); 675 (++level < nilfs_btree_height(btree) - 1));
650 } 676 }
@@ -663,13 +689,11 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree,
663 struct nilfs_btree_node *node; 689 struct nilfs_btree_node *node;
664 690
665 if (level < nilfs_btree_height(btree) - 1) { 691 if (level < nilfs_btree_height(btree) - 1) {
666 lock_buffer(path[level].bp_bh);
667 node = nilfs_btree_get_nonroot_node(path, level); 692 node = nilfs_btree_get_nonroot_node(path, level);
668 nilfs_btree_node_insert(btree, node, *keyp, *ptrp, 693 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
669 path[level].bp_index); 694 path[level].bp_index);
670 if (!buffer_dirty(path[level].bp_bh)) 695 if (!buffer_dirty(path[level].bp_bh))
671 nilfs_btnode_mark_dirty(path[level].bp_bh); 696 nilfs_btnode_mark_dirty(path[level].bp_bh);
672 unlock_buffer(path[level].bp_bh);
673 697
674 if (path[level].bp_index == 0) 698 if (path[level].bp_index == 0)
675 nilfs_btree_promote_key(btree, path, level + 1, 699 nilfs_btree_promote_key(btree, path, level + 1,
@@ -689,9 +713,6 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
689 struct nilfs_btree_node *node, *left; 713 struct nilfs_btree_node *node, *left;
690 int nchildren, lnchildren, n, move; 714 int nchildren, lnchildren, n, move;
691 715
692 lock_buffer(path[level].bp_bh);
693 lock_buffer(path[level].bp_sib_bh);
694
695 node = nilfs_btree_get_nonroot_node(path, level); 716 node = nilfs_btree_get_nonroot_node(path, level);
696 left = nilfs_btree_get_sib_node(path, level); 717 left = nilfs_btree_get_sib_node(path, level);
697 nchildren = nilfs_btree_node_get_nchildren(node); 718 nchildren = nilfs_btree_node_get_nchildren(node);
@@ -712,9 +733,6 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
712 if (!buffer_dirty(path[level].bp_sib_bh)) 733 if (!buffer_dirty(path[level].bp_sib_bh))
713 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 734 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
714 735
715 unlock_buffer(path[level].bp_bh);
716 unlock_buffer(path[level].bp_sib_bh);
717
718 nilfs_btree_promote_key(btree, path, level + 1, 736 nilfs_btree_promote_key(btree, path, level + 1,
719 nilfs_btree_node_get_key(node, 0)); 737 nilfs_btree_node_get_key(node, 0));
720 738
@@ -740,9 +758,6 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
740 struct nilfs_btree_node *node, *right; 758 struct nilfs_btree_node *node, *right;
741 int nchildren, rnchildren, n, move; 759 int nchildren, rnchildren, n, move;
742 760
743 lock_buffer(path[level].bp_bh);
744 lock_buffer(path[level].bp_sib_bh);
745
746 node = nilfs_btree_get_nonroot_node(path, level); 761 node = nilfs_btree_get_nonroot_node(path, level);
747 right = nilfs_btree_get_sib_node(path, level); 762 right = nilfs_btree_get_sib_node(path, level);
748 nchildren = nilfs_btree_node_get_nchildren(node); 763 nchildren = nilfs_btree_node_get_nchildren(node);
@@ -763,9 +778,6 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
763 if (!buffer_dirty(path[level].bp_sib_bh)) 778 if (!buffer_dirty(path[level].bp_sib_bh))
764 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 779 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
765 780
766 unlock_buffer(path[level].bp_bh);
767 unlock_buffer(path[level].bp_sib_bh);
768
769 path[level + 1].bp_index++; 781 path[level + 1].bp_index++;
770 nilfs_btree_promote_key(btree, path, level + 1, 782 nilfs_btree_promote_key(btree, path, level + 1,
771 nilfs_btree_node_get_key(right, 0)); 783 nilfs_btree_node_get_key(right, 0));
@@ -794,9 +806,6 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
794 __u64 newptr; 806 __u64 newptr;
795 int nchildren, n, move; 807 int nchildren, n, move;
796 808
797 lock_buffer(path[level].bp_bh);
798 lock_buffer(path[level].bp_sib_bh);
799
800 node = nilfs_btree_get_nonroot_node(path, level); 809 node = nilfs_btree_get_nonroot_node(path, level);
801 right = nilfs_btree_get_sib_node(path, level); 810 right = nilfs_btree_get_sib_node(path, level);
802 nchildren = nilfs_btree_node_get_nchildren(node); 811 nchildren = nilfs_btree_node_get_nchildren(node);
@@ -815,9 +824,6 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
815 if (!buffer_dirty(path[level].bp_sib_bh)) 824 if (!buffer_dirty(path[level].bp_sib_bh))
816 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 825 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
817 826
818 unlock_buffer(path[level].bp_bh);
819 unlock_buffer(path[level].bp_sib_bh);
820
821 newkey = nilfs_btree_node_get_key(right, 0); 827 newkey = nilfs_btree_node_get_key(right, 0);
822 newptr = path[level].bp_newreq.bpr_ptr; 828 newptr = path[level].bp_newreq.bpr_ptr;
823 829
@@ -852,8 +858,6 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
852 struct nilfs_btree_node *root, *child; 858 struct nilfs_btree_node *root, *child;
853 int n; 859 int n;
854 860
855 lock_buffer(path[level].bp_sib_bh);
856
857 root = nilfs_btree_get_root(btree); 861 root = nilfs_btree_get_root(btree);
858 child = nilfs_btree_get_sib_node(path, level); 862 child = nilfs_btree_get_sib_node(path, level);
859 863
@@ -865,8 +869,6 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
865 if (!buffer_dirty(path[level].bp_sib_bh)) 869 if (!buffer_dirty(path[level].bp_sib_bh))
866 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 870 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
867 871
868 unlock_buffer(path[level].bp_sib_bh);
869
870 path[level].bp_bh = path[level].bp_sib_bh; 872 path[level].bp_bh = path[level].bp_sib_bh;
871 path[level].bp_sib_bh = NULL; 873 path[level].bp_sib_bh = NULL;
872 874
@@ -1023,11 +1025,9 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1023 1025
1024 stats->bs_nblocks++; 1026 stats->bs_nblocks++;
1025 1027
1026 lock_buffer(bh);
1027 nilfs_btree_node_init(btree, 1028 nilfs_btree_node_init(btree,
1028 (struct nilfs_btree_node *)bh->b_data, 1029 (struct nilfs_btree_node *)bh->b_data,
1029 0, level, 0, NULL, NULL); 1030 0, level, 0, NULL, NULL);
1030 unlock_buffer(bh);
1031 path[level].bp_sib_bh = bh; 1031 path[level].bp_sib_bh = bh;
1032 path[level].bp_op = nilfs_btree_split; 1032 path[level].bp_op = nilfs_btree_split;
1033 } 1033 }
@@ -1052,10 +1052,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1052 if (ret < 0) 1052 if (ret < 0)
1053 goto err_out_curr_node; 1053 goto err_out_curr_node;
1054 1054
1055 lock_buffer(bh);
1056 nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, 1055 nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1057 0, level, 0, NULL, NULL); 1056 0, level, 0, NULL, NULL);
1058 unlock_buffer(bh);
1059 path[level].bp_sib_bh = bh; 1057 path[level].bp_sib_bh = bh;
1060 path[level].bp_op = nilfs_btree_grow; 1058 path[level].bp_op = nilfs_btree_grow;
1061 1059
@@ -1154,13 +1152,11 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1154 struct nilfs_btree_node *node; 1152 struct nilfs_btree_node *node;
1155 1153
1156 if (level < nilfs_btree_height(btree) - 1) { 1154 if (level < nilfs_btree_height(btree) - 1) {
1157 lock_buffer(path[level].bp_bh);
1158 node = nilfs_btree_get_nonroot_node(path, level); 1155 node = nilfs_btree_get_nonroot_node(path, level);
1159 nilfs_btree_node_delete(btree, node, keyp, ptrp, 1156 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1160 path[level].bp_index); 1157 path[level].bp_index);
1161 if (!buffer_dirty(path[level].bp_bh)) 1158 if (!buffer_dirty(path[level].bp_bh))
1162 nilfs_btnode_mark_dirty(path[level].bp_bh); 1159 nilfs_btnode_mark_dirty(path[level].bp_bh);
1163 unlock_buffer(path[level].bp_bh);
1164 if (path[level].bp_index == 0) 1160 if (path[level].bp_index == 0)
1165 nilfs_btree_promote_key(btree, path, level + 1, 1161 nilfs_btree_promote_key(btree, path, level + 1,
1166 nilfs_btree_node_get_key(node, 0)); 1162 nilfs_btree_node_get_key(node, 0));
@@ -1180,9 +1176,6 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1180 1176
1181 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1177 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1182 1178
1183 lock_buffer(path[level].bp_bh);
1184 lock_buffer(path[level].bp_sib_bh);
1185
1186 node = nilfs_btree_get_nonroot_node(path, level); 1179 node = nilfs_btree_get_nonroot_node(path, level);
1187 left = nilfs_btree_get_sib_node(path, level); 1180 left = nilfs_btree_get_sib_node(path, level);
1188 nchildren = nilfs_btree_node_get_nchildren(node); 1181 nchildren = nilfs_btree_node_get_nchildren(node);
@@ -1197,9 +1190,6 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1197 if (!buffer_dirty(path[level].bp_sib_bh)) 1190 if (!buffer_dirty(path[level].bp_sib_bh))
1198 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1191 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1199 1192
1200 unlock_buffer(path[level].bp_bh);
1201 unlock_buffer(path[level].bp_sib_bh);
1202
1203 nilfs_btree_promote_key(btree, path, level + 1, 1193 nilfs_btree_promote_key(btree, path, level + 1,
1204 nilfs_btree_node_get_key(node, 0)); 1194 nilfs_btree_node_get_key(node, 0));
1205 1195
@@ -1217,9 +1207,6 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1217 1207
1218 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1208 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1219 1209
1220 lock_buffer(path[level].bp_bh);
1221 lock_buffer(path[level].bp_sib_bh);
1222
1223 node = nilfs_btree_get_nonroot_node(path, level); 1210 node = nilfs_btree_get_nonroot_node(path, level);
1224 right = nilfs_btree_get_sib_node(path, level); 1211 right = nilfs_btree_get_sib_node(path, level);
1225 nchildren = nilfs_btree_node_get_nchildren(node); 1212 nchildren = nilfs_btree_node_get_nchildren(node);
@@ -1234,9 +1221,6 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1234 if (!buffer_dirty(path[level].bp_sib_bh)) 1221 if (!buffer_dirty(path[level].bp_sib_bh))
1235 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1222 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1236 1223
1237 unlock_buffer(path[level].bp_bh);
1238 unlock_buffer(path[level].bp_sib_bh);
1239
1240 path[level + 1].bp_index++; 1224 path[level + 1].bp_index++;
1241 nilfs_btree_promote_key(btree, path, level + 1, 1225 nilfs_btree_promote_key(btree, path, level + 1,
1242 nilfs_btree_node_get_key(right, 0)); 1226 nilfs_btree_node_get_key(right, 0));
@@ -1255,9 +1239,6 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1255 1239
1256 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1240 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1257 1241
1258 lock_buffer(path[level].bp_bh);
1259 lock_buffer(path[level].bp_sib_bh);
1260
1261 node = nilfs_btree_get_nonroot_node(path, level); 1242 node = nilfs_btree_get_nonroot_node(path, level);
1262 left = nilfs_btree_get_sib_node(path, level); 1243 left = nilfs_btree_get_sib_node(path, level);
1263 1244
@@ -1268,9 +1249,6 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1268 if (!buffer_dirty(path[level].bp_sib_bh)) 1249 if (!buffer_dirty(path[level].bp_sib_bh))
1269 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1250 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1270 1251
1271 unlock_buffer(path[level].bp_bh);
1272 unlock_buffer(path[level].bp_sib_bh);
1273
1274 nilfs_btnode_delete(path[level].bp_bh); 1252 nilfs_btnode_delete(path[level].bp_bh);
1275 path[level].bp_bh = path[level].bp_sib_bh; 1253 path[level].bp_bh = path[level].bp_sib_bh;
1276 path[level].bp_sib_bh = NULL; 1254 path[level].bp_sib_bh = NULL;
@@ -1286,9 +1264,6 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1286 1264
1287 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1265 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1288 1266
1289 lock_buffer(path[level].bp_bh);
1290 lock_buffer(path[level].bp_sib_bh);
1291
1292 node = nilfs_btree_get_nonroot_node(path, level); 1267 node = nilfs_btree_get_nonroot_node(path, level);
1293 right = nilfs_btree_get_sib_node(path, level); 1268 right = nilfs_btree_get_sib_node(path, level);
1294 1269
@@ -1299,9 +1274,6 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1299 if (!buffer_dirty(path[level].bp_bh)) 1274 if (!buffer_dirty(path[level].bp_bh))
1300 nilfs_btnode_mark_dirty(path[level].bp_bh); 1275 nilfs_btnode_mark_dirty(path[level].bp_bh);
1301 1276
1302 unlock_buffer(path[level].bp_bh);
1303 unlock_buffer(path[level].bp_sib_bh);
1304
1305 nilfs_btnode_delete(path[level].bp_sib_bh); 1277 nilfs_btnode_delete(path[level].bp_sib_bh);
1306 path[level].bp_sib_bh = NULL; 1278 path[level].bp_sib_bh = NULL;
1307 path[level + 1].bp_index++; 1279 path[level + 1].bp_index++;
@@ -1316,7 +1288,6 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
1316 1288
1317 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1289 nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1318 1290
1319 lock_buffer(path[level].bp_bh);
1320 root = nilfs_btree_get_root(btree); 1291 root = nilfs_btree_get_root(btree);
1321 child = nilfs_btree_get_nonroot_node(path, level); 1292 child = nilfs_btree_get_nonroot_node(path, level);
1322 1293
@@ -1324,7 +1295,6 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
1324 nilfs_btree_node_set_level(root, level); 1295 nilfs_btree_node_set_level(root, level);
1325 n = nilfs_btree_node_get_nchildren(child); 1296 n = nilfs_btree_node_get_nchildren(child);
1326 nilfs_btree_node_move_left(btree, root, child, n); 1297 nilfs_btree_node_move_left(btree, root, child, n);
1327 unlock_buffer(path[level].bp_bh);
1328 1298
1329 nilfs_btnode_delete(path[level].bp_bh); 1299 nilfs_btnode_delete(path[level].bp_bh);
1330 path[level].bp_bh = NULL; 1300 path[level].bp_bh = NULL;
@@ -1699,7 +1669,6 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1699 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat); 1669 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1700 1670
1701 /* create child node at level 1 */ 1671 /* create child node at level 1 */
1702 lock_buffer(bh);
1703 node = (struct nilfs_btree_node *)bh->b_data; 1672 node = (struct nilfs_btree_node *)bh->b_data;
1704 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs); 1673 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1705 nilfs_btree_node_insert(btree, node, 1674 nilfs_btree_node_insert(btree, node,
@@ -1709,7 +1678,6 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1709 if (!nilfs_bmap_dirty(bmap)) 1678 if (!nilfs_bmap_dirty(bmap))
1710 nilfs_bmap_set_dirty(bmap); 1679 nilfs_bmap_set_dirty(bmap);
1711 1680
1712 unlock_buffer(bh);
1713 brelse(bh); 1681 brelse(bh);
1714 1682
1715 /* create root node at level 2 */ 1683 /* create root node at level 2 */
@@ -2050,7 +2018,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
2050 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2018 for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2051 level < NILFS_BTREE_LEVEL_MAX; 2019 level < NILFS_BTREE_LEVEL_MAX;
2052 level++) 2020 level++)
2053 list_splice(&lists[level], listp->prev); 2021 list_splice_tail(&lists[level], listp);
2054} 2022}
2055 2023
2056static int nilfs_btree_assign_p(struct nilfs_btree *btree, 2024static int nilfs_btree_assign_p(struct nilfs_btree *btree,
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index 0e72bbbc6b64..4b82d84ade75 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -34,28 +34,6 @@ struct nilfs_btree;
34struct nilfs_btree_path; 34struct nilfs_btree_path;
35 35
36/** 36/**
37 * struct nilfs_btree_node - B-tree node
38 * @bn_flags: flags
39 * @bn_level: level
40 * @bn_nchildren: number of children
41 * @bn_pad: padding
42 */
43struct nilfs_btree_node {
44 __u8 bn_flags;
45 __u8 bn_level;
46 __le16 bn_nchildren;
47 __le32 bn_pad;
48};
49
50/* flags */
51#define NILFS_BTREE_NODE_ROOT 0x01
52
53/* level */
54#define NILFS_BTREE_LEVEL_DATA 0
55#define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1)
56#define NILFS_BTREE_LEVEL_MAX 14
57
58/**
59 * struct nilfs_btree - B-tree structure 37 * struct nilfs_btree - B-tree structure
60 * @bt_bmap: bmap base structure 38 * @bt_bmap: bmap base structure
61 */ 39 */
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 3f5d5d06f53c..d5ad54e204a5 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -926,3 +926,29 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
926 up_read(&NILFS_MDT(cpfile)->mi_sem); 926 up_read(&NILFS_MDT(cpfile)->mi_sem);
927 return ret; 927 return ret;
928} 928}
929
930/**
931 * nilfs_cpfile_read - read cpfile inode
932 * @cpfile: cpfile inode
933 * @raw_inode: on-disk cpfile inode
934 */
935int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode)
936{
937 return nilfs_read_inode_common(cpfile, raw_inode);
938}
939
940/**
941 * nilfs_cpfile_new - create cpfile
942 * @nilfs: nilfs object
943 * @cpsize: size of a checkpoint entry
944 */
945struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize)
946{
947 struct inode *cpfile;
948
949 cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO, 0);
950 if (cpfile)
951 nilfs_mdt_set_entry_size(cpfile, cpsize,
952 sizeof(struct nilfs_cpfile_header));
953 return cpfile;
954}
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h
index debea896e701..bc0809e0ab43 100644
--- a/fs/nilfs2/cpfile.h
+++ b/fs/nilfs2/cpfile.h
@@ -40,4 +40,7 @@ int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *);
40ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, 40ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned,
41 size_t); 41 size_t);
42 42
43int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode);
44struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize);
45
43#endif /* _NILFS_CPFILE_H */ 46#endif /* _NILFS_CPFILE_H */
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 1ff8e15bd36b..187dd07ba86c 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -33,6 +33,16 @@
33#define NILFS_CNO_MIN ((__u64)1) 33#define NILFS_CNO_MIN ((__u64)1)
34#define NILFS_CNO_MAX (~(__u64)0) 34#define NILFS_CNO_MAX (~(__u64)0)
35 35
36struct nilfs_dat_info {
37 struct nilfs_mdt_info mi;
38 struct nilfs_palloc_cache palloc_cache;
39};
40
41static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat)
42{
43 return (struct nilfs_dat_info *)NILFS_MDT(dat);
44}
45
36static int nilfs_dat_prepare_entry(struct inode *dat, 46static int nilfs_dat_prepare_entry(struct inode *dat,
37 struct nilfs_palloc_req *req, int create) 47 struct nilfs_palloc_req *req, int create)
38{ 48{
@@ -425,3 +435,40 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
425 435
426 return nvi; 436 return nvi;
427} 437}
438
439/**
440 * nilfs_dat_read - read dat inode
441 * @dat: dat inode
442 * @raw_inode: on-disk dat inode
443 */
444int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode)
445{
446 return nilfs_read_inode_common(dat, raw_inode);
447}
448
449/**
450 * nilfs_dat_new - create dat file
451 * @nilfs: nilfs object
452 * @entry_size: size of a dat entry
453 */
454struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size)
455{
456 static struct lock_class_key dat_lock_key;
457 struct inode *dat;
458 struct nilfs_dat_info *di;
459 int err;
460
461 dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, sizeof(*di));
462 if (dat) {
463 err = nilfs_palloc_init_blockgroup(dat, entry_size);
464 if (unlikely(err)) {
465 nilfs_mdt_destroy(dat);
466 return NULL;
467 }
468
469 di = NILFS_DAT_I(dat);
470 lockdep_set_class(&di->mi.mi_sem, &dat_lock_key);
471 nilfs_palloc_setup_cache(dat, &di->palloc_cache);
472 }
473 return dat;
474}
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
index 406070d3ff49..d31c3aab0efe 100644
--- a/fs/nilfs2/dat.h
+++ b/fs/nilfs2/dat.h
@@ -53,4 +53,7 @@ int nilfs_dat_freev(struct inode *, __u64 *, size_t);
53int nilfs_dat_move(struct inode *, __u64, sector_t); 53int nilfs_dat_move(struct inode *, __u64, sector_t);
54ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); 54ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t);
55 55
56int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode);
57struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size);
58
56#endif /* _NILFS_DAT_H */ 59#endif /* _NILFS_DAT_H */
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index e097099bfc8f..76d803e060a9 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -99,9 +99,9 @@ static int nilfs_prepare_chunk(struct page *page,
99 NULL, nilfs_get_block); 99 NULL, nilfs_get_block);
100} 100}
101 101
102static int nilfs_commit_chunk(struct page *page, 102static void nilfs_commit_chunk(struct page *page,
103 struct address_space *mapping, 103 struct address_space *mapping,
104 unsigned from, unsigned to) 104 unsigned from, unsigned to)
105{ 105{
106 struct inode *dir = mapping->host; 106 struct inode *dir = mapping->host;
107 struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb); 107 struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb);
@@ -112,15 +112,13 @@ static int nilfs_commit_chunk(struct page *page,
112 112
113 nr_dirty = nilfs_page_count_clean_buffers(page, from, to); 113 nr_dirty = nilfs_page_count_clean_buffers(page, from, to);
114 copied = block_write_end(NULL, mapping, pos, len, len, page, NULL); 114 copied = block_write_end(NULL, mapping, pos, len, len, page, NULL);
115 if (pos + copied > dir->i_size) { 115 if (pos + copied > dir->i_size)
116 i_size_write(dir, pos + copied); 116 i_size_write(dir, pos + copied);
117 mark_inode_dirty(dir);
118 }
119 if (IS_DIRSYNC(dir)) 117 if (IS_DIRSYNC(dir))
120 nilfs_set_transaction_flag(NILFS_TI_SYNC); 118 nilfs_set_transaction_flag(NILFS_TI_SYNC);
121 err = nilfs_set_file_dirty(sbi, dir, nr_dirty); 119 err = nilfs_set_file_dirty(sbi, dir, nr_dirty);
120 WARN_ON(err); /* do not happen */
122 unlock_page(page); 121 unlock_page(page);
123 return err;
124} 122}
125 123
126static void nilfs_check_page(struct page *page) 124static void nilfs_check_page(struct page *page)
@@ -455,11 +453,10 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
455 BUG_ON(err); 453 BUG_ON(err);
456 de->inode = cpu_to_le64(inode->i_ino); 454 de->inode = cpu_to_le64(inode->i_ino);
457 nilfs_set_de_type(de, inode); 455 nilfs_set_de_type(de, inode);
458 err = nilfs_commit_chunk(page, mapping, from, to); 456 nilfs_commit_chunk(page, mapping, from, to);
459 nilfs_put_page(page); 457 nilfs_put_page(page);
460 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 458 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
461/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ 459/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
462 mark_inode_dirty(dir);
463} 460}
464 461
465/* 462/*
@@ -548,10 +545,10 @@ got_it:
548 memcpy(de->name, name, namelen); 545 memcpy(de->name, name, namelen);
549 de->inode = cpu_to_le64(inode->i_ino); 546 de->inode = cpu_to_le64(inode->i_ino);
550 nilfs_set_de_type(de, inode); 547 nilfs_set_de_type(de, inode);
551 err = nilfs_commit_chunk(page, page->mapping, from, to); 548 nilfs_commit_chunk(page, page->mapping, from, to);
552 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 549 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
553/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ 550/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
554 mark_inode_dirty(dir); 551 nilfs_mark_inode_dirty(dir);
555 /* OFFSET_CACHE */ 552 /* OFFSET_CACHE */
556out_put: 553out_put:
557 nilfs_put_page(page); 554 nilfs_put_page(page);
@@ -595,10 +592,9 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
595 if (pde) 592 if (pde)
596 pde->rec_len = cpu_to_le16(to - from); 593 pde->rec_len = cpu_to_le16(to - from);
597 dir->inode = 0; 594 dir->inode = 0;
598 err = nilfs_commit_chunk(page, mapping, from, to); 595 nilfs_commit_chunk(page, mapping, from, to);
599 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 596 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
600/* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */ 597/* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */
601 mark_inode_dirty(inode);
602out: 598out:
603 nilfs_put_page(page); 599 nilfs_put_page(page);
604 return err; 600 return err;
@@ -640,7 +636,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
640 memcpy(de->name, "..\0", 4); 636 memcpy(de->name, "..\0", 4);
641 nilfs_set_de_type(de, inode); 637 nilfs_set_de_type(de, inode);
642 kunmap_atomic(kaddr, KM_USER0); 638 kunmap_atomic(kaddr, KM_USER0);
643 err = nilfs_commit_chunk(page, mapping, 0, chunk_size); 639 nilfs_commit_chunk(page, mapping, 0, chunk_size);
644fail: 640fail:
645 page_cache_release(page); 641 page_cache_release(page);
646 return err; 642 return err;
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c
index 93383c5cee90..dd5f7e0a95f6 100644
--- a/fs/nilfs2/gcdat.c
+++ b/fs/nilfs2/gcdat.c
@@ -61,6 +61,8 @@ void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs)
61 61
62 nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap); 62 nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap);
63 63
64 nilfs_palloc_clear_cache(dat);
65 nilfs_palloc_clear_cache(gcdat);
64 nilfs_clear_dirty_pages(mapping); 66 nilfs_clear_dirty_pages(mapping);
65 nilfs_copy_back_pages(mapping, gmapping); 67 nilfs_copy_back_pages(mapping, gmapping);
66 /* note: mdt dirty flags should be cleared by segctor. */ 68 /* note: mdt dirty flags should be cleared by segctor. */
@@ -79,6 +81,7 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs)
79 gcdat->i_state = I_CLEAR; 81 gcdat->i_state = I_CLEAR;
80 gii->i_flags = 0; 82 gii->i_flags = 0;
81 83
84 nilfs_palloc_clear_cache(gcdat);
82 truncate_inode_pages(gcdat->i_mapping, 0); 85 truncate_inode_pages(gcdat->i_mapping, 0);
83 truncate_inode_pages(&gii->i_btnode_cache, 0); 86 truncate_inode_pages(&gii->i_btnode_cache, 0);
84} 87}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index e6de0a27ab5d..e16a6664dfa2 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -149,7 +149,7 @@ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
149 __u64 vbn, struct buffer_head **out_bh) 149 __u64 vbn, struct buffer_head **out_bh)
150{ 150{
151 int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, 151 int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
152 vbn ? : pbn, pbn, out_bh, 0); 152 vbn ? : pbn, pbn, out_bh);
153 if (ret == -EEXIST) /* internal code (cache hit) */ 153 if (ret == -EEXIST) /* internal code (cache hit) */
154 ret = 0; 154 ret = 0;
155 return ret; 155 return ret;
@@ -212,9 +212,10 @@ void nilfs_destroy_gccache(struct the_nilfs *nilfs)
212static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino, 212static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino,
213 __u64 cno) 213 __u64 cno)
214{ 214{
215 struct inode *inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS); 215 struct inode *inode;
216 struct nilfs_inode_info *ii; 216 struct nilfs_inode_info *ii;
217 217
218 inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS, 0);
218 if (!inode) 219 if (!inode)
219 return NULL; 220 return NULL;
220 221
@@ -265,7 +266,6 @@ struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno)
265 */ 266 */
266void nilfs_clear_gcinode(struct inode *inode) 267void nilfs_clear_gcinode(struct inode *inode)
267{ 268{
268 nilfs_mdt_clear(inode);
269 nilfs_mdt_destroy(inode); 269 nilfs_mdt_destroy(inode);
270} 270}
271 271
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index de86401f209f..922d9dd42c8f 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -29,6 +29,17 @@
29#include "alloc.h" 29#include "alloc.h"
30#include "ifile.h" 30#include "ifile.h"
31 31
32
33struct nilfs_ifile_info {
34 struct nilfs_mdt_info mi;
35 struct nilfs_palloc_cache palloc_cache;
36};
37
38static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile)
39{
40 return (struct nilfs_ifile_info *)NILFS_MDT(ifile);
41}
42
32/** 43/**
33 * nilfs_ifile_create_inode - create a new disk inode 44 * nilfs_ifile_create_inode - create a new disk inode
34 * @ifile: ifile inode 45 * @ifile: ifile inode
@@ -148,3 +159,27 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
148 } 159 }
149 return err; 160 return err;
150} 161}
162
163/**
164 * nilfs_ifile_new - create inode file
165 * @sbi: nilfs_sb_info struct
166 * @inode_size: size of an inode
167 */
168struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size)
169{
170 struct inode *ifile;
171 int err;
172
173 ifile = nilfs_mdt_new(sbi->s_nilfs, sbi->s_super, NILFS_IFILE_INO,
174 sizeof(struct nilfs_ifile_info));
175 if (ifile) {
176 err = nilfs_palloc_init_blockgroup(ifile, inode_size);
177 if (unlikely(err)) {
178 nilfs_mdt_destroy(ifile);
179 return NULL;
180 }
181 nilfs_palloc_setup_cache(ifile,
182 &NILFS_IFILE_I(ifile)->palloc_cache);
183 }
184 return ifile;
185}
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h
index ecc3ba76db47..cbca32e498f2 100644
--- a/fs/nilfs2/ifile.h
+++ b/fs/nilfs2/ifile.h
@@ -49,4 +49,6 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **);
49int nilfs_ifile_delete_inode(struct inode *, ino_t); 49int nilfs_ifile_delete_inode(struct inode *, ino_t);
50int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); 50int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **);
51 51
52struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size);
53
52#endif /* _NILFS_IFILE_H */ 54#endif /* _NILFS_IFILE_H */
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 2a0a5a3ac134..7868cc122ac7 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -97,6 +97,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
97 nilfs_transaction_abort(inode->i_sb); 97 nilfs_transaction_abort(inode->i_sb);
98 goto out; 98 goto out;
99 } 99 }
100 nilfs_mark_inode_dirty(inode);
100 nilfs_transaction_commit(inode->i_sb); /* never fails */ 101 nilfs_transaction_commit(inode->i_sb); /* never fails */
101 /* Error handling should be detailed */ 102 /* Error handling should be detailed */
102 set_buffer_new(bh_result); 103 set_buffer_new(bh_result);
@@ -322,7 +323,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
322 nilfs_init_acl(), proper cancellation of 323 nilfs_init_acl(), proper cancellation of
323 above jobs should be considered */ 324 above jobs should be considered */
324 325
325 mark_inode_dirty(inode);
326 return inode; 326 return inode;
327 327
328 failed_acl: 328 failed_acl:
@@ -525,7 +525,6 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh)
525 525
526 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh); 526 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh);
527 527
528 /* The buffer is guarded with lock_buffer() by the caller */
529 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) 528 if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state))
530 memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size); 529 memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size);
531 set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); 530 set_bit(NILFS_I_INODE_DIRTY, &ii->i_state);
@@ -599,6 +598,7 @@ void nilfs_truncate(struct inode *inode)
599 if (IS_SYNC(inode)) 598 if (IS_SYNC(inode))
600 nilfs_set_transaction_flag(NILFS_TI_SYNC); 599 nilfs_set_transaction_flag(NILFS_TI_SYNC);
601 600
601 nilfs_mark_inode_dirty(inode);
602 nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); 602 nilfs_set_file_dirty(NILFS_SB(sb), inode, 0);
603 nilfs_transaction_commit(sb); 603 nilfs_transaction_commit(sb);
604 /* May construct a logical segment and may fail in sync mode. 604 /* May construct a logical segment and may fail in sync mode.
@@ -623,6 +623,7 @@ void nilfs_delete_inode(struct inode *inode)
623 truncate_inode_pages(&inode->i_data, 0); 623 truncate_inode_pages(&inode->i_data, 0);
624 624
625 nilfs_truncate_bmap(ii, 0); 625 nilfs_truncate_bmap(ii, 0);
626 nilfs_mark_inode_dirty(inode);
626 nilfs_free_inode(inode); 627 nilfs_free_inode(inode);
627 /* nilfs_free_inode() marks inode buffer dirty */ 628 /* nilfs_free_inode() marks inode buffer dirty */
628 if (IS_SYNC(inode)) 629 if (IS_SYNC(inode))
@@ -745,9 +746,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
745 "failed to reget inode block.\n"); 746 "failed to reget inode block.\n");
746 return err; 747 return err;
747 } 748 }
748 lock_buffer(ibh);
749 nilfs_update_inode(inode, ibh); 749 nilfs_update_inode(inode, ibh);
750 unlock_buffer(ibh);
751 nilfs_mdt_mark_buffer_dirty(ibh); 750 nilfs_mdt_mark_buffer_dirty(ibh);
752 nilfs_mdt_mark_dirty(sbi->s_ifile); 751 nilfs_mdt_mark_dirty(sbi->s_ifile);
753 brelse(ibh); 752 brelse(ibh);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index f6326112d647..06713ffcc7f2 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -186,7 +186,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
186} 186}
187 187
188static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, 188static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
189 struct buffer_head **out_bh) 189 int readahead, struct buffer_head **out_bh)
190{ 190{
191 struct buffer_head *first_bh, *bh; 191 struct buffer_head *first_bh, *bh;
192 unsigned long blkoff; 192 unsigned long blkoff;
@@ -200,16 +200,18 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
200 if (unlikely(err)) 200 if (unlikely(err))
201 goto failed; 201 goto failed;
202 202
203 blkoff = block + 1; 203 if (readahead) {
204 for (i = 0; i < nr_ra_blocks; i++, blkoff++) { 204 blkoff = block + 1;
205 err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); 205 for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
206 if (likely(!err || err == -EEXIST)) 206 err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh);
207 brelse(bh); 207 if (likely(!err || err == -EEXIST))
208 else if (err != -EBUSY) 208 brelse(bh);
209 break; /* abort readahead if bmap lookup failed */ 209 else if (err != -EBUSY)
210 210 break;
211 if (!buffer_locked(first_bh)) 211 /* abort readahead if bmap lookup failed */
212 goto out_no_wait; 212 if (!buffer_locked(first_bh))
213 goto out_no_wait;
214 }
213 } 215 }
214 216
215 wait_on_buffer(first_bh); 217 wait_on_buffer(first_bh);
@@ -263,7 +265,7 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
263 265
264 /* Should be rewritten with merging nilfs_mdt_read_block() */ 266 /* Should be rewritten with merging nilfs_mdt_read_block() */
265 retry: 267 retry:
266 ret = nilfs_mdt_read_block(inode, blkoff, out_bh); 268 ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh);
267 if (!create || ret != -ENOENT) 269 if (!create || ret != -ENOENT)
268 return ret; 270 return ret;
269 271
@@ -371,7 +373,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
371 struct buffer_head *bh; 373 struct buffer_head *bh;
372 int err; 374 int err;
373 375
374 err = nilfs_mdt_read_block(inode, block, &bh); 376 err = nilfs_mdt_read_block(inode, block, 0, &bh);
375 if (unlikely(err)) 377 if (unlikely(err))
376 return err; 378 return err;
377 nilfs_mark_buffer_dirty(bh); 379 nilfs_mark_buffer_dirty(bh);
@@ -445,9 +447,17 @@ static const struct file_operations def_mdt_fops;
445 * longer than those of the super block structs; they may continue for 447 * longer than those of the super block structs; they may continue for
446 * several consecutive mounts/umounts. This would need discussions. 448 * several consecutive mounts/umounts. This would need discussions.
447 */ 449 */
450/**
451 * nilfs_mdt_new_common - allocate a pseudo inode for metadata file
452 * @nilfs: nilfs object
453 * @sb: super block instance the metadata file belongs to
454 * @ino: inode number
455 * @gfp_mask: gfp mask for data pages
456 * @objsz: size of the private object attached to inode->i_private
457 */
448struct inode * 458struct inode *
449nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, 459nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
450 ino_t ino, gfp_t gfp_mask) 460 ino_t ino, gfp_t gfp_mask, size_t objsz)
451{ 461{
452 struct inode *inode = nilfs_alloc_inode_common(nilfs); 462 struct inode *inode = nilfs_alloc_inode_common(nilfs);
453 463
@@ -455,8 +465,9 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
455 return NULL; 465 return NULL;
456 else { 466 else {
457 struct address_space * const mapping = &inode->i_data; 467 struct address_space * const mapping = &inode->i_data;
458 struct nilfs_mdt_info *mi = kzalloc(sizeof(*mi), GFP_NOFS); 468 struct nilfs_mdt_info *mi;
459 469
470 mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS);
460 if (!mi) { 471 if (!mi) {
461 nilfs_destroy_inode(inode); 472 nilfs_destroy_inode(inode);
462 return NULL; 473 return NULL;
@@ -513,11 +524,11 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
513} 524}
514 525
515struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, 526struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb,
516 ino_t ino) 527 ino_t ino, size_t objsz)
517{ 528{
518 struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, 529 struct inode *inode;
519 NILFS_MDT_GFP);
520 530
531 inode = nilfs_mdt_new_common(nilfs, sb, ino, NILFS_MDT_GFP, objsz);
521 if (!inode) 532 if (!inode)
522 return NULL; 533 return NULL;
523 534
@@ -544,14 +555,15 @@ void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow)
544 &NILFS_I(orig)->i_btnode_cache; 555 &NILFS_I(orig)->i_btnode_cache;
545} 556}
546 557
547void nilfs_mdt_clear(struct inode *inode) 558static void nilfs_mdt_clear(struct inode *inode)
548{ 559{
549 struct nilfs_inode_info *ii = NILFS_I(inode); 560 struct nilfs_inode_info *ii = NILFS_I(inode);
550 561
551 invalidate_mapping_pages(inode->i_mapping, 0, -1); 562 invalidate_mapping_pages(inode->i_mapping, 0, -1);
552 truncate_inode_pages(inode->i_mapping, 0); 563 truncate_inode_pages(inode->i_mapping, 0);
553 564
554 nilfs_bmap_clear(ii->i_bmap); 565 if (test_bit(NILFS_I_BMAP, &ii->i_state))
566 nilfs_bmap_clear(ii->i_bmap);
555 nilfs_btnode_cache_clear(&ii->i_btnode_cache); 567 nilfs_btnode_cache_clear(&ii->i_btnode_cache);
556} 568}
557 569
@@ -559,6 +571,10 @@ void nilfs_mdt_destroy(struct inode *inode)
559{ 571{
560 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 572 struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
561 573
574 if (mdi->mi_palloc_cache)
575 nilfs_palloc_destroy_cache(inode);
576 nilfs_mdt_clear(inode);
577
562 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ 578 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
563 kfree(mdi); 579 kfree(mdi);
564 nilfs_destroy_inode(inode); 580 nilfs_destroy_inode(inode);
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index 431599733c9b..6c4bbb0470fc 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -36,6 +36,7 @@
36 * @mi_entry_size: size of an entry 36 * @mi_entry_size: size of an entry
37 * @mi_first_entry_offset: offset to the first entry 37 * @mi_first_entry_offset: offset to the first entry
38 * @mi_entries_per_block: number of entries in a block 38 * @mi_entries_per_block: number of entries in a block
39 * @mi_palloc_cache: persistent object allocator cache
39 * @mi_blocks_per_group: number of blocks in a group 40 * @mi_blocks_per_group: number of blocks in a group
40 * @mi_blocks_per_desc_block: number of blocks per descriptor block 41 * @mi_blocks_per_desc_block: number of blocks per descriptor block
41 */ 42 */
@@ -46,6 +47,7 @@ struct nilfs_mdt_info {
46 unsigned mi_entry_size; 47 unsigned mi_entry_size;
47 unsigned mi_first_entry_offset; 48 unsigned mi_first_entry_offset;
48 unsigned long mi_entries_per_block; 49 unsigned long mi_entries_per_block;
50 struct nilfs_palloc_cache *mi_palloc_cache;
49 unsigned long mi_blocks_per_group; 51 unsigned long mi_blocks_per_group;
50 unsigned long mi_blocks_per_desc_block; 52 unsigned long mi_blocks_per_desc_block;
51}; 53};
@@ -74,11 +76,11 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long);
74int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); 76int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long);
75int nilfs_mdt_fetch_dirty(struct inode *); 77int nilfs_mdt_fetch_dirty(struct inode *);
76 78
77struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t); 79struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t,
80 size_t);
78struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, 81struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *,
79 ino_t, gfp_t); 82 ino_t, gfp_t, size_t);
80void nilfs_mdt_destroy(struct inode *); 83void nilfs_mdt_destroy(struct inode *);
81void nilfs_mdt_clear(struct inode *);
82void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); 84void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned);
83void nilfs_mdt_set_shadow(struct inode *, struct inode *); 85void nilfs_mdt_set_shadow(struct inode *, struct inode *);
84 86
@@ -104,21 +106,4 @@ static inline __u64 nilfs_mdt_cno(struct inode *inode)
104#define nilfs_mdt_bgl_lock(inode, bg) \ 106#define nilfs_mdt_bgl_lock(inode, bg) \
105 (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock) 107 (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock)
106 108
107
108static inline int
109nilfs_mdt_read_inode_direct(struct inode *inode, struct buffer_head *bh,
110 unsigned n)
111{
112 return nilfs_read_inode_common(
113 inode, (struct nilfs_inode *)(bh->b_data + n));
114}
115
116static inline void
117nilfs_mdt_write_inode_direct(struct inode *inode, struct buffer_head *bh,
118 unsigned n)
119{
120 nilfs_write_inode_common(
121 inode, (struct nilfs_inode *)(bh->b_data + n), 1);
122}
123
124#endif /* _NILFS_MDT_H */ 109#endif /* _NILFS_MDT_H */
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index ed02e886fa79..07ba838ef089 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -120,7 +120,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode,
120 inode->i_op = &nilfs_file_inode_operations; 120 inode->i_op = &nilfs_file_inode_operations;
121 inode->i_fop = &nilfs_file_operations; 121 inode->i_fop = &nilfs_file_operations;
122 inode->i_mapping->a_ops = &nilfs_aops; 122 inode->i_mapping->a_ops = &nilfs_aops;
123 mark_inode_dirty(inode); 123 nilfs_mark_inode_dirty(inode);
124 err = nilfs_add_nondir(dentry, inode); 124 err = nilfs_add_nondir(dentry, inode);
125 } 125 }
126 if (!err) 126 if (!err)
@@ -148,7 +148,7 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
148 err = PTR_ERR(inode); 148 err = PTR_ERR(inode);
149 if (!IS_ERR(inode)) { 149 if (!IS_ERR(inode)) {
150 init_special_inode(inode, inode->i_mode, rdev); 150 init_special_inode(inode, inode->i_mode, rdev);
151 mark_inode_dirty(inode); 151 nilfs_mark_inode_dirty(inode);
152 err = nilfs_add_nondir(dentry, inode); 152 err = nilfs_add_nondir(dentry, inode);
153 } 153 }
154 if (!err) 154 if (!err)
@@ -188,7 +188,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
188 goto out_fail; 188 goto out_fail;
189 189
190 /* mark_inode_dirty(inode); */ 190 /* mark_inode_dirty(inode); */
191 /* nilfs_new_inode() and page_symlink() do this */ 191 /* page_symlink() do this */
192 192
193 err = nilfs_add_nondir(dentry, inode); 193 err = nilfs_add_nondir(dentry, inode);
194out: 194out:
@@ -200,7 +200,8 @@ out:
200 return err; 200 return err;
201 201
202out_fail: 202out_fail:
203 inode_dec_link_count(inode); 203 drop_nlink(inode);
204 nilfs_mark_inode_dirty(inode);
204 iput(inode); 205 iput(inode);
205 goto out; 206 goto out;
206} 207}
@@ -245,7 +246,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
245 if (err) 246 if (err)
246 return err; 247 return err;
247 248
248 inode_inc_link_count(dir); 249 inc_nlink(dir);
249 250
250 inode = nilfs_new_inode(dir, S_IFDIR | mode); 251 inode = nilfs_new_inode(dir, S_IFDIR | mode);
251 err = PTR_ERR(inode); 252 err = PTR_ERR(inode);
@@ -256,7 +257,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
256 inode->i_fop = &nilfs_dir_operations; 257 inode->i_fop = &nilfs_dir_operations;
257 inode->i_mapping->a_ops = &nilfs_aops; 258 inode->i_mapping->a_ops = &nilfs_aops;
258 259
259 inode_inc_link_count(inode); 260 inc_nlink(inode);
260 261
261 err = nilfs_make_empty(inode, dir); 262 err = nilfs_make_empty(inode, dir);
262 if (err) 263 if (err)
@@ -266,6 +267,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
266 if (err) 267 if (err)
267 goto out_fail; 268 goto out_fail;
268 269
270 nilfs_mark_inode_dirty(inode);
269 d_instantiate(dentry, inode); 271 d_instantiate(dentry, inode);
270out: 272out:
271 if (!err) 273 if (!err)
@@ -276,26 +278,23 @@ out:
276 return err; 278 return err;
277 279
278out_fail: 280out_fail:
279 inode_dec_link_count(inode); 281 drop_nlink(inode);
280 inode_dec_link_count(inode); 282 drop_nlink(inode);
283 nilfs_mark_inode_dirty(inode);
281 iput(inode); 284 iput(inode);
282out_dir: 285out_dir:
283 inode_dec_link_count(dir); 286 drop_nlink(dir);
287 nilfs_mark_inode_dirty(dir);
284 goto out; 288 goto out;
285} 289}
286 290
287static int nilfs_unlink(struct inode *dir, struct dentry *dentry) 291static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
288{ 292{
289 struct inode *inode; 293 struct inode *inode;
290 struct nilfs_dir_entry *de; 294 struct nilfs_dir_entry *de;
291 struct page *page; 295 struct page *page;
292 struct nilfs_transaction_info ti;
293 int err; 296 int err;
294 297
295 err = nilfs_transaction_begin(dir->i_sb, &ti, 0);
296 if (err)
297 return err;
298
299 err = -ENOENT; 298 err = -ENOENT;
300 de = nilfs_find_entry(dir, dentry, &page); 299 de = nilfs_find_entry(dir, dentry, &page);
301 if (!de) 300 if (!de)
@@ -317,12 +316,28 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
317 goto out; 316 goto out;
318 317
319 inode->i_ctime = dir->i_ctime; 318 inode->i_ctime = dir->i_ctime;
320 inode_dec_link_count(inode); 319 drop_nlink(inode);
321 err = 0; 320 err = 0;
322out: 321out:
323 if (!err) 322 return err;
323}
324
325static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
326{
327 struct nilfs_transaction_info ti;
328 int err;
329
330 err = nilfs_transaction_begin(dir->i_sb, &ti, 0);
331 if (err)
332 return err;
333
334 err = nilfs_do_unlink(dir, dentry);
335
336 if (!err) {
337 nilfs_mark_inode_dirty(dir);
338 nilfs_mark_inode_dirty(dentry->d_inode);
324 err = nilfs_transaction_commit(dir->i_sb); 339 err = nilfs_transaction_commit(dir->i_sb);
325 else 340 } else
326 nilfs_transaction_abort(dir->i_sb); 341 nilfs_transaction_abort(dir->i_sb);
327 342
328 return err; 343 return err;
@@ -340,11 +355,13 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
340 355
341 err = -ENOTEMPTY; 356 err = -ENOTEMPTY;
342 if (nilfs_empty_dir(inode)) { 357 if (nilfs_empty_dir(inode)) {
343 err = nilfs_unlink(dir, dentry); 358 err = nilfs_do_unlink(dir, dentry);
344 if (!err) { 359 if (!err) {
345 inode->i_size = 0; 360 inode->i_size = 0;
346 inode_dec_link_count(inode); 361 drop_nlink(inode);
347 inode_dec_link_count(dir); 362 nilfs_mark_inode_dirty(inode);
363 drop_nlink(dir);
364 nilfs_mark_inode_dirty(dir);
348 } 365 }
349 } 366 }
350 if (!err) 367 if (!err)
@@ -395,42 +412,48 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
395 new_de = nilfs_find_entry(new_dir, new_dentry, &new_page); 412 new_de = nilfs_find_entry(new_dir, new_dentry, &new_page);
396 if (!new_de) 413 if (!new_de)
397 goto out_dir; 414 goto out_dir;
398 inode_inc_link_count(old_inode); 415 inc_nlink(old_inode);
399 nilfs_set_link(new_dir, new_de, new_page, old_inode); 416 nilfs_set_link(new_dir, new_de, new_page, old_inode);
417 nilfs_mark_inode_dirty(new_dir);
400 new_inode->i_ctime = CURRENT_TIME; 418 new_inode->i_ctime = CURRENT_TIME;
401 if (dir_de) 419 if (dir_de)
402 drop_nlink(new_inode); 420 drop_nlink(new_inode);
403 inode_dec_link_count(new_inode); 421 drop_nlink(new_inode);
422 nilfs_mark_inode_dirty(new_inode);
404 } else { 423 } else {
405 if (dir_de) { 424 if (dir_de) {
406 err = -EMLINK; 425 err = -EMLINK;
407 if (new_dir->i_nlink >= NILFS_LINK_MAX) 426 if (new_dir->i_nlink >= NILFS_LINK_MAX)
408 goto out_dir; 427 goto out_dir;
409 } 428 }
410 inode_inc_link_count(old_inode); 429 inc_nlink(old_inode);
411 err = nilfs_add_link(new_dentry, old_inode); 430 err = nilfs_add_link(new_dentry, old_inode);
412 if (err) { 431 if (err) {
413 inode_dec_link_count(old_inode); 432 drop_nlink(old_inode);
433 nilfs_mark_inode_dirty(old_inode);
414 goto out_dir; 434 goto out_dir;
415 } 435 }
416 if (dir_de) 436 if (dir_de) {
417 inode_inc_link_count(new_dir); 437 inc_nlink(new_dir);
438 nilfs_mark_inode_dirty(new_dir);
439 }
418 } 440 }
419 441
420 /* 442 /*
421 * Like most other Unix systems, set the ctime for inodes on a 443 * Like most other Unix systems, set the ctime for inodes on a
422 * rename. 444 * rename.
423 * inode_dec_link_count() will mark the inode dirty.
424 */ 445 */
425 old_inode->i_ctime = CURRENT_TIME; 446 old_inode->i_ctime = CURRENT_TIME;
426 447
427 nilfs_delete_entry(old_de, old_page); 448 nilfs_delete_entry(old_de, old_page);
428 inode_dec_link_count(old_inode); 449 drop_nlink(old_inode);
429 450
430 if (dir_de) { 451 if (dir_de) {
431 nilfs_set_link(old_inode, dir_de, dir_page, new_dir); 452 nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
432 inode_dec_link_count(old_dir); 453 drop_nlink(old_dir);
433 } 454 }
455 nilfs_mark_inode_dirty(old_dir);
456 nilfs_mark_inode_dirty(old_inode);
434 457
435 err = nilfs_transaction_commit(old_dir->i_sb); 458 err = nilfs_transaction_commit(old_dir->i_sb);
436 return err; 459 return err;
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 6dc83591d118..c9c96c7825dc 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -770,14 +770,8 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
770 nilfs_finish_roll_forward(nilfs, sbi, ri); 770 nilfs_finish_roll_forward(nilfs, sbi, ri);
771 } 771 }
772 772
773 nilfs_detach_checkpoint(sbi);
774 return 0;
775
776 failed: 773 failed:
777 nilfs_detach_checkpoint(sbi); 774 nilfs_detach_checkpoint(sbi);
778 nilfs_mdt_clear(nilfs->ns_cpfile);
779 nilfs_mdt_clear(nilfs->ns_sufile);
780 nilfs_mdt_clear(nilfs->ns_dat);
781 return err; 775 return err;
782} 776}
783 777
@@ -804,6 +798,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
804 struct nilfs_segsum_info ssi; 798 struct nilfs_segsum_info ssi;
805 sector_t pseg_start, pseg_end, sr_pseg_start = 0; 799 sector_t pseg_start, pseg_end, sr_pseg_start = 0;
806 sector_t seg_start, seg_end; /* range of full segment (block number) */ 800 sector_t seg_start, seg_end; /* range of full segment (block number) */
801 sector_t b, end;
807 u64 seg_seq; 802 u64 seg_seq;
808 __u64 segnum, nextnum = 0; 803 __u64 segnum, nextnum = 0;
809 __u64 cno; 804 __u64 cno;
@@ -819,6 +814,11 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
819 /* Calculate range of segment */ 814 /* Calculate range of segment */
820 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); 815 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
821 816
817 /* Read ahead segment */
818 b = seg_start;
819 while (b <= seg_end)
820 sb_breadahead(sbi->s_super, b++);
821
822 for (;;) { 822 for (;;) {
823 /* Load segment summary */ 823 /* Load segment summary */
824 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1); 824 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
@@ -841,14 +841,20 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
841 ri->ri_nextnum = nextnum; 841 ri->ri_nextnum = nextnum;
842 empty_seg = 0; 842 empty_seg = 0;
843 843
844 if (!NILFS_SEG_HAS_SR(&ssi) && !scan_newer) {
845 /* This will never happen because a superblock
846 (last_segment) always points to a pseg
847 having a super root. */
848 ret = NILFS_SEG_FAIL_CONSISTENCY;
849 goto failed;
850 }
851
852 if (pseg_start == seg_start) {
853 nilfs_get_segment_range(nilfs, nextnum, &b, &end);
854 while (b <= end)
855 sb_breadahead(sbi->s_super, b++);
856 }
844 if (!NILFS_SEG_HAS_SR(&ssi)) { 857 if (!NILFS_SEG_HAS_SR(&ssi)) {
845 if (!scan_newer) {
846 /* This will never happen because a superblock
847 (last_segment) always points to a pseg
848 having a super root. */
849 ret = NILFS_SEG_FAIL_CONSISTENCY;
850 goto failed;
851 }
852 if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) { 858 if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) {
853 ri->ri_lsegs_start = pseg_start; 859 ri->ri_lsegs_start = pseg_start;
854 ri->ri_lsegs_start_seq = seg_seq; 860 ri->ri_lsegs_start_seq = seg_seq;
@@ -919,7 +925,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
919 925
920 super_root_found: 926 super_root_found:
921 /* Updating pointers relating to the latest checkpoint */ 927 /* Updating pointers relating to the latest checkpoint */
922 list_splice(&segments, ri->ri_used_segments.prev); 928 list_splice_tail(&segments, &ri->ri_used_segments);
923 nilfs->ns_last_pseg = sr_pseg_start; 929 nilfs->ns_last_pseg = sr_pseg_start;
924 nilfs->ns_last_seq = nilfs->ns_seg_seq; 930 nilfs->ns_last_seq = nilfs->ns_seg_seq;
925 nilfs->ns_last_cno = ri->ri_cno; 931 nilfs->ns_last_cno = ri->ri_cno;
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index e6d9e37fa241..645c78656aa0 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -24,10 +24,22 @@
24#include <linux/buffer_head.h> 24#include <linux/buffer_head.h>
25#include <linux/writeback.h> 25#include <linux/writeback.h>
26#include <linux/crc32.h> 26#include <linux/crc32.h>
27#include <linux/backing-dev.h>
27#include "page.h" 28#include "page.h"
28#include "segbuf.h" 29#include "segbuf.h"
29 30
30 31
32struct nilfs_write_info {
33 struct the_nilfs *nilfs;
34 struct bio *bio;
35 int start, end; /* The region to be submitted */
36 int rest_blocks;
37 int max_pages;
38 int nr_vecs;
39 sector_t blocknr;
40};
41
42
31static struct kmem_cache *nilfs_segbuf_cachep; 43static struct kmem_cache *nilfs_segbuf_cachep;
32 44
33static void nilfs_segbuf_init_once(void *obj) 45static void nilfs_segbuf_init_once(void *obj)
@@ -63,6 +75,11 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb)
63 INIT_LIST_HEAD(&segbuf->sb_list); 75 INIT_LIST_HEAD(&segbuf->sb_list);
64 INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); 76 INIT_LIST_HEAD(&segbuf->sb_segsum_buffers);
65 INIT_LIST_HEAD(&segbuf->sb_payload_buffers); 77 INIT_LIST_HEAD(&segbuf->sb_payload_buffers);
78
79 init_completion(&segbuf->sb_bio_event);
80 atomic_set(&segbuf->sb_err, 0);
81 segbuf->sb_nbio = 0;
82
66 return segbuf; 83 return segbuf;
67} 84}
68 85
@@ -83,6 +100,22 @@ void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum,
83 segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; 100 segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1;
84} 101}
85 102
103/**
104 * nilfs_segbuf_map_cont - map a new log behind a given log
105 * @segbuf: new segment buffer
106 * @prev: segment buffer containing a log to be continued
107 */
108void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
109 struct nilfs_segment_buffer *prev)
110{
111 segbuf->sb_segnum = prev->sb_segnum;
112 segbuf->sb_fseg_start = prev->sb_fseg_start;
113 segbuf->sb_fseg_end = prev->sb_fseg_end;
114 segbuf->sb_pseg_start = prev->sb_pseg_start + prev->sb_sum.nblocks;
115 segbuf->sb_rest_blocks =
116 segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1;
117}
118
86void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf, 119void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf,
87 __u64 nextnum, struct the_nilfs *nilfs) 120 __u64 nextnum, struct the_nilfs *nilfs)
88{ 121{
@@ -132,8 +165,6 @@ int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags,
132 segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary); 165 segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary);
133 segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0; 166 segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0;
134 segbuf->sb_sum.ctime = ctime; 167 segbuf->sb_sum.ctime = ctime;
135
136 segbuf->sb_io_error = 0;
137 return 0; 168 return 0;
138} 169}
139 170
@@ -219,7 +250,7 @@ void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
219 raw_sum->ss_datasum = cpu_to_le32(crc); 250 raw_sum->ss_datasum = cpu_to_le32(crc);
220} 251}
221 252
222void nilfs_release_buffers(struct list_head *list) 253static void nilfs_release_buffers(struct list_head *list)
223{ 254{
224 struct buffer_head *bh, *n; 255 struct buffer_head *bh, *n;
225 256
@@ -241,13 +272,56 @@ void nilfs_release_buffers(struct list_head *list)
241 } 272 }
242} 273}
243 274
275static void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf)
276{
277 nilfs_release_buffers(&segbuf->sb_segsum_buffers);
278 nilfs_release_buffers(&segbuf->sb_payload_buffers);
279}
280
281/*
282 * Iterators for segment buffers
283 */
284void nilfs_clear_logs(struct list_head *logs)
285{
286 struct nilfs_segment_buffer *segbuf;
287
288 list_for_each_entry(segbuf, logs, sb_list)
289 nilfs_segbuf_clear(segbuf);
290}
291
292void nilfs_truncate_logs(struct list_head *logs,
293 struct nilfs_segment_buffer *last)
294{
295 struct nilfs_segment_buffer *n, *segbuf;
296
297 segbuf = list_prepare_entry(last, logs, sb_list);
298 list_for_each_entry_safe_continue(segbuf, n, logs, sb_list) {
299 list_del_init(&segbuf->sb_list);
300 nilfs_segbuf_clear(segbuf);
301 nilfs_segbuf_free(segbuf);
302 }
303}
304
305int nilfs_wait_on_logs(struct list_head *logs)
306{
307 struct nilfs_segment_buffer *segbuf;
308 int err;
309
310 list_for_each_entry(segbuf, logs, sb_list) {
311 err = nilfs_segbuf_wait(segbuf);
312 if (err)
313 return err;
314 }
315 return 0;
316}
317
244/* 318/*
245 * BIO operations 319 * BIO operations
246 */ 320 */
247static void nilfs_end_bio_write(struct bio *bio, int err) 321static void nilfs_end_bio_write(struct bio *bio, int err)
248{ 322{
249 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 323 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
250 struct nilfs_write_info *wi = bio->bi_private; 324 struct nilfs_segment_buffer *segbuf = bio->bi_private;
251 325
252 if (err == -EOPNOTSUPP) { 326 if (err == -EOPNOTSUPP) {
253 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 327 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
@@ -256,21 +330,22 @@ static void nilfs_end_bio_write(struct bio *bio, int err)
256 } 330 }
257 331
258 if (!uptodate) 332 if (!uptodate)
259 atomic_inc(&wi->err); 333 atomic_inc(&segbuf->sb_err);
260 334
261 bio_put(bio); 335 bio_put(bio);
262 complete(&wi->bio_event); 336 complete(&segbuf->sb_bio_event);
263} 337}
264 338
265static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) 339static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
340 struct nilfs_write_info *wi, int mode)
266{ 341{
267 struct bio *bio = wi->bio; 342 struct bio *bio = wi->bio;
268 int err; 343 int err;
269 344
270 if (wi->nbio > 0 && bdi_write_congested(wi->bdi)) { 345 if (segbuf->sb_nbio > 0 && bdi_write_congested(wi->nilfs->ns_bdi)) {
271 wait_for_completion(&wi->bio_event); 346 wait_for_completion(&segbuf->sb_bio_event);
272 wi->nbio--; 347 segbuf->sb_nbio--;
273 if (unlikely(atomic_read(&wi->err))) { 348 if (unlikely(atomic_read(&segbuf->sb_err))) {
274 bio_put(bio); 349 bio_put(bio);
275 err = -EIO; 350 err = -EIO;
276 goto failed; 351 goto failed;
@@ -278,7 +353,7 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode)
278 } 353 }
279 354
280 bio->bi_end_io = nilfs_end_bio_write; 355 bio->bi_end_io = nilfs_end_bio_write;
281 bio->bi_private = wi; 356 bio->bi_private = segbuf;
282 bio_get(bio); 357 bio_get(bio);
283 submit_bio(mode, bio); 358 submit_bio(mode, bio);
284 if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 359 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
@@ -286,7 +361,7 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode)
286 err = -EOPNOTSUPP; 361 err = -EOPNOTSUPP;
287 goto failed; 362 goto failed;
288 } 363 }
289 wi->nbio++; 364 segbuf->sb_nbio++;
290 bio_put(bio); 365 bio_put(bio);
291 366
292 wi->bio = NULL; 367 wi->bio = NULL;
@@ -301,17 +376,15 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode)
301} 376}
302 377
303/** 378/**
304 * nilfs_alloc_seg_bio - allocate a bio for writing segment. 379 * nilfs_alloc_seg_bio - allocate a new bio for writing log
305 * @sb: super block 380 * @nilfs: nilfs object
306 * @start: beginning disk block number of this BIO. 381 * @start: start block number of the bio
307 * @nr_vecs: request size of page vector. 382 * @nr_vecs: request size of page vector.
308 * 383 *
309 * alloc_seg_bio() allocates a new BIO structure and initialize it.
310 *
311 * Return Value: On success, pointer to the struct bio is returned. 384 * Return Value: On success, pointer to the struct bio is returned.
312 * On error, NULL is returned. 385 * On error, NULL is returned.
313 */ 386 */
314static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start, 387static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start,
315 int nr_vecs) 388 int nr_vecs)
316{ 389{
317 struct bio *bio; 390 struct bio *bio;
@@ -322,36 +395,33 @@ static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start,
322 bio = bio_alloc(GFP_NOIO, nr_vecs); 395 bio = bio_alloc(GFP_NOIO, nr_vecs);
323 } 396 }
324 if (likely(bio)) { 397 if (likely(bio)) {
325 bio->bi_bdev = sb->s_bdev; 398 bio->bi_bdev = nilfs->ns_bdev;
326 bio->bi_sector = (sector_t)start << (sb->s_blocksize_bits - 9); 399 bio->bi_sector = start << (nilfs->ns_blocksize_bits - 9);
327 } 400 }
328 return bio; 401 return bio;
329} 402}
330 403
331void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, 404static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
332 struct nilfs_write_info *wi) 405 struct nilfs_write_info *wi)
333{ 406{
334 wi->bio = NULL; 407 wi->bio = NULL;
335 wi->rest_blocks = segbuf->sb_sum.nblocks; 408 wi->rest_blocks = segbuf->sb_sum.nblocks;
336 wi->max_pages = bio_get_nr_vecs(wi->sb->s_bdev); 409 wi->max_pages = bio_get_nr_vecs(wi->nilfs->ns_bdev);
337 wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); 410 wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
338 wi->start = wi->end = 0; 411 wi->start = wi->end = 0;
339 wi->nbio = 0;
340 wi->blocknr = segbuf->sb_pseg_start; 412 wi->blocknr = segbuf->sb_pseg_start;
341
342 atomic_set(&wi->err, 0);
343 init_completion(&wi->bio_event);
344} 413}
345 414
346static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh, 415static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
347 int mode) 416 struct nilfs_write_info *wi,
417 struct buffer_head *bh, int mode)
348{ 418{
349 int len, err; 419 int len, err;
350 420
351 BUG_ON(wi->nr_vecs <= 0); 421 BUG_ON(wi->nr_vecs <= 0);
352 repeat: 422 repeat:
353 if (!wi->bio) { 423 if (!wi->bio) {
354 wi->bio = nilfs_alloc_seg_bio(wi->sb, wi->blocknr + wi->end, 424 wi->bio = nilfs_alloc_seg_bio(wi->nilfs, wi->blocknr + wi->end,
355 wi->nr_vecs); 425 wi->nr_vecs);
356 if (unlikely(!wi->bio)) 426 if (unlikely(!wi->bio))
357 return -ENOMEM; 427 return -ENOMEM;
@@ -363,76 +433,83 @@ static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh,
363 return 0; 433 return 0;
364 } 434 }
365 /* bio is FULL */ 435 /* bio is FULL */
366 err = nilfs_submit_seg_bio(wi, mode); 436 err = nilfs_segbuf_submit_bio(segbuf, wi, mode);
367 /* never submit current bh */ 437 /* never submit current bh */
368 if (likely(!err)) 438 if (likely(!err))
369 goto repeat; 439 goto repeat;
370 return err; 440 return err;
371} 441}
372 442
443/**
444 * nilfs_segbuf_write - submit write requests of a log
445 * @segbuf: buffer storing a log to be written
446 * @nilfs: nilfs object
447 *
448 * Return Value: On Success, 0 is returned. On Error, one of the following
449 * negative error code is returned.
450 *
451 * %-EIO - I/O error
452 *
453 * %-ENOMEM - Insufficient memory available.
454 */
373int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, 455int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
374 struct nilfs_write_info *wi) 456 struct the_nilfs *nilfs)
375{ 457{
458 struct nilfs_write_info wi;
376 struct buffer_head *bh; 459 struct buffer_head *bh;
377 int res, rw = WRITE; 460 int res = 0, rw = WRITE;
461
462 wi.nilfs = nilfs;
463 nilfs_segbuf_prepare_write(segbuf, &wi);
378 464
379 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { 465 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) {
380 res = nilfs_submit_bh(wi, bh, rw); 466 res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
381 if (unlikely(res)) 467 if (unlikely(res))
382 goto failed_bio; 468 goto failed_bio;
383 } 469 }
384 470
385 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { 471 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
386 res = nilfs_submit_bh(wi, bh, rw); 472 res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
387 if (unlikely(res)) 473 if (unlikely(res))
388 goto failed_bio; 474 goto failed_bio;
389 } 475 }
390 476
391 if (wi->bio) { 477 if (wi.bio) {
392 /* 478 /*
393 * Last BIO is always sent through the following 479 * Last BIO is always sent through the following
394 * submission. 480 * submission.
395 */ 481 */
396 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); 482 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
397 res = nilfs_submit_seg_bio(wi, rw); 483 res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
398 if (unlikely(res))
399 goto failed_bio;
400 } 484 }
401 485
402 res = 0;
403 out:
404 return res;
405
406 failed_bio: 486 failed_bio:
407 atomic_inc(&wi->err); 487 return res;
408 goto out;
409} 488}
410 489
411/** 490/**
412 * nilfs_segbuf_wait - wait for completion of requested BIOs 491 * nilfs_segbuf_wait - wait for completion of requested BIOs
413 * @wi: nilfs_write_info 492 * @segbuf: segment buffer
414 * 493 *
415 * Return Value: On Success, 0 is returned. On Error, one of the following 494 * Return Value: On Success, 0 is returned. On Error, one of the following
416 * negative error code is returned. 495 * negative error code is returned.
417 * 496 *
418 * %-EIO - I/O error 497 * %-EIO - I/O error
419 */ 498 */
420int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf, 499int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf)
421 struct nilfs_write_info *wi)
422{ 500{
423 int err = 0; 501 int err = 0;
424 502
425 if (!wi->nbio) 503 if (!segbuf->sb_nbio)
426 return 0; 504 return 0;
427 505
428 do { 506 do {
429 wait_for_completion(&wi->bio_event); 507 wait_for_completion(&segbuf->sb_bio_event);
430 } while (--wi->nbio > 0); 508 } while (--segbuf->sb_nbio > 0);
431 509
432 if (unlikely(atomic_read(&wi->err) > 0)) { 510 if (unlikely(atomic_read(&segbuf->sb_err) > 0)) {
433 printk(KERN_ERR "NILFS: IO error writing segment\n"); 511 printk(KERN_ERR "NILFS: IO error writing segment\n");
434 err = -EIO; 512 err = -EIO;
435 segbuf->sb_io_error = 1;
436 } 513 }
437 return err; 514 return err;
438} 515}
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index 0c3076f4e592..6af1630fb401 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -27,7 +27,6 @@
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/bio.h> 28#include <linux/bio.h>
29#include <linux/completion.h> 29#include <linux/completion.h>
30#include <linux/backing-dev.h>
31 30
32/** 31/**
33 * struct nilfs_segsum_info - On-memory segment summary 32 * struct nilfs_segsum_info - On-memory segment summary
@@ -77,7 +76,9 @@ struct nilfs_segsum_info {
77 * @sb_rest_blocks: Number of residual blocks in the current segment 76 * @sb_rest_blocks: Number of residual blocks in the current segment
78 * @sb_segsum_buffers: List of buffers for segment summaries 77 * @sb_segsum_buffers: List of buffers for segment summaries
79 * @sb_payload_buffers: List of buffers for segment payload 78 * @sb_payload_buffers: List of buffers for segment payload
80 * @sb_io_error: I/O error status 79 * @sb_nbio: Number of flying bio requests
80 * @sb_err: I/O error status
81 * @sb_bio_event: Completion event of log writing
81 */ 82 */
82struct nilfs_segment_buffer { 83struct nilfs_segment_buffer {
83 struct super_block *sb_super; 84 struct super_block *sb_super;
@@ -96,7 +97,9 @@ struct nilfs_segment_buffer {
96 struct list_head sb_payload_buffers; /* including super root */ 97 struct list_head sb_payload_buffers; /* including super root */
97 98
98 /* io status */ 99 /* io status */
99 int sb_io_error; 100 int sb_nbio;
101 atomic_t sb_err;
102 struct completion sb_bio_event;
100}; 103};
101 104
102#define NILFS_LIST_SEGBUF(head) \ 105#define NILFS_LIST_SEGBUF(head) \
@@ -125,6 +128,8 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *);
125void nilfs_segbuf_free(struct nilfs_segment_buffer *); 128void nilfs_segbuf_free(struct nilfs_segment_buffer *);
126void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, 129void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long,
127 struct the_nilfs *); 130 struct the_nilfs *);
131void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
132 struct nilfs_segment_buffer *prev);
128void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, 133void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64,
129 struct the_nilfs *); 134 struct the_nilfs *);
130int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t); 135int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t);
@@ -161,41 +166,18 @@ nilfs_segbuf_add_file_buffer(struct nilfs_segment_buffer *segbuf,
161 segbuf->sb_sum.nfileblk++; 166 segbuf->sb_sum.nfileblk++;
162} 167}
163 168
164void nilfs_release_buffers(struct list_head *); 169int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
170 struct the_nilfs *nilfs);
171int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf);
165 172
166static inline void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) 173void nilfs_clear_logs(struct list_head *logs);
174void nilfs_truncate_logs(struct list_head *logs,
175 struct nilfs_segment_buffer *last);
176int nilfs_wait_on_logs(struct list_head *logs);
177
178static inline void nilfs_destroy_logs(struct list_head *logs)
167{ 179{
168 nilfs_release_buffers(&segbuf->sb_segsum_buffers); 180 nilfs_truncate_logs(logs, NULL);
169 nilfs_release_buffers(&segbuf->sb_payload_buffers);
170} 181}
171 182
172struct nilfs_write_info {
173 struct bio *bio;
174 int start, end; /* The region to be submitted */
175 int rest_blocks;
176 int max_pages;
177 int nr_vecs;
178 sector_t blocknr;
179
180 int nbio;
181 atomic_t err;
182 struct completion bio_event;
183 /* completion event of segment write */
184
185 /*
186 * The following fields must be set explicitly
187 */
188 struct super_block *sb;
189 struct backing_dev_info *bdi; /* backing dev info */
190 struct buffer_head *bh_sr;
191};
192
193
194void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *,
195 struct nilfs_write_info *);
196int nilfs_segbuf_write(struct nilfs_segment_buffer *,
197 struct nilfs_write_info *);
198int nilfs_segbuf_wait(struct nilfs_segment_buffer *,
199 struct nilfs_write_info *);
200
201#endif /* _NILFS_SEGBUF_H */ 183#endif /* _NILFS_SEGBUF_H */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 6eff66a070d5..17584c524486 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -974,12 +974,12 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
974 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 974 nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
975 raw_sr->sr_flags = 0; 975 raw_sr->sr_flags = 0;
976 976
977 nilfs_mdt_write_inode_direct( 977 nilfs_write_inode_common(nilfs_dat_inode(nilfs), (void *)raw_sr +
978 nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(isz)); 978 NILFS_SR_DAT_OFFSET(isz), 1);
979 nilfs_mdt_write_inode_direct( 979 nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
980 nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(isz)); 980 NILFS_SR_CPFILE_OFFSET(isz), 1);
981 nilfs_mdt_write_inode_direct( 981 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
982 nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(isz)); 982 NILFS_SR_SUFILE_OFFSET(isz), 1);
983} 983}
984 984
985static void nilfs_redirty_inodes(struct list_head *head) 985static void nilfs_redirty_inodes(struct list_head *head)
@@ -1273,73 +1273,75 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1273 return err; 1273 return err;
1274} 1274}
1275 1275
1276static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum) 1276/**
1277{ 1277 * nilfs_segctor_begin_construction - setup segment buffer to make a new log
1278 struct buffer_head *bh_su; 1278 * @sci: nilfs_sc_info
1279 struct nilfs_segment_usage *raw_su; 1279 * @nilfs: nilfs object
1280 int err; 1280 */
1281
1282 err = nilfs_sufile_get_segment_usage(sufile, segnum, &raw_su, &bh_su);
1283 if (unlikely(err))
1284 return err;
1285 nilfs_mdt_mark_buffer_dirty(bh_su);
1286 nilfs_mdt_mark_dirty(sufile);
1287 nilfs_sufile_put_segment_usage(sufile, segnum, bh_su);
1288 return 0;
1289}
1290
1291static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, 1281static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci,
1292 struct the_nilfs *nilfs) 1282 struct the_nilfs *nilfs)
1293{ 1283{
1294 struct nilfs_segment_buffer *segbuf, *n; 1284 struct nilfs_segment_buffer *segbuf, *prev;
1295 __u64 nextnum; 1285 __u64 nextnum;
1296 int err; 1286 int err, alloc = 0;
1297 1287
1298 if (list_empty(&sci->sc_segbufs)) { 1288 segbuf = nilfs_segbuf_new(sci->sc_super);
1299 segbuf = nilfs_segbuf_new(sci->sc_super); 1289 if (unlikely(!segbuf))
1300 if (unlikely(!segbuf)) 1290 return -ENOMEM;
1301 return -ENOMEM; 1291
1302 list_add(&segbuf->sb_list, &sci->sc_segbufs); 1292 if (list_empty(&sci->sc_write_logs)) {
1303 } else 1293 nilfs_segbuf_map(segbuf, nilfs->ns_segnum,
1304 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); 1294 nilfs->ns_pseg_offset, nilfs);
1295 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1296 nilfs_shift_to_next_segment(nilfs);
1297 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs);
1298 }
1305 1299
1306 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, nilfs->ns_pseg_offset, 1300 segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
1307 nilfs); 1301 nextnum = nilfs->ns_nextnum;
1308 1302
1309 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { 1303 if (nilfs->ns_segnum == nilfs->ns_nextnum)
1310 nilfs_shift_to_next_segment(nilfs); 1304 /* Start from the head of a new full segment */
1311 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); 1305 alloc++;
1306 } else {
1307 /* Continue logs */
1308 prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
1309 nilfs_segbuf_map_cont(segbuf, prev);
1310 segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq;
1311 nextnum = prev->sb_nextnum;
1312
1313 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
1314 nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
1315 segbuf->sb_sum.seg_seq++;
1316 alloc++;
1317 }
1312 } 1318 }
1313 sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
1314 1319
1315 err = nilfs_touch_segusage(nilfs->ns_sufile, segbuf->sb_segnum); 1320 err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum);
1316 if (unlikely(err)) 1321 if (err)
1317 return err; 1322 goto failed;
1318 1323
1319 if (nilfs->ns_segnum == nilfs->ns_nextnum) { 1324 if (alloc) {
1320 /* Start from the head of a new full segment */
1321 err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); 1325 err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum);
1322 if (unlikely(err)) 1326 if (err)
1323 return err; 1327 goto failed;
1324 } else 1328 }
1325 nextnum = nilfs->ns_nextnum;
1326
1327 segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
1328 nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); 1329 nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs);
1329 1330
1330 /* truncating segment buffers */ 1331 BUG_ON(!list_empty(&sci->sc_segbufs));
1331 list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, 1332 list_add_tail(&segbuf->sb_list, &sci->sc_segbufs);
1332 sb_list) { 1333 sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
1333 list_del_init(&segbuf->sb_list);
1334 nilfs_segbuf_free(segbuf);
1335 }
1336 return 0; 1334 return 0;
1335
1336 failed:
1337 nilfs_segbuf_free(segbuf);
1338 return err;
1337} 1339}
1338 1340
1339static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, 1341static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
1340 struct the_nilfs *nilfs, int nadd) 1342 struct the_nilfs *nilfs, int nadd)
1341{ 1343{
1342 struct nilfs_segment_buffer *segbuf, *prev, *n; 1344 struct nilfs_segment_buffer *segbuf, *prev;
1343 struct inode *sufile = nilfs->ns_sufile; 1345 struct inode *sufile = nilfs->ns_sufile;
1344 __u64 nextnextnum; 1346 __u64 nextnextnum;
1345 LIST_HEAD(list); 1347 LIST_HEAD(list);
@@ -1352,7 +1354,7 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
1352 * not be dirty. The following call ensures that the buffer is dirty 1354 * not be dirty. The following call ensures that the buffer is dirty
1353 * and will pin the buffer on memory until the sufile is written. 1355 * and will pin the buffer on memory until the sufile is written.
1354 */ 1356 */
1355 err = nilfs_touch_segusage(sufile, prev->sb_nextnum); 1357 err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum);
1356 if (unlikely(err)) 1358 if (unlikely(err))
1357 return err; 1359 return err;
1358 1360
@@ -1378,33 +1380,33 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
1378 list_add_tail(&segbuf->sb_list, &list); 1380 list_add_tail(&segbuf->sb_list, &list);
1379 prev = segbuf; 1381 prev = segbuf;
1380 } 1382 }
1381 list_splice(&list, sci->sc_segbufs.prev); 1383 list_splice_tail(&list, &sci->sc_segbufs);
1382 return 0; 1384 return 0;
1383 1385
1384 failed_segbuf: 1386 failed_segbuf:
1385 nilfs_segbuf_free(segbuf); 1387 nilfs_segbuf_free(segbuf);
1386 failed: 1388 failed:
1387 list_for_each_entry_safe(segbuf, n, &list, sb_list) { 1389 list_for_each_entry(segbuf, &list, sb_list) {
1388 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1390 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1389 WARN_ON(ret); /* never fails */ 1391 WARN_ON(ret); /* never fails */
1390 list_del_init(&segbuf->sb_list);
1391 nilfs_segbuf_free(segbuf);
1392 } 1392 }
1393 nilfs_destroy_logs(&list);
1393 return err; 1394 return err;
1394} 1395}
1395 1396
1396static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, 1397static void nilfs_free_incomplete_logs(struct list_head *logs,
1397 struct the_nilfs *nilfs) 1398 struct the_nilfs *nilfs)
1398{ 1399{
1399 struct nilfs_segment_buffer *segbuf; 1400 struct nilfs_segment_buffer *segbuf, *prev;
1400 int ret, done = 0; 1401 struct inode *sufile = nilfs->ns_sufile;
1402 int ret;
1401 1403
1402 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); 1404 segbuf = NILFS_FIRST_SEGBUF(logs);
1403 if (nilfs->ns_nextnum != segbuf->sb_nextnum) { 1405 if (nilfs->ns_nextnum != segbuf->sb_nextnum) {
1404 ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); 1406 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1405 WARN_ON(ret); /* never fails */ 1407 WARN_ON(ret); /* never fails */
1406 } 1408 }
1407 if (segbuf->sb_io_error) { 1409 if (atomic_read(&segbuf->sb_err)) {
1408 /* Case 1: The first segment failed */ 1410 /* Case 1: The first segment failed */
1409 if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) 1411 if (segbuf->sb_pseg_start != segbuf->sb_fseg_start)
1410 /* Case 1a: Partial segment appended into an existing 1412 /* Case 1a: Partial segment appended into an existing
@@ -1413,106 +1415,54 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci,
1413 segbuf->sb_fseg_end); 1415 segbuf->sb_fseg_end);
1414 else /* Case 1b: New full segment */ 1416 else /* Case 1b: New full segment */
1415 set_nilfs_discontinued(nilfs); 1417 set_nilfs_discontinued(nilfs);
1416 done++;
1417 } 1418 }
1418 1419
1419 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { 1420 prev = segbuf;
1420 ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); 1421 list_for_each_entry_continue(segbuf, logs, sb_list) {
1421 WARN_ON(ret); /* never fails */ 1422 if (prev->sb_nextnum != segbuf->sb_nextnum) {
1422 if (!done && segbuf->sb_io_error) { 1423 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1423 if (segbuf->sb_segnum != nilfs->ns_nextnum) 1424 WARN_ON(ret); /* never fails */
1424 /* Case 2: extended segment (!= next) failed */
1425 nilfs_sufile_set_error(nilfs->ns_sufile,
1426 segbuf->sb_segnum);
1427 done++;
1428 }
1429 }
1430}
1431
1432static void nilfs_segctor_clear_segment_buffers(struct nilfs_sc_info *sci)
1433{
1434 struct nilfs_segment_buffer *segbuf;
1435
1436 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list)
1437 nilfs_segbuf_clear(segbuf);
1438 sci->sc_super_root = NULL;
1439}
1440
1441static void nilfs_segctor_destroy_segment_buffers(struct nilfs_sc_info *sci)
1442{
1443 struct nilfs_segment_buffer *segbuf;
1444
1445 while (!list_empty(&sci->sc_segbufs)) {
1446 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1447 list_del_init(&segbuf->sb_list);
1448 nilfs_segbuf_free(segbuf);
1449 }
1450 /* sci->sc_curseg = NULL; */
1451}
1452
1453static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci,
1454 struct the_nilfs *nilfs, int err)
1455{
1456 if (unlikely(err)) {
1457 nilfs_segctor_free_incomplete_segments(sci, nilfs);
1458 if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1459 int ret;
1460
1461 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1462 sci->sc_freesegs,
1463 sci->sc_nfreesegs,
1464 NULL);
1465 WARN_ON(ret); /* do not happen */
1466 } 1425 }
1426 if (atomic_read(&segbuf->sb_err) &&
1427 segbuf->sb_segnum != nilfs->ns_nextnum)
1428 /* Case 2: extended segment (!= next) failed */
1429 nilfs_sufile_set_error(sufile, segbuf->sb_segnum);
1430 prev = segbuf;
1467 } 1431 }
1468 nilfs_segctor_clear_segment_buffers(sci);
1469} 1432}
1470 1433
1471static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, 1434static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci,
1472 struct inode *sufile) 1435 struct inode *sufile)
1473{ 1436{
1474 struct nilfs_segment_buffer *segbuf; 1437 struct nilfs_segment_buffer *segbuf;
1475 struct buffer_head *bh_su;
1476 struct nilfs_segment_usage *raw_su;
1477 unsigned long live_blocks; 1438 unsigned long live_blocks;
1478 int ret; 1439 int ret;
1479 1440
1480 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1441 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1481 ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum,
1482 &raw_su, &bh_su);
1483 WARN_ON(ret); /* always succeed because bh_su is dirty */
1484 live_blocks = segbuf->sb_sum.nblocks + 1442 live_blocks = segbuf->sb_sum.nblocks +
1485 (segbuf->sb_pseg_start - segbuf->sb_fseg_start); 1443 (segbuf->sb_pseg_start - segbuf->sb_fseg_start);
1486 raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime); 1444 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1487 raw_su->su_nblocks = cpu_to_le32(live_blocks); 1445 live_blocks,
1488 nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, 1446 sci->sc_seg_ctime);
1489 bh_su); 1447 WARN_ON(ret); /* always succeed because the segusage is dirty */
1490 } 1448 }
1491} 1449}
1492 1450
1493static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, 1451static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile)
1494 struct inode *sufile)
1495{ 1452{
1496 struct nilfs_segment_buffer *segbuf; 1453 struct nilfs_segment_buffer *segbuf;
1497 struct buffer_head *bh_su;
1498 struct nilfs_segment_usage *raw_su;
1499 int ret; 1454 int ret;
1500 1455
1501 segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); 1456 segbuf = NILFS_FIRST_SEGBUF(logs);
1502 ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, 1457 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1503 &raw_su, &bh_su); 1458 segbuf->sb_pseg_start -
1504 WARN_ON(ret); /* always succeed because bh_su is dirty */ 1459 segbuf->sb_fseg_start, 0);
1505 raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start - 1460 WARN_ON(ret); /* always succeed because the segusage is dirty */
1506 segbuf->sb_fseg_start);
1507 nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su);
1508 1461
1509 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { 1462 list_for_each_entry_continue(segbuf, logs, sb_list) {
1510 ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, 1463 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
1511 &raw_su, &bh_su); 1464 0, 0);
1512 WARN_ON(ret); /* always succeed */ 1465 WARN_ON(ret); /* always succeed */
1513 raw_su->su_nblocks = 0;
1514 nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum,
1515 bh_su);
1516 } 1466 }
1517} 1467}
1518 1468
@@ -1520,17 +1470,15 @@ static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci,
1520 struct nilfs_segment_buffer *last, 1470 struct nilfs_segment_buffer *last,
1521 struct inode *sufile) 1471 struct inode *sufile)
1522{ 1472{
1523 struct nilfs_segment_buffer *segbuf = last, *n; 1473 struct nilfs_segment_buffer *segbuf = last;
1524 int ret; 1474 int ret;
1525 1475
1526 list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, 1476 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
1527 sb_list) {
1528 list_del_init(&segbuf->sb_list);
1529 sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; 1477 sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks;
1530 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1478 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
1531 WARN_ON(ret); 1479 WARN_ON(ret);
1532 nilfs_segbuf_free(segbuf);
1533 } 1480 }
1481 nilfs_truncate_logs(&sci->sc_segbufs, last);
1534} 1482}
1535 1483
1536 1484
@@ -1569,7 +1517,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
1569 NULL); 1517 NULL);
1570 WARN_ON(err); /* do not happen */ 1518 WARN_ON(err); /* do not happen */
1571 } 1519 }
1572 nilfs_segctor_clear_segment_buffers(sci); 1520 nilfs_clear_logs(&sci->sc_segbufs);
1573 1521
1574 err = nilfs_segctor_extend_segments(sci, nilfs, nadd); 1522 err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
1575 if (unlikely(err)) 1523 if (unlikely(err))
@@ -1814,26 +1762,18 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1814} 1762}
1815 1763
1816static int nilfs_segctor_write(struct nilfs_sc_info *sci, 1764static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1817 struct backing_dev_info *bdi) 1765 struct the_nilfs *nilfs)
1818{ 1766{
1819 struct nilfs_segment_buffer *segbuf; 1767 struct nilfs_segment_buffer *segbuf;
1820 struct nilfs_write_info wi; 1768 int ret = 0;
1821 int err, res;
1822
1823 wi.sb = sci->sc_super;
1824 wi.bh_sr = sci->sc_super_root;
1825 wi.bdi = bdi;
1826 1769
1827 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1770 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1828 nilfs_segbuf_prepare_write(segbuf, &wi); 1771 ret = nilfs_segbuf_write(segbuf, nilfs);
1829 err = nilfs_segbuf_write(segbuf, &wi); 1772 if (ret)
1830 1773 break;
1831 res = nilfs_segbuf_wait(segbuf, &wi);
1832 err = err ? : res;
1833 if (err)
1834 return err;
1835 } 1774 }
1836 return 0; 1775 list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs);
1776 return ret;
1837} 1777}
1838 1778
1839static void __nilfs_end_page_io(struct page *page, int err) 1779static void __nilfs_end_page_io(struct page *page, int err)
@@ -1911,15 +1851,17 @@ static void nilfs_clear_copied_buffers(struct list_head *list, int err)
1911 } 1851 }
1912} 1852}
1913 1853
1914static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, 1854static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
1915 struct page *failed_page, int err) 1855 struct buffer_head *bh_sr, int err)
1916{ 1856{
1917 struct nilfs_segment_buffer *segbuf; 1857 struct nilfs_segment_buffer *segbuf;
1918 struct page *bd_page = NULL, *fs_page = NULL; 1858 struct page *bd_page = NULL, *fs_page = NULL;
1859 struct buffer_head *bh;
1919 1860
1920 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1861 if (list_empty(logs))
1921 struct buffer_head *bh; 1862 return;
1922 1863
1864 list_for_each_entry(segbuf, logs, sb_list) {
1923 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1865 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
1924 b_assoc_buffers) { 1866 b_assoc_buffers) {
1925 if (bh->b_page != bd_page) { 1867 if (bh->b_page != bd_page) {
@@ -1931,7 +1873,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
1931 1873
1932 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1874 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1933 b_assoc_buffers) { 1875 b_assoc_buffers) {
1934 if (bh == sci->sc_super_root) { 1876 if (bh == bh_sr) {
1935 if (bh->b_page != bd_page) { 1877 if (bh->b_page != bd_page) {
1936 end_page_writeback(bd_page); 1878 end_page_writeback(bd_page);
1937 bd_page = bh->b_page; 1879 bd_page = bh->b_page;
@@ -1941,7 +1883,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
1941 if (bh->b_page != fs_page) { 1883 if (bh->b_page != fs_page) {
1942 nilfs_end_page_io(fs_page, err); 1884 nilfs_end_page_io(fs_page, err);
1943 if (fs_page && fs_page == failed_page) 1885 if (fs_page && fs_page == failed_page)
1944 goto done; 1886 return;
1945 fs_page = bh->b_page; 1887 fs_page = bh->b_page;
1946 } 1888 }
1947 } 1889 }
@@ -1950,8 +1892,34 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
1950 end_page_writeback(bd_page); 1892 end_page_writeback(bd_page);
1951 1893
1952 nilfs_end_page_io(fs_page, err); 1894 nilfs_end_page_io(fs_page, err);
1953 done: 1895}
1896
1897static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1898 struct the_nilfs *nilfs, int err)
1899{
1900 LIST_HEAD(logs);
1901 int ret;
1902
1903 list_splice_tail_init(&sci->sc_write_logs, &logs);
1904 ret = nilfs_wait_on_logs(&logs);
1905 if (ret)
1906 nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret);
1907
1908 list_splice_tail_init(&sci->sc_segbufs, &logs);
1909 nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
1910 nilfs_free_incomplete_logs(&logs, nilfs);
1954 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); 1911 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
1912
1913 if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1914 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
1915 sci->sc_freesegs,
1916 sci->sc_nfreesegs,
1917 NULL);
1918 WARN_ON(ret); /* do not happen */
1919 }
1920
1921 nilfs_destroy_logs(&logs);
1922 sci->sc_super_root = NULL;
1955} 1923}
1956 1924
1957static void nilfs_set_next_segment(struct the_nilfs *nilfs, 1925static void nilfs_set_next_segment(struct the_nilfs *nilfs,
@@ -1973,7 +1941,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1973 struct the_nilfs *nilfs = sbi->s_nilfs; 1941 struct the_nilfs *nilfs = sbi->s_nilfs;
1974 int update_sr = (sci->sc_super_root != NULL); 1942 int update_sr = (sci->sc_super_root != NULL);
1975 1943
1976 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1944 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
1977 struct buffer_head *bh; 1945 struct buffer_head *bh;
1978 1946
1979 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1947 list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
@@ -2046,7 +2014,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
2046 2014
2047 sci->sc_nblk_inc += sci->sc_nblk_this_inc; 2015 sci->sc_nblk_inc += sci->sc_nblk_this_inc;
2048 2016
2049 segbuf = NILFS_LAST_SEGBUF(&sci->sc_segbufs); 2017 segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
2050 nilfs_set_next_segment(nilfs, segbuf); 2018 nilfs_set_next_segment(nilfs, segbuf);
2051 2019
2052 if (update_sr) { 2020 if (update_sr) {
@@ -2057,10 +2025,23 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
2057 clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 2025 clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
2058 clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); 2026 clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
2059 set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); 2027 set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
2028 nilfs_segctor_clear_metadata_dirty(sci);
2060 } else 2029 } else
2061 clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); 2030 clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
2062} 2031}
2063 2032
2033static int nilfs_segctor_wait(struct nilfs_sc_info *sci)
2034{
2035 int ret;
2036
2037 ret = nilfs_wait_on_logs(&sci->sc_write_logs);
2038 if (!ret) {
2039 nilfs_segctor_complete_write(sci);
2040 nilfs_destroy_logs(&sci->sc_write_logs);
2041 }
2042 return ret;
2043}
2044
2064static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, 2045static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci,
2065 struct nilfs_sb_info *sbi) 2046 struct nilfs_sb_info *sbi)
2066{ 2047{
@@ -2173,7 +2154,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2173 /* Avoid empty segment */ 2154 /* Avoid empty segment */
2174 if (sci->sc_stage.scnt == NILFS_ST_DONE && 2155 if (sci->sc_stage.scnt == NILFS_ST_DONE &&
2175 NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { 2156 NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) {
2176 nilfs_segctor_end_construction(sci, nilfs, 1); 2157 nilfs_segctor_abort_construction(sci, nilfs, 1);
2177 goto out; 2158 goto out;
2178 } 2159 }
2179 2160
@@ -2187,7 +2168,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2187 if (has_sr) { 2168 if (has_sr) {
2188 err = nilfs_segctor_fill_in_checkpoint(sci); 2169 err = nilfs_segctor_fill_in_checkpoint(sci);
2189 if (unlikely(err)) 2170 if (unlikely(err))
2190 goto failed_to_make_up; 2171 goto failed_to_write;
2191 2172
2192 nilfs_segctor_fill_in_super_root(sci, nilfs); 2173 nilfs_segctor_fill_in_super_root(sci, nilfs);
2193 } 2174 }
@@ -2195,42 +2176,46 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2195 2176
2196 /* Write partial segments */ 2177 /* Write partial segments */
2197 err = nilfs_segctor_prepare_write(sci, &failed_page); 2178 err = nilfs_segctor_prepare_write(sci, &failed_page);
2198 if (unlikely(err)) 2179 if (err) {
2180 nilfs_abort_logs(&sci->sc_segbufs, failed_page,
2181 sci->sc_super_root, err);
2199 goto failed_to_write; 2182 goto failed_to_write;
2200 2183 }
2201 nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed); 2184 nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed);
2202 2185
2203 err = nilfs_segctor_write(sci, nilfs->ns_bdi); 2186 err = nilfs_segctor_write(sci, nilfs);
2204 if (unlikely(err)) 2187 if (unlikely(err))
2205 goto failed_to_write; 2188 goto failed_to_write;
2206 2189
2207 nilfs_segctor_complete_write(sci); 2190 if (sci->sc_stage.scnt == NILFS_ST_DONE ||
2208 2191 nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) {
2209 /* Commit segments */ 2192 /*
2210 if (has_sr) 2193 * At this point, we avoid double buffering
2211 nilfs_segctor_clear_metadata_dirty(sci); 2194 * for blocksize < pagesize because page dirty
2212 2195 * flag is turned off during write and dirty
2213 nilfs_segctor_end_construction(sci, nilfs, 0); 2196 * buffers are not properly collected for
2214 2197 * pages crossing over segments.
2198 */
2199 err = nilfs_segctor_wait(sci);
2200 if (err)
2201 goto failed_to_write;
2202 }
2215 } while (sci->sc_stage.scnt != NILFS_ST_DONE); 2203 } while (sci->sc_stage.scnt != NILFS_ST_DONE);
2216 2204
2205 sci->sc_super_root = NULL;
2206
2217 out: 2207 out:
2218 nilfs_segctor_destroy_segment_buffers(sci);
2219 nilfs_segctor_check_out_files(sci, sbi); 2208 nilfs_segctor_check_out_files(sci, sbi);
2220 return err; 2209 return err;
2221 2210
2222 failed_to_write: 2211 failed_to_write:
2223 nilfs_segctor_abort_write(sci, failed_page, err);
2224 nilfs_segctor_cancel_segusage(sci, nilfs->ns_sufile);
2225
2226 failed_to_make_up:
2227 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) 2212 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2228 nilfs_redirty_inodes(&sci->sc_dirty_files); 2213 nilfs_redirty_inodes(&sci->sc_dirty_files);
2229 2214
2230 failed: 2215 failed:
2231 if (nilfs_doing_gc()) 2216 if (nilfs_doing_gc())
2232 nilfs_redirty_inodes(&sci->sc_gc_inodes); 2217 nilfs_redirty_inodes(&sci->sc_gc_inodes);
2233 nilfs_segctor_end_construction(sci, nilfs, err); 2218 nilfs_segctor_abort_construction(sci, nilfs, err);
2234 goto out; 2219 goto out;
2235} 2220}
2236 2221
@@ -2559,7 +2544,7 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2559 2544
2560 sci->sc_freesegs = kbufs[4]; 2545 sci->sc_freesegs = kbufs[4];
2561 sci->sc_nfreesegs = argv[4].v_nmembs; 2546 sci->sc_nfreesegs = argv[4].v_nmembs;
2562 list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev); 2547 list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes);
2563 2548
2564 for (;;) { 2549 for (;;) {
2565 nilfs_segctor_accept(sci, &req); 2550 nilfs_segctor_accept(sci, &req);
@@ -2788,6 +2773,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi)
2788 spin_lock_init(&sci->sc_state_lock); 2773 spin_lock_init(&sci->sc_state_lock);
2789 INIT_LIST_HEAD(&sci->sc_dirty_files); 2774 INIT_LIST_HEAD(&sci->sc_dirty_files);
2790 INIT_LIST_HEAD(&sci->sc_segbufs); 2775 INIT_LIST_HEAD(&sci->sc_segbufs);
2776 INIT_LIST_HEAD(&sci->sc_write_logs);
2791 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2777 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2792 INIT_LIST_HEAD(&sci->sc_copied_buffers); 2778 INIT_LIST_HEAD(&sci->sc_copied_buffers);
2793 2779
@@ -2855,6 +2841,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2855 } 2841 }
2856 2842
2857 WARN_ON(!list_empty(&sci->sc_segbufs)); 2843 WARN_ON(!list_empty(&sci->sc_segbufs));
2844 WARN_ON(!list_empty(&sci->sc_write_logs));
2858 2845
2859 down_write(&sbi->s_nilfs->ns_segctor_sem); 2846 down_write(&sbi->s_nilfs->ns_segctor_sem);
2860 2847
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 0d2a475a741b..3d3ab2f9864c 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -97,6 +97,7 @@ struct nilfs_segsum_pointer {
97 * @sc_dsync_start: start byte offset of data pages 97 * @sc_dsync_start: start byte offset of data pages
98 * @sc_dsync_end: end byte offset of data pages (inclusive) 98 * @sc_dsync_end: end byte offset of data pages (inclusive)
99 * @sc_segbufs: List of segment buffers 99 * @sc_segbufs: List of segment buffers
100 * @sc_write_logs: List of segment buffers to hold logs under writing
100 * @sc_segbuf_nblocks: Number of available blocks in segment buffers. 101 * @sc_segbuf_nblocks: Number of available blocks in segment buffers.
101 * @sc_curseg: Current segment buffer 102 * @sc_curseg: Current segment buffer
102 * @sc_super_root: Pointer to the super root buffer 103 * @sc_super_root: Pointer to the super root buffer
@@ -143,6 +144,7 @@ struct nilfs_sc_info {
143 144
144 /* Segment buffers */ 145 /* Segment buffers */
145 struct list_head sc_segbufs; 146 struct list_head sc_segbufs;
147 struct list_head sc_write_logs;
146 unsigned long sc_segbuf_nblocks; 148 unsigned long sc_segbuf_nblocks;
147 struct nilfs_segment_buffer *sc_curseg; 149 struct nilfs_segment_buffer *sc_curseg;
148 struct buffer_head *sc_super_root; 150 struct buffer_head *sc_super_root;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 37994d4a59cc..b6c36d0cc331 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -31,6 +31,16 @@
31#include "sufile.h" 31#include "sufile.h"
32 32
33 33
34struct nilfs_sufile_info {
35 struct nilfs_mdt_info mi;
36 unsigned long ncleansegs;
37};
38
39static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
40{
41 return (struct nilfs_sufile_info *)NILFS_MDT(sufile);
42}
43
34static inline unsigned long 44static inline unsigned long
35nilfs_sufile_segment_usages_per_block(const struct inode *sufile) 45nilfs_sufile_segment_usages_per_block(const struct inode *sufile)
36{ 46{
@@ -62,14 +72,6 @@ nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr,
62 max - curr + 1); 72 max - curr + 1);
63} 73}
64 74
65static inline struct nilfs_sufile_header *
66nilfs_sufile_block_get_header(const struct inode *sufile,
67 struct buffer_head *bh,
68 void *kaddr)
69{
70 return kaddr + bh_offset(bh);
71}
72
73static struct nilfs_segment_usage * 75static struct nilfs_segment_usage *
74nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, 76nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum,
75 struct buffer_head *bh, void *kaddr) 77 struct buffer_head *bh, void *kaddr)
@@ -110,6 +112,15 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
110} 112}
111 113
112/** 114/**
115 * nilfs_sufile_get_ncleansegs - return the number of clean segments
116 * @sufile: inode of segment usage file
117 */
118unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile)
119{
120 return NILFS_SUI(sufile)->ncleansegs;
121}
122
123/**
113 * nilfs_sufile_updatev - modify multiple segment usages at a time 124 * nilfs_sufile_updatev - modify multiple segment usages at a time
114 * @sufile: inode of segment usage file 125 * @sufile: inode of segment usage file
115 * @segnumv: array of segment numbers 126 * @segnumv: array of segment numbers
@@ -270,7 +281,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
270 if (ret < 0) 281 if (ret < 0)
271 goto out_sem; 282 goto out_sem;
272 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 283 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
273 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); 284 header = kaddr + bh_offset(header_bh);
274 ncleansegs = le64_to_cpu(header->sh_ncleansegs); 285 ncleansegs = le64_to_cpu(header->sh_ncleansegs);
275 last_alloc = le64_to_cpu(header->sh_last_alloc); 286 last_alloc = le64_to_cpu(header->sh_last_alloc);
276 kunmap_atomic(kaddr, KM_USER0); 287 kunmap_atomic(kaddr, KM_USER0);
@@ -302,13 +313,13 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
302 kunmap_atomic(kaddr, KM_USER0); 313 kunmap_atomic(kaddr, KM_USER0);
303 314
304 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 315 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
305 header = nilfs_sufile_block_get_header( 316 header = kaddr + bh_offset(header_bh);
306 sufile, header_bh, kaddr);
307 le64_add_cpu(&header->sh_ncleansegs, -1); 317 le64_add_cpu(&header->sh_ncleansegs, -1);
308 le64_add_cpu(&header->sh_ndirtysegs, 1); 318 le64_add_cpu(&header->sh_ndirtysegs, 1);
309 header->sh_last_alloc = cpu_to_le64(segnum); 319 header->sh_last_alloc = cpu_to_le64(segnum);
310 kunmap_atomic(kaddr, KM_USER0); 320 kunmap_atomic(kaddr, KM_USER0);
311 321
322 NILFS_SUI(sufile)->ncleansegs--;
312 nilfs_mdt_mark_buffer_dirty(header_bh); 323 nilfs_mdt_mark_buffer_dirty(header_bh);
313 nilfs_mdt_mark_buffer_dirty(su_bh); 324 nilfs_mdt_mark_buffer_dirty(su_bh);
314 nilfs_mdt_mark_dirty(sufile); 325 nilfs_mdt_mark_dirty(sufile);
@@ -351,6 +362,8 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
351 kunmap_atomic(kaddr, KM_USER0); 362 kunmap_atomic(kaddr, KM_USER0);
352 363
353 nilfs_sufile_mod_counter(header_bh, -1, 1); 364 nilfs_sufile_mod_counter(header_bh, -1, 1);
365 NILFS_SUI(sufile)->ncleansegs--;
366
354 nilfs_mdt_mark_buffer_dirty(su_bh); 367 nilfs_mdt_mark_buffer_dirty(su_bh);
355 nilfs_mdt_mark_dirty(sufile); 368 nilfs_mdt_mark_dirty(sufile);
356} 369}
@@ -380,6 +393,8 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
380 kunmap_atomic(kaddr, KM_USER0); 393 kunmap_atomic(kaddr, KM_USER0);
381 394
382 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); 395 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
396 NILFS_SUI(sufile)->ncleansegs -= clean;
397
383 nilfs_mdt_mark_buffer_dirty(su_bh); 398 nilfs_mdt_mark_buffer_dirty(su_bh);
384 nilfs_mdt_mark_dirty(sufile); 399 nilfs_mdt_mark_dirty(sufile);
385} 400}
@@ -409,79 +424,65 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
409 nilfs_mdt_mark_buffer_dirty(su_bh); 424 nilfs_mdt_mark_buffer_dirty(su_bh);
410 425
411 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); 426 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
427 NILFS_SUI(sufile)->ncleansegs++;
428
412 nilfs_mdt_mark_dirty(sufile); 429 nilfs_mdt_mark_dirty(sufile);
413} 430}
414 431
415/** 432/**
416 * nilfs_sufile_get_segment_usage - get a segment usage 433 * nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty
417 * @sufile: inode of segment usage file 434 * @sufile: inode of segment usage file
418 * @segnum: segment number 435 * @segnum: segment number
419 * @sup: pointer to segment usage
420 * @bhp: pointer to buffer head
421 *
422 * Description: nilfs_sufile_get_segment_usage() acquires the segment usage
423 * specified by @segnum.
424 *
425 * Return Value: On success, 0 is returned, and the segment usage and the
426 * buffer head of the buffer on which the segment usage is located are stored
427 * in the place pointed by @sup and @bhp, respectively. On error, one of the
428 * following negative error codes is returned.
429 *
430 * %-EIO - I/O error.
431 *
432 * %-ENOMEM - Insufficient amount of memory available.
433 *
434 * %-EINVAL - Invalid segment usage number.
435 */ 436 */
436int nilfs_sufile_get_segment_usage(struct inode *sufile, __u64 segnum, 437int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
437 struct nilfs_segment_usage **sup,
438 struct buffer_head **bhp)
439{ 438{
440 struct buffer_head *bh; 439 struct buffer_head *bh;
441 struct nilfs_segment_usage *su;
442 void *kaddr;
443 int ret; 440 int ret;
444 441
445 /* segnum is 0 origin */ 442 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
446 if (segnum >= nilfs_sufile_get_nsegments(sufile)) 443 if (!ret) {
447 return -EINVAL; 444 nilfs_mdt_mark_buffer_dirty(bh);
448 down_write(&NILFS_MDT(sufile)->mi_sem); 445 nilfs_mdt_mark_dirty(sufile);
449 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &bh);
450 if (ret < 0)
451 goto out_sem;
452 kaddr = kmap(bh->b_page);
453 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
454 if (nilfs_segment_usage_error(su)) {
455 kunmap(bh->b_page);
456 brelse(bh); 446 brelse(bh);
457 ret = -EINVAL;
458 goto out_sem;
459 } 447 }
460
461 if (sup != NULL)
462 *sup = su;
463 *bhp = bh;
464
465 out_sem:
466 up_write(&NILFS_MDT(sufile)->mi_sem);
467 return ret; 448 return ret;
468} 449}
469 450
470/** 451/**
471 * nilfs_sufile_put_segment_usage - put a segment usage 452 * nilfs_sufile_set_segment_usage - set usage of a segment
472 * @sufile: inode of segment usage file 453 * @sufile: inode of segment usage file
473 * @segnum: segment number 454 * @segnum: segment number
474 * @bh: buffer head 455 * @nblocks: number of live blocks in the segment
475 * 456 * @modtime: modification time (option)
476 * Description: nilfs_sufile_put_segment_usage() releases the segment usage
477 * specified by @segnum. @bh must be the buffer head which have been returned
478 * by a previous call to nilfs_sufile_get_segment_usage() with @segnum.
479 */ 457 */
480void nilfs_sufile_put_segment_usage(struct inode *sufile, __u64 segnum, 458int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
481 struct buffer_head *bh) 459 unsigned long nblocks, time_t modtime)
482{ 460{
483 kunmap(bh->b_page); 461 struct buffer_head *bh;
462 struct nilfs_segment_usage *su;
463 void *kaddr;
464 int ret;
465
466 down_write(&NILFS_MDT(sufile)->mi_sem);
467 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
468 if (ret < 0)
469 goto out_sem;
470
471 kaddr = kmap_atomic(bh->b_page, KM_USER0);
472 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
473 WARN_ON(nilfs_segment_usage_error(su));
474 if (modtime)
475 su->su_lastmod = cpu_to_le64(modtime);
476 su->su_nblocks = cpu_to_le32(nblocks);
477 kunmap_atomic(kaddr, KM_USER0);
478
479 nilfs_mdt_mark_buffer_dirty(bh);
480 nilfs_mdt_mark_dirty(sufile);
484 brelse(bh); 481 brelse(bh);
482
483 out_sem:
484 up_write(&NILFS_MDT(sufile)->mi_sem);
485 return ret;
485} 486}
486 487
487/** 488/**
@@ -515,7 +516,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
515 goto out_sem; 516 goto out_sem;
516 517
517 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 518 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
518 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); 519 header = kaddr + bh_offset(header_bh);
519 sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); 520 sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile);
520 sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); 521 sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs);
521 sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); 522 sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs);
@@ -532,33 +533,6 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
532 return ret; 533 return ret;
533} 534}
534 535
535/**
536 * nilfs_sufile_get_ncleansegs - get the number of clean segments
537 * @sufile: inode of segment usage file
538 * @nsegsp: pointer to the number of clean segments
539 *
540 * Description: nilfs_sufile_get_ncleansegs() acquires the number of clean
541 * segments.
542 *
543 * Return Value: On success, 0 is returned and the number of clean segments is
544 * stored in the place pointed by @nsegsp. On error, one of the following
545 * negative error codes is returned.
546 *
547 * %-EIO - I/O error.
548 *
549 * %-ENOMEM - Insufficient amount of memory available.
550 */
551int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp)
552{
553 struct nilfs_sustat sustat;
554 int ret;
555
556 ret = nilfs_sufile_get_stat(sufile, &sustat);
557 if (ret == 0)
558 *nsegsp = sustat.ss_ncleansegs;
559 return ret;
560}
561
562void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, 536void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
563 struct buffer_head *header_bh, 537 struct buffer_head *header_bh,
564 struct buffer_head *su_bh) 538 struct buffer_head *su_bh)
@@ -577,8 +551,10 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
577 nilfs_segment_usage_set_error(su); 551 nilfs_segment_usage_set_error(su);
578 kunmap_atomic(kaddr, KM_USER0); 552 kunmap_atomic(kaddr, KM_USER0);
579 553
580 if (suclean) 554 if (suclean) {
581 nilfs_sufile_mod_counter(header_bh, -1, 0); 555 nilfs_sufile_mod_counter(header_bh, -1, 0);
556 NILFS_SUI(sufile)->ncleansegs--;
557 }
582 nilfs_mdt_mark_buffer_dirty(su_bh); 558 nilfs_mdt_mark_buffer_dirty(su_bh);
583 nilfs_mdt_mark_dirty(sufile); 559 nilfs_mdt_mark_dirty(sufile);
584} 560}
@@ -657,3 +633,48 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
657 up_read(&NILFS_MDT(sufile)->mi_sem); 633 up_read(&NILFS_MDT(sufile)->mi_sem);
658 return ret; 634 return ret;
659} 635}
636
637/**
638 * nilfs_sufile_read - read sufile inode
639 * @sufile: sufile inode
640 * @raw_inode: on-disk sufile inode
641 */
642int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode)
643{
644 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
645 struct buffer_head *header_bh;
646 struct nilfs_sufile_header *header;
647 void *kaddr;
648 int ret;
649
650 ret = nilfs_read_inode_common(sufile, raw_inode);
651 if (ret < 0)
652 return ret;
653
654 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
655 if (!ret) {
656 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
657 header = kaddr + bh_offset(header_bh);
658 sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs);
659 kunmap_atomic(kaddr, KM_USER0);
660 brelse(header_bh);
661 }
662 return ret;
663}
664
665/**
666 * nilfs_sufile_new - create sufile
667 * @nilfs: nilfs object
668 * @susize: size of a segment usage entry
669 */
670struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize)
671{
672 struct inode *sufile;
673
674 sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO,
675 sizeof(struct nilfs_sufile_info));
676 if (sufile)
677 nilfs_mdt_set_entry_size(sufile, susize,
678 sizeof(struct nilfs_sufile_header));
679 return sufile;
680}
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index 0e99e5c0bd0f..15163b8aff7d 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -34,14 +34,13 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
34 return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments; 34 return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments;
35} 35}
36 36
37unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
38
37int nilfs_sufile_alloc(struct inode *, __u64 *); 39int nilfs_sufile_alloc(struct inode *, __u64 *);
38int nilfs_sufile_get_segment_usage(struct inode *, __u64, 40int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
39 struct nilfs_segment_usage **, 41int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
40 struct buffer_head **); 42 unsigned long nblocks, time_t modtime);
41void nilfs_sufile_put_segment_usage(struct inode *, __u64,
42 struct buffer_head *);
43int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); 43int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
44int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *);
45ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, 44ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned,
46 size_t); 45 size_t);
47 46
@@ -62,6 +61,9 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
62void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, 61void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
63 struct buffer_head *); 62 struct buffer_head *);
64 63
64int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode);
65struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize);
66
65/** 67/**
66 * nilfs_sufile_scrap - make a segment garbage 68 * nilfs_sufile_scrap - make a segment garbage
67 * @sufile: inode of segment usage file 69 * @sufile: inode of segment usage file
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 644e66727dd0..5403b3ef3a42 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -363,14 +363,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
363 list_add(&sbi->s_list, &nilfs->ns_supers); 363 list_add(&sbi->s_list, &nilfs->ns_supers);
364 up_write(&nilfs->ns_super_sem); 364 up_write(&nilfs->ns_super_sem);
365 365
366 sbi->s_ifile = nilfs_mdt_new(nilfs, sbi->s_super, NILFS_IFILE_INO); 366 sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size);
367 if (!sbi->s_ifile) 367 if (!sbi->s_ifile)
368 return -ENOMEM; 368 return -ENOMEM;
369 369
370 err = nilfs_palloc_init_blockgroup(sbi->s_ifile, nilfs->ns_inode_size);
371 if (unlikely(err))
372 goto failed;
373
374 down_read(&nilfs->ns_segctor_sem); 370 down_read(&nilfs->ns_segctor_sem);
375 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, 371 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
376 &bh_cp); 372 &bh_cp);
@@ -411,7 +407,6 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi)
411{ 407{
412 struct the_nilfs *nilfs = sbi->s_nilfs; 408 struct the_nilfs *nilfs = sbi->s_nilfs;
413 409
414 nilfs_mdt_clear(sbi->s_ifile);
415 nilfs_mdt_destroy(sbi->s_ifile); 410 nilfs_mdt_destroy(sbi->s_ifile);
416 sbi->s_ifile = NULL; 411 sbi->s_ifile = NULL;
417 down_write(&nilfs->ns_super_sem); 412 down_write(&nilfs->ns_super_sem);
@@ -419,22 +414,6 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi)
419 up_write(&nilfs->ns_super_sem); 414 up_write(&nilfs->ns_super_sem);
420} 415}
421 416
422static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi)
423{
424 struct the_nilfs *nilfs = sbi->s_nilfs;
425 int err = 0;
426
427 down_write(&nilfs->ns_sem);
428 if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) {
429 nilfs->ns_mount_state |= NILFS_VALID_FS;
430 err = nilfs_commit_super(sbi, 1);
431 if (likely(!err))
432 printk(KERN_INFO "NILFS: recovery complete.\n");
433 }
434 up_write(&nilfs->ns_sem);
435 return err;
436}
437
438static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) 417static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
439{ 418{
440 struct super_block *sb = dentry->d_sb; 419 struct super_block *sb = dentry->d_sb;
@@ -490,7 +469,7 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
490 struct nilfs_sb_info *sbi = NILFS_SB(sb); 469 struct nilfs_sb_info *sbi = NILFS_SB(sb);
491 470
492 if (!nilfs_test_opt(sbi, BARRIER)) 471 if (!nilfs_test_opt(sbi, BARRIER))
493 seq_printf(seq, ",barrier=off"); 472 seq_printf(seq, ",nobarrier");
494 if (nilfs_test_opt(sbi, SNAPSHOT)) 473 if (nilfs_test_opt(sbi, SNAPSHOT))
495 seq_printf(seq, ",cp=%llu", 474 seq_printf(seq, ",cp=%llu",
496 (unsigned long long int)sbi->s_snapshot_cno); 475 (unsigned long long int)sbi->s_snapshot_cno);
@@ -500,6 +479,8 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
500 seq_printf(seq, ",errors=panic"); 479 seq_printf(seq, ",errors=panic");
501 if (nilfs_test_opt(sbi, STRICT_ORDER)) 480 if (nilfs_test_opt(sbi, STRICT_ORDER))
502 seq_printf(seq, ",order=strict"); 481 seq_printf(seq, ",order=strict");
482 if (nilfs_test_opt(sbi, NORECOVERY))
483 seq_printf(seq, ",norecovery");
503 484
504 return 0; 485 return 0;
505} 486}
@@ -568,7 +549,7 @@ static const struct export_operations nilfs_export_ops = {
568 549
569enum { 550enum {
570 Opt_err_cont, Opt_err_panic, Opt_err_ro, 551 Opt_err_cont, Opt_err_panic, Opt_err_ro,
571 Opt_barrier, Opt_snapshot, Opt_order, 552 Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery,
572 Opt_err, 553 Opt_err,
573}; 554};
574 555
@@ -576,25 +557,13 @@ static match_table_t tokens = {
576 {Opt_err_cont, "errors=continue"}, 557 {Opt_err_cont, "errors=continue"},
577 {Opt_err_panic, "errors=panic"}, 558 {Opt_err_panic, "errors=panic"},
578 {Opt_err_ro, "errors=remount-ro"}, 559 {Opt_err_ro, "errors=remount-ro"},
579 {Opt_barrier, "barrier=%s"}, 560 {Opt_nobarrier, "nobarrier"},
580 {Opt_snapshot, "cp=%u"}, 561 {Opt_snapshot, "cp=%u"},
581 {Opt_order, "order=%s"}, 562 {Opt_order, "order=%s"},
563 {Opt_norecovery, "norecovery"},
582 {Opt_err, NULL} 564 {Opt_err, NULL}
583}; 565};
584 566
585static int match_bool(substring_t *s, int *result)
586{
587 int len = s->to - s->from;
588
589 if (strncmp(s->from, "on", len) == 0)
590 *result = 1;
591 else if (strncmp(s->from, "off", len) == 0)
592 *result = 0;
593 else
594 return 1;
595 return 0;
596}
597
598static int parse_options(char *options, struct super_block *sb) 567static int parse_options(char *options, struct super_block *sb)
599{ 568{
600 struct nilfs_sb_info *sbi = NILFS_SB(sb); 569 struct nilfs_sb_info *sbi = NILFS_SB(sb);
@@ -612,13 +581,8 @@ static int parse_options(char *options, struct super_block *sb)
612 581
613 token = match_token(p, tokens, args); 582 token = match_token(p, tokens, args);
614 switch (token) { 583 switch (token) {
615 case Opt_barrier: 584 case Opt_nobarrier:
616 if (match_bool(&args[0], &option)) 585 nilfs_clear_opt(sbi, BARRIER);
617 return 0;
618 if (option)
619 nilfs_set_opt(sbi, BARRIER);
620 else
621 nilfs_clear_opt(sbi, BARRIER);
622 break; 586 break;
623 case Opt_order: 587 case Opt_order:
624 if (strcmp(args[0].from, "relaxed") == 0) 588 if (strcmp(args[0].from, "relaxed") == 0)
@@ -647,6 +611,9 @@ static int parse_options(char *options, struct super_block *sb)
647 sbi->s_snapshot_cno = option; 611 sbi->s_snapshot_cno = option;
648 nilfs_set_opt(sbi, SNAPSHOT); 612 nilfs_set_opt(sbi, SNAPSHOT);
649 break; 613 break;
614 case Opt_norecovery:
615 nilfs_set_opt(sbi, NORECOVERY);
616 break;
650 default: 617 default:
651 printk(KERN_ERR 618 printk(KERN_ERR
652 "NILFS: Unrecognized mount option \"%s\"\n", p); 619 "NILFS: Unrecognized mount option \"%s\"\n", p);
@@ -672,9 +639,7 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi)
672 int mnt_count = le16_to_cpu(sbp->s_mnt_count); 639 int mnt_count = le16_to_cpu(sbp->s_mnt_count);
673 640
674 /* nilfs->sem must be locked by the caller. */ 641 /* nilfs->sem must be locked by the caller. */
675 if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { 642 if (nilfs->ns_mount_state & NILFS_ERROR_FS) {
676 printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n");
677 } else if (nilfs->ns_mount_state & NILFS_ERROR_FS) {
678 printk(KERN_WARNING 643 printk(KERN_WARNING
679 "NILFS warning: mounting fs with errors\n"); 644 "NILFS warning: mounting fs with errors\n");
680#if 0 645#if 0
@@ -782,11 +747,10 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
782 sb->s_root = NULL; 747 sb->s_root = NULL;
783 sb->s_time_gran = 1; 748 sb->s_time_gran = 1;
784 749
785 if (!nilfs_loaded(nilfs)) { 750 err = load_nilfs(nilfs, sbi);
786 err = load_nilfs(nilfs, sbi); 751 if (err)
787 if (err) 752 goto failed_sbi;
788 goto failed_sbi; 753
789 }
790 cno = nilfs_last_cno(nilfs); 754 cno = nilfs_last_cno(nilfs);
791 755
792 if (sb->s_flags & MS_RDONLY) { 756 if (sb->s_flags & MS_RDONLY) {
@@ -854,12 +818,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
854 up_write(&nilfs->ns_sem); 818 up_write(&nilfs->ns_sem);
855 } 819 }
856 820
857 err = nilfs_mark_recovery_complete(sbi);
858 if (unlikely(err)) {
859 printk(KERN_ERR "NILFS: recovery failed.\n");
860 goto failed_root;
861 }
862
863 down_write(&nilfs->ns_super_sem); 821 down_write(&nilfs->ns_super_sem);
864 if (!nilfs_test_opt(sbi, SNAPSHOT)) 822 if (!nilfs_test_opt(sbi, SNAPSHOT))
865 nilfs->ns_current = sbi; 823 nilfs->ns_current = sbi;
@@ -867,10 +825,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
867 825
868 return 0; 826 return 0;
869 827
870 failed_root:
871 dput(sb->s_root);
872 sb->s_root = NULL;
873
874 failed_segctor: 828 failed_segctor:
875 nilfs_detach_segment_constructor(sbi); 829 nilfs_detach_segment_constructor(sbi);
876 830
@@ -915,6 +869,14 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
915 goto restore_opts; 869 goto restore_opts;
916 } 870 }
917 871
872 if (!nilfs_valid_fs(nilfs)) {
873 printk(KERN_WARNING "NILFS (device %s): couldn't "
874 "remount because the filesystem is in an "
875 "incomplete recovery state.\n", sb->s_id);
876 err = -EINVAL;
877 goto restore_opts;
878 }
879
918 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 880 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
919 goto out; 881 goto out;
920 if (*flags & MS_RDONLY) { 882 if (*flags & MS_RDONLY) {
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index ad391a8c3e7e..6241e1722efc 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -146,13 +146,9 @@ void put_nilfs(struct the_nilfs *nilfs)
146 146
147 might_sleep(); 147 might_sleep();
148 if (nilfs_loaded(nilfs)) { 148 if (nilfs_loaded(nilfs)) {
149 nilfs_mdt_clear(nilfs->ns_sufile);
150 nilfs_mdt_destroy(nilfs->ns_sufile); 149 nilfs_mdt_destroy(nilfs->ns_sufile);
151 nilfs_mdt_clear(nilfs->ns_cpfile);
152 nilfs_mdt_destroy(nilfs->ns_cpfile); 150 nilfs_mdt_destroy(nilfs->ns_cpfile);
153 nilfs_mdt_clear(nilfs->ns_dat);
154 nilfs_mdt_destroy(nilfs->ns_dat); 151 nilfs_mdt_destroy(nilfs->ns_dat);
155 /* XXX: how and when to clear nilfs->ns_gc_dat? */
156 nilfs_mdt_destroy(nilfs->ns_gc_dat); 152 nilfs_mdt_destroy(nilfs->ns_gc_dat);
157 } 153 }
158 if (nilfs_init(nilfs)) { 154 if (nilfs_init(nilfs)) {
@@ -166,7 +162,6 @@ void put_nilfs(struct the_nilfs *nilfs)
166static int nilfs_load_super_root(struct the_nilfs *nilfs, 162static int nilfs_load_super_root(struct the_nilfs *nilfs,
167 struct nilfs_sb_info *sbi, sector_t sr_block) 163 struct nilfs_sb_info *sbi, sector_t sr_block)
168{ 164{
169 static struct lock_class_key dat_lock_key;
170 struct buffer_head *bh_sr; 165 struct buffer_head *bh_sr;
171 struct nilfs_super_root *raw_sr; 166 struct nilfs_super_root *raw_sr;
172 struct nilfs_super_block **sbp = nilfs->ns_sbp; 167 struct nilfs_super_block **sbp = nilfs->ns_sbp;
@@ -187,51 +182,36 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
187 inode_size = nilfs->ns_inode_size; 182 inode_size = nilfs->ns_inode_size;
188 183
189 err = -ENOMEM; 184 err = -ENOMEM;
190 nilfs->ns_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO); 185 nilfs->ns_dat = nilfs_dat_new(nilfs, dat_entry_size);
191 if (unlikely(!nilfs->ns_dat)) 186 if (unlikely(!nilfs->ns_dat))
192 goto failed; 187 goto failed;
193 188
194 nilfs->ns_gc_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO); 189 nilfs->ns_gc_dat = nilfs_dat_new(nilfs, dat_entry_size);
195 if (unlikely(!nilfs->ns_gc_dat)) 190 if (unlikely(!nilfs->ns_gc_dat))
196 goto failed_dat; 191 goto failed_dat;
197 192
198 nilfs->ns_cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO); 193 nilfs->ns_cpfile = nilfs_cpfile_new(nilfs, checkpoint_size);
199 if (unlikely(!nilfs->ns_cpfile)) 194 if (unlikely(!nilfs->ns_cpfile))
200 goto failed_gc_dat; 195 goto failed_gc_dat;
201 196
202 nilfs->ns_sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO); 197 nilfs->ns_sufile = nilfs_sufile_new(nilfs, segment_usage_size);
203 if (unlikely(!nilfs->ns_sufile)) 198 if (unlikely(!nilfs->ns_sufile))
204 goto failed_cpfile; 199 goto failed_cpfile;
205 200
206 err = nilfs_palloc_init_blockgroup(nilfs->ns_dat, dat_entry_size);
207 if (unlikely(err))
208 goto failed_sufile;
209
210 err = nilfs_palloc_init_blockgroup(nilfs->ns_gc_dat, dat_entry_size);
211 if (unlikely(err))
212 goto failed_sufile;
213
214 lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key);
215 lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key);
216
217 nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); 201 nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat);
218 nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size,
219 sizeof(struct nilfs_cpfile_header));
220 nilfs_mdt_set_entry_size(nilfs->ns_sufile, segment_usage_size,
221 sizeof(struct nilfs_sufile_header));
222 202
223 err = nilfs_mdt_read_inode_direct( 203 err = nilfs_dat_read(nilfs->ns_dat, (void *)bh_sr->b_data +
224 nilfs->ns_dat, bh_sr, NILFS_SR_DAT_OFFSET(inode_size)); 204 NILFS_SR_DAT_OFFSET(inode_size));
225 if (unlikely(err)) 205 if (unlikely(err))
226 goto failed_sufile; 206 goto failed_sufile;
227 207
228 err = nilfs_mdt_read_inode_direct( 208 err = nilfs_cpfile_read(nilfs->ns_cpfile, (void *)bh_sr->b_data +
229 nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(inode_size)); 209 NILFS_SR_CPFILE_OFFSET(inode_size));
230 if (unlikely(err)) 210 if (unlikely(err))
231 goto failed_sufile; 211 goto failed_sufile;
232 212
233 err = nilfs_mdt_read_inode_direct( 213 err = nilfs_sufile_read(nilfs->ns_sufile, (void *)bh_sr->b_data +
234 nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(inode_size)); 214 NILFS_SR_SUFILE_OFFSET(inode_size));
235 if (unlikely(err)) 215 if (unlikely(err))
236 goto failed_sufile; 216 goto failed_sufile;
237 217
@@ -281,29 +261,30 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
281 struct nilfs_recovery_info ri; 261 struct nilfs_recovery_info ri;
282 unsigned int s_flags = sbi->s_super->s_flags; 262 unsigned int s_flags = sbi->s_super->s_flags;
283 int really_read_only = bdev_read_only(nilfs->ns_bdev); 263 int really_read_only = bdev_read_only(nilfs->ns_bdev);
284 unsigned valid_fs; 264 int valid_fs = nilfs_valid_fs(nilfs);
285 int err = 0; 265 int err;
286
287 nilfs_init_recovery_info(&ri);
288 266
289 down_write(&nilfs->ns_sem); 267 if (nilfs_loaded(nilfs)) {
290 valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); 268 if (valid_fs ||
291 up_write(&nilfs->ns_sem); 269 ((s_flags & MS_RDONLY) && nilfs_test_opt(sbi, NORECOVERY)))
270 return 0;
271 printk(KERN_ERR "NILFS: the filesystem is in an incomplete "
272 "recovery state.\n");
273 return -EINVAL;
274 }
292 275
293 if (!valid_fs && (s_flags & MS_RDONLY)) { 276 if (!valid_fs) {
294 printk(KERN_INFO "NILFS: INFO: recovery " 277 printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n");
295 "required for readonly filesystem.\n"); 278 if (s_flags & MS_RDONLY) {
296 if (really_read_only) { 279 printk(KERN_INFO "NILFS: INFO: recovery "
297 printk(KERN_ERR "NILFS: write access " 280 "required for readonly filesystem.\n");
298 "unavailable, cannot proceed.\n"); 281 printk(KERN_INFO "NILFS: write access will "
299 err = -EROFS; 282 "be enabled during recovery.\n");
300 goto failed;
301 } 283 }
302 printk(KERN_INFO "NILFS: write access will "
303 "be enabled during recovery.\n");
304 sbi->s_super->s_flags &= ~MS_RDONLY;
305 } 284 }
306 285
286 nilfs_init_recovery_info(&ri);
287
307 err = nilfs_search_super_root(nilfs, sbi, &ri); 288 err = nilfs_search_super_root(nilfs, sbi, &ri);
308 if (unlikely(err)) { 289 if (unlikely(err)) {
309 printk(KERN_ERR "NILFS: error searching super root.\n"); 290 printk(KERN_ERR "NILFS: error searching super root.\n");
@@ -316,19 +297,56 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
316 goto failed; 297 goto failed;
317 } 298 }
318 299
319 if (!valid_fs) { 300 if (valid_fs)
320 err = nilfs_recover_logical_segments(nilfs, sbi, &ri); 301 goto skip_recovery;
321 if (unlikely(err)) { 302
322 nilfs_mdt_destroy(nilfs->ns_cpfile); 303 if (s_flags & MS_RDONLY) {
323 nilfs_mdt_destroy(nilfs->ns_sufile); 304 if (nilfs_test_opt(sbi, NORECOVERY)) {
324 nilfs_mdt_destroy(nilfs->ns_dat); 305 printk(KERN_INFO "NILFS: norecovery option specified. "
325 goto failed; 306 "skipping roll-forward recovery\n");
307 goto skip_recovery;
326 } 308 }
327 if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) 309 if (really_read_only) {
328 sbi->s_super->s_dirt = 1; 310 printk(KERN_ERR "NILFS: write access "
311 "unavailable, cannot proceed.\n");
312 err = -EROFS;
313 goto failed_unload;
314 }
315 sbi->s_super->s_flags &= ~MS_RDONLY;
316 } else if (nilfs_test_opt(sbi, NORECOVERY)) {
317 printk(KERN_ERR "NILFS: recovery cancelled because norecovery "
318 "option was specified for a read/write mount\n");
319 err = -EINVAL;
320 goto failed_unload;
329 } 321 }
330 322
323 err = nilfs_recover_logical_segments(nilfs, sbi, &ri);
324 if (err)
325 goto failed_unload;
326
327 down_write(&nilfs->ns_sem);
328 nilfs->ns_mount_state |= NILFS_VALID_FS;
329 nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
330 err = nilfs_commit_super(sbi, 1);
331 up_write(&nilfs->ns_sem);
332
333 if (err) {
334 printk(KERN_ERR "NILFS: failed to update super block. "
335 "recovery unfinished.\n");
336 goto failed_unload;
337 }
338 printk(KERN_INFO "NILFS: recovery complete.\n");
339
340 skip_recovery:
331 set_nilfs_loaded(nilfs); 341 set_nilfs_loaded(nilfs);
342 nilfs_clear_recovery_info(&ri);
343 sbi->s_super->s_flags = s_flags;
344 return 0;
345
346 failed_unload:
347 nilfs_mdt_destroy(nilfs->ns_cpfile);
348 nilfs_mdt_destroy(nilfs->ns_sufile);
349 nilfs_mdt_destroy(nilfs->ns_dat);
332 350
333 failed: 351 failed:
334 nilfs_clear_recovery_info(&ri); 352 nilfs_clear_recovery_info(&ri);
@@ -632,30 +650,23 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks)
632{ 650{
633 struct inode *dat = nilfs_dat_inode(nilfs); 651 struct inode *dat = nilfs_dat_inode(nilfs);
634 unsigned long ncleansegs; 652 unsigned long ncleansegs;
635 int err;
636 653
637 down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 654 down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
638 err = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile, &ncleansegs); 655 ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
639 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ 656 up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
640 if (likely(!err)) 657 *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment;
641 *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; 658 return 0;
642 return err;
643} 659}
644 660
645int nilfs_near_disk_full(struct the_nilfs *nilfs) 661int nilfs_near_disk_full(struct the_nilfs *nilfs)
646{ 662{
647 struct inode *sufile = nilfs->ns_sufile;
648 unsigned long ncleansegs, nincsegs; 663 unsigned long ncleansegs, nincsegs;
649 int ret;
650 664
651 ret = nilfs_sufile_get_ncleansegs(sufile, &ncleansegs); 665 ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile);
652 if (likely(!ret)) { 666 nincsegs = atomic_read(&nilfs->ns_ndirtyblks) /
653 nincsegs = atomic_read(&nilfs->ns_ndirtyblks) / 667 nilfs->ns_blocks_per_segment + 1;
654 nilfs->ns_blocks_per_segment + 1; 668
655 if (ncleansegs <= nilfs->ns_nrsvsegs + nincsegs) 669 return ncleansegs <= nilfs->ns_nrsvsegs + nincsegs;
656 ret++;
657 }
658 return ret;
659} 670}
660 671
661/** 672/**
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 20abd55881e0..589786e33464 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -258,6 +258,16 @@ static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
258 kfree(sbi); 258 kfree(sbi);
259} 259}
260 260
261static inline int nilfs_valid_fs(struct the_nilfs *nilfs)
262{
263 unsigned valid_fs;
264
265 down_read(&nilfs->ns_sem);
266 valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS);
267 up_read(&nilfs->ns_sem);
268 return valid_fs;
269}
270
261static inline void 271static inline void
262nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum, 272nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum,
263 sector_t *seg_start, sector_t *seg_end) 273 sector_t *seg_start, sector_t *seg_end)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index dcd2040d330c..5ef5f365a5c8 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -69,36 +69,30 @@ static int zero;
69 69
70ctl_table inotify_table[] = { 70ctl_table inotify_table[] = {
71 { 71 {
72 .ctl_name = INOTIFY_MAX_USER_INSTANCES,
73 .procname = "max_user_instances", 72 .procname = "max_user_instances",
74 .data = &inotify_max_user_instances, 73 .data = &inotify_max_user_instances,
75 .maxlen = sizeof(int), 74 .maxlen = sizeof(int),
76 .mode = 0644, 75 .mode = 0644,
77 .proc_handler = &proc_dointvec_minmax, 76 .proc_handler = proc_dointvec_minmax,
78 .strategy = &sysctl_intvec,
79 .extra1 = &zero, 77 .extra1 = &zero,
80 }, 78 },
81 { 79 {
82 .ctl_name = INOTIFY_MAX_USER_WATCHES,
83 .procname = "max_user_watches", 80 .procname = "max_user_watches",
84 .data = &inotify_max_user_watches, 81 .data = &inotify_max_user_watches,
85 .maxlen = sizeof(int), 82 .maxlen = sizeof(int),
86 .mode = 0644, 83 .mode = 0644,
87 .proc_handler = &proc_dointvec_minmax, 84 .proc_handler = proc_dointvec_minmax,
88 .strategy = &sysctl_intvec,
89 .extra1 = &zero, 85 .extra1 = &zero,
90 }, 86 },
91 { 87 {
92 .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
93 .procname = "max_queued_events", 88 .procname = "max_queued_events",
94 .data = &inotify_max_queued_events, 89 .data = &inotify_max_queued_events,
95 .maxlen = sizeof(int), 90 .maxlen = sizeof(int),
96 .mode = 0644, 91 .mode = 0644,
97 .proc_handler = &proc_dointvec_minmax, 92 .proc_handler = proc_dointvec_minmax,
98 .strategy = &sysctl_intvec,
99 .extra1 = &zero 93 .extra1 = &zero
100 }, 94 },
101 { .ctl_name = 0 } 95 { }
102}; 96};
103#endif /* CONFIG_SYSCTL */ 97#endif /* CONFIG_SYSCTL */
104 98
@@ -747,10 +741,6 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
747 741
748 /* create/update an inode mark */ 742 /* create/update an inode mark */
749 ret = inotify_update_watch(group, inode, mask); 743 ret = inotify_update_watch(group, inode, mask);
750 if (unlikely(ret))
751 goto path_put_and_out;
752
753path_put_and_out:
754 path_put(&path); 744 path_put(&path);
755fput_and_out: 745fput_and_out:
756 fput_light(filp, fput_needed); 746 fput_light(filp, fput_needed);
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 9669541d0119..08f7530e9341 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -927,7 +927,7 @@ lock_retry_remap:
927 return 0; 927 return 0;
928 928
929 ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ? 929 ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ?
930 "EOVERFLOW" : (!err ? "EIO" : "unkown error")); 930 "EOVERFLOW" : (!err ? "EIO" : "unknown error"));
931 return err < 0 ? err : -EIO; 931 return err < 0 ? err : -EIO;
932 932
933read_err: 933read_err:
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 663c0e341f8b..43179ddd336f 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -399,7 +399,7 @@ static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
399 * @cached_page: allocated but as yet unused page 399 * @cached_page: allocated but as yet unused page
400 * @lru_pvec: lru-buffering pagevec of caller 400 * @lru_pvec: lru-buffering pagevec of caller
401 * 401 *
402 * Obtain @nr_pages locked page cache pages from the mapping @maping and 402 * Obtain @nr_pages locked page cache pages from the mapping @mapping and
403 * starting at index @index. 403 * starting at index @index.
404 * 404 *
405 * If a page is newly created, increment its refcount and add it to the 405 * If a page is newly created, increment its refcount and add it to the
@@ -1281,7 +1281,7 @@ rl_not_mapped_enoent:
1281 1281
1282/* 1282/*
1283 * Copy as much as we can into the pages and return the number of bytes which 1283 * Copy as much as we can into the pages and return the number of bytes which
1284 * were sucessfully copied. If a fault is encountered then clear the pages 1284 * were successfully copied. If a fault is encountered then clear the pages
1285 * out to (ofs + bytes) and return the number of bytes which were copied. 1285 * out to (ofs + bytes) and return the number of bytes which were copied.
1286 */ 1286 */
1287static inline size_t ntfs_copy_from_user(struct page **pages, 1287static inline size_t ntfs_copy_from_user(struct page **pages,
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 89b02985c054..4dadcdf3d451 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -338,7 +338,7 @@ err_out:
338 * copy of the complete multi sector transfer deprotected page. On failure, 338 * copy of the complete multi sector transfer deprotected page. On failure,
339 * *@wrp is undefined. 339 * *@wrp is undefined.
340 * 340 *
341 * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current 341 * Simillarly, if @lsn is not NULL, on success *@lsn will be set to the current
342 * logfile lsn according to this restart page. On failure, *@lsn is undefined. 342 * logfile lsn according to this restart page. On failure, *@lsn is undefined.
343 * 343 *
344 * The following error codes are defined: 344 * The following error codes are defined:
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c
index 9ef85e628fe1..79a89184cb5e 100644
--- a/fs/ntfs/sysctl.c
+++ b/fs/ntfs/sysctl.c
@@ -36,12 +36,11 @@
36/* Definition of the ntfs sysctl. */ 36/* Definition of the ntfs sysctl. */
37static ctl_table ntfs_sysctls[] = { 37static ctl_table ntfs_sysctls[] = {
38 { 38 {
39 .ctl_name = CTL_UNNUMBERED, /* Binary and text IDs. */
40 .procname = "ntfs-debug", 39 .procname = "ntfs-debug",
41 .data = &debug_msgs, /* Data pointer and size. */ 40 .data = &debug_msgs, /* Data pointer and size. */
42 .maxlen = sizeof(debug_msgs), 41 .maxlen = sizeof(debug_msgs),
43 .mode = 0644, /* Mode, proc handler. */ 42 .mode = 0644, /* Mode, proc handler. */
44 .proc_handler = &proc_dointvec 43 .proc_handler = proc_dointvec
45 }, 44 },
46 {} 45 {}
47}; 46};
@@ -49,7 +48,6 @@ static ctl_table ntfs_sysctls[] = {
49/* Define the parent directory /proc/sys/fs. */ 48/* Define the parent directory /proc/sys/fs. */
50static ctl_table sysctls_root[] = { 49static ctl_table sysctls_root[] = {
51 { 50 {
52 .ctl_name = CTL_FS,
53 .procname = "fs", 51 .procname = "fs",
54 .mode = 0555, 52 .mode = 0555,
55 .child = ntfs_sysctls 53 .child = ntfs_sysctls
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 38a42f5d59ff..7c7198a5bc90 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2398,7 +2398,7 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
2398 * 2398 *
2399 * The array is assumed to be large enough to hold an entire path (tree depth). 2399 * The array is assumed to be large enough to hold an entire path (tree depth).
2400 * 2400 *
2401 * Upon succesful return from this function: 2401 * Upon successful return from this function:
2402 * 2402 *
2403 * - The 'right_path' array will contain a path to the leaf block 2403 * - The 'right_path' array will contain a path to the leaf block
2404 * whose range contains e_cpos. 2404 * whose range contains e_cpos.
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index a1163b8b417c..b7428c5d0d3b 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -47,7 +47,7 @@
47 * Calculate the bit offset in the hamming code buffer based on the bit's 47 * Calculate the bit offset in the hamming code buffer based on the bit's
48 * offset in the data buffer. Since the hamming code reserves all 48 * offset in the data buffer. Since the hamming code reserves all
49 * power-of-two bits for parity, the data bit number and the code bit 49 * power-of-two bits for parity, the data bit number and the code bit
50 * number are offest by all the parity bits beforehand. 50 * number are offset by all the parity bits beforehand.
51 * 51 *
52 * Recall that bit numbers in hamming code are 1-based. This function 52 * Recall that bit numbers in hamming code are 1-based. This function
53 * takes the 0-based data bit from the caller. 53 * takes the 0-based data bit from the caller.
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index da794bc07a6c..a3f150e52b02 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -294,10 +294,10 @@ static int sc_seq_show(struct seq_file *seq, void *v)
294 if (sc->sc_sock) { 294 if (sc->sc_sock) {
295 inet = inet_sk(sc->sc_sock->sk); 295 inet = inet_sk(sc->sc_sock->sk);
296 /* the stack's structs aren't sparse endian clean */ 296 /* the stack's structs aren't sparse endian clean */
297 saddr = (__force __be32)inet->saddr; 297 saddr = (__force __be32)inet->inet_saddr;
298 daddr = (__force __be32)inet->daddr; 298 daddr = (__force __be32)inet->inet_daddr;
299 sport = (__force __be16)inet->sport; 299 sport = (__force __be16)inet->inet_sport;
300 dport = (__force __be16)inet->dport; 300 dport = (__force __be16)inet->inet_dport;
301 } 301 }
302 302
303 /* XXX sigh, inet-> doesn't have sparse annotation so any 303 /* XXX sigh, inet-> doesn't have sparse annotation so any
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 83bcaf266b35..03ccf9a7b1f4 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2586,7 +2586,7 @@ fail:
2586 * is complete everywhere. if the target dies while this is 2586 * is complete everywhere. if the target dies while this is
2587 * going on, some nodes could potentially see the target as the 2587 * going on, some nodes could potentially see the target as the
2588 * master, so it is important that my recovery finds the migration 2588 * master, so it is important that my recovery finds the migration
2589 * mle and sets the master to UNKNONWN. */ 2589 * mle and sets the master to UNKNOWN. */
2590 2590
2591 2591
2592 /* wait for new node to assert master */ 2592 /* wait for new node to assert master */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 0d38d67194cb..c5e4a49e3a12 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1855,7 +1855,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock)
1855 * outstanding lock request, so a cancel convert is 1855 * outstanding lock request, so a cancel convert is
1856 * required. We intentionally overwrite 'ret' - if the 1856 * required. We intentionally overwrite 'ret' - if the
1857 * cancel fails and the lock was granted, it's easier 1857 * cancel fails and the lock was granted, it's easier
1858 * to just bubble sucess back up to the user. 1858 * to just bubble success back up to the user.
1859 */ 1859 */
1860 ret = ocfs2_flock_handle_signal(lockres, level); 1860 ret = ocfs2_flock_handle_signal(lockres, level);
1861 } else if (!ret && (level > lockres->l_level)) { 1861 } else if (!ret && (level > lockres->l_level)) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index de059f490586..3d30a1c974a8 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2006,7 +2006,7 @@ out_dio:
2006 /* buffered aio wouldn't have proper lock coverage today */ 2006 /* buffered aio wouldn't have proper lock coverage today */
2007 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2007 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
2008 2008
2009 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { 2009 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode)) {
2010 ret = filemap_fdatawrite_range(file->f_mapping, pos, 2010 ret = filemap_fdatawrite_range(file->f_mapping, pos,
2011 pos + count - 1); 2011 pos + count - 1);
2012 if (ret < 0) 2012 if (ret < 0)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 54c16b66327e..bf34c491ae96 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -659,7 +659,7 @@ static int __ocfs2_journal_access(handle_t *handle,
659 659
660 default: 660 default:
661 status = -EINVAL; 661 status = -EINVAL;
662 mlog(ML_ERROR, "Uknown access type!\n"); 662 mlog(ML_ERROR, "Unknown access type!\n");
663 } 663 }
664 if (!status && ocfs2_meta_ecc(osb) && triggers) 664 if (!status && ocfs2_meta_ecc(osb) && triggers)
665 jbd2_journal_set_triggers(bh, &triggers->ot_triggers); 665 jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index e5df9d170b0c..123bc520a2c0 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -17,10 +17,6 @@
17 17
18#include "ocfs2.h" 18#include "ocfs2.h"
19 19
20/* Common stuff */
21/* id number of quota format */
22#define QFMT_OCFS2 3
23
24/* 20/*
25 * In-memory structures 21 * In-memory structures
26 */ 22 */
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 1a2c50a759fa..21f9e71223ca 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -1325,7 +1325,7 @@ out:
1325 return status; 1325 return status;
1326} 1326}
1327 1327
1328static struct quota_format_ops ocfs2_format_ops = { 1328static const struct quota_format_ops ocfs2_format_ops = {
1329 .check_quota_file = ocfs2_local_check_quota_file, 1329 .check_quota_file = ocfs2_local_check_quota_file,
1330 .read_file_info = ocfs2_local_read_info, 1330 .read_file_info = ocfs2_local_read_info,
1331 .write_file_info = ocfs2_global_write_info, 1331 .write_file_info = ocfs2_global_write_info,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3a0df7a1b810..30967e3f5e43 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2431,7 +2431,7 @@ out:
2431 * we gonna touch and whether we need to create new blocks. 2431 * we gonna touch and whether we need to create new blocks.
2432 * 2432 *
2433 * Normally the refcount blocks store these refcount should be 2433 * Normally the refcount blocks store these refcount should be
2434 * continguous also, so that we can get the number easily. 2434 * contiguous also, so that we can get the number easily.
2435 * As for meta_ac, we will at most add split 2 refcount record and 2435 * As for meta_ac, we will at most add split 2 refcount record and
2436 * 2 more refcount block, so just check it in a rough way. 2436 * 2 more refcount block, so just check it in a rough way.
2437 * 2437 *
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 3f2f1c45b7b6..f3df0baa9a48 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -620,51 +620,46 @@ error:
620 620
621static ctl_table ocfs2_nm_table[] = { 621static ctl_table ocfs2_nm_table[] = {
622 { 622 {
623 .ctl_name = 1,
624 .procname = "hb_ctl_path", 623 .procname = "hb_ctl_path",
625 .data = ocfs2_hb_ctl_path, 624 .data = ocfs2_hb_ctl_path,
626 .maxlen = OCFS2_MAX_HB_CTL_PATH, 625 .maxlen = OCFS2_MAX_HB_CTL_PATH,
627 .mode = 0644, 626 .mode = 0644,
628 .proc_handler = &proc_dostring, 627 .proc_handler = proc_dostring,
629 .strategy = &sysctl_string,
630 }, 628 },
631 { .ctl_name = 0 } 629 { }
632}; 630};
633 631
634static ctl_table ocfs2_mod_table[] = { 632static ctl_table ocfs2_mod_table[] = {
635 { 633 {
636 .ctl_name = FS_OCFS2_NM,
637 .procname = "nm", 634 .procname = "nm",
638 .data = NULL, 635 .data = NULL,
639 .maxlen = 0, 636 .maxlen = 0,
640 .mode = 0555, 637 .mode = 0555,
641 .child = ocfs2_nm_table 638 .child = ocfs2_nm_table
642 }, 639 },
643 { .ctl_name = 0} 640 { }
644}; 641};
645 642
646static ctl_table ocfs2_kern_table[] = { 643static ctl_table ocfs2_kern_table[] = {
647 { 644 {
648 .ctl_name = FS_OCFS2,
649 .procname = "ocfs2", 645 .procname = "ocfs2",
650 .data = NULL, 646 .data = NULL,
651 .maxlen = 0, 647 .maxlen = 0,
652 .mode = 0555, 648 .mode = 0555,
653 .child = ocfs2_mod_table 649 .child = ocfs2_mod_table
654 }, 650 },
655 { .ctl_name = 0} 651 { }
656}; 652};
657 653
658static ctl_table ocfs2_root_table[] = { 654static ctl_table ocfs2_root_table[] = {
659 { 655 {
660 .ctl_name = CTL_FS,
661 .procname = "fs", 656 .procname = "fs",
662 .data = NULL, 657 .data = NULL,
663 .maxlen = 0, 658 .maxlen = 0,
664 .mode = 0555, 659 .mode = 0555,
665 .child = ocfs2_kern_table 660 .child = ocfs2_kern_table
666 }, 661 },
667 { .ctl_name = 0 } 662 { }
668}; 663};
669 664
670static struct ctl_table_header *ocfs2_table_header = NULL; 665static struct ctl_table_header *ocfs2_table_header = NULL;
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
index e1c0ec0ae989..082234581d05 100644
--- a/fs/omfs/bitmap.c
+++ b/fs/omfs/bitmap.c
@@ -85,7 +85,7 @@ out:
85} 85}
86 86
87/* 87/*
88 * Tries to allocate exactly one block. Returns true if sucessful. 88 * Tries to allocate exactly one block. Returns true if successful.
89 */ 89 */
90int omfs_allocate_block(struct super_block *sb, u64 block) 90int omfs_allocate_block(struct super_block *sb, u64 block)
91{ 91{
diff --git a/fs/open.c b/fs/open.c
index 4f01e06227c6..b4b31d277f3a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -587,6 +587,9 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename)
587 error = -EPERM; 587 error = -EPERM;
588 if (!capable(CAP_SYS_CHROOT)) 588 if (!capable(CAP_SYS_CHROOT))
589 goto dput_and_out; 589 goto dput_and_out;
590 error = security_path_chroot(&path);
591 if (error)
592 goto dput_and_out;
590 593
591 set_fs_root(current->fs, &path); 594 set_fs_root(current->fs, &path);
592 error = 0; 595 error = 0;
@@ -617,11 +620,15 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
617 if (err) 620 if (err)
618 goto out_putf; 621 goto out_putf;
619 mutex_lock(&inode->i_mutex); 622 mutex_lock(&inode->i_mutex);
623 err = security_path_chmod(dentry, file->f_vfsmnt, mode);
624 if (err)
625 goto out_unlock;
620 if (mode == (mode_t) -1) 626 if (mode == (mode_t) -1)
621 mode = inode->i_mode; 627 mode = inode->i_mode;
622 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 628 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
623 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 629 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
624 err = notify_change(dentry, &newattrs); 630 err = notify_change(dentry, &newattrs);
631out_unlock:
625 mutex_unlock(&inode->i_mutex); 632 mutex_unlock(&inode->i_mutex);
626 mnt_drop_write(file->f_path.mnt); 633 mnt_drop_write(file->f_path.mnt);
627out_putf: 634out_putf:
@@ -646,11 +653,15 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
646 if (error) 653 if (error)
647 goto dput_and_out; 654 goto dput_and_out;
648 mutex_lock(&inode->i_mutex); 655 mutex_lock(&inode->i_mutex);
656 error = security_path_chmod(path.dentry, path.mnt, mode);
657 if (error)
658 goto out_unlock;
649 if (mode == (mode_t) -1) 659 if (mode == (mode_t) -1)
650 mode = inode->i_mode; 660 mode = inode->i_mode;
651 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 661 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
652 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 662 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
653 error = notify_change(path.dentry, &newattrs); 663 error = notify_change(path.dentry, &newattrs);
664out_unlock:
654 mutex_unlock(&inode->i_mutex); 665 mutex_unlock(&inode->i_mutex);
655 mnt_drop_write(path.mnt); 666 mnt_drop_write(path.mnt);
656dput_and_out: 667dput_and_out:
@@ -664,9 +675,9 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode)
664 return sys_fchmodat(AT_FDCWD, filename, mode); 675 return sys_fchmodat(AT_FDCWD, filename, mode);
665} 676}
666 677
667static int chown_common(struct dentry * dentry, uid_t user, gid_t group) 678static int chown_common(struct path *path, uid_t user, gid_t group)
668{ 679{
669 struct inode *inode = dentry->d_inode; 680 struct inode *inode = path->dentry->d_inode;
670 int error; 681 int error;
671 struct iattr newattrs; 682 struct iattr newattrs;
672 683
@@ -683,7 +694,9 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
683 newattrs.ia_valid |= 694 newattrs.ia_valid |=
684 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; 695 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
685 mutex_lock(&inode->i_mutex); 696 mutex_lock(&inode->i_mutex);
686 error = notify_change(dentry, &newattrs); 697 error = security_path_chown(path, user, group);
698 if (!error)
699 error = notify_change(path->dentry, &newattrs);
687 mutex_unlock(&inode->i_mutex); 700 mutex_unlock(&inode->i_mutex);
688 701
689 return error; 702 return error;
@@ -700,7 +713,7 @@ SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
700 error = mnt_want_write(path.mnt); 713 error = mnt_want_write(path.mnt);
701 if (error) 714 if (error)
702 goto out_release; 715 goto out_release;
703 error = chown_common(path.dentry, user, group); 716 error = chown_common(&path, user, group);
704 mnt_drop_write(path.mnt); 717 mnt_drop_write(path.mnt);
705out_release: 718out_release:
706 path_put(&path); 719 path_put(&path);
@@ -725,7 +738,7 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
725 error = mnt_want_write(path.mnt); 738 error = mnt_want_write(path.mnt);
726 if (error) 739 if (error)
727 goto out_release; 740 goto out_release;
728 error = chown_common(path.dentry, user, group); 741 error = chown_common(&path, user, group);
729 mnt_drop_write(path.mnt); 742 mnt_drop_write(path.mnt);
730out_release: 743out_release:
731 path_put(&path); 744 path_put(&path);
@@ -744,7 +757,7 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group
744 error = mnt_want_write(path.mnt); 757 error = mnt_want_write(path.mnt);
745 if (error) 758 if (error)
746 goto out_release; 759 goto out_release;
747 error = chown_common(path.dentry, user, group); 760 error = chown_common(&path, user, group);
748 mnt_drop_write(path.mnt); 761 mnt_drop_write(path.mnt);
749out_release: 762out_release:
750 path_put(&path); 763 path_put(&path);
@@ -767,7 +780,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
767 goto out_fput; 780 goto out_fput;
768 dentry = file->f_path.dentry; 781 dentry = file->f_path.dentry;
769 audit_inode(NULL, dentry); 782 audit_inode(NULL, dentry);
770 error = chown_common(dentry, user, group); 783 error = chown_common(&file->f_path, user, group);
771 mnt_drop_write(file->f_path.mnt); 784 mnt_drop_write(file->f_path.mnt);
772out_fput: 785out_fput:
773 fput(file); 786 fput(file);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e10cbad..64bc8998ac9a 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -226,6 +226,13 @@ ssize_t part_alignment_offset_show(struct device *dev,
226 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); 226 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
227} 227}
228 228
229ssize_t part_discard_alignment_show(struct device *dev,
230 struct device_attribute *attr, char *buf)
231{
232 struct hd_struct *p = dev_to_part(dev);
233 return sprintf(buf, "%u\n", p->discard_alignment);
234}
235
229ssize_t part_stat_show(struct device *dev, 236ssize_t part_stat_show(struct device *dev,
230 struct device_attribute *attr, char *buf) 237 struct device_attribute *attr, char *buf)
231{ 238{
@@ -288,6 +295,8 @@ static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
288static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); 295static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
289static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 296static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
290static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); 297static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
298static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
299 NULL);
291static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 300static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
292static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); 301static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
293#ifdef CONFIG_FAIL_MAKE_REQUEST 302#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -300,6 +309,7 @@ static struct attribute *part_attrs[] = {
300 &dev_attr_start.attr, 309 &dev_attr_start.attr,
301 &dev_attr_size.attr, 310 &dev_attr_size.attr,
302 &dev_attr_alignment_offset.attr, 311 &dev_attr_alignment_offset.attr,
312 &dev_attr_discard_alignment.attr,
303 &dev_attr_stat.attr, 313 &dev_attr_stat.attr,
304 &dev_attr_inflight.attr, 314 &dev_attr_inflight.attr,
305#ifdef CONFIG_FAIL_MAKE_REQUEST 315#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -403,6 +413,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
403 413
404 p->start_sect = start; 414 p->start_sect = start;
405 p->alignment_offset = queue_sector_alignment_offset(disk->queue, start); 415 p->alignment_offset = queue_sector_alignment_offset(disk->queue, start);
416 p->discard_alignment = queue_sector_discard_alignment(disk->queue,
417 start);
406 p->nr_sects = len; 418 p->nr_sects = len;
407 p->partno = partno; 419 p->partno = partno;
408 p->policy = get_disk_ro(disk); 420 p->policy = get_disk_ro(disk);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index 038a6022152f..49cfd5f54238 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -1,7 +1,9 @@
1/************************************************************ 1/************************************************************
2 * EFI GUID Partition Table handling 2 * EFI GUID Partition Table handling
3 * Per Intel EFI Specification v1.02 3 *
4 * http://developer.intel.com/technology/efi/efi.htm 4 * http://www.uefi.org/specs/
5 * http://www.intel.com/technology/efi/
6 *
5 * efi.[ch] by Matt Domsch <Matt_Domsch@dell.com> 7 * efi.[ch] by Matt Domsch <Matt_Domsch@dell.com>
6 * Copyright 2000,2001,2002,2004 Dell Inc. 8 * Copyright 2000,2001,2002,2004 Dell Inc.
7 * 9 *
@@ -92,6 +94,7 @@
92 * 94 *
93 ************************************************************/ 95 ************************************************************/
94#include <linux/crc32.h> 96#include <linux/crc32.h>
97#include <linux/math64.h>
95#include "check.h" 98#include "check.h"
96#include "efi.h" 99#include "efi.h"
97 100
@@ -141,7 +144,8 @@ last_lba(struct block_device *bdev)
141{ 144{
142 if (!bdev || !bdev->bd_inode) 145 if (!bdev || !bdev->bd_inode)
143 return 0; 146 return 0;
144 return (bdev->bd_inode->i_size >> 9) - 1ULL; 147 return div_u64(bdev->bd_inode->i_size,
148 bdev_logical_block_size(bdev)) - 1ULL;
145} 149}
146 150
147static inline int 151static inline int
@@ -188,6 +192,7 @@ static size_t
188read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count) 192read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count)
189{ 193{
190 size_t totalreadcount = 0; 194 size_t totalreadcount = 0;
195 sector_t n = lba * (bdev_logical_block_size(bdev) / 512);
191 196
192 if (!bdev || !buffer || lba > last_lba(bdev)) 197 if (!bdev || !buffer || lba > last_lba(bdev))
193 return 0; 198 return 0;
@@ -195,7 +200,7 @@ read_lba(struct block_device *bdev, u64 lba, u8 * buffer, size_t count)
195 while (count) { 200 while (count) {
196 int copied = 512; 201 int copied = 512;
197 Sector sect; 202 Sector sect;
198 unsigned char *data = read_dev_sector(bdev, lba++, &sect); 203 unsigned char *data = read_dev_sector(bdev, n++, &sect);
199 if (!data) 204 if (!data)
200 break; 205 break;
201 if (copied > count) 206 if (copied > count)
@@ -257,15 +262,16 @@ static gpt_header *
257alloc_read_gpt_header(struct block_device *bdev, u64 lba) 262alloc_read_gpt_header(struct block_device *bdev, u64 lba)
258{ 263{
259 gpt_header *gpt; 264 gpt_header *gpt;
265 unsigned ssz = bdev_logical_block_size(bdev);
266
260 if (!bdev) 267 if (!bdev)
261 return NULL; 268 return NULL;
262 269
263 gpt = kzalloc(sizeof (gpt_header), GFP_KERNEL); 270 gpt = kzalloc(ssz, GFP_KERNEL);
264 if (!gpt) 271 if (!gpt)
265 return NULL; 272 return NULL;
266 273
267 if (read_lba(bdev, lba, (u8 *) gpt, 274 if (read_lba(bdev, lba, (u8 *) gpt, ssz) < ssz) {
268 sizeof (gpt_header)) < sizeof (gpt_header)) {
269 kfree(gpt); 275 kfree(gpt);
270 gpt=NULL; 276 gpt=NULL;
271 return NULL; 277 return NULL;
@@ -601,6 +607,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
601 gpt_header *gpt = NULL; 607 gpt_header *gpt = NULL;
602 gpt_entry *ptes = NULL; 608 gpt_entry *ptes = NULL;
603 u32 i; 609 u32 i;
610 unsigned ssz = bdev_logical_block_size(bdev) / 512;
604 611
605 if (!find_valid_gpt(bdev, &gpt, &ptes) || !gpt || !ptes) { 612 if (!find_valid_gpt(bdev, &gpt, &ptes) || !gpt || !ptes) {
606 kfree(gpt); 613 kfree(gpt);
@@ -611,13 +618,14 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
611 pr_debug("GUID Partition Table is valid! Yea!\n"); 618 pr_debug("GUID Partition Table is valid! Yea!\n");
612 619
613 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { 620 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
621 u64 start = le64_to_cpu(ptes[i].starting_lba);
622 u64 size = le64_to_cpu(ptes[i].ending_lba) -
623 le64_to_cpu(ptes[i].starting_lba) + 1ULL;
624
614 if (!is_pte_valid(&ptes[i], last_lba(bdev))) 625 if (!is_pte_valid(&ptes[i], last_lba(bdev)))
615 continue; 626 continue;
616 627
617 put_partition(state, i+1, le64_to_cpu(ptes[i].starting_lba), 628 put_partition(state, i+1, start * ssz, size * ssz);
618 (le64_to_cpu(ptes[i].ending_lba) -
619 le64_to_cpu(ptes[i].starting_lba) +
620 1ULL));
621 629
622 /* If this is a RAID volume, tell md */ 630 /* If this is a RAID volume, tell md */
623 if (!efi_guidcmp(ptes[i].partition_type_guid, 631 if (!efi_guidcmp(ptes[i].partition_type_guid,
diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h
index 2cc89d0475bf..6998b589abf9 100644
--- a/fs/partitions/efi.h
+++ b/fs/partitions/efi.h
@@ -37,7 +37,6 @@
37#define EFI_PMBR_OSTYPE_EFI 0xEF 37#define EFI_PMBR_OSTYPE_EFI 0xEF
38#define EFI_PMBR_OSTYPE_EFI_GPT 0xEE 38#define EFI_PMBR_OSTYPE_EFI_GPT 0xEE
39 39
40#define GPT_BLOCK_SIZE 512
41#define GPT_HEADER_SIGNATURE 0x5452415020494645ULL 40#define GPT_HEADER_SIGNATURE 0x5452415020494645ULL
42#define GPT_HEADER_REVISION_V1 0x00010000 41#define GPT_HEADER_REVISION_V1 0x00010000
43#define GPT_PRIMARY_PARTITION_TABLE_LBA 1 42#define GPT_PRIMARY_PARTITION_TABLE_LBA 1
@@ -79,7 +78,12 @@ typedef struct _gpt_header {
79 __le32 num_partition_entries; 78 __le32 num_partition_entries;
80 __le32 sizeof_partition_entry; 79 __le32 sizeof_partition_entry;
81 __le32 partition_entry_array_crc32; 80 __le32 partition_entry_array_crc32;
82 u8 reserved2[GPT_BLOCK_SIZE - 92]; 81
82 /* The rest of the logical block is reserved by UEFI and must be zero.
83 * EFI standard handles this by:
84 *
85 * uint8_t reserved2[ BlockSize - 92 ];
86 */
83} __attribute__ ((packed)) gpt_header; 87} __attribute__ ((packed)) gpt_header;
84 88
85typedef struct _gpt_entry_attributes { 89typedef struct _gpt_entry_attributes {
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 822c2d506518..4badde179b18 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -410,6 +410,16 @@ static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
410} 410}
411#endif /* CONFIG_MMU */ 411#endif /* CONFIG_MMU */
412 412
413static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
414{
415 seq_printf(m, "Cpus_allowed:\t");
416 seq_cpumask(m, &task->cpus_allowed);
417 seq_printf(m, "\n");
418 seq_printf(m, "Cpus_allowed_list:\t");
419 seq_cpumask_list(m, &task->cpus_allowed);
420 seq_printf(m, "\n");
421}
422
413int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, 423int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
414 struct pid *pid, struct task_struct *task) 424 struct pid *pid, struct task_struct *task)
415{ 425{
@@ -424,6 +434,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
424 } 434 }
425 task_sig(m, task); 435 task_sig(m, task);
426 task_cap(m, task); 436 task_cap(m, task);
437 task_cpus_allowed(m, task);
427 cpuset_task_status_allowed(m, task); 438 cpuset_task_status_allowed(m, task);
428#if defined(CONFIG_S390) 439#if defined(CONFIG_S390)
429 task_show_regs(m, task); 440 task_show_regs(m, task);
@@ -495,20 +506,17 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
495 506
496 /* add up live thread stats at the group level */ 507 /* add up live thread stats at the group level */
497 if (whole) { 508 if (whole) {
498 struct task_cputime cputime;
499 struct task_struct *t = task; 509 struct task_struct *t = task;
500 do { 510 do {
501 min_flt += t->min_flt; 511 min_flt += t->min_flt;
502 maj_flt += t->maj_flt; 512 maj_flt += t->maj_flt;
503 gtime = cputime_add(gtime, task_gtime(t)); 513 gtime = cputime_add(gtime, t->gtime);
504 t = next_thread(t); 514 t = next_thread(t);
505 } while (t != task); 515 } while (t != task);
506 516
507 min_flt += sig->min_flt; 517 min_flt += sig->min_flt;
508 maj_flt += sig->maj_flt; 518 maj_flt += sig->maj_flt;
509 thread_group_cputime(task, &cputime); 519 thread_group_times(task, &utime, &stime);
510 utime = cputime.utime;
511 stime = cputime.stime;
512 gtime = cputime_add(gtime, sig->gtime); 520 gtime = cputime_add(gtime, sig->gtime);
513 } 521 }
514 522
@@ -524,9 +532,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
524 if (!whole) { 532 if (!whole) {
525 min_flt = task->min_flt; 533 min_flt = task->min_flt;
526 maj_flt = task->maj_flt; 534 maj_flt = task->maj_flt;
527 utime = task_utime(task); 535 task_times(task, &utime, &stime);
528 stime = task_stime(task); 536 gtime = task->gtime;
529 gtime = task_gtime(task);
530 } 537 }
531 538
532 /* scale priority and nice values from timeslices to -20..20 */ 539 /* scale priority and nice values from timeslices to -20..20 */
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 7ba79a54948c..123257bb356b 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -7,6 +7,7 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/time.h> 8#include <linux/time.h>
9#include <linux/proc_fs.h> 9#include <linux/proc_fs.h>
10#include <linux/seq_file.h>
10#include <linux/stat.h> 11#include <linux/stat.h>
11#include <linux/string.h> 12#include <linux/string.h>
12#include <asm/prom.h> 13#include <asm/prom.h>
@@ -25,26 +26,27 @@ static struct proc_dir_entry *proc_device_tree;
25/* 26/*
26 * Supply data on a read from /proc/device-tree/node/property. 27 * Supply data on a read from /proc/device-tree/node/property.
27 */ 28 */
28static int property_read_proc(char *page, char **start, off_t off, 29static int property_proc_show(struct seq_file *m, void *v)
29 int count, int *eof, void *data)
30{ 30{
31 struct property *pp = data; 31 struct property *pp = m->private;
32 int n;
33 32
34 if (off >= pp->length) { 33 seq_write(m, pp->value, pp->length);
35 *eof = 1; 34 return 0;
36 return 0; 35}
37 } 36
38 n = pp->length - off; 37static int property_proc_open(struct inode *inode, struct file *file)
39 if (n > count) 38{
40 n = count; 39 return single_open(file, property_proc_show, PDE(inode)->data);
41 else
42 *eof = 1;
43 memcpy(page, (char *)pp->value + off, n);
44 *start = page;
45 return n;
46} 40}
47 41
42static const struct file_operations property_proc_fops = {
43 .owner = THIS_MODULE,
44 .open = property_proc_open,
45 .read = seq_read,
46 .llseek = seq_lseek,
47 .release = single_release,
48};
49
48/* 50/*
49 * For a node with a name like "gc@10", we make symlinks called "gc" 51 * For a node with a name like "gc@10", we make symlinks called "gc"
50 * and "@10" to it. 52 * and "@10" to it.
@@ -63,10 +65,9 @@ __proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp,
63 * Unfortunately proc_register puts each new entry 65 * Unfortunately proc_register puts each new entry
64 * at the beginning of the list. So we rearrange them. 66 * at the beginning of the list. So we rearrange them.
65 */ 67 */
66 ent = create_proc_read_entry(name, 68 ent = proc_create_data(name,
67 strncmp(name, "security-", 9) 69 strncmp(name, "security-", 9) ? S_IRUGO : S_IRUSR,
68 ? S_IRUGO : S_IRUSR, de, 70 de, &property_proc_fops, pp);
69 property_read_proc, pp);
70 if (ent == NULL) 71 if (ent == NULL)
71 return NULL; 72 return NULL;
72 73
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index f667e8aeabdf..6ff9981f0a18 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -48,7 +48,7 @@ out:
48static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) 48static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
49{ 49{
50 int len; 50 int len;
51 for ( ; p->ctl_name || p->procname; p++) { 51 for ( ; p->procname; p++) {
52 52
53 if (!p->procname) 53 if (!p->procname)
54 continue; 54 continue;
@@ -218,7 +218,7 @@ static int scan(struct ctl_table_header *head, ctl_table *table,
218 void *dirent, filldir_t filldir) 218 void *dirent, filldir_t filldir)
219{ 219{
220 220
221 for (; table->ctl_name || table->procname; table++, (*pos)++) { 221 for (; table->procname; table++, (*pos)++) {
222 int res; 222 int res;
223 223
224 /* Can't do anything without a proc name */ 224 /* Can't do anything without a proc name */
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 7cc726c6d70a..b9b7aad2003d 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -27,7 +27,7 @@ static int show_stat(struct seq_file *p, void *v)
27 int i, j; 27 int i, j;
28 unsigned long jif; 28 unsigned long jif;
29 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; 29 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
30 cputime64_t guest; 30 cputime64_t guest, guest_nice;
31 u64 sum = 0; 31 u64 sum = 0;
32 u64 sum_softirq = 0; 32 u64 sum_softirq = 0;
33 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; 33 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
@@ -36,7 +36,7 @@ static int show_stat(struct seq_file *p, void *v)
36 36
37 user = nice = system = idle = iowait = 37 user = nice = system = idle = iowait =
38 irq = softirq = steal = cputime64_zero; 38 irq = softirq = steal = cputime64_zero;
39 guest = cputime64_zero; 39 guest = guest_nice = cputime64_zero;
40 getboottime(&boottime); 40 getboottime(&boottime);
41 jif = boottime.tv_sec; 41 jif = boottime.tv_sec;
42 42
@@ -51,6 +51,8 @@ static int show_stat(struct seq_file *p, void *v)
51 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); 51 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
52 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); 52 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
53 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); 53 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
54 guest_nice = cputime64_add(guest_nice,
55 kstat_cpu(i).cpustat.guest_nice);
54 for_each_irq_nr(j) { 56 for_each_irq_nr(j) {
55 sum += kstat_irqs_cpu(j, i); 57 sum += kstat_irqs_cpu(j, i);
56 } 58 }
@@ -65,7 +67,8 @@ static int show_stat(struct seq_file *p, void *v)
65 } 67 }
66 sum += arch_irq_stat(); 68 sum += arch_irq_stat();
67 69
68 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", 70 seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu "
71 "%llu\n",
69 (unsigned long long)cputime64_to_clock_t(user), 72 (unsigned long long)cputime64_to_clock_t(user),
70 (unsigned long long)cputime64_to_clock_t(nice), 73 (unsigned long long)cputime64_to_clock_t(nice),
71 (unsigned long long)cputime64_to_clock_t(system), 74 (unsigned long long)cputime64_to_clock_t(system),
@@ -74,7 +77,8 @@ static int show_stat(struct seq_file *p, void *v)
74 (unsigned long long)cputime64_to_clock_t(irq), 77 (unsigned long long)cputime64_to_clock_t(irq),
75 (unsigned long long)cputime64_to_clock_t(softirq), 78 (unsigned long long)cputime64_to_clock_t(softirq),
76 (unsigned long long)cputime64_to_clock_t(steal), 79 (unsigned long long)cputime64_to_clock_t(steal),
77 (unsigned long long)cputime64_to_clock_t(guest)); 80 (unsigned long long)cputime64_to_clock_t(guest),
81 (unsigned long long)cputime64_to_clock_t(guest_nice));
78 for_each_online_cpu(i) { 82 for_each_online_cpu(i) {
79 83
80 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ 84 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
@@ -88,8 +92,10 @@ static int show_stat(struct seq_file *p, void *v)
88 softirq = kstat_cpu(i).cpustat.softirq; 92 softirq = kstat_cpu(i).cpustat.softirq;
89 steal = kstat_cpu(i).cpustat.steal; 93 steal = kstat_cpu(i).cpustat.steal;
90 guest = kstat_cpu(i).cpustat.guest; 94 guest = kstat_cpu(i).cpustat.guest;
95 guest_nice = kstat_cpu(i).cpustat.guest_nice;
91 seq_printf(p, 96 seq_printf(p,
92 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", 97 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
98 "%llu\n",
93 i, 99 i,
94 (unsigned long long)cputime64_to_clock_t(user), 100 (unsigned long long)cputime64_to_clock_t(user),
95 (unsigned long long)cputime64_to_clock_t(nice), 101 (unsigned long long)cputime64_to_clock_t(nice),
@@ -99,7 +105,8 @@ static int show_stat(struct seq_file *p, void *v)
99 (unsigned long long)cputime64_to_clock_t(irq), 105 (unsigned long long)cputime64_to_clock_t(irq),
100 (unsigned long long)cputime64_to_clock_t(softirq), 106 (unsigned long long)cputime64_to_clock_t(softirq),
101 (unsigned long long)cputime64_to_clock_t(steal), 107 (unsigned long long)cputime64_to_clock_t(steal),
102 (unsigned long long)cputime64_to_clock_t(guest)); 108 (unsigned long long)cputime64_to_clock_t(guest),
109 (unsigned long long)cputime64_to_clock_t(guest_nice));
103 } 110 }
104 seq_printf(p, "intr %llu", (unsigned long long)sum); 111 seq_printf(p, "intr %llu", (unsigned long long)sum);
105 112
diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c
index 0afba069d567..32f5d131a644 100644
--- a/fs/qnx4/bitmap.c
+++ b/fs/qnx4/bitmap.c
@@ -67,7 +67,7 @@ unsigned long qnx4_count_free_blocks(struct super_block *sb)
67 67
68 while (total < size) { 68 while (total < size) {
69 if ((bh = sb_bread(sb, start + offset)) == NULL) { 69 if ((bh = sb_bread(sb, start + offset)) == NULL) {
70 printk("qnx4: I/O error in counting free blocks\n"); 70 printk(KERN_ERR "qnx4: I/O error in counting free blocks\n");
71 break; 71 break;
72 } 72 }
73 count_bits(bh->b_data, size - total, &total_free); 73 count_bits(bh->b_data, size - total, &total_free);
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 86cc39cb1398..6f30c3d5bcbf 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -26,8 +26,8 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
26 int ix, ino; 26 int ix, ino;
27 int size; 27 int size;
28 28
29 QNX4DEBUG(("qnx4_readdir:i_size = %ld\n", (long) inode->i_size)); 29 QNX4DEBUG((KERN_INFO "qnx4_readdir:i_size = %ld\n", (long) inode->i_size));
30 QNX4DEBUG(("filp->f_pos = %ld\n", (long) filp->f_pos)); 30 QNX4DEBUG((KERN_INFO "filp->f_pos = %ld\n", (long) filp->f_pos));
31 31
32 lock_kernel(); 32 lock_kernel();
33 33
@@ -50,7 +50,7 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
50 size = QNX4_NAME_MAX; 50 size = QNX4_NAME_MAX;
51 51
52 if ( ( de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK) ) != 0 ) { 52 if ( ( de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK) ) != 0 ) {
53 QNX4DEBUG(("qnx4_readdir:%.*s\n", size, de->di_fname)); 53 QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname));
54 if ( ( de->di_status & QNX4_FILE_LINK ) == 0 ) 54 if ( ( de->di_status & QNX4_FILE_LINK ) == 0 )
55 ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; 55 ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1;
56 else { 56 else {
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index d2cd1798d8c4..449f5a66dd34 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -107,7 +107,7 @@ static int qnx4_get_block( struct inode *inode, sector_t iblock, struct buffer_h
107{ 107{
108 unsigned long phys; 108 unsigned long phys;
109 109
110 QNX4DEBUG(("qnx4: qnx4_get_block inode=[%ld] iblock=[%ld]\n",inode->i_ino,iblock)); 110 QNX4DEBUG((KERN_INFO "qnx4: qnx4_get_block inode=[%ld] iblock=[%ld]\n",inode->i_ino,iblock));
111 111
112 phys = qnx4_block_map( inode, iblock ); 112 phys = qnx4_block_map( inode, iblock );
113 if ( phys ) { 113 if ( phys ) {
@@ -142,12 +142,12 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
142 // read next xtnt block. 142 // read next xtnt block.
143 bh = sb_bread(inode->i_sb, i_xblk - 1); 143 bh = sb_bread(inode->i_sb, i_xblk - 1);
144 if ( !bh ) { 144 if ( !bh ) {
145 QNX4DEBUG(("qnx4: I/O error reading xtnt block [%ld])\n", i_xblk - 1)); 145 QNX4DEBUG((KERN_ERR "qnx4: I/O error reading xtnt block [%ld])\n", i_xblk - 1));
146 return -EIO; 146 return -EIO;
147 } 147 }
148 xblk = (struct qnx4_xblk*)bh->b_data; 148 xblk = (struct qnx4_xblk*)bh->b_data;
149 if ( memcmp( xblk->xblk_signature, "IamXblk", 7 ) ) { 149 if ( memcmp( xblk->xblk_signature, "IamXblk", 7 ) ) {
150 QNX4DEBUG(("qnx4: block at %ld is not a valid xtnt\n", qnx4_inode->i_xblk)); 150 QNX4DEBUG((KERN_ERR "qnx4: block at %ld is not a valid xtnt\n", qnx4_inode->i_xblk));
151 return -EIO; 151 return -EIO;
152 } 152 }
153 } 153 }
@@ -168,7 +168,7 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
168 brelse( bh ); 168 brelse( bh );
169 } 169 }
170 170
171 QNX4DEBUG(("qnx4: mapping block %ld of inode %ld = %ld\n",iblock,inode->i_ino,block)); 171 QNX4DEBUG((KERN_INFO "qnx4: mapping block %ld of inode %ld = %ld\n",iblock,inode->i_ino,block));
172 return block; 172 return block;
173} 173}
174 174
@@ -209,7 +209,7 @@ static const char *qnx4_checkroot(struct super_block *sb)
209 if (*(qnx4_sb(sb)->sb->RootDir.di_fname) != '/') { 209 if (*(qnx4_sb(sb)->sb->RootDir.di_fname) != '/') {
210 return "no qnx4 filesystem (no root dir)."; 210 return "no qnx4 filesystem (no root dir).";
211 } else { 211 } else {
212 QNX4DEBUG(("QNX4 filesystem found on dev %s.\n", sb->s_id)); 212 QNX4DEBUG((KERN_NOTICE "QNX4 filesystem found on dev %s.\n", sb->s_id));
213 rd = le32_to_cpu(qnx4_sb(sb)->sb->RootDir.di_first_xtnt.xtnt_blk) - 1; 213 rd = le32_to_cpu(qnx4_sb(sb)->sb->RootDir.di_first_xtnt.xtnt_blk) - 1;
214 rl = le32_to_cpu(qnx4_sb(sb)->sb->RootDir.di_first_xtnt.xtnt_size); 214 rl = le32_to_cpu(qnx4_sb(sb)->sb->RootDir.di_first_xtnt.xtnt_size);
215 for (j = 0; j < rl; j++) { 215 for (j = 0; j < rl; j++) {
@@ -220,7 +220,7 @@ static const char *qnx4_checkroot(struct super_block *sb)
220 for (i = 0; i < QNX4_INODES_PER_BLOCK; i++) { 220 for (i = 0; i < QNX4_INODES_PER_BLOCK; i++) {
221 rootdir = (struct qnx4_inode_entry *) (bh->b_data + i * QNX4_DIR_ENTRY_SIZE); 221 rootdir = (struct qnx4_inode_entry *) (bh->b_data + i * QNX4_DIR_ENTRY_SIZE);
222 if (rootdir->di_fname != NULL) { 222 if (rootdir->di_fname != NULL) {
223 QNX4DEBUG(("Rootdir entry found : [%s]\n", rootdir->di_fname)); 223 QNX4DEBUG((KERN_INFO "rootdir entry found : [%s]\n", rootdir->di_fname));
224 if (!strncmp(rootdir->di_fname, QNX4_BMNAME, sizeof QNX4_BMNAME)) { 224 if (!strncmp(rootdir->di_fname, QNX4_BMNAME, sizeof QNX4_BMNAME)) {
225 found = 1; 225 found = 1;
226 qnx4_sb(sb)->BitMap = kmalloc( sizeof( struct qnx4_inode_entry ), GFP_KERNEL ); 226 qnx4_sb(sb)->BitMap = kmalloc( sizeof( struct qnx4_inode_entry ), GFP_KERNEL );
@@ -265,12 +265,12 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
265 if we don't belong here... */ 265 if we don't belong here... */
266 bh = sb_bread(s, 1); 266 bh = sb_bread(s, 1);
267 if (!bh) { 267 if (!bh) {
268 printk("qnx4: unable to read the superblock\n"); 268 printk(KERN_ERR "qnx4: unable to read the superblock\n");
269 goto outnobh; 269 goto outnobh;
270 } 270 }
271 if ( le32_to_cpup((__le32*) bh->b_data) != QNX4_SUPER_MAGIC ) { 271 if ( le32_to_cpup((__le32*) bh->b_data) != QNX4_SUPER_MAGIC ) {
272 if (!silent) 272 if (!silent)
273 printk("qnx4: wrong fsid in superblock.\n"); 273 printk(KERN_ERR "qnx4: wrong fsid in superblock.\n");
274 goto out; 274 goto out;
275 } 275 }
276 s->s_op = &qnx4_sops; 276 s->s_op = &qnx4_sops;
@@ -284,14 +284,14 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
284 errmsg = qnx4_checkroot(s); 284 errmsg = qnx4_checkroot(s);
285 if (errmsg != NULL) { 285 if (errmsg != NULL) {
286 if (!silent) 286 if (!silent)
287 printk("qnx4: %s\n", errmsg); 287 printk(KERN_ERR "qnx4: %s\n", errmsg);
288 goto out; 288 goto out;
289 } 289 }
290 290
291 /* does root not have inode number QNX4_ROOT_INO ?? */ 291 /* does root not have inode number QNX4_ROOT_INO ?? */
292 root = qnx4_iget(s, QNX4_ROOT_INO * QNX4_INODES_PER_BLOCK); 292 root = qnx4_iget(s, QNX4_ROOT_INO * QNX4_INODES_PER_BLOCK);
293 if (IS_ERR(root)) { 293 if (IS_ERR(root)) {
294 printk("qnx4: get inode failed\n"); 294 printk(KERN_ERR "qnx4: get inode failed\n");
295 ret = PTR_ERR(root); 295 ret = PTR_ERR(root);
296 goto out; 296 goto out;
297 } 297 }
@@ -374,7 +374,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
374 qnx4_inode = qnx4_raw_inode(inode); 374 qnx4_inode = qnx4_raw_inode(inode);
375 inode->i_mode = 0; 375 inode->i_mode = 0;
376 376
377 QNX4DEBUG(("Reading inode : [%d]\n", ino)); 377 QNX4DEBUG((KERN_INFO "reading inode : [%d]\n", ino));
378 if (!ino) { 378 if (!ino) {
379 printk(KERN_ERR "qnx4: bad inode number on dev %s: %lu is " 379 printk(KERN_ERR "qnx4: bad inode number on dev %s: %lu is "
380 "out of range\n", 380 "out of range\n",
@@ -385,7 +385,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
385 block = ino / QNX4_INODES_PER_BLOCK; 385 block = ino / QNX4_INODES_PER_BLOCK;
386 386
387 if (!(bh = sb_bread(sb, block))) { 387 if (!(bh = sb_bread(sb, block))) {
388 printk("qnx4: major problem: unable to read inode from dev " 388 printk(KERN_ERR "qnx4: major problem: unable to read inode from dev "
389 "%s\n", sb->s_id); 389 "%s\n", sb->s_id);
390 iget_failed(inode); 390 iget_failed(inode);
391 return ERR_PTR(-EIO); 391 return ERR_PTR(-EIO);
@@ -499,7 +499,7 @@ static int __init init_qnx4_fs(void)
499 return err; 499 return err;
500 } 500 }
501 501
502 printk("QNX4 filesystem 0.2.3 registered.\n"); 502 printk(KERN_INFO "QNX4 filesystem 0.2.3 registered.\n");
503 return 0; 503 return 0;
504} 504}
505 505
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index ae1e7edbacd6..58703ebba879 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -30,7 +30,7 @@ static int qnx4_match(int len, const char *name,
30 int namelen, thislen; 30 int namelen, thislen;
31 31
32 if (bh == NULL) { 32 if (bh == NULL) {
33 printk("qnx4: matching unassigned buffer !\n"); 33 printk(KERN_WARNING "qnx4: matching unassigned buffer !\n");
34 return 0; 34 return 0;
35 } 35 }
36 de = (struct qnx4_inode_entry *) (bh->b_data + *offset); 36 de = (struct qnx4_inode_entry *) (bh->b_data + *offset);
@@ -66,7 +66,7 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
66 66
67 *res_dir = NULL; 67 *res_dir = NULL;
68 if (!dir->i_sb) { 68 if (!dir->i_sb) {
69 printk("qnx4: no superblock on dir.\n"); 69 printk(KERN_WARNING "qnx4: no superblock on dir.\n");
70 return NULL; 70 return NULL;
71 } 71 }
72 bh = NULL; 72 bh = NULL;
@@ -124,7 +124,7 @@ struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nam
124 foundinode = qnx4_iget(dir->i_sb, ino); 124 foundinode = qnx4_iget(dir->i_sb, ino);
125 if (IS_ERR(foundinode)) { 125 if (IS_ERR(foundinode)) {
126 unlock_kernel(); 126 unlock_kernel();
127 QNX4DEBUG(("qnx4: lookup->iget -> error %ld\n", 127 QNX4DEBUG((KERN_ERR "qnx4: lookup->iget -> error %ld\n",
128 PTR_ERR(foundinode))); 128 PTR_ERR(foundinode)));
129 return ERR_CAST(foundinode); 129 return ERR_CAST(foundinode);
130 } 130 }
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index 8047e01ef46b..efc02ebb8c70 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -17,7 +17,7 @@ config QUOTA
17 17
18config QUOTA_NETLINK_INTERFACE 18config QUOTA_NETLINK_INTERFACE
19 bool "Report quota messages through netlink interface" 19 bool "Report quota messages through netlink interface"
20 depends on QUOTA && NET 20 depends on QUOTACTL && NET
21 help 21 help
22 If you say Y here, quota warnings (about exceeding softlimit, reaching 22 If you say Y here, quota warnings (about exceeding softlimit, reaching
23 hardlimit, etc.) will be reported through netlink interface. If unsure, 23 hardlimit, etc.) will be reported through netlink interface. If unsure,
@@ -46,12 +46,14 @@ config QFMT_V1
46 format say Y here. 46 format say Y here.
47 47
48config QFMT_V2 48config QFMT_V2
49 tristate "Quota format v2 support" 49 tristate "Quota format vfsv0 and vfsv1 support"
50 depends on QUOTA 50 depends on QUOTA
51 select QUOTA_TREE 51 select QUOTA_TREE
52 help 52 help
53 This quota format allows using quotas with 32-bit UIDs/GIDs. If you 53 This config option enables kernel support for vfsv0 and vfsv1 quota
54 need this functionality say Y here. 54 formats. Both these formats support 32-bit UIDs/GIDs and vfsv1 format
55 also supports 64-bit inode and block quota limits. If you need this
56 functionality say Y here.
55 57
56config QUOTACTL 58config QUOTACTL
57 bool 59 bool
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 39b49c42a7ed..cd6bb9a33c13 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -77,10 +77,6 @@
77#include <linux/capability.h> 77#include <linux/capability.h>
78#include <linux/quotaops.h> 78#include <linux/quotaops.h>
79#include <linux/writeback.h> /* for inode_lock, oddly enough.. */ 79#include <linux/writeback.h> /* for inode_lock, oddly enough.. */
80#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
81#include <net/netlink.h>
82#include <net/genetlink.h>
83#endif
84 80
85#include <asm/uaccess.h> 81#include <asm/uaccess.h>
86 82
@@ -1071,73 +1067,6 @@ static void print_warning(struct dquot *dquot, const int warntype)
1071} 1067}
1072#endif 1068#endif
1073 1069
1074#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
1075
1076/* Netlink family structure for quota */
1077static struct genl_family quota_genl_family = {
1078 .id = GENL_ID_GENERATE,
1079 .hdrsize = 0,
1080 .name = "VFS_DQUOT",
1081 .version = 1,
1082 .maxattr = QUOTA_NL_A_MAX,
1083};
1084
1085/* Send warning to userspace about user which exceeded quota */
1086static void send_warning(const struct dquot *dquot, const char warntype)
1087{
1088 static atomic_t seq;
1089 struct sk_buff *skb;
1090 void *msg_head;
1091 int ret;
1092 int msg_size = 4 * nla_total_size(sizeof(u32)) +
1093 2 * nla_total_size(sizeof(u64));
1094
1095 /* We have to allocate using GFP_NOFS as we are called from a
1096 * filesystem performing write and thus further recursion into
1097 * the fs to free some data could cause deadlocks. */
1098 skb = genlmsg_new(msg_size, GFP_NOFS);
1099 if (!skb) {
1100 printk(KERN_ERR
1101 "VFS: Not enough memory to send quota warning.\n");
1102 return;
1103 }
1104 msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
1105 &quota_genl_family, 0, QUOTA_NL_C_WARNING);
1106 if (!msg_head) {
1107 printk(KERN_ERR
1108 "VFS: Cannot store netlink header in quota warning.\n");
1109 goto err_out;
1110 }
1111 ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
1112 if (ret)
1113 goto attr_err_out;
1114 ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
1115 if (ret)
1116 goto attr_err_out;
1117 ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
1118 if (ret)
1119 goto attr_err_out;
1120 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
1121 MAJOR(dquot->dq_sb->s_dev));
1122 if (ret)
1123 goto attr_err_out;
1124 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
1125 MINOR(dquot->dq_sb->s_dev));
1126 if (ret)
1127 goto attr_err_out;
1128 ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
1129 if (ret)
1130 goto attr_err_out;
1131 genlmsg_end(skb, msg_head);
1132
1133 genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
1134 return;
1135attr_err_out:
1136 printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
1137err_out:
1138 kfree_skb(skb);
1139}
1140#endif
1141/* 1070/*
1142 * Write warnings to the console and send warning messages over netlink. 1071 * Write warnings to the console and send warning messages over netlink.
1143 * 1072 *
@@ -1145,18 +1074,20 @@ err_out:
1145 */ 1074 */
1146static void flush_warnings(struct dquot *const *dquots, char *warntype) 1075static void flush_warnings(struct dquot *const *dquots, char *warntype)
1147{ 1076{
1077 struct dquot *dq;
1148 int i; 1078 int i;
1149 1079
1150 for (i = 0; i < MAXQUOTAS; i++) 1080 for (i = 0; i < MAXQUOTAS; i++) {
1151 if (dquots[i] && warntype[i] != QUOTA_NL_NOWARN && 1081 dq = dquots[i];
1152 !warning_issued(dquots[i], warntype[i])) { 1082 if (dq && warntype[i] != QUOTA_NL_NOWARN &&
1083 !warning_issued(dq, warntype[i])) {
1153#ifdef CONFIG_PRINT_QUOTA_WARNING 1084#ifdef CONFIG_PRINT_QUOTA_WARNING
1154 print_warning(dquots[i], warntype[i]); 1085 print_warning(dq, warntype[i]);
1155#endif
1156#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
1157 send_warning(dquots[i], warntype[i]);
1158#endif 1086#endif
1087 quota_send_warning(dq->dq_type, dq->dq_id,
1088 dq->dq_sb->s_dev, warntype[i]);
1159 } 1089 }
1090 }
1160} 1091}
1161 1092
1162static int ignore_hardlimit(struct dquot *dquot) 1093static int ignore_hardlimit(struct dquot *dquot)
@@ -2233,7 +2164,9 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
2233 struct dentry *dentry; 2164 struct dentry *dentry;
2234 int error; 2165 int error;
2235 2166
2167 mutex_lock(&sb->s_root->d_inode->i_mutex);
2236 dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name)); 2168 dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
2169 mutex_unlock(&sb->s_root->d_inode->i_mutex);
2237 if (IS_ERR(dentry)) 2170 if (IS_ERR(dentry))
2238 return PTR_ERR(dentry); 2171 return PTR_ERR(dentry);
2239 2172
@@ -2473,100 +2406,89 @@ const struct quotactl_ops vfs_quotactl_ops = {
2473 2406
2474static ctl_table fs_dqstats_table[] = { 2407static ctl_table fs_dqstats_table[] = {
2475 { 2408 {
2476 .ctl_name = FS_DQ_LOOKUPS,
2477 .procname = "lookups", 2409 .procname = "lookups",
2478 .data = &dqstats.lookups, 2410 .data = &dqstats.lookups,
2479 .maxlen = sizeof(int), 2411 .maxlen = sizeof(int),
2480 .mode = 0444, 2412 .mode = 0444,
2481 .proc_handler = &proc_dointvec, 2413 .proc_handler = proc_dointvec,
2482 }, 2414 },
2483 { 2415 {
2484 .ctl_name = FS_DQ_DROPS,
2485 .procname = "drops", 2416 .procname = "drops",
2486 .data = &dqstats.drops, 2417 .data = &dqstats.drops,
2487 .maxlen = sizeof(int), 2418 .maxlen = sizeof(int),
2488 .mode = 0444, 2419 .mode = 0444,
2489 .proc_handler = &proc_dointvec, 2420 .proc_handler = proc_dointvec,
2490 }, 2421 },
2491 { 2422 {
2492 .ctl_name = FS_DQ_READS,
2493 .procname = "reads", 2423 .procname = "reads",
2494 .data = &dqstats.reads, 2424 .data = &dqstats.reads,
2495 .maxlen = sizeof(int), 2425 .maxlen = sizeof(int),
2496 .mode = 0444, 2426 .mode = 0444,
2497 .proc_handler = &proc_dointvec, 2427 .proc_handler = proc_dointvec,
2498 }, 2428 },
2499 { 2429 {
2500 .ctl_name = FS_DQ_WRITES,
2501 .procname = "writes", 2430 .procname = "writes",
2502 .data = &dqstats.writes, 2431 .data = &dqstats.writes,
2503 .maxlen = sizeof(int), 2432 .maxlen = sizeof(int),
2504 .mode = 0444, 2433 .mode = 0444,
2505 .proc_handler = &proc_dointvec, 2434 .proc_handler = proc_dointvec,
2506 }, 2435 },
2507 { 2436 {
2508 .ctl_name = FS_DQ_CACHE_HITS,
2509 .procname = "cache_hits", 2437 .procname = "cache_hits",
2510 .data = &dqstats.cache_hits, 2438 .data = &dqstats.cache_hits,
2511 .maxlen = sizeof(int), 2439 .maxlen = sizeof(int),
2512 .mode = 0444, 2440 .mode = 0444,
2513 .proc_handler = &proc_dointvec, 2441 .proc_handler = proc_dointvec,
2514 }, 2442 },
2515 { 2443 {
2516 .ctl_name = FS_DQ_ALLOCATED,
2517 .procname = "allocated_dquots", 2444 .procname = "allocated_dquots",
2518 .data = &dqstats.allocated_dquots, 2445 .data = &dqstats.allocated_dquots,
2519 .maxlen = sizeof(int), 2446 .maxlen = sizeof(int),
2520 .mode = 0444, 2447 .mode = 0444,
2521 .proc_handler = &proc_dointvec, 2448 .proc_handler = proc_dointvec,
2522 }, 2449 },
2523 { 2450 {
2524 .ctl_name = FS_DQ_FREE,
2525 .procname = "free_dquots", 2451 .procname = "free_dquots",
2526 .data = &dqstats.free_dquots, 2452 .data = &dqstats.free_dquots,
2527 .maxlen = sizeof(int), 2453 .maxlen = sizeof(int),
2528 .mode = 0444, 2454 .mode = 0444,
2529 .proc_handler = &proc_dointvec, 2455 .proc_handler = proc_dointvec,
2530 }, 2456 },
2531 { 2457 {
2532 .ctl_name = FS_DQ_SYNCS,
2533 .procname = "syncs", 2458 .procname = "syncs",
2534 .data = &dqstats.syncs, 2459 .data = &dqstats.syncs,
2535 .maxlen = sizeof(int), 2460 .maxlen = sizeof(int),
2536 .mode = 0444, 2461 .mode = 0444,
2537 .proc_handler = &proc_dointvec, 2462 .proc_handler = proc_dointvec,
2538 }, 2463 },
2539#ifdef CONFIG_PRINT_QUOTA_WARNING 2464#ifdef CONFIG_PRINT_QUOTA_WARNING
2540 { 2465 {
2541 .ctl_name = FS_DQ_WARNINGS,
2542 .procname = "warnings", 2466 .procname = "warnings",
2543 .data = &flag_print_warnings, 2467 .data = &flag_print_warnings,
2544 .maxlen = sizeof(int), 2468 .maxlen = sizeof(int),
2545 .mode = 0644, 2469 .mode = 0644,
2546 .proc_handler = &proc_dointvec, 2470 .proc_handler = proc_dointvec,
2547 }, 2471 },
2548#endif 2472#endif
2549 { .ctl_name = 0 }, 2473 { },
2550}; 2474};
2551 2475
2552static ctl_table fs_table[] = { 2476static ctl_table fs_table[] = {
2553 { 2477 {
2554 .ctl_name = FS_DQSTATS,
2555 .procname = "quota", 2478 .procname = "quota",
2556 .mode = 0555, 2479 .mode = 0555,
2557 .child = fs_dqstats_table, 2480 .child = fs_dqstats_table,
2558 }, 2481 },
2559 { .ctl_name = 0 }, 2482 { },
2560}; 2483};
2561 2484
2562static ctl_table sys_table[] = { 2485static ctl_table sys_table[] = {
2563 { 2486 {
2564 .ctl_name = CTL_FS,
2565 .procname = "fs", 2487 .procname = "fs",
2566 .mode = 0555, 2488 .mode = 0555,
2567 .child = fs_table, 2489 .child = fs_table,
2568 }, 2490 },
2569 { .ctl_name = 0 }, 2491 { },
2570}; 2492};
2571 2493
2572static int __init dquot_init(void) 2494static int __init dquot_init(void)
@@ -2607,12 +2529,6 @@ static int __init dquot_init(void)
2607 2529
2608 register_shrinker(&dqcache_shrinker); 2530 register_shrinker(&dqcache_shrinker);
2609 2531
2610#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
2611 if (genl_register_family(&quota_genl_family) != 0)
2612 printk(KERN_ERR
2613 "VFS: Failed to create quota netlink interface.\n");
2614#endif
2615
2616 return 0; 2532 return 0;
2617} 2533}
2618module_init(dquot_init); 2534module_init(dquot_init);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 95c5b42384b2..ee91e2756950 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -18,6 +18,8 @@
18#include <linux/capability.h> 18#include <linux/capability.h>
19#include <linux/quotaops.h> 19#include <linux/quotaops.h>
20#include <linux/types.h> 20#include <linux/types.h>
21#include <net/netlink.h>
22#include <net/genetlink.h>
21 23
22/* Check validity of generic quotactl commands */ 24/* Check validity of generic quotactl commands */
23static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, 25static int generic_quotactl_valid(struct super_block *sb, int type, int cmd,
@@ -525,3 +527,94 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
525 return ret; 527 return ret;
526} 528}
527#endif 529#endif
530
531
532#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
533
534/* Netlink family structure for quota */
535static struct genl_family quota_genl_family = {
536 .id = GENL_ID_GENERATE,
537 .hdrsize = 0,
538 .name = "VFS_DQUOT",
539 .version = 1,
540 .maxattr = QUOTA_NL_A_MAX,
541};
542
543/**
544 * quota_send_warning - Send warning to userspace about exceeded quota
545 * @type: The quota type: USRQQUOTA, GRPQUOTA,...
546 * @id: The user or group id of the quota that was exceeded
547 * @dev: The device on which the fs is mounted (sb->s_dev)
548 * @warntype: The type of the warning: QUOTA_NL_...
549 *
550 * This can be used by filesystems (including those which don't use
551 * dquot) to send a message to userspace relating to quota limits.
552 *
553 */
554
555void quota_send_warning(short type, unsigned int id, dev_t dev,
556 const char warntype)
557{
558 static atomic_t seq;
559 struct sk_buff *skb;
560 void *msg_head;
561 int ret;
562 int msg_size = 4 * nla_total_size(sizeof(u32)) +
563 2 * nla_total_size(sizeof(u64));
564
565 /* We have to allocate using GFP_NOFS as we are called from a
566 * filesystem performing write and thus further recursion into
567 * the fs to free some data could cause deadlocks. */
568 skb = genlmsg_new(msg_size, GFP_NOFS);
569 if (!skb) {
570 printk(KERN_ERR
571 "VFS: Not enough memory to send quota warning.\n");
572 return;
573 }
574 msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
575 &quota_genl_family, 0, QUOTA_NL_C_WARNING);
576 if (!msg_head) {
577 printk(KERN_ERR
578 "VFS: Cannot store netlink header in quota warning.\n");
579 goto err_out;
580 }
581 ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type);
582 if (ret)
583 goto attr_err_out;
584 ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id);
585 if (ret)
586 goto attr_err_out;
587 ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
588 if (ret)
589 goto attr_err_out;
590 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev));
591 if (ret)
592 goto attr_err_out;
593 ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev));
594 if (ret)
595 goto attr_err_out;
596 ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid());
597 if (ret)
598 goto attr_err_out;
599 genlmsg_end(skb, msg_head);
600
601 genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
602 return;
603attr_err_out:
604 printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
605err_out:
606 kfree_skb(skb);
607}
608EXPORT_SYMBOL(quota_send_warning);
609
610static int __init quota_init(void)
611{
612 if (genl_register_family(&quota_genl_family) != 0)
613 printk(KERN_ERR
614 "VFS: Failed to create quota netlink interface.\n");
615 return 0;
616};
617
618module_init(quota_init);
619#endif
620
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 0edcf42b1778..2ae757e9c008 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -204,7 +204,7 @@ out:
204 return ret; 204 return ret;
205} 205}
206 206
207static struct quota_format_ops v1_format_ops = { 207static const struct quota_format_ops v1_format_ops = {
208 .check_quota_file = v1_check_quota_file, 208 .check_quota_file = v1_check_quota_file,
209 .read_file_info = v1_read_file_info, 209 .read_file_info = v1_read_file_info,
210 .write_file_info = v1_write_file_info, 210 .write_file_info = v1_write_file_info,
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index a5475fb1ae44..3dfc23e02135 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -23,14 +23,23 @@ MODULE_LICENSE("GPL");
23 23
24#define __QUOTA_V2_PARANOIA 24#define __QUOTA_V2_PARANOIA
25 25
26static void v2_mem2diskdqb(void *dp, struct dquot *dquot); 26static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot);
27static void v2_disk2memdqb(struct dquot *dquot, void *dp); 27static void v2r0_disk2memdqb(struct dquot *dquot, void *dp);
28static int v2_is_id(void *dp, struct dquot *dquot); 28static int v2r0_is_id(void *dp, struct dquot *dquot);
29 29static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot);
30static struct qtree_fmt_operations v2_qtree_ops = { 30static void v2r1_disk2memdqb(struct dquot *dquot, void *dp);
31 .mem2disk_dqblk = v2_mem2diskdqb, 31static int v2r1_is_id(void *dp, struct dquot *dquot);
32 .disk2mem_dqblk = v2_disk2memdqb, 32
33 .is_id = v2_is_id, 33static struct qtree_fmt_operations v2r0_qtree_ops = {
34 .mem2disk_dqblk = v2r0_mem2diskdqb,
35 .disk2mem_dqblk = v2r0_disk2memdqb,
36 .is_id = v2r0_is_id,
37};
38
39static struct qtree_fmt_operations v2r1_qtree_ops = {
40 .mem2disk_dqblk = v2r1_mem2diskdqb,
41 .disk2mem_dqblk = v2r1_disk2memdqb,
42 .is_id = v2r1_is_id,
34}; 43};
35 44
36#define QUOTABLOCK_BITS 10 45#define QUOTABLOCK_BITS 10
@@ -46,23 +55,33 @@ static inline qsize_t v2_qbtos(qsize_t blocks)
46 return blocks << QUOTABLOCK_BITS; 55 return blocks << QUOTABLOCK_BITS;
47} 56}
48 57
58static int v2_read_header(struct super_block *sb, int type,
59 struct v2_disk_dqheader *dqhead)
60{
61 ssize_t size;
62
63 size = sb->s_op->quota_read(sb, type, (char *)dqhead,
64 sizeof(struct v2_disk_dqheader), 0);
65 if (size != sizeof(struct v2_disk_dqheader)) {
66 printk(KERN_WARNING "quota_v2: Failed header read:"
67 " expected=%zd got=%zd\n",
68 sizeof(struct v2_disk_dqheader), size);
69 return 0;
70 }
71 return 1;
72}
73
49/* Check whether given file is really vfsv0 quotafile */ 74/* Check whether given file is really vfsv0 quotafile */
50static int v2_check_quota_file(struct super_block *sb, int type) 75static int v2_check_quota_file(struct super_block *sb, int type)
51{ 76{
52 struct v2_disk_dqheader dqhead; 77 struct v2_disk_dqheader dqhead;
53 ssize_t size;
54 static const uint quota_magics[] = V2_INITQMAGICS; 78 static const uint quota_magics[] = V2_INITQMAGICS;
55 static const uint quota_versions[] = V2_INITQVERSIONS; 79 static const uint quota_versions[] = V2_INITQVERSIONS;
56 80
57 size = sb->s_op->quota_read(sb, type, (char *)&dqhead, 81 if (!v2_read_header(sb, type, &dqhead))
58 sizeof(struct v2_disk_dqheader), 0);
59 if (size != sizeof(struct v2_disk_dqheader)) {
60 printk("quota_v2: failed read expected=%zd got=%zd\n",
61 sizeof(struct v2_disk_dqheader), size);
62 return 0; 82 return 0;
63 }
64 if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] || 83 if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
65 le32_to_cpu(dqhead.dqh_version) != quota_versions[type]) 84 le32_to_cpu(dqhead.dqh_version) > quota_versions[type])
66 return 0; 85 return 0;
67 return 1; 86 return 1;
68} 87}
@@ -71,14 +90,20 @@ static int v2_check_quota_file(struct super_block *sb, int type)
71static int v2_read_file_info(struct super_block *sb, int type) 90static int v2_read_file_info(struct super_block *sb, int type)
72{ 91{
73 struct v2_disk_dqinfo dinfo; 92 struct v2_disk_dqinfo dinfo;
93 struct v2_disk_dqheader dqhead;
74 struct mem_dqinfo *info = sb_dqinfo(sb, type); 94 struct mem_dqinfo *info = sb_dqinfo(sb, type);
75 struct qtree_mem_dqinfo *qinfo; 95 struct qtree_mem_dqinfo *qinfo;
76 ssize_t size; 96 ssize_t size;
97 unsigned int version;
98
99 if (!v2_read_header(sb, type, &dqhead))
100 return 0;
101 version = le32_to_cpu(dqhead.dqh_version);
77 102
78 size = sb->s_op->quota_read(sb, type, (char *)&dinfo, 103 size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
79 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); 104 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
80 if (size != sizeof(struct v2_disk_dqinfo)) { 105 if (size != sizeof(struct v2_disk_dqinfo)) {
81 printk(KERN_WARNING "Can't read info structure on device %s.\n", 106 printk(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n",
82 sb->s_id); 107 sb->s_id);
83 return -1; 108 return -1;
84 } 109 }
@@ -89,9 +114,15 @@ static int v2_read_file_info(struct super_block *sb, int type)
89 return -1; 114 return -1;
90 } 115 }
91 qinfo = info->dqi_priv; 116 qinfo = info->dqi_priv;
92 /* limits are stored as unsigned 32-bit data */ 117 if (version == 0) {
93 info->dqi_maxblimit = 0xffffffff; 118 /* limits are stored as unsigned 32-bit data */
94 info->dqi_maxilimit = 0xffffffff; 119 info->dqi_maxblimit = 0xffffffff;
120 info->dqi_maxilimit = 0xffffffff;
121 } else {
122 /* used space is stored as unsigned 64-bit value */
123 info->dqi_maxblimit = 0xffffffffffffffff; /* 2^64-1 */
124 info->dqi_maxilimit = 0xffffffffffffffff;
125 }
95 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 126 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
96 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 127 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
97 info->dqi_flags = le32_to_cpu(dinfo.dqi_flags); 128 info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
@@ -103,8 +134,13 @@ static int v2_read_file_info(struct super_block *sb, int type)
103 qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS; 134 qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
104 qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS; 135 qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
105 qinfo->dqi_qtree_depth = qtree_depth(qinfo); 136 qinfo->dqi_qtree_depth = qtree_depth(qinfo);
106 qinfo->dqi_entry_size = sizeof(struct v2_disk_dqblk); 137 if (version == 0) {
107 qinfo->dqi_ops = &v2_qtree_ops; 138 qinfo->dqi_entry_size = sizeof(struct v2r0_disk_dqblk);
139 qinfo->dqi_ops = &v2r0_qtree_ops;
140 } else {
141 qinfo->dqi_entry_size = sizeof(struct v2r1_disk_dqblk);
142 qinfo->dqi_ops = &v2r1_qtree_ops;
143 }
108 return 0; 144 return 0;
109} 145}
110 146
@@ -135,9 +171,9 @@ static int v2_write_file_info(struct super_block *sb, int type)
135 return 0; 171 return 0;
136} 172}
137 173
138static void v2_disk2memdqb(struct dquot *dquot, void *dp) 174static void v2r0_disk2memdqb(struct dquot *dquot, void *dp)
139{ 175{
140 struct v2_disk_dqblk *d = dp, empty; 176 struct v2r0_disk_dqblk *d = dp, empty;
141 struct mem_dqblk *m = &dquot->dq_dqb; 177 struct mem_dqblk *m = &dquot->dq_dqb;
142 178
143 m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit); 179 m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
@@ -149,15 +185,15 @@ static void v2_disk2memdqb(struct dquot *dquot, void *dp)
149 m->dqb_curspace = le64_to_cpu(d->dqb_curspace); 185 m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
150 m->dqb_btime = le64_to_cpu(d->dqb_btime); 186 m->dqb_btime = le64_to_cpu(d->dqb_btime);
151 /* We need to escape back all-zero structure */ 187 /* We need to escape back all-zero structure */
152 memset(&empty, 0, sizeof(struct v2_disk_dqblk)); 188 memset(&empty, 0, sizeof(struct v2r0_disk_dqblk));
153 empty.dqb_itime = cpu_to_le64(1); 189 empty.dqb_itime = cpu_to_le64(1);
154 if (!memcmp(&empty, dp, sizeof(struct v2_disk_dqblk))) 190 if (!memcmp(&empty, dp, sizeof(struct v2r0_disk_dqblk)))
155 m->dqb_itime = 0; 191 m->dqb_itime = 0;
156} 192}
157 193
158static void v2_mem2diskdqb(void *dp, struct dquot *dquot) 194static void v2r0_mem2diskdqb(void *dp, struct dquot *dquot)
159{ 195{
160 struct v2_disk_dqblk *d = dp; 196 struct v2r0_disk_dqblk *d = dp;
161 struct mem_dqblk *m = &dquot->dq_dqb; 197 struct mem_dqblk *m = &dquot->dq_dqb;
162 struct qtree_mem_dqinfo *info = 198 struct qtree_mem_dqinfo *info =
163 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 199 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
@@ -175,9 +211,60 @@ static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
175 d->dqb_itime = cpu_to_le64(1); 211 d->dqb_itime = cpu_to_le64(1);
176} 212}
177 213
178static int v2_is_id(void *dp, struct dquot *dquot) 214static int v2r0_is_id(void *dp, struct dquot *dquot)
215{
216 struct v2r0_disk_dqblk *d = dp;
217 struct qtree_mem_dqinfo *info =
218 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
219
220 if (qtree_entry_unused(info, dp))
221 return 0;
222 return le32_to_cpu(d->dqb_id) == dquot->dq_id;
223}
224
225static void v2r1_disk2memdqb(struct dquot *dquot, void *dp)
226{
227 struct v2r1_disk_dqblk *d = dp, empty;
228 struct mem_dqblk *m = &dquot->dq_dqb;
229
230 m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit);
231 m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit);
232 m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes);
233 m->dqb_itime = le64_to_cpu(d->dqb_itime);
234 m->dqb_bhardlimit = v2_qbtos(le64_to_cpu(d->dqb_bhardlimit));
235 m->dqb_bsoftlimit = v2_qbtos(le64_to_cpu(d->dqb_bsoftlimit));
236 m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
237 m->dqb_btime = le64_to_cpu(d->dqb_btime);
238 /* We need to escape back all-zero structure */
239 memset(&empty, 0, sizeof(struct v2r1_disk_dqblk));
240 empty.dqb_itime = cpu_to_le64(1);
241 if (!memcmp(&empty, dp, sizeof(struct v2r1_disk_dqblk)))
242 m->dqb_itime = 0;
243}
244
245static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot)
246{
247 struct v2r1_disk_dqblk *d = dp;
248 struct mem_dqblk *m = &dquot->dq_dqb;
249 struct qtree_mem_dqinfo *info =
250 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
251
252 d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
253 d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
254 d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
255 d->dqb_itime = cpu_to_le64(m->dqb_itime);
256 d->dqb_bhardlimit = cpu_to_le64(v2_stoqb(m->dqb_bhardlimit));
257 d->dqb_bsoftlimit = cpu_to_le64(v2_stoqb(m->dqb_bsoftlimit));
258 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
259 d->dqb_btime = cpu_to_le64(m->dqb_btime);
260 d->dqb_id = cpu_to_le32(dquot->dq_id);
261 if (qtree_entry_unused(info, dp))
262 d->dqb_itime = cpu_to_le64(1);
263}
264
265static int v2r1_is_id(void *dp, struct dquot *dquot)
179{ 266{
180 struct v2_disk_dqblk *d = dp; 267 struct v2r1_disk_dqblk *d = dp;
181 struct qtree_mem_dqinfo *info = 268 struct qtree_mem_dqinfo *info =
182 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 269 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
183 270
@@ -207,7 +294,7 @@ static int v2_free_file_info(struct super_block *sb, int type)
207 return 0; 294 return 0;
208} 295}
209 296
210static struct quota_format_ops v2_format_ops = { 297static const struct quota_format_ops v2_format_ops = {
211 .check_quota_file = v2_check_quota_file, 298 .check_quota_file = v2_check_quota_file,
212 .read_file_info = v2_read_file_info, 299 .read_file_info = v2_read_file_info,
213 .write_file_info = v2_write_file_info, 300 .write_file_info = v2_write_file_info,
@@ -217,20 +304,32 @@ static struct quota_format_ops v2_format_ops = {
217 .release_dqblk = v2_release_dquot, 304 .release_dqblk = v2_release_dquot,
218}; 305};
219 306
220static struct quota_format_type v2_quota_format = { 307static struct quota_format_type v2r0_quota_format = {
221 .qf_fmt_id = QFMT_VFS_V0, 308 .qf_fmt_id = QFMT_VFS_V0,
222 .qf_ops = &v2_format_ops, 309 .qf_ops = &v2_format_ops,
223 .qf_owner = THIS_MODULE 310 .qf_owner = THIS_MODULE
224}; 311};
225 312
313static struct quota_format_type v2r1_quota_format = {
314 .qf_fmt_id = QFMT_VFS_V1,
315 .qf_ops = &v2_format_ops,
316 .qf_owner = THIS_MODULE
317};
318
226static int __init init_v2_quota_format(void) 319static int __init init_v2_quota_format(void)
227{ 320{
228 return register_quota_format(&v2_quota_format); 321 int ret;
322
323 ret = register_quota_format(&v2r0_quota_format);
324 if (ret)
325 return ret;
326 return register_quota_format(&v2r1_quota_format);
229} 327}
230 328
231static void __exit exit_v2_quota_format(void) 329static void __exit exit_v2_quota_format(void)
232{ 330{
233 unregister_quota_format(&v2_quota_format); 331 unregister_quota_format(&v2r0_quota_format);
332 unregister_quota_format(&v2r1_quota_format);
234} 333}
235 334
236module_init(init_v2_quota_format); 335module_init(init_v2_quota_format);
diff --git a/fs/quota/quotaio_v2.h b/fs/quota/quotaio_v2.h
index 530fe580685c..f1966b42c2fd 100644
--- a/fs/quota/quotaio_v2.h
+++ b/fs/quota/quotaio_v2.h
@@ -17,8 +17,8 @@
17} 17}
18 18
19#define V2_INITQVERSIONS {\ 19#define V2_INITQVERSIONS {\
20 0, /* USRQUOTA */\ 20 1, /* USRQUOTA */\
21 0 /* GRPQUOTA */\ 21 1 /* GRPQUOTA */\
22} 22}
23 23
24/* First generic header */ 24/* First generic header */
@@ -32,7 +32,7 @@ struct v2_disk_dqheader {
32 * (as it appears on disk) - the file is a radix tree whose leaves point 32 * (as it appears on disk) - the file is a radix tree whose leaves point
33 * to blocks of these structures. 33 * to blocks of these structures.
34 */ 34 */
35struct v2_disk_dqblk { 35struct v2r0_disk_dqblk {
36 __le32 dqb_id; /* id this quota applies to */ 36 __le32 dqb_id; /* id this quota applies to */
37 __le32 dqb_ihardlimit; /* absolute limit on allocated inodes */ 37 __le32 dqb_ihardlimit; /* absolute limit on allocated inodes */
38 __le32 dqb_isoftlimit; /* preferred inode limit */ 38 __le32 dqb_isoftlimit; /* preferred inode limit */
@@ -44,6 +44,19 @@ struct v2_disk_dqblk {
44 __le64 dqb_itime; /* time limit for excessive inode use */ 44 __le64 dqb_itime; /* time limit for excessive inode use */
45}; 45};
46 46
47struct v2r1_disk_dqblk {
48 __le32 dqb_id; /* id this quota applies to */
49 __le32 dqb_pad;
50 __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */
51 __le64 dqb_isoftlimit; /* preferred inode limit */
52 __le64 dqb_curinodes; /* current # allocated inodes */
53 __le64 dqb_bhardlimit; /* absolute limit on disk space (in QUOTABLOCK_SIZE) */
54 __le64 dqb_bsoftlimit; /* preferred limit on disk space (in QUOTABLOCK_SIZE) */
55 __le64 dqb_curspace; /* current space occupied (in bytes) */
56 __le64 dqb_btime; /* time limit for excessive disk use */
57 __le64 dqb_itime; /* time limit for excessive inode use */
58};
59
47/* Header with type and version specific information */ 60/* Header with type and version specific information */
48struct v2_disk_dqinfo { 61struct v2_disk_dqinfo {
49 __le32 dqi_bgrace; /* Time before block soft limit becomes hard limit */ 62 __le32 dqi_bgrace; /* Time before block soft limit becomes hard limit */
diff --git a/fs/read_write.c b/fs/read_write.c
index 3ac28987f22a..b7f4a1f94d48 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -826,8 +826,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
826 if (!(out_file->f_mode & FMODE_WRITE)) 826 if (!(out_file->f_mode & FMODE_WRITE))
827 goto fput_out; 827 goto fput_out;
828 retval = -EINVAL; 828 retval = -EINVAL;
829 if (!out_file->f_op || !out_file->f_op->sendpage)
830 goto fput_out;
831 in_inode = in_file->f_path.dentry->d_inode; 829 in_inode = in_file->f_path.dentry->d_inode;
832 out_inode = out_file->f_path.dentry->d_inode; 830 out_inode = out_file->f_path.dentry->d_inode;
833 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 831 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 7c5ab6330dd6..6a9e30c041dd 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_REISERFS_FS) += reiserfs.o
7reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \ 7reiserfs-objs := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o \
8 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \ 8 super.o prints.o objectid.o lbalance.o ibalance.o stree.o \
9 hashes.o tail_conversion.o journal.o resize.o \ 9 hashes.o tail_conversion.o journal.o resize.o \
10 item_ops.o ioctl.o procfs.o xattr.o 10 item_ops.o ioctl.o procfs.o xattr.o lock.o
11 11
12ifeq ($(CONFIG_REISERFS_FS_XATTR),y) 12ifeq ($(CONFIG_REISERFS_FS_XATTR),y)
13reiserfs-objs += xattr_user.o xattr_trusted.o 13reiserfs-objs += xattr_user.o xattr_trusted.o
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index e716161ab325..685495707181 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1249,14 +1249,18 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1249 else if (bitmap == 0) 1249 else if (bitmap == 0)
1250 block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; 1250 block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
1251 1251
1252 reiserfs_write_unlock(sb);
1252 bh = sb_bread(sb, block); 1253 bh = sb_bread(sb, block);
1254 reiserfs_write_lock(sb);
1253 if (bh == NULL) 1255 if (bh == NULL)
1254 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " 1256 reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) "
1255 "reading failed", __func__, block); 1257 "reading failed", __func__, block);
1256 else { 1258 else {
1257 if (buffer_locked(bh)) { 1259 if (buffer_locked(bh)) {
1258 PROC_INFO_INC(sb, scan_bitmap.wait); 1260 PROC_INFO_INC(sb, scan_bitmap.wait);
1261 reiserfs_write_unlock(sb);
1259 __wait_on_buffer(bh); 1262 __wait_on_buffer(bh);
1263 reiserfs_write_lock(sb);
1260 } 1264 }
1261 BUG_ON(!buffer_uptodate(bh)); 1265 BUG_ON(!buffer_uptodate(bh));
1262 BUG_ON(atomic_read(&bh->b_count) == 0); 1266 BUG_ON(atomic_read(&bh->b_count) == 0);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 6d2668fdc384..c094f58c7448 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -20,7 +20,7 @@ const struct file_operations reiserfs_dir_operations = {
20 .read = generic_read_dir, 20 .read = generic_read_dir,
21 .readdir = reiserfs_readdir, 21 .readdir = reiserfs_readdir,
22 .fsync = reiserfs_dir_fsync, 22 .fsync = reiserfs_dir_fsync,
23 .ioctl = reiserfs_ioctl, 23 .unlocked_ioctl = reiserfs_ioctl,
24#ifdef CONFIG_COMPAT 24#ifdef CONFIG_COMPAT
25 .compat_ioctl = reiserfs_compat_ioctl, 25 .compat_ioctl = reiserfs_compat_ioctl,
26#endif 26#endif
@@ -174,14 +174,22 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
174 // user space buffer is swapped out. At that time 174 // user space buffer is swapped out. At that time
175 // entry can move to somewhere else 175 // entry can move to somewhere else
176 memcpy(local_buf, d_name, d_reclen); 176 memcpy(local_buf, d_name, d_reclen);
177
178 /*
179 * Since filldir might sleep, we can release
180 * the write lock here for other waiters
181 */
182 reiserfs_write_unlock(inode->i_sb);
177 if (filldir 183 if (filldir
178 (dirent, local_buf, d_reclen, d_off, d_ino, 184 (dirent, local_buf, d_reclen, d_off, d_ino,
179 DT_UNKNOWN) < 0) { 185 DT_UNKNOWN) < 0) {
186 reiserfs_write_lock(inode->i_sb);
180 if (local_buf != small_buf) { 187 if (local_buf != small_buf) {
181 kfree(local_buf); 188 kfree(local_buf);
182 } 189 }
183 goto end; 190 goto end;
184 } 191 }
192 reiserfs_write_lock(inode->i_sb);
185 if (local_buf != small_buf) { 193 if (local_buf != small_buf) {
186 kfree(local_buf); 194 kfree(local_buf);
187 } 195 }
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 128d3f7c8aa5..60c080440661 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -21,14 +21,6 @@
21#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
22#include <linux/kernel.h> 22#include <linux/kernel.h>
23 23
24#ifdef CONFIG_REISERFS_CHECK
25
26struct tree_balance *cur_tb = NULL; /* detects whether more than one
27 copy of tb exists as a means
28 of checking whether schedule
29 is interrupting do_balance */
30#endif
31
32static inline void buffer_info_init_left(struct tree_balance *tb, 24static inline void buffer_info_init_left(struct tree_balance *tb,
33 struct buffer_info *bi) 25 struct buffer_info *bi)
34{ 26{
@@ -1840,11 +1832,12 @@ static int check_before_balancing(struct tree_balance *tb)
1840{ 1832{
1841 int retval = 0; 1833 int retval = 0;
1842 1834
1843 if (cur_tb) { 1835 if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1844 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " 1836 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1845 "occurred based on cur_tb not being null at " 1837 "occurred based on cur_tb not being null at "
1846 "this point in code. do_balance cannot properly " 1838 "this point in code. do_balance cannot properly "
1847 "handle schedule occurring while it runs."); 1839 "handle concurrent tree accesses on a same "
1840 "mount point.");
1848 } 1841 }
1849 1842
1850 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have 1843 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
@@ -1986,7 +1979,7 @@ static inline void do_balance_starts(struct tree_balance *tb)
1986 "check");*/ 1979 "check");*/
1987 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); 1980 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1988#ifdef CONFIG_REISERFS_CHECK 1981#ifdef CONFIG_REISERFS_CHECK
1989 cur_tb = tb; 1982 REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1990#endif 1983#endif
1991} 1984}
1992 1985
@@ -1996,7 +1989,7 @@ static inline void do_balance_completed(struct tree_balance *tb)
1996#ifdef CONFIG_REISERFS_CHECK 1989#ifdef CONFIG_REISERFS_CHECK
1997 check_leaf_level(tb); 1990 check_leaf_level(tb);
1998 check_internal_levels(tb); 1991 check_internal_levels(tb);
1999 cur_tb = NULL; 1992 REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
2000#endif 1993#endif
2001 1994
2002 /* reiserfs_free_block is no longer schedule safe. So, we need to 1995 /* reiserfs_free_block is no longer schedule safe. So, we need to
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9f436668b7f8..da2dba082e2d 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -284,7 +284,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t
284const struct file_operations reiserfs_file_operations = { 284const struct file_operations reiserfs_file_operations = {
285 .read = do_sync_read, 285 .read = do_sync_read,
286 .write = reiserfs_file_write, 286 .write = reiserfs_file_write,
287 .ioctl = reiserfs_ioctl, 287 .unlocked_ioctl = reiserfs_ioctl,
288#ifdef CONFIG_COMPAT 288#ifdef CONFIG_COMPAT
289 .compat_ioctl = reiserfs_compat_ioctl, 289 .compat_ioctl = reiserfs_compat_ioctl,
290#endif 290#endif
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 5e5a4e6fbaf8..6591cb21edf6 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -563,9 +563,6 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
563 return needed_nodes; 563 return needed_nodes;
564} 564}
565 565
566#ifdef CONFIG_REISERFS_CHECK
567extern struct tree_balance *cur_tb;
568#endif
569 566
570/* Set parameters for balancing. 567/* Set parameters for balancing.
571 * Performs write of results of analysis of balancing into structure tb, 568 * Performs write of results of analysis of balancing into structure tb,
@@ -834,7 +831,7 @@ static int get_empty_nodes(struct tree_balance *tb, int h)
834 RFALSE(buffer_dirty(new_bh) || 831 RFALSE(buffer_dirty(new_bh) ||
835 buffer_journaled(new_bh) || 832 buffer_journaled(new_bh) ||
836 buffer_journal_dirty(new_bh), 833 buffer_journal_dirty(new_bh),
837 "PAP-8140: journlaled or dirty buffer %b for the new block", 834 "PAP-8140: journaled or dirty buffer %b for the new block",
838 new_bh); 835 new_bh);
839 836
840 /* Put empty buffers into the array. */ 837 /* Put empty buffers into the array. */
@@ -1022,7 +1019,11 @@ static int get_far_parent(struct tree_balance *tb,
1022 /* Check whether the common parent is locked. */ 1019 /* Check whether the common parent is locked. */
1023 1020
1024 if (buffer_locked(*pcom_father)) { 1021 if (buffer_locked(*pcom_father)) {
1022
1023 /* Release the write lock while the buffer is busy */
1024 reiserfs_write_unlock(tb->tb_sb);
1025 __wait_on_buffer(*pcom_father); 1025 __wait_on_buffer(*pcom_father);
1026 reiserfs_write_lock(tb->tb_sb);
1026 if (FILESYSTEM_CHANGED_TB(tb)) { 1027 if (FILESYSTEM_CHANGED_TB(tb)) {
1027 brelse(*pcom_father); 1028 brelse(*pcom_father);
1028 return REPEAT_SEARCH; 1029 return REPEAT_SEARCH;
@@ -1927,7 +1928,9 @@ static int get_direct_parent(struct tree_balance *tb, int h)
1927 return REPEAT_SEARCH; 1928 return REPEAT_SEARCH;
1928 1929
1929 if (buffer_locked(bh)) { 1930 if (buffer_locked(bh)) {
1931 reiserfs_write_unlock(tb->tb_sb);
1930 __wait_on_buffer(bh); 1932 __wait_on_buffer(bh);
1933 reiserfs_write_lock(tb->tb_sb);
1931 if (FILESYSTEM_CHANGED_TB(tb)) 1934 if (FILESYSTEM_CHANGED_TB(tb))
1932 return REPEAT_SEARCH; 1935 return REPEAT_SEARCH;
1933 } 1936 }
@@ -1965,7 +1968,9 @@ static int get_neighbors(struct tree_balance *tb, int h)
1965 tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb-> 1968 tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
1966 FL[h]); 1969 FL[h]);
1967 son_number = B_N_CHILD_NUM(tb->FL[h], child_position); 1970 son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
1971 reiserfs_write_unlock(sb);
1968 bh = sb_bread(sb, son_number); 1972 bh = sb_bread(sb, son_number);
1973 reiserfs_write_lock(sb);
1969 if (!bh) 1974 if (!bh)
1970 return IO_ERROR; 1975 return IO_ERROR;
1971 if (FILESYSTEM_CHANGED_TB(tb)) { 1976 if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -2003,7 +2008,9 @@ static int get_neighbors(struct tree_balance *tb, int h)
2003 child_position = 2008 child_position =
2004 (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0; 2009 (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
2005 son_number = B_N_CHILD_NUM(tb->FR[h], child_position); 2010 son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
2011 reiserfs_write_unlock(sb);
2006 bh = sb_bread(sb, son_number); 2012 bh = sb_bread(sb, son_number);
2013 reiserfs_write_lock(sb);
2007 if (!bh) 2014 if (!bh)
2008 return IO_ERROR; 2015 return IO_ERROR;
2009 if (FILESYSTEM_CHANGED_TB(tb)) { 2016 if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -2278,7 +2285,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2278 REPEAT_SEARCH : CARRY_ON; 2285 REPEAT_SEARCH : CARRY_ON;
2279 } 2286 }
2280#endif 2287#endif
2288 reiserfs_write_unlock(tb->tb_sb);
2281 __wait_on_buffer(locked); 2289 __wait_on_buffer(locked);
2290 reiserfs_write_lock(tb->tb_sb);
2282 if (FILESYSTEM_CHANGED_TB(tb)) 2291 if (FILESYSTEM_CHANGED_TB(tb))
2283 return REPEAT_SEARCH; 2292 return REPEAT_SEARCH;
2284 } 2293 }
@@ -2349,12 +2358,14 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2349 2358
2350 /* if it possible in indirect_to_direct conversion */ 2359 /* if it possible in indirect_to_direct conversion */
2351 if (buffer_locked(tbS0)) { 2360 if (buffer_locked(tbS0)) {
2361 reiserfs_write_unlock(tb->tb_sb);
2352 __wait_on_buffer(tbS0); 2362 __wait_on_buffer(tbS0);
2363 reiserfs_write_lock(tb->tb_sb);
2353 if (FILESYSTEM_CHANGED_TB(tb)) 2364 if (FILESYSTEM_CHANGED_TB(tb))
2354 return REPEAT_SEARCH; 2365 return REPEAT_SEARCH;
2355 } 2366 }
2356#ifdef CONFIG_REISERFS_CHECK 2367#ifdef CONFIG_REISERFS_CHECK
2357 if (cur_tb) { 2368 if (REISERFS_SB(tb->tb_sb)->cur_tb) {
2358 print_cur_tb("fix_nodes"); 2369 print_cur_tb("fix_nodes");
2359 reiserfs_panic(tb->tb_sb, "PAP-8305", 2370 reiserfs_panic(tb->tb_sb, "PAP-8305",
2360 "there is pending do_balance"); 2371 "there is pending do_balance");
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a14d6cd9eeda..3a28e7751b3c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -251,7 +251,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
251 struct cpu_key key; 251 struct cpu_key key;
252 struct buffer_head *bh; 252 struct buffer_head *bh;
253 struct item_head *ih, tmp_ih; 253 struct item_head *ih, tmp_ih;
254 int fs_gen;
255 b_blocknr_t blocknr; 254 b_blocknr_t blocknr;
256 char *p = NULL; 255 char *p = NULL;
257 int chars; 256 int chars;
@@ -265,7 +264,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
265 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 264 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
266 3); 265 3);
267 266
268 research:
269 result = search_for_position_by_key(inode->i_sb, &key, &path); 267 result = search_for_position_by_key(inode->i_sb, &key, &path);
270 if (result != POSITION_FOUND) { 268 if (result != POSITION_FOUND) {
271 pathrelse(&path); 269 pathrelse(&path);
@@ -340,7 +338,6 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
340 } 338 }
341 // read file tail into part of page 339 // read file tail into part of page
342 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); 340 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
343 fs_gen = get_generation(inode->i_sb);
344 copy_item_head(&tmp_ih, ih); 341 copy_item_head(&tmp_ih, ih);
345 342
346 /* we only want to kmap if we are reading the tail into the page. 343 /* we only want to kmap if we are reading the tail into the page.
@@ -348,13 +345,9 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
348 ** sure we need to. But, this means the item might move if 345 ** sure we need to. But, this means the item might move if
349 ** kmap schedules 346 ** kmap schedules
350 */ 347 */
351 if (!p) { 348 if (!p)
352 p = (char *)kmap(bh_result->b_page); 349 p = (char *)kmap(bh_result->b_page);
353 if (fs_changed(fs_gen, inode->i_sb) 350
354 && item_moved(&tmp_ih, &path)) {
355 goto research;
356 }
357 }
358 p += offset; 351 p += offset;
359 memset(p, 0, inode->i_sb->s_blocksize); 352 memset(p, 0, inode->i_sb->s_blocksize);
360 do { 353 do {
@@ -489,10 +482,14 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
489 disappeared */ 482 disappeared */
490 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { 483 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
491 int err; 484 int err;
492 lock_kernel(); 485
486 reiserfs_write_lock(inode->i_sb);
487
493 err = reiserfs_commit_for_inode(inode); 488 err = reiserfs_commit_for_inode(inode);
494 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 489 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
495 unlock_kernel(); 490
491 reiserfs_write_unlock(inode->i_sb);
492
496 if (err < 0) 493 if (err < 0)
497 ret = err; 494 ret = err;
498 } 495 }
@@ -601,6 +598,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
601 __le32 *item; 598 __le32 *item;
602 int done; 599 int done;
603 int fs_gen; 600 int fs_gen;
601 int lock_depth;
604 struct reiserfs_transaction_handle *th = NULL; 602 struct reiserfs_transaction_handle *th = NULL;
605 /* space reserved in transaction batch: 603 /* space reserved in transaction batch:
606 . 3 balancings in direct->indirect conversion 604 . 3 balancings in direct->indirect conversion
@@ -616,12 +614,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
616 loff_t new_offset = 614 loff_t new_offset =
617 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; 615 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
618 616
619 /* bad.... */ 617 lock_depth = reiserfs_write_lock_once(inode->i_sb);
620 reiserfs_write_lock(inode->i_sb);
621 version = get_inode_item_key_version(inode); 618 version = get_inode_item_key_version(inode);
622 619
623 if (!file_capable(inode, block)) { 620 if (!file_capable(inode, block)) {
624 reiserfs_write_unlock(inode->i_sb); 621 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
625 return -EFBIG; 622 return -EFBIG;
626 } 623 }
627 624
@@ -633,7 +630,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
633 /* find number of block-th logical block of the file */ 630 /* find number of block-th logical block of the file */
634 ret = _get_block_create_0(inode, block, bh_result, 631 ret = _get_block_create_0(inode, block, bh_result,
635 create | GET_BLOCK_READ_DIRECT); 632 create | GET_BLOCK_READ_DIRECT);
636 reiserfs_write_unlock(inode->i_sb); 633 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
637 return ret; 634 return ret;
638 } 635 }
639 /* 636 /*
@@ -751,7 +748,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
751 if (!dangle && th) 748 if (!dangle && th)
752 retval = reiserfs_end_persistent_transaction(th); 749 retval = reiserfs_end_persistent_transaction(th);
753 750
754 reiserfs_write_unlock(inode->i_sb); 751 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
755 752
756 /* the item was found, so new blocks were not added to the file 753 /* the item was found, so new blocks were not added to the file
757 ** there is no need to make sure the inode is updated with this 754 ** there is no need to make sure the inode is updated with this
@@ -935,7 +932,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
935 if (blocks_needed == 1) { 932 if (blocks_needed == 1) {
936 un = &unf_single; 933 un = &unf_single;
937 } else { 934 } else {
938 un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling. 935 un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS);
939 if (!un) { 936 if (!un) {
940 un = &unf_single; 937 un = &unf_single;
941 blocks_needed = 1; 938 blocks_needed = 1;
@@ -997,10 +994,16 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
997 if (retval) 994 if (retval)
998 goto failure; 995 goto failure;
999 } 996 }
1000 /* inserting indirect pointers for a hole can take a 997 /*
1001 ** long time. reschedule if needed 998 * inserting indirect pointers for a hole can take a
999 * long time. reschedule if needed and also release the write
1000 * lock for others.
1002 */ 1001 */
1003 cond_resched(); 1002 if (need_resched()) {
1003 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
1004 schedule();
1005 lock_depth = reiserfs_write_lock_once(inode->i_sb);
1006 }
1004 1007
1005 retval = search_for_position_by_key(inode->i_sb, &key, &path); 1008 retval = search_for_position_by_key(inode->i_sb, &key, &path);
1006 if (retval == IO_ERROR) { 1009 if (retval == IO_ERROR) {
@@ -1035,7 +1038,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
1035 retval = err; 1038 retval = err;
1036 } 1039 }
1037 1040
1038 reiserfs_write_unlock(inode->i_sb); 1041 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
1039 reiserfs_check_path(&path); 1042 reiserfs_check_path(&path);
1040 return retval; 1043 return retval;
1041} 1044}
@@ -2072,8 +2075,9 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2072 int error; 2075 int error;
2073 struct buffer_head *bh = NULL; 2076 struct buffer_head *bh = NULL;
2074 int err2; 2077 int err2;
2078 int lock_depth;
2075 2079
2076 reiserfs_write_lock(inode->i_sb); 2080 lock_depth = reiserfs_write_lock_once(inode->i_sb);
2077 2081
2078 if (inode->i_size > 0) { 2082 if (inode->i_size > 0) {
2079 error = grab_tail_page(inode, &page, &bh); 2083 error = grab_tail_page(inode, &page, &bh);
@@ -2142,14 +2146,17 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2142 page_cache_release(page); 2146 page_cache_release(page);
2143 } 2147 }
2144 2148
2145 reiserfs_write_unlock(inode->i_sb); 2149 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2150
2146 return 0; 2151 return 0;
2147 out: 2152 out:
2148 if (page) { 2153 if (page) {
2149 unlock_page(page); 2154 unlock_page(page);
2150 page_cache_release(page); 2155 page_cache_release(page);
2151 } 2156 }
2152 reiserfs_write_unlock(inode->i_sb); 2157
2158 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2159
2153 return error; 2160 return error;
2154} 2161}
2155 2162
@@ -2608,7 +2615,10 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
2608 int ret; 2615 int ret;
2609 int old_ref = 0; 2616 int old_ref = 0;
2610 2617
2618 reiserfs_write_unlock(inode->i_sb);
2611 reiserfs_wait_on_write_block(inode->i_sb); 2619 reiserfs_wait_on_write_block(inode->i_sb);
2620 reiserfs_write_lock(inode->i_sb);
2621
2612 fix_tail_page_for_writing(page); 2622 fix_tail_page_for_writing(page);
2613 if (reiserfs_transaction_running(inode->i_sb)) { 2623 if (reiserfs_transaction_running(inode->i_sb)) {
2614 struct reiserfs_transaction_handle *th; 2624 struct reiserfs_transaction_handle *th;
@@ -2664,6 +2674,8 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2664 int update_sd = 0; 2674 int update_sd = 0;
2665 struct reiserfs_transaction_handle *th; 2675 struct reiserfs_transaction_handle *th;
2666 unsigned start; 2676 unsigned start;
2677 int lock_depth = 0;
2678 bool locked = false;
2667 2679
2668 if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND) 2680 if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
2669 pos ++; 2681 pos ++;
@@ -2690,9 +2702,11 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2690 ** to do the i_size updates here. 2702 ** to do the i_size updates here.
2691 */ 2703 */
2692 pos += copied; 2704 pos += copied;
2705
2693 if (pos > inode->i_size) { 2706 if (pos > inode->i_size) {
2694 struct reiserfs_transaction_handle myth; 2707 struct reiserfs_transaction_handle myth;
2695 reiserfs_write_lock(inode->i_sb); 2708 lock_depth = reiserfs_write_lock_once(inode->i_sb);
2709 locked = true;
2696 /* If the file have grown beyond the border where it 2710 /* If the file have grown beyond the border where it
2697 can have a tail, unmark it as needing a tail 2711 can have a tail, unmark it as needing a tail
2698 packing */ 2712 packing */
@@ -2703,10 +2717,9 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2703 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 2717 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2704 2718
2705 ret = journal_begin(&myth, inode->i_sb, 1); 2719 ret = journal_begin(&myth, inode->i_sb, 1);
2706 if (ret) { 2720 if (ret)
2707 reiserfs_write_unlock(inode->i_sb);
2708 goto journal_error; 2721 goto journal_error;
2709 } 2722
2710 reiserfs_update_inode_transaction(inode); 2723 reiserfs_update_inode_transaction(inode);
2711 inode->i_size = pos; 2724 inode->i_size = pos;
2712 /* 2725 /*
@@ -2718,34 +2731,36 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2718 reiserfs_update_sd(&myth, inode); 2731 reiserfs_update_sd(&myth, inode);
2719 update_sd = 1; 2732 update_sd = 1;
2720 ret = journal_end(&myth, inode->i_sb, 1); 2733 ret = journal_end(&myth, inode->i_sb, 1);
2721 reiserfs_write_unlock(inode->i_sb);
2722 if (ret) 2734 if (ret)
2723 goto journal_error; 2735 goto journal_error;
2724 } 2736 }
2725 if (th) { 2737 if (th) {
2726 reiserfs_write_lock(inode->i_sb); 2738 if (!locked) {
2739 lock_depth = reiserfs_write_lock_once(inode->i_sb);
2740 locked = true;
2741 }
2727 if (!update_sd) 2742 if (!update_sd)
2728 mark_inode_dirty(inode); 2743 mark_inode_dirty(inode);
2729 ret = reiserfs_end_persistent_transaction(th); 2744 ret = reiserfs_end_persistent_transaction(th);
2730 reiserfs_write_unlock(inode->i_sb);
2731 if (ret) 2745 if (ret)
2732 goto out; 2746 goto out;
2733 } 2747 }
2734 2748
2735 out: 2749 out:
2750 if (locked)
2751 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2736 unlock_page(page); 2752 unlock_page(page);
2737 page_cache_release(page); 2753 page_cache_release(page);
2738 return ret == 0 ? copied : ret; 2754 return ret == 0 ? copied : ret;
2739 2755
2740 journal_error: 2756 journal_error:
2757 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2758 locked = false;
2741 if (th) { 2759 if (th) {
2742 reiserfs_write_lock(inode->i_sb);
2743 if (!update_sd) 2760 if (!update_sd)
2744 reiserfs_update_sd(th, inode); 2761 reiserfs_update_sd(th, inode);
2745 ret = reiserfs_end_persistent_transaction(th); 2762 ret = reiserfs_end_persistent_transaction(th);
2746 reiserfs_write_unlock(inode->i_sb);
2747 } 2763 }
2748
2749 goto out; 2764 goto out;
2750} 2765}
2751 2766
@@ -2758,7 +2773,10 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2758 int update_sd = 0; 2773 int update_sd = 0;
2759 struct reiserfs_transaction_handle *th = NULL; 2774 struct reiserfs_transaction_handle *th = NULL;
2760 2775
2776 reiserfs_write_unlock(inode->i_sb);
2761 reiserfs_wait_on_write_block(inode->i_sb); 2777 reiserfs_wait_on_write_block(inode->i_sb);
2778 reiserfs_write_lock(inode->i_sb);
2779
2762 if (reiserfs_transaction_running(inode->i_sb)) { 2780 if (reiserfs_transaction_running(inode->i_sb)) {
2763 th = current->journal_info; 2781 th = current->journal_info;
2764 } 2782 }
@@ -2770,7 +2788,6 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2770 */ 2788 */
2771 if (pos > inode->i_size) { 2789 if (pos > inode->i_size) {
2772 struct reiserfs_transaction_handle myth; 2790 struct reiserfs_transaction_handle myth;
2773 reiserfs_write_lock(inode->i_sb);
2774 /* If the file have grown beyond the border where it 2791 /* If the file have grown beyond the border where it
2775 can have a tail, unmark it as needing a tail 2792 can have a tail, unmark it as needing a tail
2776 packing */ 2793 packing */
@@ -2781,10 +2798,9 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2781 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 2798 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2782 2799
2783 ret = journal_begin(&myth, inode->i_sb, 1); 2800 ret = journal_begin(&myth, inode->i_sb, 1);
2784 if (ret) { 2801 if (ret)
2785 reiserfs_write_unlock(inode->i_sb);
2786 goto journal_error; 2802 goto journal_error;
2787 } 2803
2788 reiserfs_update_inode_transaction(inode); 2804 reiserfs_update_inode_transaction(inode);
2789 inode->i_size = pos; 2805 inode->i_size = pos;
2790 /* 2806 /*
@@ -2796,16 +2812,13 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2796 reiserfs_update_sd(&myth, inode); 2812 reiserfs_update_sd(&myth, inode);
2797 update_sd = 1; 2813 update_sd = 1;
2798 ret = journal_end(&myth, inode->i_sb, 1); 2814 ret = journal_end(&myth, inode->i_sb, 1);
2799 reiserfs_write_unlock(inode->i_sb);
2800 if (ret) 2815 if (ret)
2801 goto journal_error; 2816 goto journal_error;
2802 } 2817 }
2803 if (th) { 2818 if (th) {
2804 reiserfs_write_lock(inode->i_sb);
2805 if (!update_sd) 2819 if (!update_sd)
2806 mark_inode_dirty(inode); 2820 mark_inode_dirty(inode);
2807 ret = reiserfs_end_persistent_transaction(th); 2821 ret = reiserfs_end_persistent_transaction(th);
2808 reiserfs_write_unlock(inode->i_sb);
2809 if (ret) 2822 if (ret)
2810 goto out; 2823 goto out;
2811 } 2824 }
@@ -2815,11 +2828,9 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2815 2828
2816 journal_error: 2829 journal_error:
2817 if (th) { 2830 if (th) {
2818 reiserfs_write_lock(inode->i_sb);
2819 if (!update_sd) 2831 if (!update_sd)
2820 reiserfs_update_sd(th, inode); 2832 reiserfs_update_sd(th, inode);
2821 ret = reiserfs_end_persistent_transaction(th); 2833 ret = reiserfs_end_persistent_transaction(th);
2822 reiserfs_write_unlock(inode->i_sb);
2823 } 2834 }
2824 2835
2825 return ret; 2836 return ret;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 0ccc3fdda7bf..ace77451ceb1 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -13,44 +13,52 @@
13#include <linux/compat.h> 13#include <linux/compat.h>
14 14
15/* 15/*
16** reiserfs_ioctl - handler for ioctl for inode 16 * reiserfs_ioctl - handler for ioctl for inode
17** supported commands: 17 * supported commands:
18** 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect 18 * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect
19** and prevent packing file (argument arg has to be non-zero) 19 * and prevent packing file (argument arg has to be non-zero)
20** 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION 20 * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION
21** 3) That's all for a while ... 21 * 3) That's all for a while ...
22*/ 22 */
23int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, 23long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
24 unsigned long arg)
25{ 24{
25 struct inode *inode = filp->f_path.dentry->d_inode;
26 unsigned int flags; 26 unsigned int flags;
27 int err = 0; 27 int err = 0;
28 28
29 reiserfs_write_lock(inode->i_sb);
30
29 switch (cmd) { 31 switch (cmd) {
30 case REISERFS_IOC_UNPACK: 32 case REISERFS_IOC_UNPACK:
31 if (S_ISREG(inode->i_mode)) { 33 if (S_ISREG(inode->i_mode)) {
32 if (arg) 34 if (arg)
33 return reiserfs_unpack(inode, filp); 35 err = reiserfs_unpack(inode, filp);
34 else
35 return 0;
36 } else 36 } else
37 return -ENOTTY; 37 err = -ENOTTY;
38 /* following two cases are taken from fs/ext2/ioctl.c by Remy 38 break;
39 Card (card@masi.ibp.fr) */ 39 /*
40 * following two cases are taken from fs/ext2/ioctl.c by Remy
41 * Card (card@masi.ibp.fr)
42 */
40 case REISERFS_IOC_GETFLAGS: 43 case REISERFS_IOC_GETFLAGS:
41 if (!reiserfs_attrs(inode->i_sb)) 44 if (!reiserfs_attrs(inode->i_sb)) {
42 return -ENOTTY; 45 err = -ENOTTY;
46 break;
47 }
43 48
44 flags = REISERFS_I(inode)->i_attrs; 49 flags = REISERFS_I(inode)->i_attrs;
45 i_attrs_to_sd_attrs(inode, (__u16 *) & flags); 50 i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
46 return put_user(flags, (int __user *)arg); 51 err = put_user(flags, (int __user *)arg);
52 break;
47 case REISERFS_IOC_SETFLAGS:{ 53 case REISERFS_IOC_SETFLAGS:{
48 if (!reiserfs_attrs(inode->i_sb)) 54 if (!reiserfs_attrs(inode->i_sb)) {
49 return -ENOTTY; 55 err = -ENOTTY;
56 break;
57 }
50 58
51 err = mnt_want_write(filp->f_path.mnt); 59 err = mnt_want_write(filp->f_path.mnt);
52 if (err) 60 if (err)
53 return err; 61 break;
54 62
55 if (!is_owner_or_cap(inode)) { 63 if (!is_owner_or_cap(inode)) {
56 err = -EPERM; 64 err = -EPERM;
@@ -90,16 +98,18 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
90 mark_inode_dirty(inode); 98 mark_inode_dirty(inode);
91setflags_out: 99setflags_out:
92 mnt_drop_write(filp->f_path.mnt); 100 mnt_drop_write(filp->f_path.mnt);
93 return err; 101 break;
94 } 102 }
95 case REISERFS_IOC_GETVERSION: 103 case REISERFS_IOC_GETVERSION:
96 return put_user(inode->i_generation, (int __user *)arg); 104 err = put_user(inode->i_generation, (int __user *)arg);
105 break;
97 case REISERFS_IOC_SETVERSION: 106 case REISERFS_IOC_SETVERSION:
98 if (!is_owner_or_cap(inode)) 107 if (!is_owner_or_cap(inode))
99 return -EPERM; 108 err = -EPERM;
109 break;
100 err = mnt_want_write(filp->f_path.mnt); 110 err = mnt_want_write(filp->f_path.mnt);
101 if (err) 111 if (err)
102 return err; 112 break;
103 if (get_user(inode->i_generation, (int __user *)arg)) { 113 if (get_user(inode->i_generation, (int __user *)arg)) {
104 err = -EFAULT; 114 err = -EFAULT;
105 goto setversion_out; 115 goto setversion_out;
@@ -108,19 +118,20 @@ setflags_out:
108 mark_inode_dirty(inode); 118 mark_inode_dirty(inode);
109setversion_out: 119setversion_out:
110 mnt_drop_write(filp->f_path.mnt); 120 mnt_drop_write(filp->f_path.mnt);
111 return err; 121 break;
112 default: 122 default:
113 return -ENOTTY; 123 err = -ENOTTY;
114 } 124 }
125
126 reiserfs_write_unlock(inode->i_sb);
127
128 return err;
115} 129}
116 130
117#ifdef CONFIG_COMPAT 131#ifdef CONFIG_COMPAT
118long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, 132long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
119 unsigned long arg) 133 unsigned long arg)
120{ 134{
121 struct inode *inode = file->f_path.dentry->d_inode;
122 int ret;
123
124 /* These are just misnamed, they actually get/put from/to user an int */ 135 /* These are just misnamed, they actually get/put from/to user an int */
125 switch (cmd) { 136 switch (cmd) {
126 case REISERFS_IOC32_UNPACK: 137 case REISERFS_IOC32_UNPACK:
@@ -141,10 +152,8 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
141 default: 152 default:
142 return -ENOIOCTLCMD; 153 return -ENOIOCTLCMD;
143 } 154 }
144 lock_kernel(); 155
145 ret = reiserfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); 156 return reiserfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
146 unlock_kernel();
147 return ret;
148} 157}
149#endif 158#endif
150 159
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 90622200b39c..2f8a7e7b8dab 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -429,21 +429,6 @@ static void clear_prepared_bits(struct buffer_head *bh)
429 clear_buffer_journal_restore_dirty(bh); 429 clear_buffer_journal_restore_dirty(bh);
430} 430}
431 431
432/* utility function to force a BUG if it is called without the big
433** kernel lock held. caller is the string printed just before calling BUG()
434*/
435void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
436{
437#ifdef CONFIG_SMP
438 if (current->lock_depth < 0) {
439 reiserfs_panic(sb, "journal-1", "%s called without kernel "
440 "lock held", caller);
441 }
442#else
443 ;
444#endif
445}
446
447/* return a cnode with same dev, block number and size in table, or null if not found */ 432/* return a cnode with same dev, block number and size in table, or null if not found */
448static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct 433static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
449 super_block 434 super_block
@@ -556,7 +541,8 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
556static inline void lock_journal(struct super_block *sb) 541static inline void lock_journal(struct super_block *sb)
557{ 542{
558 PROC_INFO_INC(sb, journal.lock_journal); 543 PROC_INFO_INC(sb, journal.lock_journal);
559 mutex_lock(&SB_JOURNAL(sb)->j_mutex); 544
545 reiserfs_mutex_lock_safe(&SB_JOURNAL(sb)->j_mutex, sb);
560} 546}
561 547
562/* unlock the current transaction */ 548/* unlock the current transaction */
@@ -708,7 +694,9 @@ static void check_barrier_completion(struct super_block *s,
708 disable_barrier(s); 694 disable_barrier(s);
709 set_buffer_uptodate(bh); 695 set_buffer_uptodate(bh);
710 set_buffer_dirty(bh); 696 set_buffer_dirty(bh);
697 reiserfs_write_unlock(s);
711 sync_dirty_buffer(bh); 698 sync_dirty_buffer(bh);
699 reiserfs_write_lock(s);
712 } 700 }
713} 701}
714 702
@@ -996,8 +984,13 @@ static int reiserfs_async_progress_wait(struct super_block *s)
996{ 984{
997 DEFINE_WAIT(wait); 985 DEFINE_WAIT(wait);
998 struct reiserfs_journal *j = SB_JOURNAL(s); 986 struct reiserfs_journal *j = SB_JOURNAL(s);
999 if (atomic_read(&j->j_async_throttle)) 987
988 if (atomic_read(&j->j_async_throttle)) {
989 reiserfs_write_unlock(s);
1000 congestion_wait(BLK_RW_ASYNC, HZ / 10); 990 congestion_wait(BLK_RW_ASYNC, HZ / 10);
991 reiserfs_write_lock(s);
992 }
993
1001 return 0; 994 return 0;
1002} 995}
1003 996
@@ -1043,7 +1036,8 @@ static int flush_commit_list(struct super_block *s,
1043 } 1036 }
1044 1037
1045 /* make sure nobody is trying to flush this one at the same time */ 1038 /* make sure nobody is trying to flush this one at the same time */
1046 mutex_lock(&jl->j_commit_mutex); 1039 reiserfs_mutex_lock_safe(&jl->j_commit_mutex, s);
1040
1047 if (!journal_list_still_alive(s, trans_id)) { 1041 if (!journal_list_still_alive(s, trans_id)) {
1048 mutex_unlock(&jl->j_commit_mutex); 1042 mutex_unlock(&jl->j_commit_mutex);
1049 goto put_jl; 1043 goto put_jl;
@@ -1061,12 +1055,17 @@ static int flush_commit_list(struct super_block *s,
1061 1055
1062 if (!list_empty(&jl->j_bh_list)) { 1056 if (!list_empty(&jl->j_bh_list)) {
1063 int ret; 1057 int ret;
1064 unlock_kernel(); 1058
1059 /*
1060 * We might sleep in numerous places inside
1061 * write_ordered_buffers. Relax the write lock.
1062 */
1063 reiserfs_write_unlock(s);
1065 ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, 1064 ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
1066 journal, jl, &jl->j_bh_list); 1065 journal, jl, &jl->j_bh_list);
1067 if (ret < 0 && retval == 0) 1066 if (ret < 0 && retval == 0)
1068 retval = ret; 1067 retval = ret;
1069 lock_kernel(); 1068 reiserfs_write_lock(s);
1070 } 1069 }
1071 BUG_ON(!list_empty(&jl->j_bh_list)); 1070 BUG_ON(!list_empty(&jl->j_bh_list));
1072 /* 1071 /*
@@ -1085,8 +1084,11 @@ static int flush_commit_list(struct super_block *s,
1085 SB_ONDISK_JOURNAL_SIZE(s); 1084 SB_ONDISK_JOURNAL_SIZE(s);
1086 tbh = journal_find_get_block(s, bn); 1085 tbh = journal_find_get_block(s, bn);
1087 if (tbh) { 1086 if (tbh) {
1088 if (buffer_dirty(tbh)) 1087 if (buffer_dirty(tbh)) {
1089 ll_rw_block(WRITE, 1, &tbh) ; 1088 reiserfs_write_unlock(s);
1089 ll_rw_block(WRITE, 1, &tbh);
1090 reiserfs_write_lock(s);
1091 }
1090 put_bh(tbh) ; 1092 put_bh(tbh) ;
1091 } 1093 }
1092 } 1094 }
@@ -1114,12 +1116,19 @@ static int flush_commit_list(struct super_block *s,
1114 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + 1116 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
1115 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); 1117 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
1116 tbh = journal_find_get_block(s, bn); 1118 tbh = journal_find_get_block(s, bn);
1119
1120 reiserfs_write_unlock(s);
1117 wait_on_buffer(tbh); 1121 wait_on_buffer(tbh);
1122 reiserfs_write_lock(s);
1118 // since we're using ll_rw_blk above, it might have skipped over 1123 // since we're using ll_rw_blk above, it might have skipped over
1119 // a locked buffer. Double check here 1124 // a locked buffer. Double check here
1120 // 1125 //
1121 if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ 1126 /* redundant, sync_dirty_buffer() checks */
1127 if (buffer_dirty(tbh)) {
1128 reiserfs_write_unlock(s);
1122 sync_dirty_buffer(tbh); 1129 sync_dirty_buffer(tbh);
1130 reiserfs_write_lock(s);
1131 }
1123 if (unlikely(!buffer_uptodate(tbh))) { 1132 if (unlikely(!buffer_uptodate(tbh))) {
1124#ifdef CONFIG_REISERFS_CHECK 1133#ifdef CONFIG_REISERFS_CHECK
1125 reiserfs_warning(s, "journal-601", 1134 reiserfs_warning(s, "journal-601",
@@ -1143,10 +1152,15 @@ static int flush_commit_list(struct super_block *s,
1143 if (buffer_dirty(jl->j_commit_bh)) 1152 if (buffer_dirty(jl->j_commit_bh))
1144 BUG(); 1153 BUG();
1145 mark_buffer_dirty(jl->j_commit_bh) ; 1154 mark_buffer_dirty(jl->j_commit_bh) ;
1155 reiserfs_write_unlock(s);
1146 sync_dirty_buffer(jl->j_commit_bh) ; 1156 sync_dirty_buffer(jl->j_commit_bh) ;
1157 reiserfs_write_lock(s);
1147 } 1158 }
1148 } else 1159 } else {
1160 reiserfs_write_unlock(s);
1149 wait_on_buffer(jl->j_commit_bh); 1161 wait_on_buffer(jl->j_commit_bh);
1162 reiserfs_write_lock(s);
1163 }
1150 1164
1151 check_barrier_completion(s, jl->j_commit_bh); 1165 check_barrier_completion(s, jl->j_commit_bh);
1152 1166
@@ -1286,7 +1300,9 @@ static int _update_journal_header_block(struct super_block *sb,
1286 1300
1287 if (trans_id >= journal->j_last_flush_trans_id) { 1301 if (trans_id >= journal->j_last_flush_trans_id) {
1288 if (buffer_locked((journal->j_header_bh))) { 1302 if (buffer_locked((journal->j_header_bh))) {
1303 reiserfs_write_unlock(sb);
1289 wait_on_buffer((journal->j_header_bh)); 1304 wait_on_buffer((journal->j_header_bh));
1305 reiserfs_write_lock(sb);
1290 if (unlikely(!buffer_uptodate(journal->j_header_bh))) { 1306 if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
1291#ifdef CONFIG_REISERFS_CHECK 1307#ifdef CONFIG_REISERFS_CHECK
1292 reiserfs_warning(sb, "journal-699", 1308 reiserfs_warning(sb, "journal-699",
@@ -1312,12 +1328,16 @@ static int _update_journal_header_block(struct super_block *sb,
1312 disable_barrier(sb); 1328 disable_barrier(sb);
1313 goto sync; 1329 goto sync;
1314 } 1330 }
1331 reiserfs_write_unlock(sb);
1315 wait_on_buffer(journal->j_header_bh); 1332 wait_on_buffer(journal->j_header_bh);
1333 reiserfs_write_lock(sb);
1316 check_barrier_completion(sb, journal->j_header_bh); 1334 check_barrier_completion(sb, journal->j_header_bh);
1317 } else { 1335 } else {
1318 sync: 1336 sync:
1319 set_buffer_dirty(journal->j_header_bh); 1337 set_buffer_dirty(journal->j_header_bh);
1338 reiserfs_write_unlock(sb);
1320 sync_dirty_buffer(journal->j_header_bh); 1339 sync_dirty_buffer(journal->j_header_bh);
1340 reiserfs_write_lock(sb);
1321 } 1341 }
1322 if (!buffer_uptodate(journal->j_header_bh)) { 1342 if (!buffer_uptodate(journal->j_header_bh)) {
1323 reiserfs_warning(sb, "journal-837", 1343 reiserfs_warning(sb, "journal-837",
@@ -1409,7 +1429,7 @@ static int flush_journal_list(struct super_block *s,
1409 1429
1410 /* if flushall == 0, the lock is already held */ 1430 /* if flushall == 0, the lock is already held */
1411 if (flushall) { 1431 if (flushall) {
1412 mutex_lock(&journal->j_flush_mutex); 1432 reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
1413 } else if (mutex_trylock(&journal->j_flush_mutex)) { 1433 } else if (mutex_trylock(&journal->j_flush_mutex)) {
1414 BUG(); 1434 BUG();
1415 } 1435 }
@@ -1553,7 +1573,11 @@ static int flush_journal_list(struct super_block *s,
1553 reiserfs_panic(s, "journal-1011", 1573 reiserfs_panic(s, "journal-1011",
1554 "cn->bh is NULL"); 1574 "cn->bh is NULL");
1555 } 1575 }
1576
1577 reiserfs_write_unlock(s);
1556 wait_on_buffer(cn->bh); 1578 wait_on_buffer(cn->bh);
1579 reiserfs_write_lock(s);
1580
1557 if (!cn->bh) { 1581 if (!cn->bh) {
1558 reiserfs_panic(s, "journal-1012", 1582 reiserfs_panic(s, "journal-1012",
1559 "cn->bh is NULL"); 1583 "cn->bh is NULL");
@@ -1769,7 +1793,7 @@ static int kupdate_transactions(struct super_block *s,
1769 struct reiserfs_journal *journal = SB_JOURNAL(s); 1793 struct reiserfs_journal *journal = SB_JOURNAL(s);
1770 chunk.nr = 0; 1794 chunk.nr = 0;
1771 1795
1772 mutex_lock(&journal->j_flush_mutex); 1796 reiserfs_mutex_lock_safe(&journal->j_flush_mutex, s);
1773 if (!journal_list_still_alive(s, orig_trans_id)) { 1797 if (!journal_list_still_alive(s, orig_trans_id)) {
1774 goto done; 1798 goto done;
1775 } 1799 }
@@ -1973,11 +1997,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1973 reiserfs_mounted_fs_count--; 1997 reiserfs_mounted_fs_count--;
1974 /* wait for all commits to finish */ 1998 /* wait for all commits to finish */
1975 cancel_delayed_work(&SB_JOURNAL(sb)->j_work); 1999 cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
2000
2001 /*
2002 * We must release the write lock here because
2003 * the workqueue job (flush_async_commit) needs this lock
2004 */
2005 reiserfs_write_unlock(sb);
1976 flush_workqueue(commit_wq); 2006 flush_workqueue(commit_wq);
2007
1977 if (!reiserfs_mounted_fs_count) { 2008 if (!reiserfs_mounted_fs_count) {
1978 destroy_workqueue(commit_wq); 2009 destroy_workqueue(commit_wq);
1979 commit_wq = NULL; 2010 commit_wq = NULL;
1980 } 2011 }
2012 reiserfs_write_lock(sb);
1981 2013
1982 free_journal_ram(sb); 2014 free_journal_ram(sb);
1983 2015
@@ -2243,7 +2275,11 @@ static int journal_read_transaction(struct super_block *sb,
2243 /* read in the log blocks, memcpy to the corresponding real block */ 2275 /* read in the log blocks, memcpy to the corresponding real block */
2244 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); 2276 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
2245 for (i = 0; i < get_desc_trans_len(desc); i++) { 2277 for (i = 0; i < get_desc_trans_len(desc); i++) {
2278
2279 reiserfs_write_unlock(sb);
2246 wait_on_buffer(log_blocks[i]); 2280 wait_on_buffer(log_blocks[i]);
2281 reiserfs_write_lock(sb);
2282
2247 if (!buffer_uptodate(log_blocks[i])) { 2283 if (!buffer_uptodate(log_blocks[i])) {
2248 reiserfs_warning(sb, "journal-1212", 2284 reiserfs_warning(sb, "journal-1212",
2249 "REPLAY FAILURE fsck required! " 2285 "REPLAY FAILURE fsck required! "
@@ -2765,11 +2801,27 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2765 goto free_and_return; 2801 goto free_and_return;
2766 } 2802 }
2767 2803
2804 /*
2805 * We need to unlock here to avoid creating the following
2806 * dependency:
2807 * reiserfs_lock -> sysfs_mutex
2808 * Because the reiserfs mmap path creates the following dependency:
2809 * mm->mmap -> reiserfs_lock, hence we have
2810 * mm->mmap -> reiserfs_lock ->sysfs_mutex
2811 * This would ends up in a circular dependency with sysfs readdir path
2812 * which does sysfs_mutex -> mm->mmap_sem
2813 * This is fine because the reiserfs lock is useless in mount path,
2814 * at least until we call journal_begin. We keep it for paranoid
2815 * reasons.
2816 */
2817 reiserfs_write_unlock(sb);
2768 if (journal_init_dev(sb, journal, j_dev_name) != 0) { 2818 if (journal_init_dev(sb, journal, j_dev_name) != 0) {
2819 reiserfs_write_lock(sb);
2769 reiserfs_warning(sb, "sh-462", 2820 reiserfs_warning(sb, "sh-462",
2770 "unable to initialize jornal device"); 2821 "unable to initialize jornal device");
2771 goto free_and_return; 2822 goto free_and_return;
2772 } 2823 }
2824 reiserfs_write_lock(sb);
2773 2825
2774 rs = SB_DISK_SUPER_BLOCK(sb); 2826 rs = SB_DISK_SUPER_BLOCK(sb);
2775 2827
@@ -2881,8 +2933,11 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2881 } 2933 }
2882 2934
2883 reiserfs_mounted_fs_count++; 2935 reiserfs_mounted_fs_count++;
2884 if (reiserfs_mounted_fs_count <= 1) 2936 if (reiserfs_mounted_fs_count <= 1) {
2937 reiserfs_write_unlock(sb);
2885 commit_wq = create_workqueue("reiserfs"); 2938 commit_wq = create_workqueue("reiserfs");
2939 reiserfs_write_lock(sb);
2940 }
2886 2941
2887 INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); 2942 INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
2888 journal->j_work_sb = sb; 2943 journal->j_work_sb = sb;
@@ -2964,8 +3019,11 @@ static void queue_log_writer(struct super_block *s)
2964 init_waitqueue_entry(&wait, current); 3019 init_waitqueue_entry(&wait, current);
2965 add_wait_queue(&journal->j_join_wait, &wait); 3020 add_wait_queue(&journal->j_join_wait, &wait);
2966 set_current_state(TASK_UNINTERRUPTIBLE); 3021 set_current_state(TASK_UNINTERRUPTIBLE);
2967 if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) 3022 if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
3023 reiserfs_write_unlock(s);
2968 schedule(); 3024 schedule();
3025 reiserfs_write_lock(s);
3026 }
2969 __set_current_state(TASK_RUNNING); 3027 __set_current_state(TASK_RUNNING);
2970 remove_wait_queue(&journal->j_join_wait, &wait); 3028 remove_wait_queue(&journal->j_join_wait, &wait);
2971} 3029}
@@ -2982,7 +3040,9 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
2982 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3040 struct reiserfs_journal *journal = SB_JOURNAL(sb);
2983 unsigned long bcount = journal->j_bcount; 3041 unsigned long bcount = journal->j_bcount;
2984 while (1) { 3042 while (1) {
3043 reiserfs_write_unlock(sb);
2985 schedule_timeout_uninterruptible(1); 3044 schedule_timeout_uninterruptible(1);
3045 reiserfs_write_lock(sb);
2986 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; 3046 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
2987 while ((atomic_read(&journal->j_wcount) > 0 || 3047 while ((atomic_read(&journal->j_wcount) > 0 ||
2988 atomic_read(&journal->j_jlock)) && 3048 atomic_read(&journal->j_jlock)) &&
@@ -3033,7 +3093,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3033 3093
3034 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { 3094 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
3035 unlock_journal(sb); 3095 unlock_journal(sb);
3096 reiserfs_write_unlock(sb);
3036 reiserfs_wait_on_write_block(sb); 3097 reiserfs_wait_on_write_block(sb);
3098 reiserfs_write_lock(sb);
3037 PROC_INFO_INC(sb, journal.journal_relock_writers); 3099 PROC_INFO_INC(sb, journal.journal_relock_writers);
3038 goto relock; 3100 goto relock;
3039 } 3101 }
@@ -3506,14 +3568,14 @@ static void flush_async_commits(struct work_struct *work)
3506 struct reiserfs_journal_list *jl; 3568 struct reiserfs_journal_list *jl;
3507 struct list_head *entry; 3569 struct list_head *entry;
3508 3570
3509 lock_kernel(); 3571 reiserfs_write_lock(sb);
3510 if (!list_empty(&journal->j_journal_list)) { 3572 if (!list_empty(&journal->j_journal_list)) {
3511 /* last entry is the youngest, commit it and you get everything */ 3573 /* last entry is the youngest, commit it and you get everything */
3512 entry = journal->j_journal_list.prev; 3574 entry = journal->j_journal_list.prev;
3513 jl = JOURNAL_LIST_ENTRY(entry); 3575 jl = JOURNAL_LIST_ENTRY(entry);
3514 flush_commit_list(sb, jl, 1); 3576 flush_commit_list(sb, jl, 1);
3515 } 3577 }
3516 unlock_kernel(); 3578 reiserfs_write_unlock(sb);
3517} 3579}
3518 3580
3519/* 3581/*
@@ -4041,7 +4103,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4041 * the new transaction is fully setup, and we've already flushed the 4103 * the new transaction is fully setup, and we've already flushed the
4042 * ordered bh list 4104 * ordered bh list
4043 */ 4105 */
4044 mutex_lock(&jl->j_commit_mutex); 4106 reiserfs_mutex_lock_safe(&jl->j_commit_mutex, sb);
4045 4107
4046 /* save the transaction id in case we need to commit it later */ 4108 /* save the transaction id in case we need to commit it later */
4047 commit_trans_id = jl->j_trans_id; 4109 commit_trans_id = jl->j_trans_id;
@@ -4156,7 +4218,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4156 next = cn->next; 4218 next = cn->next;
4157 free_cnode(sb, cn); 4219 free_cnode(sb, cn);
4158 cn = next; 4220 cn = next;
4221 reiserfs_write_unlock(sb);
4159 cond_resched(); 4222 cond_resched();
4223 reiserfs_write_lock(sb);
4160 } 4224 }
4161 4225
4162 /* we are done with both the c_bh and d_bh, but 4226 /* we are done with both the c_bh and d_bh, but
@@ -4203,10 +4267,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4203 * is lost. 4267 * is lost.
4204 */ 4268 */
4205 if (!list_empty(&jl->j_tail_bh_list)) { 4269 if (!list_empty(&jl->j_tail_bh_list)) {
4206 unlock_kernel(); 4270 reiserfs_write_unlock(sb);
4207 write_ordered_buffers(&journal->j_dirty_buffers_lock, 4271 write_ordered_buffers(&journal->j_dirty_buffers_lock,
4208 journal, jl, &jl->j_tail_bh_list); 4272 journal, jl, &jl->j_tail_bh_list);
4209 lock_kernel(); 4273 reiserfs_write_lock(sb);
4210 } 4274 }
4211 BUG_ON(!list_empty(&jl->j_tail_bh_list)); 4275 BUG_ON(!list_empty(&jl->j_tail_bh_list));
4212 mutex_unlock(&jl->j_commit_mutex); 4276 mutex_unlock(&jl->j_commit_mutex);
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
new file mode 100644
index 000000000000..ee2cfc0fd8a7
--- /dev/null
+++ b/fs/reiserfs/lock.c
@@ -0,0 +1,88 @@
1#include <linux/reiserfs_fs.h>
2#include <linux/mutex.h>
3
4/*
5 * The previous reiserfs locking scheme was heavily based on
6 * the tricky properties of the Bkl:
7 *
8 * - it was acquired recursively by a same task
9 * - the performances relied on the release-while-schedule() property
10 *
11 * Now that we replace it by a mutex, we still want to keep the same
12 * recursive property to avoid big changes in the code structure.
13 * We use our own lock_owner here because the owner field on a mutex
14 * is only available in SMP or mutex debugging, also we only need this field
15 * for this mutex, no need for a system wide mutex facility.
16 *
17 * Also this lock is often released before a call that could block because
18 * reiserfs performances were partialy based on the release while schedule()
19 * property of the Bkl.
20 */
21void reiserfs_write_lock(struct super_block *s)
22{
23 struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
24
25 if (sb_i->lock_owner != current) {
26 mutex_lock(&sb_i->lock);
27 sb_i->lock_owner = current;
28 }
29
30 /* No need to protect it, only the current task touches it */
31 sb_i->lock_depth++;
32}
33
34void reiserfs_write_unlock(struct super_block *s)
35{
36 struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
37
38 /*
39 * Are we unlocking without even holding the lock?
40 * Such a situation must raise a BUG() if we don't want
41 * to corrupt the data.
42 */
43 BUG_ON(sb_i->lock_owner != current);
44
45 if (--sb_i->lock_depth == -1) {
46 sb_i->lock_owner = NULL;
47 mutex_unlock(&sb_i->lock);
48 }
49}
50
51/*
52 * If we already own the lock, just exit and don't increase the depth.
53 * Useful when we don't want to lock more than once.
54 *
55 * We always return the lock_depth we had before calling
56 * this function.
57 */
58int reiserfs_write_lock_once(struct super_block *s)
59{
60 struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
61
62 if (sb_i->lock_owner != current) {
63 mutex_lock(&sb_i->lock);
64 sb_i->lock_owner = current;
65 return sb_i->lock_depth++;
66 }
67
68 return sb_i->lock_depth;
69}
70
71void reiserfs_write_unlock_once(struct super_block *s, int lock_depth)
72{
73 if (lock_depth == -1)
74 reiserfs_write_unlock(s);
75}
76
77/*
78 * Utility function to force a BUG if it is called without the superblock
79 * write lock held. caller is the string printed just before calling BUG()
80 */
81void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
82{
83 struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
84
85 if (sb_i->lock_depth < 0)
86 reiserfs_panic(sb, "%s called without kernel lock held %d",
87 caller);
88}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 271579128634..e296ff72a6cc 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -324,6 +324,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
324 struct nameidata *nd) 324 struct nameidata *nd)
325{ 325{
326 int retval; 326 int retval;
327 int lock_depth;
327 struct inode *inode = NULL; 328 struct inode *inode = NULL;
328 struct reiserfs_dir_entry de; 329 struct reiserfs_dir_entry de;
329 INITIALIZE_PATH(path_to_entry); 330 INITIALIZE_PATH(path_to_entry);
@@ -331,7 +332,13 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
331 if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len) 332 if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len)
332 return ERR_PTR(-ENAMETOOLONG); 333 return ERR_PTR(-ENAMETOOLONG);
333 334
334 reiserfs_write_lock(dir->i_sb); 335 /*
336 * Might be called with or without the write lock, must be careful
337 * to not recursively hold it in case we want to release the lock
338 * before rescheduling.
339 */
340 lock_depth = reiserfs_write_lock_once(dir->i_sb);
341
335 de.de_gen_number_bit_string = NULL; 342 de.de_gen_number_bit_string = NULL;
336 retval = 343 retval =
337 reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, 344 reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
@@ -341,7 +348,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
341 inode = reiserfs_iget(dir->i_sb, 348 inode = reiserfs_iget(dir->i_sb,
342 (struct cpu_key *)&(de.de_dir_id)); 349 (struct cpu_key *)&(de.de_dir_id));
343 if (!inode || IS_ERR(inode)) { 350 if (!inode || IS_ERR(inode)) {
344 reiserfs_write_unlock(dir->i_sb); 351 reiserfs_write_unlock_once(dir->i_sb, lock_depth);
345 return ERR_PTR(-EACCES); 352 return ERR_PTR(-EACCES);
346 } 353 }
347 354
@@ -350,7 +357,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
350 if (IS_PRIVATE(dir)) 357 if (IS_PRIVATE(dir))
351 inode->i_flags |= S_PRIVATE; 358 inode->i_flags |= S_PRIVATE;
352 } 359 }
353 reiserfs_write_unlock(dir->i_sb); 360 reiserfs_write_unlock_once(dir->i_sb, lock_depth);
354 if (retval == IO_ERROR) { 361 if (retval == IO_ERROR) {
355 return ERR_PTR(-EIO); 362 return ERR_PTR(-EIO);
356 } 363 }
@@ -725,6 +732,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
725 struct inode *inode; 732 struct inode *inode;
726 struct reiserfs_transaction_handle th; 733 struct reiserfs_transaction_handle th;
727 struct reiserfs_security_handle security; 734 struct reiserfs_security_handle security;
735 int lock_depth;
728 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 736 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
729 int jbegin_count = 737 int jbegin_count =
730 JOURNAL_PER_BALANCE_CNT * 3 + 738 JOURNAL_PER_BALANCE_CNT * 3 +
@@ -748,7 +756,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
748 return retval; 756 return retval;
749 } 757 }
750 jbegin_count += retval; 758 jbegin_count += retval;
751 reiserfs_write_lock(dir->i_sb); 759 lock_depth = reiserfs_write_lock_once(dir->i_sb);
752 760
753 retval = journal_begin(&th, dir->i_sb, jbegin_count); 761 retval = journal_begin(&th, dir->i_sb, jbegin_count);
754 if (retval) { 762 if (retval) {
@@ -798,8 +806,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
798 d_instantiate(dentry, inode); 806 d_instantiate(dentry, inode);
799 unlock_new_inode(inode); 807 unlock_new_inode(inode);
800 retval = journal_end(&th, dir->i_sb, jbegin_count); 808 retval = journal_end(&th, dir->i_sb, jbegin_count);
801 out_failed: 809out_failed:
802 reiserfs_write_unlock(dir->i_sb); 810 reiserfs_write_unlock_once(dir->i_sb, lock_depth);
803 return retval; 811 return retval;
804} 812}
805 813
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 536eacaeb710..adbc6f538515 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -349,10 +349,6 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
349 349
350 . */ 350 . */
351 351
352#ifdef CONFIG_REISERFS_CHECK
353extern struct tree_balance *cur_tb;
354#endif
355
356void __reiserfs_panic(struct super_block *sb, const char *id, 352void __reiserfs_panic(struct super_block *sb, const char *id,
357 const char *function, const char *fmt, ...) 353 const char *function, const char *fmt, ...)
358{ 354{
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 18b315d3d104..b3a94d20f0fc 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -141,7 +141,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
141 141
142 set_buffer_uptodate(bh); 142 set_buffer_uptodate(bh);
143 mark_buffer_dirty(bh); 143 mark_buffer_dirty(bh);
144 reiserfs_write_unlock(s);
144 sync_dirty_buffer(bh); 145 sync_dirty_buffer(bh);
146 reiserfs_write_lock(s);
145 // update bitmap_info stuff 147 // update bitmap_info stuff
146 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; 148 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
147 brelse(bh); 149 brelse(bh);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index d036ee5b1c81..5fa7118f04e1 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -222,9 +222,6 @@ static inline int bin_search(const void *key, /* Key to search for. */
222 return ITEM_NOT_FOUND; 222 return ITEM_NOT_FOUND;
223} 223}
224 224
225#ifdef CONFIG_REISERFS_CHECK
226extern struct tree_balance *cur_tb;
227#endif
228 225
229/* Minimal possible key. It is never in the tree. */ 226/* Minimal possible key. It is never in the tree. */
230const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} }; 227const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
@@ -519,25 +516,48 @@ static int is_tree_node(struct buffer_head *bh, int level)
519 516
520#define SEARCH_BY_KEY_READA 16 517#define SEARCH_BY_KEY_READA 16
521 518
522/* The function is NOT SCHEDULE-SAFE! */ 519/*
523static void search_by_key_reada(struct super_block *s, 520 * The function is NOT SCHEDULE-SAFE!
521 * It might unlock the write lock if we needed to wait for a block
522 * to be read. Note that in this case it won't recover the lock to avoid
523 * high contention resulting from too much lock requests, especially
524 * the caller (search_by_key) will perform other schedule-unsafe
525 * operations just after calling this function.
526 *
527 * @return true if we have unlocked
528 */
529static bool search_by_key_reada(struct super_block *s,
524 struct buffer_head **bh, 530 struct buffer_head **bh,
525 b_blocknr_t *b, int num) 531 b_blocknr_t *b, int num)
526{ 532{
527 int i, j; 533 int i, j;
534 bool unlocked = false;
528 535
529 for (i = 0; i < num; i++) { 536 for (i = 0; i < num; i++) {
530 bh[i] = sb_getblk(s, b[i]); 537 bh[i] = sb_getblk(s, b[i]);
531 } 538 }
539 /*
540 * We are going to read some blocks on which we
541 * have a reference. It's safe, though we might be
542 * reading blocks concurrently changed if we release
543 * the lock. But it's still fine because we check later
544 * if the tree changed
545 */
532 for (j = 0; j < i; j++) { 546 for (j = 0; j < i; j++) {
533 /* 547 /*
534 * note, this needs attention if we are getting rid of the BKL 548 * note, this needs attention if we are getting rid of the BKL
535 * you have to make sure the prepared bit isn't set on this buffer 549 * you have to make sure the prepared bit isn't set on this buffer
536 */ 550 */
537 if (!buffer_uptodate(bh[j])) 551 if (!buffer_uptodate(bh[j])) {
552 if (!unlocked) {
553 reiserfs_write_unlock(s);
554 unlocked = true;
555 }
538 ll_rw_block(READA, 1, bh + j); 556 ll_rw_block(READA, 1, bh + j);
557 }
539 brelse(bh[j]); 558 brelse(bh[j]);
540 } 559 }
560 return unlocked;
541} 561}
542 562
543/************************************************************************** 563/**************************************************************************
@@ -625,11 +645,26 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
625 have a pointer to it. */ 645 have a pointer to it. */
626 if ((bh = last_element->pe_buffer = 646 if ((bh = last_element->pe_buffer =
627 sb_getblk(sb, block_number))) { 647 sb_getblk(sb, block_number))) {
648 bool unlocked = false;
649
628 if (!buffer_uptodate(bh) && reada_count > 1) 650 if (!buffer_uptodate(bh) && reada_count > 1)
629 search_by_key_reada(sb, reada_bh, 651 /* may unlock the write lock */
652 unlocked = search_by_key_reada(sb, reada_bh,
630 reada_blocks, reada_count); 653 reada_blocks, reada_count);
654 /*
655 * If we haven't already unlocked the write lock,
656 * then we need to do that here before reading
657 * the current block
658 */
659 if (!buffer_uptodate(bh) && !unlocked) {
660 reiserfs_write_unlock(sb);
661 unlocked = true;
662 }
631 ll_rw_block(READ, 1, &bh); 663 ll_rw_block(READ, 1, &bh);
632 wait_on_buffer(bh); 664 wait_on_buffer(bh);
665
666 if (unlocked)
667 reiserfs_write_lock(sb);
633 if (!buffer_uptodate(bh)) 668 if (!buffer_uptodate(bh))
634 goto io_error; 669 goto io_error;
635 } else { 670 } else {
@@ -673,7 +708,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
673 !key_in_buffer(search_path, key, sb), 708 !key_in_buffer(search_path, key, sb),
674 "PAP-5130: key is not in the buffer"); 709 "PAP-5130: key is not in the buffer");
675#ifdef CONFIG_REISERFS_CHECK 710#ifdef CONFIG_REISERFS_CHECK
676 if (cur_tb) { 711 if (REISERFS_SB(sb)->cur_tb) {
677 print_cur_tb("5140"); 712 print_cur_tb("5140");
678 reiserfs_panic(sb, "PAP-5140", 713 reiserfs_panic(sb, "PAP-5140",
679 "schedule occurred in do_balance!"); 714 "schedule occurred in do_balance!");
@@ -1024,7 +1059,9 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1024 reiserfs_free_block(th, inode, block, 1); 1059 reiserfs_free_block(th, inode, block, 1);
1025 } 1060 }
1026 1061
1062 reiserfs_write_unlock(sb);
1027 cond_resched(); 1063 cond_resched();
1064 reiserfs_write_lock(sb);
1028 1065
1029 if (item_moved (&s_ih, path)) { 1066 if (item_moved (&s_ih, path)) {
1030 need_re_search = 1; 1067 need_re_search = 1;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f0ad05f38022..339b0baf2af6 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -465,7 +465,7 @@ static void reiserfs_put_super(struct super_block *s)
465 struct reiserfs_transaction_handle th; 465 struct reiserfs_transaction_handle th;
466 th.t_trans_id = 0; 466 th.t_trans_id = 0;
467 467
468 lock_kernel(); 468 reiserfs_write_lock(s);
469 469
470 if (s->s_dirt) 470 if (s->s_dirt)
471 reiserfs_write_super(s); 471 reiserfs_write_super(s);
@@ -499,10 +499,10 @@ static void reiserfs_put_super(struct super_block *s)
499 499
500 reiserfs_proc_info_done(s); 500 reiserfs_proc_info_done(s);
501 501
502 reiserfs_write_unlock(s);
503 mutex_destroy(&REISERFS_SB(s)->lock);
502 kfree(s->s_fs_info); 504 kfree(s->s_fs_info);
503 s->s_fs_info = NULL; 505 s->s_fs_info = NULL;
504
505 unlock_kernel();
506} 506}
507 507
508static struct kmem_cache *reiserfs_inode_cachep; 508static struct kmem_cache *reiserfs_inode_cachep;
@@ -554,25 +554,28 @@ static void reiserfs_dirty_inode(struct inode *inode)
554 struct reiserfs_transaction_handle th; 554 struct reiserfs_transaction_handle th;
555 555
556 int err = 0; 556 int err = 0;
557 int lock_depth;
558
557 if (inode->i_sb->s_flags & MS_RDONLY) { 559 if (inode->i_sb->s_flags & MS_RDONLY) {
558 reiserfs_warning(inode->i_sb, "clm-6006", 560 reiserfs_warning(inode->i_sb, "clm-6006",
559 "writing inode %lu on readonly FS", 561 "writing inode %lu on readonly FS",
560 inode->i_ino); 562 inode->i_ino);
561 return; 563 return;
562 } 564 }
563 reiserfs_write_lock(inode->i_sb); 565 lock_depth = reiserfs_write_lock_once(inode->i_sb);
564 566
565 /* this is really only used for atime updates, so they don't have 567 /* this is really only used for atime updates, so they don't have
566 ** to be included in O_SYNC or fsync 568 ** to be included in O_SYNC or fsync
567 */ 569 */
568 err = journal_begin(&th, inode->i_sb, 1); 570 err = journal_begin(&th, inode->i_sb, 1);
569 if (err) { 571 if (err)
570 reiserfs_write_unlock(inode->i_sb); 572 goto out;
571 return; 573
572 }
573 reiserfs_update_sd(&th, inode); 574 reiserfs_update_sd(&th, inode);
574 journal_end(&th, inode->i_sb, 1); 575 journal_end(&th, inode->i_sb, 1);
575 reiserfs_write_unlock(inode->i_sb); 576
577out:
578 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
576} 579}
577 580
578#ifdef CONFIG_QUOTA 581#ifdef CONFIG_QUOTA
@@ -1168,11 +1171,14 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1168 unsigned int qfmt = 0; 1171 unsigned int qfmt = 0;
1169#ifdef CONFIG_QUOTA 1172#ifdef CONFIG_QUOTA
1170 int i; 1173 int i;
1174#endif
1175
1176 reiserfs_write_lock(s);
1171 1177
1178#ifdef CONFIG_QUOTA
1172 memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names)); 1179 memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
1173#endif 1180#endif
1174 1181
1175 lock_kernel();
1176 rs = SB_DISK_SUPER_BLOCK(s); 1182 rs = SB_DISK_SUPER_BLOCK(s);
1177 1183
1178 if (!reiserfs_parse_options 1184 if (!reiserfs_parse_options
@@ -1295,12 +1301,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1295 1301
1296out_ok: 1302out_ok:
1297 replace_mount_options(s, new_opts); 1303 replace_mount_options(s, new_opts);
1298 unlock_kernel(); 1304 reiserfs_write_unlock(s);
1299 return 0; 1305 return 0;
1300 1306
1301out_err: 1307out_err:
1302 kfree(new_opts); 1308 kfree(new_opts);
1303 unlock_kernel(); 1309 reiserfs_write_unlock(s);
1304 return err; 1310 return err;
1305} 1311}
1306 1312
@@ -1404,7 +1410,9 @@ static int read_super_block(struct super_block *s, int offset)
1404static int reread_meta_blocks(struct super_block *s) 1410static int reread_meta_blocks(struct super_block *s)
1405{ 1411{
1406 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); 1412 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
1413 reiserfs_write_unlock(s);
1407 wait_on_buffer(SB_BUFFER_WITH_SB(s)); 1414 wait_on_buffer(SB_BUFFER_WITH_SB(s));
1415 reiserfs_write_lock(s);
1408 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { 1416 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
1409 reiserfs_warning(s, "reiserfs-2504", "error reading the super"); 1417 reiserfs_warning(s, "reiserfs-2504", "error reading the super");
1410 return 1; 1418 return 1;
@@ -1613,7 +1621,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1613 sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); 1621 sbi = kzalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
1614 if (!sbi) { 1622 if (!sbi) {
1615 errval = -ENOMEM; 1623 errval = -ENOMEM;
1616 goto error; 1624 goto error_alloc;
1617 } 1625 }
1618 s->s_fs_info = sbi; 1626 s->s_fs_info = sbi;
1619 /* Set default values for options: non-aggressive tails, RO on errors */ 1627 /* Set default values for options: non-aggressive tails, RO on errors */
@@ -1627,6 +1635,20 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1627 /* setup default block allocator options */ 1635 /* setup default block allocator options */
1628 reiserfs_init_alloc_options(s); 1636 reiserfs_init_alloc_options(s);
1629 1637
1638 mutex_init(&REISERFS_SB(s)->lock);
1639 REISERFS_SB(s)->lock_depth = -1;
1640
1641 /*
1642 * This function is called with the bkl, which also was the old
1643 * locking used here.
1644 * do_journal_begin() will soon check if we hold the lock (ie: was the
1645 * bkl). This is likely because do_journal_begin() has several another
1646 * callers because at this time, it doesn't seem to be necessary to
1647 * protect against anything.
1648 * Anyway, let's be conservative and lock for now.
1649 */
1650 reiserfs_write_lock(s);
1651
1630 jdev_name = NULL; 1652 jdev_name = NULL;
1631 if (reiserfs_parse_options 1653 if (reiserfs_parse_options
1632 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, 1654 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
@@ -1852,9 +1874,13 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1852 init_waitqueue_head(&(sbi->s_wait)); 1874 init_waitqueue_head(&(sbi->s_wait));
1853 spin_lock_init(&sbi->bitmap_lock); 1875 spin_lock_init(&sbi->bitmap_lock);
1854 1876
1877 reiserfs_write_unlock(s);
1878
1855 return (0); 1879 return (0);
1856 1880
1857error: 1881error:
1882 reiserfs_write_unlock(s);
1883error_alloc:
1858 if (jinit_done) { /* kill the commit thread, free journal ram */ 1884 if (jinit_done) { /* kill the commit thread, free journal ram */
1859 journal_release_error(NULL, s); 1885 journal_release_error(NULL, s);
1860 } 1886 }
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 6925b835a43b..58aa8e75f7f5 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -975,7 +975,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
975 int err = 0; 975 int err = 0;
976 976
977 /* If we don't have the privroot located yet - go find it */ 977 /* If we don't have the privroot located yet - go find it */
978 mutex_lock(&s->s_root->d_inode->i_mutex); 978 reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
979 dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, 979 dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
980 strlen(PRIVROOT_NAME)); 980 strlen(PRIVROOT_NAME));
981 if (!IS_ERR(dentry)) { 981 if (!IS_ERR(dentry)) {
@@ -1004,14 +1004,14 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1004 goto error; 1004 goto error;
1005 1005
1006 if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) { 1006 if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) {
1007 mutex_lock(&s->s_root->d_inode->i_mutex); 1007 reiserfs_mutex_lock_safe(&s->s_root->d_inode->i_mutex, s);
1008 err = create_privroot(REISERFS_SB(s)->priv_root); 1008 err = create_privroot(REISERFS_SB(s)->priv_root);
1009 mutex_unlock(&s->s_root->d_inode->i_mutex); 1009 mutex_unlock(&s->s_root->d_inode->i_mutex);
1010 } 1010 }
1011 1011
1012 if (privroot->d_inode) { 1012 if (privroot->d_inode) {
1013 s->s_xattr = reiserfs_xattr_handlers; 1013 s->s_xattr = reiserfs_xattr_handlers;
1014 mutex_lock(&privroot->d_inode->i_mutex); 1014 reiserfs_mutex_lock_safe(&privroot->d_inode->i_mutex, s);
1015 if (!REISERFS_SB(s)->xattr_root) { 1015 if (!REISERFS_SB(s)->xattr_root) {
1016 struct dentry *dentry; 1016 struct dentry *dentry;
1017 dentry = lookup_one_len(XAROOT_NAME, privroot, 1017 dentry = lookup_one_len(XAROOT_NAME, privroot,
diff --git a/fs/splice.c b/fs/splice.c
index 7394e9e17534..39208663aaf1 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -648,9 +648,11 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
648 ret = buf->ops->confirm(pipe, buf); 648 ret = buf->ops->confirm(pipe, buf);
649 if (!ret) { 649 if (!ret) {
650 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 650 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
651 651 if (file->f_op && file->f_op->sendpage)
652 ret = file->f_op->sendpage(file, buf->page, buf->offset, 652 ret = file->f_op->sendpage(file, buf->page, buf->offset,
653 sd->len, &pos, more); 653 sd->len, &pos, more);
654 else
655 ret = -EINVAL;
654 } 656 }
655 657
656 return ret; 658 return ret;
@@ -1068,8 +1070,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
1068 if (unlikely(ret < 0)) 1070 if (unlikely(ret < 0))
1069 return ret; 1071 return ret;
1070 1072
1071 splice_write = out->f_op->splice_write; 1073 if (out->f_op && out->f_op->splice_write)
1072 if (!splice_write) 1074 splice_write = out->f_op->splice_write;
1075 else
1073 splice_write = default_file_splice_write; 1076 splice_write = default_file_splice_write;
1074 1077
1075 return splice_write(pipe, out, ppos, len, flags); 1078 return splice_write(pipe, out, ppos, len, flags);
@@ -1093,8 +1096,9 @@ static long do_splice_to(struct file *in, loff_t *ppos,
1093 if (unlikely(ret < 0)) 1096 if (unlikely(ret < 0))
1094 return ret; 1097 return ret;
1095 1098
1096 splice_read = in->f_op->splice_read; 1099 if (in->f_op && in->f_op->splice_read)
1097 if (!splice_read) 1100 splice_read = in->f_op->splice_read;
1101 else
1098 splice_read = default_file_splice_read; 1102 splice_read = default_file_splice_read;
1099 1103
1100 return splice_read(in, ppos, pipe, len, flags); 1104 return splice_read(in, ppos, pipe, len, flags);
@@ -1316,7 +1320,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1316 if (off_in) 1320 if (off_in)
1317 return -ESPIPE; 1321 return -ESPIPE;
1318 if (off_out) { 1322 if (off_out) {
1319 if (out->f_op->llseek == no_llseek) 1323 if (!out->f_op || !out->f_op->llseek ||
1324 out->f_op->llseek == no_llseek)
1320 return -EINVAL; 1325 return -EINVAL;
1321 if (copy_from_user(&offset, off_out, sizeof(loff_t))) 1326 if (copy_from_user(&offset, off_out, sizeof(loff_t)))
1322 return -EFAULT; 1327 return -EFAULT;
@@ -1336,7 +1341,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1336 if (off_out) 1341 if (off_out)
1337 return -ESPIPE; 1342 return -ESPIPE;
1338 if (off_in) { 1343 if (off_in) {
1339 if (in->f_op->llseek == no_llseek) 1344 if (!in->f_op || !in->f_op->llseek ||
1345 in->f_op->llseek == no_llseek)
1340 return -EINVAL; 1346 return -EINVAL;
1341 if (copy_from_user(&offset, off_in, sizeof(loff_t))) 1347 if (copy_from_user(&offset, off_in, sizeof(loff_t)))
1342 return -EFAULT; 1348 return -EFAULT;
diff --git a/fs/sync.c b/fs/sync.c
index d104591b066b..36752a683481 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -295,10 +295,11 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
295 */ 295 */
296int generic_write_sync(struct file *file, loff_t pos, loff_t count) 296int generic_write_sync(struct file *file, loff_t pos, loff_t count)
297{ 297{
298 if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host)) 298 if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
299 return 0; 299 return 0;
300 return vfs_fsync_range(file, file->f_path.dentry, pos, 300 return vfs_fsync_range(file, file->f_path.dentry, pos,
301 pos + count - 1, 1); 301 pos + count - 1,
302 (file->f_flags & __O_SYNC) ? 0 : 1);
302} 303}
303EXPORT_SYMBOL(generic_write_sync); 304EXPORT_SYMBOL(generic_write_sync);
304 305
@@ -452,9 +453,7 @@ int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
452 453
453 ret = 0; 454 ret = 0;
454 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) { 455 if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
455 ret = wait_on_page_writeback_range(mapping, 456 ret = filemap_fdatawait_range(mapping, offset, endbyte);
456 offset >> PAGE_CACHE_SHIFT,
457 endbyte >> PAGE_CACHE_SHIFT);
458 if (ret < 0) 457 if (ret < 0)
459 goto out; 458 goto out;
460 } 459 }
@@ -467,9 +466,7 @@ int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
467 } 466 }
468 467
469 if (flags & SYNC_FILE_RANGE_WAIT_AFTER) { 468 if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
470 ret = wait_on_page_writeback_range(mapping, 469 ret = filemap_fdatawait_range(mapping, offset, endbyte);
471 offset >> PAGE_CACHE_SHIFT,
472 endbyte >> PAGE_CACHE_SHIFT);
473 } 470 }
474out: 471out:
475 return ret; 472 return ret;
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e0201837d244..f05f2303a8b8 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -25,7 +25,6 @@
25#include "sysfs.h" 25#include "sysfs.h"
26 26
27DEFINE_MUTEX(sysfs_mutex); 27DEFINE_MUTEX(sysfs_mutex);
28DEFINE_MUTEX(sysfs_rename_mutex);
29DEFINE_SPINLOCK(sysfs_assoc_lock); 28DEFINE_SPINLOCK(sysfs_assoc_lock);
30 29
31static DEFINE_SPINLOCK(sysfs_ino_lock); 30static DEFINE_SPINLOCK(sysfs_ino_lock);
@@ -85,46 +84,6 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
85} 84}
86 85
87/** 86/**
88 * sysfs_get_dentry - get dentry for the given sysfs_dirent
89 * @sd: sysfs_dirent of interest
90 *
91 * Get dentry for @sd. Dentry is looked up if currently not
92 * present. This function descends from the root looking up
93 * dentry for each step.
94 *
95 * LOCKING:
96 * mutex_lock(sysfs_rename_mutex)
97 *
98 * RETURNS:
99 * Pointer to found dentry on success, ERR_PTR() value on error.
100 */
101struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
102{
103 struct dentry *dentry = dget(sysfs_sb->s_root);
104
105 while (dentry->d_fsdata != sd) {
106 struct sysfs_dirent *cur;
107 struct dentry *parent;
108
109 /* find the first ancestor which hasn't been looked up */
110 cur = sd;
111 while (cur->s_parent != dentry->d_fsdata)
112 cur = cur->s_parent;
113
114 /* look it up */
115 parent = dentry;
116 mutex_lock(&parent->d_inode->i_mutex);
117 dentry = lookup_one_noperm(cur->s_name, parent);
118 mutex_unlock(&parent->d_inode->i_mutex);
119 dput(parent);
120
121 if (IS_ERR(dentry))
122 break;
123 }
124 return dentry;
125}
126
127/**
128 * sysfs_get_active - get an active reference to sysfs_dirent 87 * sysfs_get_active - get an active reference to sysfs_dirent
129 * @sd: sysfs_dirent to get an active reference to 88 * @sd: sysfs_dirent to get an active reference to
130 * 89 *
@@ -298,7 +257,61 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
298 goto repeat; 257 goto repeat;
299} 258}
300 259
301static void sysfs_d_iput(struct dentry * dentry, struct inode * inode) 260static int sysfs_dentry_delete(struct dentry *dentry)
261{
262 struct sysfs_dirent *sd = dentry->d_fsdata;
263 return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
264}
265
266static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
267{
268 struct sysfs_dirent *sd = dentry->d_fsdata;
269 int is_dir;
270
271 mutex_lock(&sysfs_mutex);
272
273 /* The sysfs dirent has been deleted */
274 if (sd->s_flags & SYSFS_FLAG_REMOVED)
275 goto out_bad;
276
277 /* The sysfs dirent has been moved? */
278 if (dentry->d_parent->d_fsdata != sd->s_parent)
279 goto out_bad;
280
281 /* The sysfs dirent has been renamed */
282 if (strcmp(dentry->d_name.name, sd->s_name) != 0)
283 goto out_bad;
284
285 mutex_unlock(&sysfs_mutex);
286out_valid:
287 return 1;
288out_bad:
289 /* Remove the dentry from the dcache hashes.
290 * If this is a deleted dentry we use d_drop instead of d_delete
291 * so sysfs doesn't need to cope with negative dentries.
292 *
293 * If this is a dentry that has simply been renamed we
294 * use d_drop to remove it from the dcache lookup on its
295 * old parent. If this dentry persists later when a lookup
296 * is performed at its new name the dentry will be readded
297 * to the dcache hashes.
298 */
299 is_dir = (sysfs_type(sd) == SYSFS_DIR);
300 mutex_unlock(&sysfs_mutex);
301 if (is_dir) {
302 /* If we have submounts we must allow the vfs caches
303 * to lie about the state of the filesystem to prevent
304 * leaks and other nasty things.
305 */
306 if (have_submounts(dentry))
307 goto out_valid;
308 shrink_dcache_parent(dentry);
309 }
310 d_drop(dentry);
311 return 0;
312}
313
314static void sysfs_dentry_iput(struct dentry *dentry, struct inode *inode)
302{ 315{
303 struct sysfs_dirent * sd = dentry->d_fsdata; 316 struct sysfs_dirent * sd = dentry->d_fsdata;
304 317
@@ -307,7 +320,9 @@ static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
307} 320}
308 321
309static const struct dentry_operations sysfs_dentry_ops = { 322static const struct dentry_operations sysfs_dentry_ops = {
310 .d_iput = sysfs_d_iput, 323 .d_revalidate = sysfs_dentry_revalidate,
324 .d_delete = sysfs_dentry_delete,
325 .d_iput = sysfs_dentry_iput,
311}; 326};
312 327
313struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) 328struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
@@ -344,12 +359,6 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
344 return NULL; 359 return NULL;
345} 360}
346 361
347static int sysfs_ilookup_test(struct inode *inode, void *arg)
348{
349 struct sysfs_dirent *sd = arg;
350 return inode->i_ino == sd->s_ino;
351}
352
353/** 362/**
354 * sysfs_addrm_start - prepare for sysfs_dirent add/remove 363 * sysfs_addrm_start - prepare for sysfs_dirent add/remove
355 * @acxt: pointer to sysfs_addrm_cxt to be used 364 * @acxt: pointer to sysfs_addrm_cxt to be used
@@ -357,47 +366,20 @@ static int sysfs_ilookup_test(struct inode *inode, void *arg)
357 * 366 *
358 * This function is called when the caller is about to add or 367 * This function is called when the caller is about to add or
359 * remove sysfs_dirent under @parent_sd. This function acquires 368 * remove sysfs_dirent under @parent_sd. This function acquires
360 * sysfs_mutex, grabs inode for @parent_sd if available and lock 369 * sysfs_mutex. @acxt is used to keep and pass context to
361 * i_mutex of it. @acxt is used to keep and pass context to
362 * other addrm functions. 370 * other addrm functions.
363 * 371 *
364 * LOCKING: 372 * LOCKING:
365 * Kernel thread context (may sleep). sysfs_mutex is locked on 373 * Kernel thread context (may sleep). sysfs_mutex is locked on
366 * return. i_mutex of parent inode is locked on return if 374 * return.
367 * available.
368 */ 375 */
369void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, 376void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
370 struct sysfs_dirent *parent_sd) 377 struct sysfs_dirent *parent_sd)
371{ 378{
372 struct inode *inode;
373
374 memset(acxt, 0, sizeof(*acxt)); 379 memset(acxt, 0, sizeof(*acxt));
375 acxt->parent_sd = parent_sd; 380 acxt->parent_sd = parent_sd;
376 381
377 /* Lookup parent inode. inode initialization is protected by
378 * sysfs_mutex, so inode existence can be determined by
379 * looking up inode while holding sysfs_mutex.
380 */
381 mutex_lock(&sysfs_mutex); 382 mutex_lock(&sysfs_mutex);
382
383 inode = ilookup5(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
384 parent_sd);
385 if (inode) {
386 WARN_ON(inode->i_state & I_NEW);
387
388 /* parent inode available */
389 acxt->parent_inode = inode;
390
391 /* sysfs_mutex is below i_mutex in lock hierarchy.
392 * First, trylock i_mutex. If fails, unlock
393 * sysfs_mutex and lock them in order.
394 */
395 if (!mutex_trylock(&inode->i_mutex)) {
396 mutex_unlock(&sysfs_mutex);
397 mutex_lock(&inode->i_mutex);
398 mutex_lock(&sysfs_mutex);
399 }
400 }
401} 383}
402 384
403/** 385/**
@@ -422,18 +404,22 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
422 */ 404 */
423int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) 405int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
424{ 406{
407 struct sysfs_inode_attrs *ps_iattr;
408
425 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) 409 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
426 return -EEXIST; 410 return -EEXIST;
427 411
428 sd->s_parent = sysfs_get(acxt->parent_sd); 412 sd->s_parent = sysfs_get(acxt->parent_sd);
429 413
430 if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
431 inc_nlink(acxt->parent_inode);
432
433 acxt->cnt++;
434
435 sysfs_link_sibling(sd); 414 sysfs_link_sibling(sd);
436 415
416 /* Update timestamps on the parent */
417 ps_iattr = acxt->parent_sd->s_iattr;
418 if (ps_iattr) {
419 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
420 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
421 }
422
437 return 0; 423 return 0;
438} 424}
439 425
@@ -512,70 +498,22 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
512 */ 498 */
513void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) 499void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
514{ 500{
501 struct sysfs_inode_attrs *ps_iattr;
502
515 BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED); 503 BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED);
516 504
517 sysfs_unlink_sibling(sd); 505 sysfs_unlink_sibling(sd);
518 506
507 /* Update timestamps on the parent */
508 ps_iattr = acxt->parent_sd->s_iattr;
509 if (ps_iattr) {
510 struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
511 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
512 }
513
519 sd->s_flags |= SYSFS_FLAG_REMOVED; 514 sd->s_flags |= SYSFS_FLAG_REMOVED;
520 sd->s_sibling = acxt->removed; 515 sd->s_sibling = acxt->removed;
521 acxt->removed = sd; 516 acxt->removed = sd;
522
523 if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
524 drop_nlink(acxt->parent_inode);
525
526 acxt->cnt++;
527}
528
529/**
530 * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
531 * @sd: target sysfs_dirent
532 *
533 * Drop dentry for @sd. @sd must have been unlinked from its
534 * parent on entry to this function such that it can't be looked
535 * up anymore.
536 */
537static void sysfs_drop_dentry(struct sysfs_dirent *sd)
538{
539 struct inode *inode;
540 struct dentry *dentry;
541
542 inode = ilookup(sysfs_sb, sd->s_ino);
543 if (!inode)
544 return;
545
546 /* Drop any existing dentries associated with sd.
547 *
548 * For the dentry to be properly freed we need to grab a
549 * reference to the dentry under the dcache lock, unhash it,
550 * and then put it. The playing with the dentry count allows
551 * dput to immediately free the dentry if it is not in use.
552 */
553repeat:
554 spin_lock(&dcache_lock);
555 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
556 if (d_unhashed(dentry))
557 continue;
558 dget_locked(dentry);
559 spin_lock(&dentry->d_lock);
560 __d_drop(dentry);
561 spin_unlock(&dentry->d_lock);
562 spin_unlock(&dcache_lock);
563 dput(dentry);
564 goto repeat;
565 }
566 spin_unlock(&dcache_lock);
567
568 /* adjust nlink and update timestamp */
569 mutex_lock(&inode->i_mutex);
570
571 inode->i_ctime = CURRENT_TIME;
572 drop_nlink(inode);
573 if (sysfs_type(sd) == SYSFS_DIR)
574 drop_nlink(inode);
575
576 mutex_unlock(&inode->i_mutex);
577
578 iput(inode);
579} 517}
580 518
581/** 519/**
@@ -584,25 +522,15 @@ repeat:
584 * 522 *
585 * Finish up sysfs_dirent add/remove. Resources acquired by 523 * Finish up sysfs_dirent add/remove. Resources acquired by
586 * sysfs_addrm_start() are released and removed sysfs_dirents are 524 * sysfs_addrm_start() are released and removed sysfs_dirents are
587 * cleaned up. Timestamps on the parent inode are updated. 525 * cleaned up.
588 * 526 *
589 * LOCKING: 527 * LOCKING:
590 * All mutexes acquired by sysfs_addrm_start() are released. 528 * sysfs_mutex is released.
591 */ 529 */
592void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) 530void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
593{ 531{
594 /* release resources acquired by sysfs_addrm_start() */ 532 /* release resources acquired by sysfs_addrm_start() */
595 mutex_unlock(&sysfs_mutex); 533 mutex_unlock(&sysfs_mutex);
596 if (acxt->parent_inode) {
597 struct inode *inode = acxt->parent_inode;
598
599 /* if added/removed, update timestamps on the parent */
600 if (acxt->cnt)
601 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
602
603 mutex_unlock(&inode->i_mutex);
604 iput(inode);
605 }
606 534
607 /* kill removed sysfs_dirents */ 535 /* kill removed sysfs_dirents */
608 while (acxt->removed) { 536 while (acxt->removed) {
@@ -611,7 +539,6 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
611 acxt->removed = sd->s_sibling; 539 acxt->removed = sd->s_sibling;
612 sd->s_sibling = NULL; 540 sd->s_sibling = NULL;
613 541
614 sysfs_drop_dentry(sd);
615 sysfs_deactivate(sd); 542 sysfs_deactivate(sd);
616 unmap_bin_file(sd); 543 unmap_bin_file(sd);
617 sysfs_put(sd); 544 sysfs_put(sd);
@@ -751,10 +678,15 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
751 } 678 }
752 679
753 /* instantiate and hash dentry */ 680 /* instantiate and hash dentry */
754 dentry->d_op = &sysfs_dentry_ops; 681 ret = d_find_alias(inode);
755 dentry->d_fsdata = sysfs_get(sd); 682 if (!ret) {
756 d_instantiate(dentry, inode); 683 dentry->d_op = &sysfs_dentry_ops;
757 d_rehash(dentry); 684 dentry->d_fsdata = sysfs_get(sd);
685 d_add(dentry, inode);
686 } else {
687 d_move(ret, dentry);
688 iput(inode);
689 }
758 690
759 out_unlock: 691 out_unlock:
760 mutex_unlock(&sysfs_mutex); 692 mutex_unlock(&sysfs_mutex);
@@ -763,7 +695,9 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
763 695
764const struct inode_operations sysfs_dir_inode_operations = { 696const struct inode_operations sysfs_dir_inode_operations = {
765 .lookup = sysfs_lookup, 697 .lookup = sysfs_lookup,
698 .permission = sysfs_permission,
766 .setattr = sysfs_setattr, 699 .setattr = sysfs_setattr,
700 .getattr = sysfs_getattr,
767 .setxattr = sysfs_setxattr, 701 .setxattr = sysfs_setxattr,
768}; 702};
769 703
@@ -826,141 +760,65 @@ void sysfs_remove_dir(struct kobject * kobj)
826 __sysfs_remove_dir(sd); 760 __sysfs_remove_dir(sd);
827} 761}
828 762
829int sysfs_rename_dir(struct kobject * kobj, const char *new_name) 763int sysfs_rename(struct sysfs_dirent *sd,
764 struct sysfs_dirent *new_parent_sd, const char *new_name)
830{ 765{
831 struct sysfs_dirent *sd = kobj->sd;
832 struct dentry *parent = NULL;
833 struct dentry *old_dentry = NULL, *new_dentry = NULL;
834 const char *dup_name = NULL; 766 const char *dup_name = NULL;
835 int error; 767 int error;
836 768
837 mutex_lock(&sysfs_rename_mutex); 769 mutex_lock(&sysfs_mutex);
838 770
839 error = 0; 771 error = 0;
840 if (strcmp(sd->s_name, new_name) == 0) 772 if ((sd->s_parent == new_parent_sd) &&
773 (strcmp(sd->s_name, new_name) == 0))
841 goto out; /* nothing to rename */ 774 goto out; /* nothing to rename */
842 775
843 /* get the original dentry */
844 old_dentry = sysfs_get_dentry(sd);
845 if (IS_ERR(old_dentry)) {
846 error = PTR_ERR(old_dentry);
847 old_dentry = NULL;
848 goto out;
849 }
850
851 parent = old_dentry->d_parent;
852
853 /* lock parent and get dentry for new name */
854 mutex_lock(&parent->d_inode->i_mutex);
855 mutex_lock(&sysfs_mutex);
856
857 error = -EEXIST; 776 error = -EEXIST;
858 if (sysfs_find_dirent(sd->s_parent, new_name)) 777 if (sysfs_find_dirent(new_parent_sd, new_name))
859 goto out_unlock; 778 goto out;
860
861 error = -ENOMEM;
862 new_dentry = d_alloc_name(parent, new_name);
863 if (!new_dentry)
864 goto out_unlock;
865 779
866 /* rename sysfs_dirent */ 780 /* rename sysfs_dirent */
867 error = -ENOMEM; 781 if (strcmp(sd->s_name, new_name) != 0) {
868 new_name = dup_name = kstrdup(new_name, GFP_KERNEL); 782 error = -ENOMEM;
869 if (!new_name) 783 new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
870 goto out_unlock; 784 if (!new_name)
871 785 goto out;
872 dup_name = sd->s_name; 786
873 sd->s_name = new_name; 787 dup_name = sd->s_name;
788 sd->s_name = new_name;
789 }
874 790
875 /* rename */ 791 /* Remove from old parent's list and insert into new parent's list. */
876 d_add(new_dentry, NULL); 792 if (sd->s_parent != new_parent_sd) {
877 d_move(old_dentry, new_dentry); 793 sysfs_unlink_sibling(sd);
794 sysfs_get(new_parent_sd);
795 sysfs_put(sd->s_parent);
796 sd->s_parent = new_parent_sd;
797 sysfs_link_sibling(sd);
798 }
878 799
879 error = 0; 800 error = 0;
880 out_unlock: 801 out:
881 mutex_unlock(&sysfs_mutex); 802 mutex_unlock(&sysfs_mutex);
882 mutex_unlock(&parent->d_inode->i_mutex);
883 kfree(dup_name); 803 kfree(dup_name);
884 dput(old_dentry);
885 dput(new_dentry);
886 out:
887 mutex_unlock(&sysfs_rename_mutex);
888 return error; 804 return error;
889} 805}
890 806
807int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
808{
809 return sysfs_rename(kobj->sd, kobj->sd->s_parent, new_name);
810}
811
891int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) 812int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
892{ 813{
893 struct sysfs_dirent *sd = kobj->sd; 814 struct sysfs_dirent *sd = kobj->sd;
894 struct sysfs_dirent *new_parent_sd; 815 struct sysfs_dirent *new_parent_sd;
895 struct dentry *old_parent, *new_parent = NULL;
896 struct dentry *old_dentry = NULL, *new_dentry = NULL;
897 int error;
898 816
899 mutex_lock(&sysfs_rename_mutex);
900 BUG_ON(!sd->s_parent); 817 BUG_ON(!sd->s_parent);
901 new_parent_sd = (new_parent_kobj && new_parent_kobj->sd) ? 818 new_parent_sd = new_parent_kobj && new_parent_kobj->sd ?
902 new_parent_kobj->sd : &sysfs_root; 819 new_parent_kobj->sd : &sysfs_root;
903 820
904 error = 0; 821 return sysfs_rename(sd, new_parent_sd, sd->s_name);
905 if (sd->s_parent == new_parent_sd)
906 goto out; /* nothing to move */
907
908 /* get dentries */
909 old_dentry = sysfs_get_dentry(sd);
910 if (IS_ERR(old_dentry)) {
911 error = PTR_ERR(old_dentry);
912 old_dentry = NULL;
913 goto out;
914 }
915 old_parent = old_dentry->d_parent;
916
917 new_parent = sysfs_get_dentry(new_parent_sd);
918 if (IS_ERR(new_parent)) {
919 error = PTR_ERR(new_parent);
920 new_parent = NULL;
921 goto out;
922 }
923
924again:
925 mutex_lock(&old_parent->d_inode->i_mutex);
926 if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
927 mutex_unlock(&old_parent->d_inode->i_mutex);
928 goto again;
929 }
930 mutex_lock(&sysfs_mutex);
931
932 error = -EEXIST;
933 if (sysfs_find_dirent(new_parent_sd, sd->s_name))
934 goto out_unlock;
935
936 error = -ENOMEM;
937 new_dentry = d_alloc_name(new_parent, sd->s_name);
938 if (!new_dentry)
939 goto out_unlock;
940
941 error = 0;
942 d_add(new_dentry, NULL);
943 d_move(old_dentry, new_dentry);
944
945 /* Remove from old parent's list and insert into new parent's list. */
946 sysfs_unlink_sibling(sd);
947 sysfs_get(new_parent_sd);
948 drop_nlink(old_parent->d_inode);
949 sysfs_put(sd->s_parent);
950 sd->s_parent = new_parent_sd;
951 inc_nlink(new_parent->d_inode);
952 sysfs_link_sibling(sd);
953
954 out_unlock:
955 mutex_unlock(&sysfs_mutex);
956 mutex_unlock(&new_parent->d_inode->i_mutex);
957 mutex_unlock(&old_parent->d_inode->i_mutex);
958 out:
959 dput(new_parent);
960 dput(old_dentry);
961 dput(new_dentry);
962 mutex_unlock(&sysfs_rename_mutex);
963 return error;
964} 822}
965 823
966/* Relationship between s_mode and the DT_xxx types */ 824/* Relationship between s_mode and the DT_xxx types */
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index f5ea4680f15f..dc30d9e31683 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -579,46 +579,23 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
579 */ 579 */
580int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) 580int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
581{ 581{
582 struct sysfs_dirent *victim_sd = NULL; 582 struct sysfs_dirent *sd;
583 struct dentry *victim = NULL;
584 struct inode * inode;
585 struct iattr newattrs; 583 struct iattr newattrs;
586 int rc; 584 int rc;
587 585
588 rc = -ENOENT; 586 mutex_lock(&sysfs_mutex);
589 victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
590 if (!victim_sd)
591 goto out;
592 587
593 mutex_lock(&sysfs_rename_mutex); 588 rc = -ENOENT;
594 victim = sysfs_get_dentry(victim_sd); 589 sd = sysfs_find_dirent(kobj->sd, attr->name);
595 mutex_unlock(&sysfs_rename_mutex); 590 if (!sd)
596 if (IS_ERR(victim)) {
597 rc = PTR_ERR(victim);
598 victim = NULL;
599 goto out; 591 goto out;
600 }
601
602 inode = victim->d_inode;
603
604 mutex_lock(&inode->i_mutex);
605 592
606 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 593 newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO);
607 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 594 newattrs.ia_valid = ATTR_MODE;
608 newattrs.ia_ctime = current_fs_time(inode->i_sb); 595 rc = sysfs_sd_setattr(sd, &newattrs);
609 rc = sysfs_setattr(victim, &newattrs);
610 596
611 if (rc == 0) {
612 fsnotify_change(victim, newattrs.ia_valid);
613 mutex_lock(&sysfs_mutex);
614 victim_sd->s_mode = newattrs.ia_mode;
615 mutex_unlock(&sysfs_mutex);
616 }
617
618 mutex_unlock(&inode->i_mutex);
619 out: 597 out:
620 dput(victim); 598 mutex_unlock(&sysfs_mutex);
621 sysfs_put(victim_sd);
622 return rc; 599 return rc;
623} 600}
624EXPORT_SYMBOL_GPL(sysfs_chmod_file); 601EXPORT_SYMBOL_GPL(sysfs_chmod_file);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index e28cecf179f5..220b758523ae 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -37,7 +37,9 @@ static struct backing_dev_info sysfs_backing_dev_info = {
37}; 37};
38 38
39static const struct inode_operations sysfs_inode_operations ={ 39static const struct inode_operations sysfs_inode_operations ={
40 .permission = sysfs_permission,
40 .setattr = sysfs_setattr, 41 .setattr = sysfs_setattr,
42 .getattr = sysfs_getattr,
41 .setxattr = sysfs_setxattr, 43 .setxattr = sysfs_setxattr,
42}; 44};
43 45
@@ -46,7 +48,7 @@ int __init sysfs_inode_init(void)
46 return bdi_init(&sysfs_backing_dev_info); 48 return bdi_init(&sysfs_backing_dev_info);
47} 49}
48 50
49struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd) 51static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
50{ 52{
51 struct sysfs_inode_attrs *attrs; 53 struct sysfs_inode_attrs *attrs;
52 struct iattr *iattrs; 54 struct iattr *iattrs;
@@ -64,30 +66,15 @@ struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
64 66
65 return attrs; 67 return attrs;
66} 68}
67int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) 69
70int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr * iattr)
68{ 71{
69 struct inode * inode = dentry->d_inode;
70 struct sysfs_dirent * sd = dentry->d_fsdata;
71 struct sysfs_inode_attrs *sd_attrs; 72 struct sysfs_inode_attrs *sd_attrs;
72 struct iattr *iattrs; 73 struct iattr *iattrs;
73 unsigned int ia_valid = iattr->ia_valid; 74 unsigned int ia_valid = iattr->ia_valid;
74 int error;
75
76 if (!sd)
77 return -EINVAL;
78 75
79 sd_attrs = sd->s_iattr; 76 sd_attrs = sd->s_iattr;
80 77
81 error = inode_change_ok(inode, iattr);
82 if (error)
83 return error;
84
85 iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
86
87 error = inode_setattr(inode, iattr);
88 if (error)
89 return error;
90
91 if (!sd_attrs) { 78 if (!sd_attrs) {
92 /* setting attributes for the first time, allocate now */ 79 /* setting attributes for the first time, allocate now */
93 sd_attrs = sysfs_init_inode_attrs(sd); 80 sd_attrs = sysfs_init_inode_attrs(sd);
@@ -103,42 +90,78 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
103 if (ia_valid & ATTR_GID) 90 if (ia_valid & ATTR_GID)
104 iattrs->ia_gid = iattr->ia_gid; 91 iattrs->ia_gid = iattr->ia_gid;
105 if (ia_valid & ATTR_ATIME) 92 if (ia_valid & ATTR_ATIME)
106 iattrs->ia_atime = timespec_trunc(iattr->ia_atime, 93 iattrs->ia_atime = iattr->ia_atime;
107 inode->i_sb->s_time_gran);
108 if (ia_valid & ATTR_MTIME) 94 if (ia_valid & ATTR_MTIME)
109 iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime, 95 iattrs->ia_mtime = iattr->ia_mtime;
110 inode->i_sb->s_time_gran);
111 if (ia_valid & ATTR_CTIME) 96 if (ia_valid & ATTR_CTIME)
112 iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime, 97 iattrs->ia_ctime = iattr->ia_ctime;
113 inode->i_sb->s_time_gran);
114 if (ia_valid & ATTR_MODE) { 98 if (ia_valid & ATTR_MODE) {
115 umode_t mode = iattr->ia_mode; 99 umode_t mode = iattr->ia_mode;
116
117 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
118 mode &= ~S_ISGID;
119 iattrs->ia_mode = sd->s_mode = mode; 100 iattrs->ia_mode = sd->s_mode = mode;
120 } 101 }
121 } 102 }
103 return 0;
104}
105
106int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
107{
108 struct inode *inode = dentry->d_inode;
109 struct sysfs_dirent *sd = dentry->d_fsdata;
110 int error;
111
112 if (!sd)
113 return -EINVAL;
114
115 error = inode_change_ok(inode, iattr);
116 if (error)
117 return error;
118
119 iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
120
121 error = inode_setattr(inode, iattr);
122 if (error)
123 return error;
124
125 mutex_lock(&sysfs_mutex);
126 error = sysfs_sd_setattr(sd, iattr);
127 mutex_unlock(&sysfs_mutex);
128
122 return error; 129 return error;
123} 130}
124 131
132static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *secdata_len)
133{
134 struct sysfs_inode_attrs *iattrs;
135 void *old_secdata;
136 size_t old_secdata_len;
137
138 iattrs = sd->s_iattr;
139 if (!iattrs)
140 iattrs = sysfs_init_inode_attrs(sd);
141 if (!iattrs)
142 return -ENOMEM;
143
144 old_secdata = iattrs->ia_secdata;
145 old_secdata_len = iattrs->ia_secdata_len;
146
147 iattrs->ia_secdata = *secdata;
148 iattrs->ia_secdata_len = *secdata_len;
149
150 *secdata = old_secdata;
151 *secdata_len = old_secdata_len;
152 return 0;
153}
154
125int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, 155int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
126 size_t size, int flags) 156 size_t size, int flags)
127{ 157{
128 struct sysfs_dirent *sd = dentry->d_fsdata; 158 struct sysfs_dirent *sd = dentry->d_fsdata;
129 struct sysfs_inode_attrs *iattrs;
130 void *secdata; 159 void *secdata;
131 int error; 160 int error;
132 u32 secdata_len = 0; 161 u32 secdata_len = 0;
133 162
134 if (!sd) 163 if (!sd)
135 return -EINVAL; 164 return -EINVAL;
136 if (!sd->s_iattr)
137 sd->s_iattr = sysfs_init_inode_attrs(sd);
138 if (!sd->s_iattr)
139 return -ENOMEM;
140
141 iattrs = sd->s_iattr;
142 165
143 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { 166 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
144 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; 167 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
@@ -150,12 +173,13 @@ int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
150 &secdata, &secdata_len); 173 &secdata, &secdata_len);
151 if (error) 174 if (error)
152 goto out; 175 goto out;
153 if (iattrs->ia_secdata)
154 security_release_secctx(iattrs->ia_secdata,
155 iattrs->ia_secdata_len);
156 iattrs->ia_secdata = secdata;
157 iattrs->ia_secdata_len = secdata_len;
158 176
177 mutex_lock(&sysfs_mutex);
178 error = sysfs_sd_setsecdata(sd, &secdata, &secdata_len);
179 mutex_unlock(&sysfs_mutex);
180
181 if (secdata)
182 security_release_secctx(secdata, secdata_len);
159 } else 183 } else
160 return -EINVAL; 184 return -EINVAL;
161out: 185out:
@@ -170,7 +194,6 @@ static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
170 194
171static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) 195static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
172{ 196{
173 inode->i_mode = iattr->ia_mode;
174 inode->i_uid = iattr->ia_uid; 197 inode->i_uid = iattr->ia_uid;
175 inode->i_gid = iattr->ia_gid; 198 inode->i_gid = iattr->ia_gid;
176 inode->i_atime = iattr->ia_atime; 199 inode->i_atime = iattr->ia_atime;
@@ -178,17 +201,6 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
178 inode->i_ctime = iattr->ia_ctime; 201 inode->i_ctime = iattr->ia_ctime;
179} 202}
180 203
181
182/*
183 * sysfs has a different i_mutex lock order behavior for i_mutex than other
184 * filesystems; sysfs i_mutex is called in many places with subsystem locks
185 * held. At the same time, many of the VFS locking rules do not apply to
186 * sysfs at all (cross directory rename for example). To untangle this mess
187 * (which gives false positives in lockdep), we're giving sysfs inodes their
188 * own class for i_mutex.
189 */
190static struct lock_class_key sysfs_inode_imutex_key;
191
192static int sysfs_count_nlink(struct sysfs_dirent *sd) 204static int sysfs_count_nlink(struct sysfs_dirent *sd)
193{ 205{
194 struct sysfs_dirent *child; 206 struct sysfs_dirent *child;
@@ -201,38 +213,55 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd)
201 return nr + 2; 213 return nr + 2;
202} 214}
203 215
216static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
217{
218 struct sysfs_inode_attrs *iattrs = sd->s_iattr;
219
220 inode->i_mode = sd->s_mode;
221 if (iattrs) {
222 /* sysfs_dirent has non-default attributes
223 * get them from persistent copy in sysfs_dirent
224 */
225 set_inode_attr(inode, &iattrs->ia_iattr);
226 security_inode_notifysecctx(inode,
227 iattrs->ia_secdata,
228 iattrs->ia_secdata_len);
229 }
230
231 if (sysfs_type(sd) == SYSFS_DIR)
232 inode->i_nlink = sysfs_count_nlink(sd);
233}
234
235int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
236{
237 struct sysfs_dirent *sd = dentry->d_fsdata;
238 struct inode *inode = dentry->d_inode;
239
240 mutex_lock(&sysfs_mutex);
241 sysfs_refresh_inode(sd, inode);
242 mutex_unlock(&sysfs_mutex);
243
244 generic_fillattr(inode, stat);
245 return 0;
246}
247
204static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) 248static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
205{ 249{
206 struct bin_attribute *bin_attr; 250 struct bin_attribute *bin_attr;
207 struct sysfs_inode_attrs *iattrs;
208 251
209 inode->i_private = sysfs_get(sd); 252 inode->i_private = sysfs_get(sd);
210 inode->i_mapping->a_ops = &sysfs_aops; 253 inode->i_mapping->a_ops = &sysfs_aops;
211 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; 254 inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
212 inode->i_op = &sysfs_inode_operations; 255 inode->i_op = &sysfs_inode_operations;
213 inode->i_ino = sd->s_ino;
214 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
215 256
216 iattrs = sd->s_iattr; 257 set_default_inode_attr(inode, sd->s_mode);
217 if (iattrs) { 258 sysfs_refresh_inode(sd, inode);
218 /* sysfs_dirent has non-default attributes
219 * get them for the new inode from persistent copy
220 * in sysfs_dirent
221 */
222 set_inode_attr(inode, &iattrs->ia_iattr);
223 if (iattrs->ia_secdata)
224 security_inode_notifysecctx(inode,
225 iattrs->ia_secdata,
226 iattrs->ia_secdata_len);
227 } else
228 set_default_inode_attr(inode, sd->s_mode);
229 259
230 /* initialize inode according to type */ 260 /* initialize inode according to type */
231 switch (sysfs_type(sd)) { 261 switch (sysfs_type(sd)) {
232 case SYSFS_DIR: 262 case SYSFS_DIR:
233 inode->i_op = &sysfs_dir_inode_operations; 263 inode->i_op = &sysfs_dir_inode_operations;
234 inode->i_fop = &sysfs_dir_operations; 264 inode->i_fop = &sysfs_dir_operations;
235 inode->i_nlink = sysfs_count_nlink(sd);
236 break; 265 break;
237 case SYSFS_KOBJ_ATTR: 266 case SYSFS_KOBJ_ATTR:
238 inode->i_size = PAGE_SIZE; 267 inode->i_size = PAGE_SIZE;
@@ -315,3 +344,14 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
315 else 344 else
316 return -ENOENT; 345 return -ENOENT;
317} 346}
347
348int sysfs_permission(struct inode *inode, int mask)
349{
350 struct sysfs_dirent *sd = inode->i_private;
351
352 mutex_lock(&sysfs_mutex);
353 sysfs_refresh_inode(sd, inode);
354 mutex_unlock(&sysfs_mutex);
355
356 return generic_permission(inode, mask, NULL);
357}
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index c5081ad77026..c5eff49fa41b 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -210,10 +210,13 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co
210} 210}
211 211
212const struct inode_operations sysfs_symlink_inode_operations = { 212const struct inode_operations sysfs_symlink_inode_operations = {
213 .setxattr = sysfs_setxattr, 213 .setxattr = sysfs_setxattr,
214 .readlink = generic_readlink, 214 .readlink = generic_readlink,
215 .follow_link = sysfs_follow_link, 215 .follow_link = sysfs_follow_link,
216 .put_link = sysfs_put_link, 216 .put_link = sysfs_put_link,
217 .setattr = sysfs_setattr,
218 .getattr = sysfs_getattr,
219 .permission = sysfs_permission,
217}; 220};
218 221
219 222
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index af4c4e7482ac..ca52e7b9d8f8 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -89,9 +89,7 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
89 */ 89 */
90struct sysfs_addrm_cxt { 90struct sysfs_addrm_cxt {
91 struct sysfs_dirent *parent_sd; 91 struct sysfs_dirent *parent_sd;
92 struct inode *parent_inode;
93 struct sysfs_dirent *removed; 92 struct sysfs_dirent *removed;
94 int cnt;
95}; 93};
96 94
97/* 95/*
@@ -105,7 +103,6 @@ extern struct kmem_cache *sysfs_dir_cachep;
105 * dir.c 103 * dir.c
106 */ 104 */
107extern struct mutex sysfs_mutex; 105extern struct mutex sysfs_mutex;
108extern struct mutex sysfs_rename_mutex;
109extern spinlock_t sysfs_assoc_lock; 106extern spinlock_t sysfs_assoc_lock;
110 107
111extern const struct file_operations sysfs_dir_operations; 108extern const struct file_operations sysfs_dir_operations;
@@ -133,6 +130,9 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
133 struct sysfs_dirent **p_sd); 130 struct sysfs_dirent **p_sd);
134void sysfs_remove_subdir(struct sysfs_dirent *sd); 131void sysfs_remove_subdir(struct sysfs_dirent *sd);
135 132
133int sysfs_rename(struct sysfs_dirent *sd,
134 struct sysfs_dirent *new_parent_sd, const char *new_name);
135
136static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) 136static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
137{ 137{
138 if (sd) { 138 if (sd) {
@@ -155,7 +155,10 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
155 */ 155 */
156struct inode *sysfs_get_inode(struct sysfs_dirent *sd); 156struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
157void sysfs_delete_inode(struct inode *inode); 157void sysfs_delete_inode(struct inode *inode);
158int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
159int sysfs_permission(struct inode *inode, int mask);
158int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 160int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
161int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
159int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, 162int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
160 size_t size, int flags); 163 size_t size, int flags);
161int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); 164int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index dbc093afd946..8a771c59ac3e 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2014,7 +2014,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
2014 inum = key_inum_flash(c, &dent->key); 2014 inum = key_inum_flash(c, &dent->key);
2015 fscki1 = read_add_inode(c, priv, inum); 2015 fscki1 = read_add_inode(c, priv, inum);
2016 if (IS_ERR(fscki1)) { 2016 if (IS_ERR(fscki1)) {
2017 err = PTR_ERR(fscki); 2017 err = PTR_ERR(fscki1);
2018 ubifs_err("error %d while processing entry node and " 2018 ubifs_err("error %d while processing entry node and "
2019 "trying to find parent inode node %lu", 2019 "trying to find parent inode node %lu",
2020 err, (unsigned long)inum); 2020 err, (unsigned long)inum);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 1009adc8d602..39849f887e72 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1389,7 +1389,6 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
1389 unsigned long nr_segs, loff_t pos) 1389 unsigned long nr_segs, loff_t pos)
1390{ 1390{
1391 int err; 1391 int err;
1392 ssize_t ret;
1393 struct inode *inode = iocb->ki_filp->f_mapping->host; 1392 struct inode *inode = iocb->ki_filp->f_mapping->host;
1394 struct ubifs_info *c = inode->i_sb->s_fs_info; 1393 struct ubifs_info *c = inode->i_sb->s_fs_info;
1395 1394
@@ -1397,17 +1396,7 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
1397 if (err) 1396 if (err)
1398 return err; 1397 return err;
1399 1398
1400 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 1399 return generic_file_aio_write(iocb, iov, nr_segs, pos);
1401 if (ret < 0)
1402 return ret;
1403
1404 if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) {
1405 err = ubifs_sync_wbufs_by_inode(c, inode);
1406 if (err)
1407 return err;
1408 }
1409
1410 return ret;
1411} 1400}
1412 1401
1413static int ubifs_set_page_dirty(struct page *page) 1402static int ubifs_set_page_dirty(struct page *page)
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index f94ddf7efba0..868a55ee080f 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -23,7 +23,7 @@
23/* 23/*
24 * This file implements functions needed to recover from unclean un-mounts. 24 * This file implements functions needed to recover from unclean un-mounts.
25 * When UBIFS is mounted, it checks a flag on the master node to determine if 25 * When UBIFS is mounted, it checks a flag on the master node to determine if
26 * an un-mount was completed sucessfully. If not, the process of mounting 26 * an un-mount was completed successfully. If not, the process of mounting
27 * incorparates additional checking and fixing of on-flash data structures. 27 * incorparates additional checking and fixing of on-flash data structures.
28 * UBIFS always cleans away all remnants of an unclean un-mount, so that 28 * UBIFS always cleans away all remnants of an unclean un-mount, so that
29 * errors do not accumulate. However UBIFS defers recovery if it is mounted 29 * errors do not accumulate. However UBIFS defers recovery if it is mounted
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 333e181ee987..943ad5624530 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1842,22 +1842,32 @@ const struct super_operations ubifs_super_operations = {
1842 * @name: UBI volume name 1842 * @name: UBI volume name
1843 * @mode: UBI volume open mode 1843 * @mode: UBI volume open mode
1844 * 1844 *
1845 * There are several ways to specify UBI volumes when mounting UBIFS: 1845 * The primary method of mounting UBIFS is by specifying the UBI volume
1846 * o ubiX_Y - UBI device number X, volume Y; 1846 * character device node path. However, UBIFS may also be mounted withoug any
1847 * o ubiY - UBI device number 0, volume Y; 1847 * character device node using one of the following methods:
1848 *
1849 * o ubiX_Y - mount UBI device number X, volume Y;
1850 * o ubiY - mount UBI device number 0, volume Y;
1848 * o ubiX:NAME - mount UBI device X, volume with name NAME; 1851 * o ubiX:NAME - mount UBI device X, volume with name NAME;
1849 * o ubi:NAME - mount UBI device 0, volume with name NAME. 1852 * o ubi:NAME - mount UBI device 0, volume with name NAME.
1850 * 1853 *
1851 * Alternative '!' separator may be used instead of ':' (because some shells 1854 * Alternative '!' separator may be used instead of ':' (because some shells
1852 * like busybox may interpret ':' as an NFS host name separator). This function 1855 * like busybox may interpret ':' as an NFS host name separator). This function
1853 * returns ubi volume object in case of success and a negative error code in 1856 * returns UBI volume description object in case of success and a negative
1854 * case of failure. 1857 * error code in case of failure.
1855 */ 1858 */
1856static struct ubi_volume_desc *open_ubi(const char *name, int mode) 1859static struct ubi_volume_desc *open_ubi(const char *name, int mode)
1857{ 1860{
1861 struct ubi_volume_desc *ubi;
1858 int dev, vol; 1862 int dev, vol;
1859 char *endptr; 1863 char *endptr;
1860 1864
1865 /* First, try to open using the device node path method */
1866 ubi = ubi_open_volume_path(name, mode);
1867 if (!IS_ERR(ubi))
1868 return ubi;
1869
1870 /* Try the "nodev" method */
1861 if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') 1871 if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i')
1862 return ERR_PTR(-EINVAL); 1872 return ERR_PTR(-EINVAL);
1863 1873
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c
index c6ad7c7e3ee9..05ac0fe9c4d3 100644
--- a/fs/xattr_acl.c
+++ b/fs/xattr_acl.c
@@ -36,7 +36,7 @@ posix_acl_from_xattr(const void *value, size_t size)
36 if (count == 0) 36 if (count == 0)
37 return NULL; 37 return NULL;
38 38
39 acl = posix_acl_alloc(count, GFP_KERNEL); 39 acl = posix_acl_alloc(count, GFP_NOFS);
40 if (!acl) 40 if (!acl)
41 return ERR_PTR(-ENOMEM); 41 return ERR_PTR(-ENOMEM);
42 acl_e = acl->a_entries; 42 acl_e = acl->a_entries;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c2e30eea74dc..87813e405cef 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -235,71 +235,36 @@ xfs_setfilesize(
235} 235}
236 236
237/* 237/*
238 * Buffered IO write completion for delayed allocate extents. 238 * IO write completion.
239 */ 239 */
240STATIC void 240STATIC void
241xfs_end_bio_delalloc( 241xfs_end_io(
242 struct work_struct *work)
243{
244 xfs_ioend_t *ioend =
245 container_of(work, xfs_ioend_t, io_work);
246
247 xfs_setfilesize(ioend);
248 xfs_destroy_ioend(ioend);
249}
250
251/*
252 * Buffered IO write completion for regular, written extents.
253 */
254STATIC void
255xfs_end_bio_written(
256 struct work_struct *work)
257{
258 xfs_ioend_t *ioend =
259 container_of(work, xfs_ioend_t, io_work);
260
261 xfs_setfilesize(ioend);
262 xfs_destroy_ioend(ioend);
263}
264
265/*
266 * IO write completion for unwritten extents.
267 *
268 * Issue transactions to convert a buffer range from unwritten
269 * to written extents.
270 */
271STATIC void
272xfs_end_bio_unwritten(
273 struct work_struct *work) 242 struct work_struct *work)
274{ 243{
275 xfs_ioend_t *ioend = 244 xfs_ioend_t *ioend =
276 container_of(work, xfs_ioend_t, io_work); 245 container_of(work, xfs_ioend_t, io_work);
277 struct xfs_inode *ip = XFS_I(ioend->io_inode); 246 struct xfs_inode *ip = XFS_I(ioend->io_inode);
278 xfs_off_t offset = ioend->io_offset;
279 size_t size = ioend->io_size;
280
281 if (likely(!ioend->io_error)) {
282 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
283 int error;
284 error = xfs_iomap_write_unwritten(ip, offset, size);
285 if (error)
286 ioend->io_error = error;
287 }
288 xfs_setfilesize(ioend);
289 }
290 xfs_destroy_ioend(ioend);
291}
292 247
293/* 248 /*
294 * IO read completion for regular, written extents. 249 * For unwritten extents we need to issue transactions to convert a
295 */ 250 * range to normal written extens after the data I/O has finished.
296STATIC void 251 */
297xfs_end_bio_read( 252 if (ioend->io_type == IOMAP_UNWRITTEN &&
298 struct work_struct *work) 253 likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
299{ 254 int error;
300 xfs_ioend_t *ioend = 255
301 container_of(work, xfs_ioend_t, io_work); 256 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
257 ioend->io_size);
258 if (error)
259 ioend->io_error = error;
260 }
302 261
262 /*
263 * We might have to update the on-disk file size after extending
264 * writes.
265 */
266 if (ioend->io_type != IOMAP_READ)
267 xfs_setfilesize(ioend);
303 xfs_destroy_ioend(ioend); 268 xfs_destroy_ioend(ioend);
304} 269}
305 270
@@ -314,10 +279,10 @@ xfs_finish_ioend(
314 int wait) 279 int wait)
315{ 280{
316 if (atomic_dec_and_test(&ioend->io_remaining)) { 281 if (atomic_dec_and_test(&ioend->io_remaining)) {
317 struct workqueue_struct *wq = xfsdatad_workqueue; 282 struct workqueue_struct *wq;
318 if (ioend->io_work.func == xfs_end_bio_unwritten)
319 wq = xfsconvertd_workqueue;
320 283
284 wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
285 xfsconvertd_workqueue : xfsdatad_workqueue;
321 queue_work(wq, &ioend->io_work); 286 queue_work(wq, &ioend->io_work);
322 if (wait) 287 if (wait)
323 flush_workqueue(wq); 288 flush_workqueue(wq);
@@ -355,15 +320,7 @@ xfs_alloc_ioend(
355 ioend->io_offset = 0; 320 ioend->io_offset = 0;
356 ioend->io_size = 0; 321 ioend->io_size = 0;
357 322
358 if (type == IOMAP_UNWRITTEN) 323 INIT_WORK(&ioend->io_work, xfs_end_io);
359 INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten);
360 else if (type == IOMAP_DELAY)
361 INIT_WORK(&ioend->io_work, xfs_end_bio_delalloc);
362 else if (type == IOMAP_READ)
363 INIT_WORK(&ioend->io_work, xfs_end_bio_read);
364 else
365 INIT_WORK(&ioend->io_work, xfs_end_bio_written);
366
367 return ioend; 324 return ioend;
368} 325}
369 326
@@ -380,7 +337,7 @@ xfs_map_blocks(
380 return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); 337 return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps);
381} 338}
382 339
383STATIC_INLINE int 340STATIC int
384xfs_iomap_valid( 341xfs_iomap_valid(
385 xfs_iomap_t *iomapp, 342 xfs_iomap_t *iomapp,
386 loff_t offset) 343 loff_t offset)
@@ -412,8 +369,9 @@ xfs_end_bio(
412 369
413STATIC void 370STATIC void
414xfs_submit_ioend_bio( 371xfs_submit_ioend_bio(
415 xfs_ioend_t *ioend, 372 struct writeback_control *wbc,
416 struct bio *bio) 373 xfs_ioend_t *ioend,
374 struct bio *bio)
417{ 375{
418 atomic_inc(&ioend->io_remaining); 376 atomic_inc(&ioend->io_remaining);
419 bio->bi_private = ioend; 377 bio->bi_private = ioend;
@@ -426,7 +384,8 @@ xfs_submit_ioend_bio(
426 if (xfs_ioend_new_eof(ioend)) 384 if (xfs_ioend_new_eof(ioend))
427 xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); 385 xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode));
428 386
429 submit_bio(WRITE, bio); 387 submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
388 WRITE_SYNC_PLUG : WRITE, bio);
430 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); 389 ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP));
431 bio_put(bio); 390 bio_put(bio);
432} 391}
@@ -505,6 +464,7 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
505 */ 464 */
506STATIC void 465STATIC void
507xfs_submit_ioend( 466xfs_submit_ioend(
467 struct writeback_control *wbc,
508 xfs_ioend_t *ioend) 468 xfs_ioend_t *ioend)
509{ 469{
510 xfs_ioend_t *head = ioend; 470 xfs_ioend_t *head = ioend;
@@ -533,19 +493,19 @@ xfs_submit_ioend(
533 retry: 493 retry:
534 bio = xfs_alloc_ioend_bio(bh); 494 bio = xfs_alloc_ioend_bio(bh);
535 } else if (bh->b_blocknr != lastblock + 1) { 495 } else if (bh->b_blocknr != lastblock + 1) {
536 xfs_submit_ioend_bio(ioend, bio); 496 xfs_submit_ioend_bio(wbc, ioend, bio);
537 goto retry; 497 goto retry;
538 } 498 }
539 499
540 if (bio_add_buffer(bio, bh) != bh->b_size) { 500 if (bio_add_buffer(bio, bh) != bh->b_size) {
541 xfs_submit_ioend_bio(ioend, bio); 501 xfs_submit_ioend_bio(wbc, ioend, bio);
542 goto retry; 502 goto retry;
543 } 503 }
544 504
545 lastblock = bh->b_blocknr; 505 lastblock = bh->b_blocknr;
546 } 506 }
547 if (bio) 507 if (bio)
548 xfs_submit_ioend_bio(ioend, bio); 508 xfs_submit_ioend_bio(wbc, ioend, bio);
549 xfs_finish_ioend(ioend, 0); 509 xfs_finish_ioend(ioend, 0);
550 } while ((ioend = next) != NULL); 510 } while ((ioend = next) != NULL);
551} 511}
@@ -904,16 +864,9 @@ xfs_convert_page(
904 864
905 if (startio) { 865 if (startio) {
906 if (count) { 866 if (count) {
907 struct backing_dev_info *bdi;
908
909 bdi = inode->i_mapping->backing_dev_info;
910 wbc->nr_to_write--; 867 wbc->nr_to_write--;
911 if (bdi_write_congested(bdi)) { 868 if (wbc->nr_to_write <= 0)
912 wbc->encountered_congestion = 1;
913 done = 1;
914 } else if (wbc->nr_to_write <= 0) {
915 done = 1; 869 done = 1;
916 }
917 } 870 }
918 xfs_start_page_writeback(page, !page_dirty, count); 871 xfs_start_page_writeback(page, !page_dirty, count);
919 } 872 }
@@ -1198,7 +1151,7 @@ xfs_page_state_convert(
1198 } 1151 }
1199 1152
1200 if (iohead) 1153 if (iohead)
1201 xfs_submit_ioend(iohead); 1154 xfs_submit_ioend(wbc, iohead);
1202 1155
1203 return page_dirty; 1156 return page_dirty;
1204 1157
@@ -1535,7 +1488,7 @@ xfs_end_io_direct(
1535 * didn't map an unwritten extent so switch it's completion 1488 * didn't map an unwritten extent so switch it's completion
1536 * handler. 1489 * handler.
1537 */ 1490 */
1538 INIT_WORK(&ioend->io_work, xfs_end_bio_written); 1491 ioend->io_type = IOMAP_NEW;
1539 xfs_finish_ioend(ioend, 0); 1492 xfs_finish_ioend(ioend, 0);
1540 } 1493 }
1541 1494
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 965df1227d64..4ddc973aea7a 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -149,7 +149,7 @@ page_region_mask(
149 return mask; 149 return mask;
150} 150}
151 151
152STATIC_INLINE void 152STATIC void
153set_page_region( 153set_page_region(
154 struct page *page, 154 struct page *page,
155 size_t offset, 155 size_t offset,
@@ -161,7 +161,7 @@ set_page_region(
161 SetPageUptodate(page); 161 SetPageUptodate(page);
162} 162}
163 163
164STATIC_INLINE int 164STATIC int
165test_page_region( 165test_page_region(
166 struct page *page, 166 struct page *page,
167 size_t offset, 167 size_t offset,
@@ -582,7 +582,7 @@ found:
582 * although backing storage may not be. 582 * although backing storage may not be.
583 */ 583 */
584xfs_buf_t * 584xfs_buf_t *
585xfs_buf_get_flags( 585xfs_buf_get(
586 xfs_buftarg_t *target,/* target for buffer */ 586 xfs_buftarg_t *target,/* target for buffer */
587 xfs_off_t ioff, /* starting offset of range */ 587 xfs_off_t ioff, /* starting offset of range */
588 size_t isize, /* length of range */ 588 size_t isize, /* length of range */
@@ -661,7 +661,7 @@ _xfs_buf_read(
661} 661}
662 662
663xfs_buf_t * 663xfs_buf_t *
664xfs_buf_read_flags( 664xfs_buf_read(
665 xfs_buftarg_t *target, 665 xfs_buftarg_t *target,
666 xfs_off_t ioff, 666 xfs_off_t ioff,
667 size_t isize, 667 size_t isize,
@@ -671,7 +671,7 @@ xfs_buf_read_flags(
671 671
672 flags |= XBF_READ; 672 flags |= XBF_READ;
673 673
674 bp = xfs_buf_get_flags(target, ioff, isize, flags); 674 bp = xfs_buf_get(target, ioff, isize, flags);
675 if (bp) { 675 if (bp) {
676 if (!XFS_BUF_ISDONE(bp)) { 676 if (!XFS_BUF_ISDONE(bp)) {
677 XB_TRACE(bp, "read", (unsigned long)flags); 677 XB_TRACE(bp, "read", (unsigned long)flags);
@@ -718,7 +718,7 @@ xfs_buf_readahead(
718 return; 718 return;
719 719
720 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); 720 flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
721 xfs_buf_read_flags(target, ioff, isize, flags); 721 xfs_buf_read(target, ioff, isize, flags);
722} 722}
723 723
724xfs_buf_t * 724xfs_buf_t *
@@ -1113,7 +1113,7 @@ xfs_bdwrite(
1113 xfs_buf_delwri_queue(bp, 1); 1113 xfs_buf_delwri_queue(bp, 1);
1114} 1114}
1115 1115
1116STATIC_INLINE void 1116STATIC void
1117_xfs_buf_ioend( 1117_xfs_buf_ioend(
1118 xfs_buf_t *bp, 1118 xfs_buf_t *bp,
1119 int schedule) 1119 int schedule)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 9b4d666ad31f..5f07dd91c5fa 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -186,15 +186,10 @@ extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
186#define xfs_incore(buftarg,blkno,len,lockit) \ 186#define xfs_incore(buftarg,blkno,len,lockit) \
187 _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) 187 _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
188 188
189extern xfs_buf_t *xfs_buf_get_flags(xfs_buftarg_t *, xfs_off_t, size_t, 189extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
190 xfs_buf_flags_t); 190 xfs_buf_flags_t);
191#define xfs_buf_get(target, blkno, len, flags) \ 191extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
192 xfs_buf_get_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
193
194extern xfs_buf_t *xfs_buf_read_flags(xfs_buftarg_t *, xfs_off_t, size_t,
195 xfs_buf_flags_t); 192 xfs_buf_flags_t);
196#define xfs_buf_read(target, blkno, len, flags) \
197 xfs_buf_read_flags((target), (blkno), (len), XBF_LOCK | XBF_MAPPED)
198 193
199extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); 194extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
200extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); 195extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index eff61e2732af..e4caeb28ce2e 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -52,7 +52,7 @@ xfs_file_aio_read(
52 loff_t pos) 52 loff_t pos)
53{ 53{
54 struct file *file = iocb->ki_filp; 54 struct file *file = iocb->ki_filp;
55 int ioflags = IO_ISAIO; 55 int ioflags = 0;
56 56
57 BUG_ON(iocb->ki_pos != pos); 57 BUG_ON(iocb->ki_pos != pos);
58 if (unlikely(file->f_flags & O_DIRECT)) 58 if (unlikely(file->f_flags & O_DIRECT))
@@ -71,7 +71,7 @@ xfs_file_aio_write(
71 loff_t pos) 71 loff_t pos)
72{ 72{
73 struct file *file = iocb->ki_filp; 73 struct file *file = iocb->ki_filp;
74 int ioflags = IO_ISAIO; 74 int ioflags = 0;
75 75
76 BUG_ON(iocb->ki_pos != pos); 76 BUG_ON(iocb->ki_pos != pos);
77 if (unlikely(file->f_flags & O_DIRECT)) 77 if (unlikely(file->f_flags & O_DIRECT))
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index cd42ef78f6b5..1f3b4b8f7dd4 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -573,8 +573,8 @@ xfs_vn_fallocate(
573 bf.l_len = len; 573 bf.l_len = len;
574 574
575 xfs_ilock(ip, XFS_IOLOCK_EXCL); 575 xfs_ilock(ip, XFS_IOLOCK_EXCL);
576 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 576 error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
577 0, XFS_ATTR_NOLOCK); 577 0, XFS_ATTR_NOLOCK);
578 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 578 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
579 offset + len > i_size_read(inode)) 579 offset + len > i_size_read(inode))
580 new_size = offset + len; 580 new_size = offset + len;
@@ -585,7 +585,7 @@ xfs_vn_fallocate(
585 585
586 iattr.ia_valid = ATTR_SIZE; 586 iattr.ia_valid = ATTR_SIZE;
587 iattr.ia_size = new_size; 587 iattr.ia_size = new_size;
588 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); 588 error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
589 } 589 }
590 590
591 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 591 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 072050f8d346..1bf47f219c97 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -255,8 +255,6 @@ xfs_read(
255 255
256 iocb->ki_pos = *offset; 256 iocb->ki_pos = *offset;
257 ret = generic_file_aio_read(iocb, iovp, segs, *offset); 257 ret = generic_file_aio_read(iocb, iovp, segs, *offset);
258 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
259 ret = wait_on_sync_kiocb(iocb);
260 if (ret > 0) 258 if (ret > 0)
261 XFS_STATS_ADD(xs_read_bytes, ret); 259 XFS_STATS_ADD(xs_read_bytes, ret);
262 260
@@ -774,9 +772,6 @@ write_retry:
774 772
775 current->backing_dev_info = NULL; 773 current->backing_dev_info = NULL;
776 774
777 if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
778 ret = wait_on_sync_kiocb(iocb);
779
780 isize = i_size_read(inode); 775 isize = i_size_read(inode);
781 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) 776 if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize))
782 *offset = isize; 777 *offset = isize;
@@ -811,7 +806,7 @@ write_retry:
811 XFS_STATS_ADD(xs_write_bytes, ret); 806 XFS_STATS_ADD(xs_write_bytes, ret);
812 807
813 /* Handle various SYNC-type writes */ 808 /* Handle various SYNC-type writes */
814 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { 809 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
815 loff_t end = pos + ret - 1; 810 loff_t end = pos + ret - 1;
816 int error2; 811 int error2;
817 812
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 18a4b8e11df2..1bfb0e980193 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -930,13 +930,39 @@ xfs_fs_alloc_inode(
930 */ 930 */
931STATIC void 931STATIC void
932xfs_fs_destroy_inode( 932xfs_fs_destroy_inode(
933 struct inode *inode) 933 struct inode *inode)
934{ 934{
935 xfs_inode_t *ip = XFS_I(inode); 935 struct xfs_inode *ip = XFS_I(inode);
936
937 xfs_itrace_entry(ip);
936 938
937 XFS_STATS_INC(vn_reclaim); 939 XFS_STATS_INC(vn_reclaim);
938 if (xfs_reclaim(ip)) 940
939 panic("%s: cannot reclaim 0x%p\n", __func__, inode); 941 /* bad inode, get out here ASAP */
942 if (is_bad_inode(inode))
943 goto out_reclaim;
944
945 xfs_ioend_wait(ip);
946
947 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
948
949 /*
950 * We should never get here with one of the reclaim flags already set.
951 */
952 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
953 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
954
955 /*
956 * If we have nothing to flush with this inode then complete the
957 * teardown now, otherwise delay the flush operation.
958 */
959 if (!xfs_inode_clean(ip)) {
960 xfs_inode_set_reclaim_tag(ip);
961 return;
962 }
963
964out_reclaim:
965 xfs_ireclaim(ip);
940} 966}
941 967
942/* 968/*
@@ -973,7 +999,6 @@ xfs_fs_inode_init_once(
973 999
974 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 1000 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
975 "xfsino", ip->i_ino); 1001 "xfsino", ip->i_ino);
976 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
977} 1002}
978 1003
979/* 1004/*
@@ -1075,6 +1100,20 @@ xfs_fs_clear_inode(
1075 XFS_STATS_INC(vn_remove); 1100 XFS_STATS_INC(vn_remove);
1076 XFS_STATS_DEC(vn_active); 1101 XFS_STATS_DEC(vn_active);
1077 1102
1103 /*
1104 * The iolock is used by the file system to coordinate reads,
1105 * writes, and block truncates. Up to this point the lock
1106 * protected concurrent accesses by users of the inode. But
1107 * from here forward we're doing some final processing of the
1108 * inode because we're done with it, and although we reuse the
1109 * iolock for protection it is really a distinct lock class
1110 * (in the lockdep sense) from before. To keep lockdep happy
1111 * (and basically indicate what we are doing), we explicitly
1112 * re-init the iolock here.
1113 */
1114 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
1115 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
1116
1078 xfs_inactive(ip); 1117 xfs_inactive(ip);
1079} 1118}
1080 1119
@@ -1092,8 +1131,6 @@ xfs_fs_put_super(
1092 struct super_block *sb) 1131 struct super_block *sb)
1093{ 1132{
1094 struct xfs_mount *mp = XFS_M(sb); 1133 struct xfs_mount *mp = XFS_M(sb);
1095 struct xfs_inode *rip = mp->m_rootip;
1096 int unmount_event_flags = 0;
1097 1134
1098 xfs_syncd_stop(mp); 1135 xfs_syncd_stop(mp);
1099 1136
@@ -1109,20 +1146,7 @@ xfs_fs_put_super(
1109 xfs_sync_attr(mp, 0); 1146 xfs_sync_attr(mp, 0);
1110 } 1147 }
1111 1148
1112#ifdef HAVE_DMAPI 1149 XFS_SEND_PREUNMOUNT(mp);
1113 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1114 unmount_event_flags =
1115 (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
1116 0 : DM_FLAGS_UNWANTED;
1117 /*
1118 * Ignore error from dmapi here, first unmount is not allowed
1119 * to fail anyway, and second we wouldn't want to fail a
1120 * unmount because of dmapi.
1121 */
1122 XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
1123 NULL, NULL, 0, 0, unmount_event_flags);
1124 }
1125#endif
1126 1150
1127 /* 1151 /*
1128 * Blow away any referenced inode in the filestreams cache. 1152 * Blow away any referenced inode in the filestreams cache.
@@ -1133,10 +1157,7 @@ xfs_fs_put_super(
1133 1157
1134 XFS_bflush(mp->m_ddev_targp); 1158 XFS_bflush(mp->m_ddev_targp);
1135 1159
1136 if (mp->m_flags & XFS_MOUNT_DMAPI) { 1160 XFS_SEND_UNMOUNT(mp);
1137 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
1138 unmount_event_flags);
1139 }
1140 1161
1141 xfs_unmountfs(mp); 1162 xfs_unmountfs(mp);
1142 xfs_freesb(mp); 1163 xfs_freesb(mp);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 961df0a22c78..d895a3a960f5 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -663,10 +663,9 @@ xfs_syncd_stop(
663 kthread_stop(mp->m_sync_task); 663 kthread_stop(mp->m_sync_task);
664} 664}
665 665
666int 666STATIC int
667xfs_reclaim_inode( 667xfs_reclaim_inode(
668 xfs_inode_t *ip, 668 xfs_inode_t *ip,
669 int locked,
670 int sync_mode) 669 int sync_mode)
671{ 670{
672 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); 671 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
@@ -682,10 +681,6 @@ xfs_reclaim_inode(
682 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { 681 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
683 spin_unlock(&ip->i_flags_lock); 682 spin_unlock(&ip->i_flags_lock);
684 write_unlock(&pag->pag_ici_lock); 683 write_unlock(&pag->pag_ici_lock);
685 if (locked) {
686 xfs_ifunlock(ip);
687 xfs_iunlock(ip, XFS_ILOCK_EXCL);
688 }
689 return -EAGAIN; 684 return -EAGAIN;
690 } 685 }
691 __xfs_iflags_set(ip, XFS_IRECLAIM); 686 __xfs_iflags_set(ip, XFS_IRECLAIM);
@@ -704,10 +699,8 @@ xfs_reclaim_inode(
704 * We get the flush lock regardless, though, just to make sure 699 * We get the flush lock regardless, though, just to make sure
705 * we don't free it while it is being flushed. 700 * we don't free it while it is being flushed.
706 */ 701 */
707 if (!locked) { 702 xfs_ilock(ip, XFS_ILOCK_EXCL);
708 xfs_ilock(ip, XFS_ILOCK_EXCL); 703 xfs_iflock(ip);
709 xfs_iflock(ip);
710 }
711 704
712 /* 705 /*
713 * In the case of a forced shutdown we rely on xfs_iflush() to 706 * In the case of a forced shutdown we rely on xfs_iflush() to
@@ -778,7 +771,7 @@ xfs_reclaim_inode_now(
778 } 771 }
779 read_unlock(&pag->pag_ici_lock); 772 read_unlock(&pag->pag_ici_lock);
780 773
781 return xfs_reclaim_inode(ip, 0, flags); 774 return xfs_reclaim_inode(ip, flags);
782} 775}
783 776
784int 777int
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 27920eb7a820..a500b4d91835 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -44,7 +44,6 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
44 44
45void xfs_flush_inodes(struct xfs_inode *ip); 45void xfs_flush_inodes(struct xfs_inode *ip);
46 46
47int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
48int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 47int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
49 48
50void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 49void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index c5bc67c4e3bb..7bb5092d6ae4 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -55,170 +55,140 @@ xfs_stats_clear_proc_handler(
55 55
56static ctl_table xfs_table[] = { 56static ctl_table xfs_table[] = {
57 { 57 {
58 .ctl_name = XFS_SGID_INHERIT,
59 .procname = "irix_sgid_inherit", 58 .procname = "irix_sgid_inherit",
60 .data = &xfs_params.sgid_inherit.val, 59 .data = &xfs_params.sgid_inherit.val,
61 .maxlen = sizeof(int), 60 .maxlen = sizeof(int),
62 .mode = 0644, 61 .mode = 0644,
63 .proc_handler = &proc_dointvec_minmax, 62 .proc_handler = proc_dointvec_minmax,
64 .strategy = &sysctl_intvec,
65 .extra1 = &xfs_params.sgid_inherit.min, 63 .extra1 = &xfs_params.sgid_inherit.min,
66 .extra2 = &xfs_params.sgid_inherit.max 64 .extra2 = &xfs_params.sgid_inherit.max
67 }, 65 },
68 { 66 {
69 .ctl_name = XFS_SYMLINK_MODE,
70 .procname = "irix_symlink_mode", 67 .procname = "irix_symlink_mode",
71 .data = &xfs_params.symlink_mode.val, 68 .data = &xfs_params.symlink_mode.val,
72 .maxlen = sizeof(int), 69 .maxlen = sizeof(int),
73 .mode = 0644, 70 .mode = 0644,
74 .proc_handler = &proc_dointvec_minmax, 71 .proc_handler = proc_dointvec_minmax,
75 .strategy = &sysctl_intvec,
76 .extra1 = &xfs_params.symlink_mode.min, 72 .extra1 = &xfs_params.symlink_mode.min,
77 .extra2 = &xfs_params.symlink_mode.max 73 .extra2 = &xfs_params.symlink_mode.max
78 }, 74 },
79 { 75 {
80 .ctl_name = XFS_PANIC_MASK,
81 .procname = "panic_mask", 76 .procname = "panic_mask",
82 .data = &xfs_params.panic_mask.val, 77 .data = &xfs_params.panic_mask.val,
83 .maxlen = sizeof(int), 78 .maxlen = sizeof(int),
84 .mode = 0644, 79 .mode = 0644,
85 .proc_handler = &proc_dointvec_minmax, 80 .proc_handler = proc_dointvec_minmax,
86 .strategy = &sysctl_intvec,
87 .extra1 = &xfs_params.panic_mask.min, 81 .extra1 = &xfs_params.panic_mask.min,
88 .extra2 = &xfs_params.panic_mask.max 82 .extra2 = &xfs_params.panic_mask.max
89 }, 83 },
90 84
91 { 85 {
92 .ctl_name = XFS_ERRLEVEL,
93 .procname = "error_level", 86 .procname = "error_level",
94 .data = &xfs_params.error_level.val, 87 .data = &xfs_params.error_level.val,
95 .maxlen = sizeof(int), 88 .maxlen = sizeof(int),
96 .mode = 0644, 89 .mode = 0644,
97 .proc_handler = &proc_dointvec_minmax, 90 .proc_handler = proc_dointvec_minmax,
98 .strategy = &sysctl_intvec,
99 .extra1 = &xfs_params.error_level.min, 91 .extra1 = &xfs_params.error_level.min,
100 .extra2 = &xfs_params.error_level.max 92 .extra2 = &xfs_params.error_level.max
101 }, 93 },
102 { 94 {
103 .ctl_name = XFS_SYNCD_TIMER,
104 .procname = "xfssyncd_centisecs", 95 .procname = "xfssyncd_centisecs",
105 .data = &xfs_params.syncd_timer.val, 96 .data = &xfs_params.syncd_timer.val,
106 .maxlen = sizeof(int), 97 .maxlen = sizeof(int),
107 .mode = 0644, 98 .mode = 0644,
108 .proc_handler = &proc_dointvec_minmax, 99 .proc_handler = proc_dointvec_minmax,
109 .strategy = &sysctl_intvec,
110 .extra1 = &xfs_params.syncd_timer.min, 100 .extra1 = &xfs_params.syncd_timer.min,
111 .extra2 = &xfs_params.syncd_timer.max 101 .extra2 = &xfs_params.syncd_timer.max
112 }, 102 },
113 { 103 {
114 .ctl_name = XFS_INHERIT_SYNC,
115 .procname = "inherit_sync", 104 .procname = "inherit_sync",
116 .data = &xfs_params.inherit_sync.val, 105 .data = &xfs_params.inherit_sync.val,
117 .maxlen = sizeof(int), 106 .maxlen = sizeof(int),
118 .mode = 0644, 107 .mode = 0644,
119 .proc_handler = &proc_dointvec_minmax, 108 .proc_handler = proc_dointvec_minmax,
120 .strategy = &sysctl_intvec,
121 .extra1 = &xfs_params.inherit_sync.min, 109 .extra1 = &xfs_params.inherit_sync.min,
122 .extra2 = &xfs_params.inherit_sync.max 110 .extra2 = &xfs_params.inherit_sync.max
123 }, 111 },
124 { 112 {
125 .ctl_name = XFS_INHERIT_NODUMP,
126 .procname = "inherit_nodump", 113 .procname = "inherit_nodump",
127 .data = &xfs_params.inherit_nodump.val, 114 .data = &xfs_params.inherit_nodump.val,
128 .maxlen = sizeof(int), 115 .maxlen = sizeof(int),
129 .mode = 0644, 116 .mode = 0644,
130 .proc_handler = &proc_dointvec_minmax, 117 .proc_handler = proc_dointvec_minmax,
131 .strategy = &sysctl_intvec,
132 .extra1 = &xfs_params.inherit_nodump.min, 118 .extra1 = &xfs_params.inherit_nodump.min,
133 .extra2 = &xfs_params.inherit_nodump.max 119 .extra2 = &xfs_params.inherit_nodump.max
134 }, 120 },
135 { 121 {
136 .ctl_name = XFS_INHERIT_NOATIME,
137 .procname = "inherit_noatime", 122 .procname = "inherit_noatime",
138 .data = &xfs_params.inherit_noatim.val, 123 .data = &xfs_params.inherit_noatim.val,
139 .maxlen = sizeof(int), 124 .maxlen = sizeof(int),
140 .mode = 0644, 125 .mode = 0644,
141 .proc_handler = &proc_dointvec_minmax, 126 .proc_handler = proc_dointvec_minmax,
142 .strategy = &sysctl_intvec,
143 .extra1 = &xfs_params.inherit_noatim.min, 127 .extra1 = &xfs_params.inherit_noatim.min,
144 .extra2 = &xfs_params.inherit_noatim.max 128 .extra2 = &xfs_params.inherit_noatim.max
145 }, 129 },
146 { 130 {
147 .ctl_name = XFS_BUF_TIMER,
148 .procname = "xfsbufd_centisecs", 131 .procname = "xfsbufd_centisecs",
149 .data = &xfs_params.xfs_buf_timer.val, 132 .data = &xfs_params.xfs_buf_timer.val,
150 .maxlen = sizeof(int), 133 .maxlen = sizeof(int),
151 .mode = 0644, 134 .mode = 0644,
152 .proc_handler = &proc_dointvec_minmax, 135 .proc_handler = proc_dointvec_minmax,
153 .strategy = &sysctl_intvec,
154 .extra1 = &xfs_params.xfs_buf_timer.min, 136 .extra1 = &xfs_params.xfs_buf_timer.min,
155 .extra2 = &xfs_params.xfs_buf_timer.max 137 .extra2 = &xfs_params.xfs_buf_timer.max
156 }, 138 },
157 { 139 {
158 .ctl_name = XFS_BUF_AGE,
159 .procname = "age_buffer_centisecs", 140 .procname = "age_buffer_centisecs",
160 .data = &xfs_params.xfs_buf_age.val, 141 .data = &xfs_params.xfs_buf_age.val,
161 .maxlen = sizeof(int), 142 .maxlen = sizeof(int),
162 .mode = 0644, 143 .mode = 0644,
163 .proc_handler = &proc_dointvec_minmax, 144 .proc_handler = proc_dointvec_minmax,
164 .strategy = &sysctl_intvec,
165 .extra1 = &xfs_params.xfs_buf_age.min, 145 .extra1 = &xfs_params.xfs_buf_age.min,
166 .extra2 = &xfs_params.xfs_buf_age.max 146 .extra2 = &xfs_params.xfs_buf_age.max
167 }, 147 },
168 { 148 {
169 .ctl_name = XFS_INHERIT_NOSYM,
170 .procname = "inherit_nosymlinks", 149 .procname = "inherit_nosymlinks",
171 .data = &xfs_params.inherit_nosym.val, 150 .data = &xfs_params.inherit_nosym.val,
172 .maxlen = sizeof(int), 151 .maxlen = sizeof(int),
173 .mode = 0644, 152 .mode = 0644,
174 .proc_handler = &proc_dointvec_minmax, 153 .proc_handler = proc_dointvec_minmax,
175 .strategy = &sysctl_intvec,
176 .extra1 = &xfs_params.inherit_nosym.min, 154 .extra1 = &xfs_params.inherit_nosym.min,
177 .extra2 = &xfs_params.inherit_nosym.max 155 .extra2 = &xfs_params.inherit_nosym.max
178 }, 156 },
179 { 157 {
180 .ctl_name = XFS_ROTORSTEP,
181 .procname = "rotorstep", 158 .procname = "rotorstep",
182 .data = &xfs_params.rotorstep.val, 159 .data = &xfs_params.rotorstep.val,
183 .maxlen = sizeof(int), 160 .maxlen = sizeof(int),
184 .mode = 0644, 161 .mode = 0644,
185 .proc_handler = &proc_dointvec_minmax, 162 .proc_handler = proc_dointvec_minmax,
186 .strategy = &sysctl_intvec,
187 .extra1 = &xfs_params.rotorstep.min, 163 .extra1 = &xfs_params.rotorstep.min,
188 .extra2 = &xfs_params.rotorstep.max 164 .extra2 = &xfs_params.rotorstep.max
189 }, 165 },
190 { 166 {
191 .ctl_name = XFS_INHERIT_NODFRG,
192 .procname = "inherit_nodefrag", 167 .procname = "inherit_nodefrag",
193 .data = &xfs_params.inherit_nodfrg.val, 168 .data = &xfs_params.inherit_nodfrg.val,
194 .maxlen = sizeof(int), 169 .maxlen = sizeof(int),
195 .mode = 0644, 170 .mode = 0644,
196 .proc_handler = &proc_dointvec_minmax, 171 .proc_handler = proc_dointvec_minmax,
197 .strategy = &sysctl_intvec,
198 .extra1 = &xfs_params.inherit_nodfrg.min, 172 .extra1 = &xfs_params.inherit_nodfrg.min,
199 .extra2 = &xfs_params.inherit_nodfrg.max 173 .extra2 = &xfs_params.inherit_nodfrg.max
200 }, 174 },
201 { 175 {
202 .ctl_name = XFS_FILESTREAM_TIMER,
203 .procname = "filestream_centisecs", 176 .procname = "filestream_centisecs",
204 .data = &xfs_params.fstrm_timer.val, 177 .data = &xfs_params.fstrm_timer.val,
205 .maxlen = sizeof(int), 178 .maxlen = sizeof(int),
206 .mode = 0644, 179 .mode = 0644,
207 .proc_handler = &proc_dointvec_minmax, 180 .proc_handler = proc_dointvec_minmax,
208 .strategy = &sysctl_intvec,
209 .extra1 = &xfs_params.fstrm_timer.min, 181 .extra1 = &xfs_params.fstrm_timer.min,
210 .extra2 = &xfs_params.fstrm_timer.max, 182 .extra2 = &xfs_params.fstrm_timer.max,
211 }, 183 },
212 /* please keep this the last entry */ 184 /* please keep this the last entry */
213#ifdef CONFIG_PROC_FS 185#ifdef CONFIG_PROC_FS
214 { 186 {
215 .ctl_name = XFS_STATS_CLEAR,
216 .procname = "stats_clear", 187 .procname = "stats_clear",
217 .data = &xfs_params.stats_clear.val, 188 .data = &xfs_params.stats_clear.val,
218 .maxlen = sizeof(int), 189 .maxlen = sizeof(int),
219 .mode = 0644, 190 .mode = 0644,
220 .proc_handler = &xfs_stats_clear_proc_handler, 191 .proc_handler = xfs_stats_clear_proc_handler,
221 .strategy = &sysctl_intvec,
222 .extra1 = &xfs_params.stats_clear.min, 192 .extra1 = &xfs_params.stats_clear.min,
223 .extra2 = &xfs_params.stats_clear.max 193 .extra2 = &xfs_params.stats_clear.max
224 }, 194 },
@@ -229,7 +199,6 @@ static ctl_table xfs_table[] = {
229 199
230static ctl_table xfs_dir_table[] = { 200static ctl_table xfs_dir_table[] = {
231 { 201 {
232 .ctl_name = FS_XFS,
233 .procname = "xfs", 202 .procname = "xfs",
234 .mode = 0555, 203 .mode = 0555,
235 .child = xfs_table 204 .child = xfs_table
@@ -239,7 +208,6 @@ static ctl_table xfs_dir_table[] = {
239 208
240static ctl_table xfs_root_table[] = { 209static ctl_table xfs_root_table[] = {
241 { 210 {
242 .ctl_name = CTL_FS,
243 .procname = "fs", 211 .procname = "fs",
244 .mode = 0555, 212 .mode = 0555,
245 .child = xfs_dir_table 213 .child = xfs_dir_table
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index ad7fbead4c97..00cabf5354d2 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -36,7 +36,6 @@ struct attrlist_cursor_kern;
36/* 36/*
37 * Flags for read/write calls - same values as IRIX 37 * Flags for read/write calls - same values as IRIX
38 */ 38 */
39#define IO_ISAIO 0x00001 /* don't wait for completion */
40#define IO_ISDIRECT 0x00004 /* bypass page cache */ 39#define IO_ISDIRECT 0x00004 /* bypass page cache */
41#define IO_INVIS 0x00020 /* don't update inode timestamps */ 40#define IO_INVIS 0x00020 /* don't update inode timestamps */
42 41
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 6533ead9b889..a2c16bcee90b 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -98,7 +98,7 @@ typedef struct xfs_dquot {
98#define dq_flags q_lists.dqm_flags 98#define dq_flags q_lists.dqm_flags
99 99
100/* 100/*
101 * Lock hierachy for q_qlock: 101 * Lock hierarchy for q_qlock:
102 * XFS_QLOCK_NORMAL is the implicit default, 102 * XFS_QLOCK_NORMAL is the implicit default,
103 * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 103 * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
104 */ 104 */
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 6f4fd37c67af..d2d20462fd4f 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -41,10 +41,6 @@ extern void assfail(char *expr, char *f, int l);
41# define STATIC static noinline 41# define STATIC static noinline
42#endif 42#endif
43 43
44#ifndef STATIC_INLINE
45# define STATIC_INLINE static inline
46#endif
47
48#else /* DEBUG */ 44#else /* DEBUG */
49 45
50#define ASSERT(expr) \ 46#define ASSERT(expr) \
@@ -54,19 +50,5 @@ extern void assfail(char *expr, char *f, int l);
54# define STATIC noinline 50# define STATIC noinline
55#endif 51#endif
56 52
57/*
58 * We stop inlining of inline functions in debug mode.
59 * Unfortunately, this means static inline in header files
60 * get multiple definitions, so they need to remain static.
61 * This then gives tonnes of warnings about unused but defined
62 * functions, so we need to add the unused attribute to prevent
63 * these spurious warnings.
64 */
65#ifndef STATIC_INLINE
66# define STATIC_INLINE static __attribute__ ((unused)) noinline
67#endif
68
69#endif /* DEBUG */ 53#endif /* DEBUG */
70
71
72#endif /* __XFS_SUPPORT_DEBUG_H__ */ 54#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 4ece1906bd41..8fe6f6b78a4a 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -123,9 +123,13 @@ xfs_inode_hasattr(
123 * Overall external interface routines. 123 * Overall external interface routines.
124 *========================================================================*/ 124 *========================================================================*/
125 125
126int 126STATIC int
127xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name, 127xfs_attr_get_int(
128 char *value, int *valuelenp, int flags) 128 struct xfs_inode *ip,
129 struct xfs_name *name,
130 char *value,
131 int *valuelenp,
132 int flags)
129{ 133{
130 xfs_da_args_t args; 134 xfs_da_args_t args;
131 int error; 135 int error;
@@ -188,7 +192,7 @@ xfs_attr_get(
188 return error; 192 return error;
189 193
190 xfs_ilock(ip, XFS_ILOCK_SHARED); 194 xfs_ilock(ip, XFS_ILOCK_SHARED);
191 error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags); 195 error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags);
192 xfs_iunlock(ip, XFS_ILOCK_SHARED); 196 xfs_iunlock(ip, XFS_ILOCK_SHARED);
193 return(error); 197 return(error);
194} 198}
@@ -2143,8 +2147,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2143 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 2147 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2144 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 2148 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2145 2149
2146 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt, 2150 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
2147 XFS_BUF_LOCK | XBF_DONT_BLOCK); 2151 XFS_BUF_LOCK | XBF_DONT_BLOCK);
2148 ASSERT(bp); 2152 ASSERT(bp);
2149 ASSERT(!XFS_BUF_GETERROR(bp)); 2153 ASSERT(!XFS_BUF_GETERROR(bp));
2150 2154
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index fb3b2a68b9b9..12f0be3a73d4 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -131,7 +131,6 @@ typedef struct xfs_attr_list_context {
131 */ 131 */
132int xfs_attr_calc_size(struct xfs_inode *, int, int, int *); 132int xfs_attr_calc_size(struct xfs_inode *, int, int, int *);
133int xfs_attr_inactive(struct xfs_inode *dp); 133int xfs_attr_inactive(struct xfs_inode *dp);
134int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
135int xfs_attr_rmtval_get(struct xfs_da_args *args); 134int xfs_attr_rmtval_get(struct xfs_da_args *args);
136int xfs_attr_list_int(struct xfs_attr_list_context *); 135int xfs_attr_list_int(struct xfs_attr_list_context *);
137 136
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index afdc8911637d..0b687351293f 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -98,7 +98,7 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
98 * If namespace bits don't match return 0. 98 * If namespace bits don't match return 0.
99 * If all match then return 1. 99 * If all match then return 1.
100 */ 100 */
101STATIC_INLINE int 101STATIC int
102xfs_attr_namesp_match(int arg_flags, int ondisk_flags) 102xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
103{ 103{
104 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); 104 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index eb7b702d0690..6f5ccede63f9 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -98,8 +98,7 @@ xfs_bmdr_to_bmbt(
98 * This code must be in sync with the routines xfs_bmbt_get_startoff, 98 * This code must be in sync with the routines xfs_bmbt_get_startoff,
99 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. 99 * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
100 */ 100 */
101 101STATIC void
102STATIC_INLINE void
103__xfs_bmbt_get_all( 102__xfs_bmbt_get_all(
104 __uint64_t l0, 103 __uint64_t l0,
105 __uint64_t l1, 104 __uint64_t l1,
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index f655f7dc334c..4aba67c5f64f 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -79,7 +79,7 @@ extern ktrace_t *xfs_filestreams_trace_buf;
79 * the cache that reference per-ag array elements that have since been 79 * the cache that reference per-ag array elements that have since been
80 * reallocated. 80 * reallocated.
81 */ 81 */
82STATIC_INLINE int 82static inline int
83xfs_filestream_peek_ag( 83xfs_filestream_peek_ag(
84 xfs_mount_t *mp, 84 xfs_mount_t *mp,
85 xfs_agnumber_t agno) 85 xfs_agnumber_t agno)
@@ -87,7 +87,7 @@ xfs_filestream_peek_ag(
87 return atomic_read(&mp->m_perag[agno].pagf_fstrms); 87 return atomic_read(&mp->m_perag[agno].pagf_fstrms);
88} 88}
89 89
90STATIC_INLINE int 90static inline int
91xfs_filestream_get_ag( 91xfs_filestream_get_ag(
92 xfs_mount_t *mp, 92 xfs_mount_t *mp,
93 xfs_agnumber_t agno) 93 xfs_agnumber_t agno)
@@ -95,7 +95,7 @@ xfs_filestream_get_ag(
95 return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms); 95 return atomic_inc_return(&mp->m_perag[agno].pagf_fstrms);
96} 96}
97 97
98STATIC_INLINE int 98static inline int
99xfs_filestream_put_ag( 99xfs_filestream_put_ag(
100 xfs_mount_t *mp, 100 xfs_mount_t *mp,
101 xfs_agnumber_t agno) 101 xfs_agnumber_t agno)
@@ -122,7 +122,7 @@ int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp);
122 122
123 123
124/* filestreams for the inode? */ 124/* filestreams for the inode? */
125STATIC_INLINE int 125static inline int
126xfs_inode_is_filestream( 126xfs_inode_is_filestream(
127 struct xfs_inode *ip) 127 struct xfs_inode *ip)
128{ 128{
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 2d0b3e1da9e6..36079aa91344 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -201,8 +201,8 @@ xfs_growfs_data_private(
201 * AG freelist header block 201 * AG freelist header block
202 */ 202 */
203 bp = xfs_buf_get(mp->m_ddev_targp, 203 bp = xfs_buf_get(mp->m_ddev_targp,
204 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 204 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
205 XFS_FSS_TO_BB(mp, 1), 0); 205 XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
206 agf = XFS_BUF_TO_AGF(bp); 206 agf = XFS_BUF_TO_AGF(bp);
207 memset(agf, 0, mp->m_sb.sb_sectsize); 207 memset(agf, 0, mp->m_sb.sb_sectsize);
208 agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); 208 agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
@@ -233,8 +233,8 @@ xfs_growfs_data_private(
233 * AG inode header block 233 * AG inode header block
234 */ 234 */
235 bp = xfs_buf_get(mp->m_ddev_targp, 235 bp = xfs_buf_get(mp->m_ddev_targp,
236 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), 236 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
237 XFS_FSS_TO_BB(mp, 1), 0); 237 XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
238 agi = XFS_BUF_TO_AGI(bp); 238 agi = XFS_BUF_TO_AGI(bp);
239 memset(agi, 0, mp->m_sb.sb_sectsize); 239 memset(agi, 0, mp->m_sb.sb_sectsize);
240 agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); 240 agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
@@ -257,8 +257,9 @@ xfs_growfs_data_private(
257 * BNO btree root block 257 * BNO btree root block
258 */ 258 */
259 bp = xfs_buf_get(mp->m_ddev_targp, 259 bp = xfs_buf_get(mp->m_ddev_targp,
260 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), 260 XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
261 BTOBB(mp->m_sb.sb_blocksize), 0); 261 BTOBB(mp->m_sb.sb_blocksize),
262 XBF_LOCK | XBF_MAPPED);
262 block = XFS_BUF_TO_BLOCK(bp); 263 block = XFS_BUF_TO_BLOCK(bp);
263 memset(block, 0, mp->m_sb.sb_blocksize); 264 memset(block, 0, mp->m_sb.sb_blocksize);
264 block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC); 265 block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC);
@@ -278,8 +279,9 @@ xfs_growfs_data_private(
278 * CNT btree root block 279 * CNT btree root block
279 */ 280 */
280 bp = xfs_buf_get(mp->m_ddev_targp, 281 bp = xfs_buf_get(mp->m_ddev_targp,
281 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), 282 XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
282 BTOBB(mp->m_sb.sb_blocksize), 0); 283 BTOBB(mp->m_sb.sb_blocksize),
284 XBF_LOCK | XBF_MAPPED);
283 block = XFS_BUF_TO_BLOCK(bp); 285 block = XFS_BUF_TO_BLOCK(bp);
284 memset(block, 0, mp->m_sb.sb_blocksize); 286 memset(block, 0, mp->m_sb.sb_blocksize);
285 block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC); 287 block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC);
@@ -300,8 +302,9 @@ xfs_growfs_data_private(
300 * INO btree root block 302 * INO btree root block
301 */ 303 */
302 bp = xfs_buf_get(mp->m_ddev_targp, 304 bp = xfs_buf_get(mp->m_ddev_targp,
303 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), 305 XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
304 BTOBB(mp->m_sb.sb_blocksize), 0); 306 BTOBB(mp->m_sb.sb_blocksize),
307 XBF_LOCK | XBF_MAPPED);
305 block = XFS_BUF_TO_BLOCK(bp); 308 block = XFS_BUF_TO_BLOCK(bp);
306 memset(block, 0, mp->m_sb.sb_blocksize); 309 memset(block, 0, mp->m_sb.sb_blocksize);
307 block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC); 310 block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC);
@@ -611,7 +614,7 @@ xfs_fs_log_dummy(
611 xfs_inode_t *ip; 614 xfs_inode_t *ip;
612 int error; 615 int error;
613 616
614 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); 617 tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
615 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); 618 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
616 if (error) { 619 if (error) {
617 xfs_trans_cancel(tp, 0); 620 xfs_trans_cancel(tp, 0);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 0785797db828..cb907ba69c4c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -425,7 +425,7 @@ xfs_ialloc_ag_alloc(
425 return 0; 425 return 0;
426} 426}
427 427
428STATIC_INLINE xfs_agnumber_t 428STATIC xfs_agnumber_t
429xfs_ialloc_next_ag( 429xfs_ialloc_next_ag(
430 xfs_mount_t *mp) 430 xfs_mount_t *mp)
431{ 431{
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 80e526489be5..073bb4a26b19 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -73,6 +73,9 @@ xfs_inode_alloc(
73 ASSERT(atomic_read(&ip->i_pincount) == 0); 73 ASSERT(atomic_read(&ip->i_pincount) == 0);
74 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 74 ASSERT(!spin_is_locked(&ip->i_flags_lock));
75 ASSERT(completion_done(&ip->i_flush)); 75 ASSERT(completion_done(&ip->i_flush));
76 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
77
78 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
76 79
77 /* initialise the xfs inode */ 80 /* initialise the xfs inode */
78 ip->i_ino = ino; 81 ip->i_ino = ino;
@@ -290,7 +293,7 @@ xfs_iget_cache_miss(
290 struct xfs_inode **ipp, 293 struct xfs_inode **ipp,
291 xfs_daddr_t bno, 294 xfs_daddr_t bno,
292 int flags, 295 int flags,
293 int lock_flags) __releases(pag->pag_ici_lock) 296 int lock_flags)
294{ 297{
295 struct xfs_inode *ip; 298 struct xfs_inode *ip;
296 int error; 299 int error;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 67ae5555a30a..7294abce6ef2 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -860,8 +860,15 @@ xfs_iomap_write_unwritten(
860 * set up a transaction to convert the range of extents 860 * set up a transaction to convert the range of extents
861 * from unwritten to real. Do allocations in a loop until 861 * from unwritten to real. Do allocations in a loop until
862 * we have covered the range passed in. 862 * we have covered the range passed in.
863 *
864 * Note that we open code the transaction allocation here
865 * to pass KM_NOFS--we can't risk to recursing back into
866 * the filesystem here as we might be asked to write out
867 * the same inode that we complete here and might deadlock
868 * on the iolock.
863 */ 869 */
864 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 870 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
871 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
865 tp->t_flags |= XFS_TRANS_RESERVE; 872 tp->t_flags |= XFS_TRANS_RESERVE;
866 error = xfs_trans_reserve(tp, resblks, 873 error = xfs_trans_reserve(tp, resblks,
867 XFS_WRITE_LOG_RES(mp), 0, 874 XFS_WRITE_LOG_RES(mp), 0,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index fb17f8226b09..1ec98ed914d4 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2206,6 +2206,7 @@ xlog_recover_do_buffer_trans(
2206 xfs_daddr_t blkno; 2206 xfs_daddr_t blkno;
2207 int len; 2207 int len;
2208 ushort flags; 2208 ushort flags;
2209 uint buf_flags;
2209 2210
2210 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr; 2211 buf_f = (xfs_buf_log_format_t *)item->ri_buf[0].i_addr;
2211 2212
@@ -2246,12 +2247,11 @@ xlog_recover_do_buffer_trans(
2246 } 2247 }
2247 2248
2248 mp = log->l_mp; 2249 mp = log->l_mp;
2249 if (flags & XFS_BLI_INODE_BUF) { 2250 buf_flags = XFS_BUF_LOCK;
2250 bp = xfs_buf_read_flags(mp->m_ddev_targp, blkno, len, 2251 if (!(flags & XFS_BLI_INODE_BUF))
2251 XFS_BUF_LOCK); 2252 buf_flags |= XFS_BUF_MAPPED;
2252 } else { 2253
2253 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, 0); 2254 bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
2254 }
2255 if (XFS_BUF_ISERROR(bp)) { 2255 if (XFS_BUF_ISERROR(bp)) {
2256 xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp, 2256 xfs_ioerror_alert("xlog_recover_do..(read#1)", log->l_mp,
2257 bp, blkno); 2257 bp, blkno);
@@ -2350,8 +2350,8 @@ xlog_recover_do_inode_trans(
2350 goto error; 2350 goto error;
2351 } 2351 }
2352 2352
2353 bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno, 2353 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
2354 in_f->ilf_len, XFS_BUF_LOCK); 2354 XFS_BUF_LOCK);
2355 if (XFS_BUF_ISERROR(bp)) { 2355 if (XFS_BUF_ISERROR(bp)) {
2356 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, 2356 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
2357 bp, in_f->ilf_blkno); 2357 bp, in_f->ilf_blkno);
@@ -3517,7 +3517,7 @@ xlog_do_recovery_pass(
3517{ 3517{
3518 xlog_rec_header_t *rhead; 3518 xlog_rec_header_t *rhead;
3519 xfs_daddr_t blk_no; 3519 xfs_daddr_t blk_no;
3520 xfs_caddr_t bufaddr, offset; 3520 xfs_caddr_t offset;
3521 xfs_buf_t *hbp, *dbp; 3521 xfs_buf_t *hbp, *dbp;
3522 int error = 0, h_size; 3522 int error = 0, h_size;
3523 int bblks, split_bblks; 3523 int bblks, split_bblks;
@@ -3610,7 +3610,7 @@ xlog_do_recovery_pass(
3610 /* 3610 /*
3611 * Check for header wrapping around physical end-of-log 3611 * Check for header wrapping around physical end-of-log
3612 */ 3612 */
3613 offset = NULL; 3613 offset = XFS_BUF_PTR(hbp);
3614 split_hblks = 0; 3614 split_hblks = 0;
3615 wrapped_hblks = 0; 3615 wrapped_hblks = 0;
3616 if (blk_no + hblks <= log->l_logBBsize) { 3616 if (blk_no + hblks <= log->l_logBBsize) {
@@ -3646,9 +3646,8 @@ xlog_do_recovery_pass(
3646 * - order is important. 3646 * - order is important.
3647 */ 3647 */
3648 wrapped_hblks = hblks - split_hblks; 3648 wrapped_hblks = hblks - split_hblks;
3649 bufaddr = XFS_BUF_PTR(hbp);
3650 error = XFS_BUF_SET_PTR(hbp, 3649 error = XFS_BUF_SET_PTR(hbp,
3651 bufaddr + BBTOB(split_hblks), 3650 offset + BBTOB(split_hblks),
3652 BBTOB(hblks - split_hblks)); 3651 BBTOB(hblks - split_hblks));
3653 if (error) 3652 if (error)
3654 goto bread_err2; 3653 goto bread_err2;
@@ -3658,14 +3657,10 @@ xlog_do_recovery_pass(
3658 if (error) 3657 if (error)
3659 goto bread_err2; 3658 goto bread_err2;
3660 3659
3661 error = XFS_BUF_SET_PTR(hbp, bufaddr, 3660 error = XFS_BUF_SET_PTR(hbp, offset,
3662 BBTOB(hblks)); 3661 BBTOB(hblks));
3663 if (error) 3662 if (error)
3664 goto bread_err2; 3663 goto bread_err2;
3665
3666 if (!offset)
3667 offset = xlog_align(log, 0,
3668 wrapped_hblks, hbp);
3669 } 3664 }
3670 rhead = (xlog_rec_header_t *)offset; 3665 rhead = (xlog_rec_header_t *)offset;
3671 error = xlog_valid_rec_header(log, rhead, 3666 error = xlog_valid_rec_header(log, rhead,
@@ -3685,7 +3680,7 @@ xlog_do_recovery_pass(
3685 } else { 3680 } else {
3686 /* This log record is split across the 3681 /* This log record is split across the
3687 * physical end of log */ 3682 * physical end of log */
3688 offset = NULL; 3683 offset = XFS_BUF_PTR(dbp);
3689 split_bblks = 0; 3684 split_bblks = 0;
3690 if (blk_no != log->l_logBBsize) { 3685 if (blk_no != log->l_logBBsize) {
3691 /* some data is before the physical 3686 /* some data is before the physical
@@ -3714,9 +3709,8 @@ xlog_do_recovery_pass(
3714 * _first_, then the log start (LR header end) 3709 * _first_, then the log start (LR header end)
3715 * - order is important. 3710 * - order is important.
3716 */ 3711 */
3717 bufaddr = XFS_BUF_PTR(dbp);
3718 error = XFS_BUF_SET_PTR(dbp, 3712 error = XFS_BUF_SET_PTR(dbp,
3719 bufaddr + BBTOB(split_bblks), 3713 offset + BBTOB(split_bblks),
3720 BBTOB(bblks - split_bblks)); 3714 BBTOB(bblks - split_bblks));
3721 if (error) 3715 if (error)
3722 goto bread_err2; 3716 goto bread_err2;
@@ -3727,13 +3721,9 @@ xlog_do_recovery_pass(
3727 if (error) 3721 if (error)
3728 goto bread_err2; 3722 goto bread_err2;
3729 3723
3730 error = XFS_BUF_SET_PTR(dbp, bufaddr, h_size); 3724 error = XFS_BUF_SET_PTR(dbp, offset, h_size);
3731 if (error) 3725 if (error)
3732 goto bread_err2; 3726 goto bread_err2;
3733
3734 if (!offset)
3735 offset = xlog_align(log, wrapped_hblks,
3736 bblks - split_bblks, dbp);
3737 } 3727 }
3738 xlog_unpack_data(rhead, offset, log); 3728 xlog_unpack_data(rhead, offset, log);
3739 if ((error = xlog_recover_process_data(log, rhash, 3729 if ((error = xlog_recover_process_data(log, rhash,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 8b6c9e807efb..66a888a9ad6f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -583,8 +583,8 @@ xfs_readsb(xfs_mount_t *mp, int flags)
583 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 583 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
584 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 584 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED;
585 585
586 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 586 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
587 BTOBB(sector_size), extra_flags); 587 extra_flags);
588 if (!bp || XFS_BUF_ISERROR(bp)) { 588 if (!bp || XFS_BUF_ISERROR(bp)) {
589 xfs_fs_mount_cmn_err(flags, "SB read failed"); 589 xfs_fs_mount_cmn_err(flags, "SB read failed");
590 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 590 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
@@ -624,8 +624,8 @@ xfs_readsb(xfs_mount_t *mp, int flags)
624 XFS_BUF_UNMANAGE(bp); 624 XFS_BUF_UNMANAGE(bp);
625 xfs_buf_relse(bp); 625 xfs_buf_relse(bp);
626 sector_size = mp->m_sb.sb_sectsize; 626 sector_size = mp->m_sb.sb_sectsize;
627 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 627 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
628 BTOBB(sector_size), extra_flags); 628 BTOBB(sector_size), extra_flags);
629 if (!bp || XFS_BUF_ISERROR(bp)) { 629 if (!bp || XFS_BUF_ISERROR(bp)) {
630 xfs_fs_mount_cmn_err(flags, "SB re-read failed"); 630 xfs_fs_mount_cmn_err(flags, "SB re-read failed");
631 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 631 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
@@ -1471,7 +1471,7 @@ xfs_log_sbcount(
1471 if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) 1471 if (!xfs_sb_version_haslazysbcount(&mp->m_sb))
1472 return 0; 1472 return 0;
1473 1473
1474 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT); 1474 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
1475 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1475 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
1476 XFS_DEFAULT_LOG_COUNT); 1476 XFS_DEFAULT_LOG_COUNT);
1477 if (error) { 1477 if (error) {
@@ -2123,7 +2123,7 @@ xfs_icsb_destroy_counters(
2123 mutex_destroy(&mp->m_icsb_mutex); 2123 mutex_destroy(&mp->m_icsb_mutex);
2124} 2124}
2125 2125
2126STATIC_INLINE void 2126STATIC void
2127xfs_icsb_lock_cntr( 2127xfs_icsb_lock_cntr(
2128 xfs_icsb_cnts_t *icsbp) 2128 xfs_icsb_cnts_t *icsbp)
2129{ 2129{
@@ -2132,7 +2132,7 @@ xfs_icsb_lock_cntr(
2132 } 2132 }
2133} 2133}
2134 2134
2135STATIC_INLINE void 2135STATIC void
2136xfs_icsb_unlock_cntr( 2136xfs_icsb_unlock_cntr(
2137 xfs_icsb_cnts_t *icsbp) 2137 xfs_icsb_cnts_t *icsbp)
2138{ 2138{
@@ -2140,7 +2140,7 @@ xfs_icsb_unlock_cntr(
2140} 2140}
2141 2141
2142 2142
2143STATIC_INLINE void 2143STATIC void
2144xfs_icsb_lock_all_counters( 2144xfs_icsb_lock_all_counters(
2145 xfs_mount_t *mp) 2145 xfs_mount_t *mp)
2146{ 2146{
@@ -2153,7 +2153,7 @@ xfs_icsb_lock_all_counters(
2153 } 2153 }
2154} 2154}
2155 2155
2156STATIC_INLINE void 2156STATIC void
2157xfs_icsb_unlock_all_counters( 2157xfs_icsb_unlock_all_counters(
2158 xfs_mount_t *mp) 2158 xfs_mount_t *mp)
2159{ 2159{
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a6c023bc0fb2..1df7e4502967 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -93,6 +93,9 @@ typedef struct xfs_dmops {
93 xfs_send_unmount_t xfs_send_unmount; 93 xfs_send_unmount_t xfs_send_unmount;
94} xfs_dmops_t; 94} xfs_dmops_t;
95 95
96#define XFS_DMAPI_UNMOUNT_FLAGS(mp) \
97 (((mp)->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ? 0 : DM_FLAGS_UNWANTED)
98
96#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \ 99#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \
97 (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock) 100 (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock)
98#define XFS_SEND_MMAP(mp, vma,fl) \ 101#define XFS_SEND_MMAP(mp, vma,fl) \
@@ -101,12 +104,24 @@ typedef struct xfs_dmops {
101 (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right) 104 (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right)
102#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ 105#define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
103 (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl) 106 (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl)
104#define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
105 (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl)
106#define XFS_SEND_MOUNT(mp,right,path,name) \ 107#define XFS_SEND_MOUNT(mp,right,path,name) \
107 (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name) 108 (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name)
108#define XFS_SEND_UNMOUNT(mp, ip,right,mode,rval,fl) \ 109#define XFS_SEND_PREUNMOUNT(mp) \
109 (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl) 110do { \
111 if (mp->m_flags & XFS_MOUNT_DMAPI) { \
112 (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT, mp, \
113 (mp)->m_rootip, DM_RIGHT_NULL, \
114 (mp)->m_rootip, DM_RIGHT_NULL, \
115 NULL, NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
116 } \
117} while (0)
118#define XFS_SEND_UNMOUNT(mp) \
119do { \
120 if (mp->m_flags & XFS_MOUNT_DMAPI) { \
121 (*(mp)->m_dm_ops->xfs_send_unmount)(mp, (mp)->m_rootip, \
122 DM_RIGHT_NULL, 0, 0, XFS_DMAPI_UNMOUNT_FLAGS(mp)); \
123 } \
124} while (0)
110 125
111 126
112#ifdef HAVE_PERCPU_SB 127#ifdef HAVE_PERCPU_SB
@@ -387,13 +402,13 @@ xfs_put_perag(struct xfs_mount *mp, xfs_perag_t *pag)
387 * Per-cpu superblock locking functions 402 * Per-cpu superblock locking functions
388 */ 403 */
389#ifdef HAVE_PERCPU_SB 404#ifdef HAVE_PERCPU_SB
390STATIC_INLINE void 405static inline void
391xfs_icsb_lock(xfs_mount_t *mp) 406xfs_icsb_lock(xfs_mount_t *mp)
392{ 407{
393 mutex_lock(&mp->m_icsb_mutex); 408 mutex_lock(&mp->m_icsb_mutex);
394} 409}
395 410
396STATIC_INLINE void 411static inline void
397xfs_icsb_unlock(xfs_mount_t *mp) 412xfs_icsb_unlock(xfs_mount_t *mp)
398{ 413{
399 mutex_unlock(&mp->m_icsb_mutex); 414 mutex_unlock(&mp->m_icsb_mutex);
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 3f816ad7ff19..4c199d18f850 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -277,10 +277,10 @@ xfs_read_buf(
277 xfs_buf_t *bp; 277 xfs_buf_t *bp;
278 int error; 278 int error;
279 279
280 if (flags) 280 if (!flags)
281 bp = xfs_buf_read_flags(target, blkno, len, flags); 281 flags = XBF_LOCK | XBF_MAPPED;
282 else 282
283 bp = xfs_buf_read(target, blkno, len, flags); 283 bp = xfs_buf_read(target, blkno, len, flags);
284 if (!bp) 284 if (!bp)
285 return XFS_ERROR(EIO); 285 return XFS_ERROR(EIO);
286 error = XFS_BUF_GETERROR(bp); 286 error = XFS_BUF_GETERROR(bp);
@@ -336,3 +336,25 @@ xfs_bwrite(
336 } 336 }
337 return (error); 337 return (error);
338} 338}
339
340/*
341 * helper function to extract extent size hint from inode
342 */
343xfs_extlen_t
344xfs_get_extsz_hint(
345 struct xfs_inode *ip)
346{
347 xfs_extlen_t extsz;
348
349 if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
350 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
351 ? ip->i_d.di_extsize
352 : ip->i_mount->m_sb.sb_rextsize;
353 ASSERT(extsz);
354 } else {
355 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
356 ? ip->i_d.di_extsize : 0;
357 }
358
359 return extsz;
360}
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index f5e4874c37d8..571f2174435c 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -37,34 +37,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
37} 37}
38 38
39/* 39/*
40 * Flags for xfs_free_eofblocks
41 */
42#define XFS_FREE_EOF_LOCK (1<<0)
43#define XFS_FREE_EOF_NOLOCK (1<<1)
44
45
46/*
47 * helper function to extract extent size hint from inode
48 */
49STATIC_INLINE xfs_extlen_t
50xfs_get_extsz_hint(
51 xfs_inode_t *ip)
52{
53 xfs_extlen_t extsz;
54
55 if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
56 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
57 ? ip->i_d.di_extsize
58 : ip->i_mount->m_sb.sb_rextsize;
59 ASSERT(extsz);
60 } else {
61 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
62 ? ip->i_d.di_extsize : 0;
63 }
64 return extsz;
65}
66
67/*
68 * Prototypes for functions in xfs_rw.c. 40 * Prototypes for functions in xfs_rw.c.
69 */ 41 */
70extern int xfs_write_clear_setuid(struct xfs_inode *ip); 42extern int xfs_write_clear_setuid(struct xfs_inode *ip);
@@ -76,5 +48,6 @@ extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
76 struct xfs_buf **bpp); 48 struct xfs_buf **bpp);
77extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, 49extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
78 xfs_buf_t *bp, xfs_daddr_t blkno); 50 xfs_buf_t *bp, xfs_daddr_t blkno);
51extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
79 52
80#endif /* __XFS_RW_H__ */ 53#endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 66b849358e62..237badcbac3b 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -236,19 +236,20 @@ xfs_trans_alloc(
236 uint type) 236 uint type)
237{ 237{
238 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); 238 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
239 return _xfs_trans_alloc(mp, type); 239 return _xfs_trans_alloc(mp, type, KM_SLEEP);
240} 240}
241 241
242xfs_trans_t * 242xfs_trans_t *
243_xfs_trans_alloc( 243_xfs_trans_alloc(
244 xfs_mount_t *mp, 244 xfs_mount_t *mp,
245 uint type) 245 uint type,
246 uint memflags)
246{ 247{
247 xfs_trans_t *tp; 248 xfs_trans_t *tp;
248 249
249 atomic_inc(&mp->m_active_trans); 250 atomic_inc(&mp->m_active_trans);
250 251
251 tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); 252 tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
252 tp->t_magic = XFS_TRANS_MAGIC; 253 tp->t_magic = XFS_TRANS_MAGIC;
253 tp->t_type = type; 254 tp->t_type = type;
254 tp->t_mountp = mp; 255 tp->t_mountp = mp;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index ed47fc77759c..a0574f593f52 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -924,7 +924,7 @@ typedef struct xfs_trans {
924 * XFS transaction mechanism exported interfaces. 924 * XFS transaction mechanism exported interfaces.
925 */ 925 */
926xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); 926xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
927xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint); 927xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint);
928xfs_trans_t *xfs_trans_dup(xfs_trans_t *); 928xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
929int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, 929int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
930 uint, uint); 930 uint, uint);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 218829e6a152..03a1f701fea8 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -79,11 +79,8 @@ xfs_trans_get_buf(xfs_trans_t *tp,
79 /* 79 /*
80 * Default to a normal get_buf() call if the tp is NULL. 80 * Default to a normal get_buf() call if the tp is NULL.
81 */ 81 */
82 if (tp == NULL) { 82 if (tp == NULL)
83 bp = xfs_buf_get_flags(target_dev, blkno, len, 83 return xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY);
84 flags | BUF_BUSY);
85 return(bp);
86 }
87 84
88 /* 85 /*
89 * If we find the buffer in the cache with this transaction 86 * If we find the buffer in the cache with this transaction
@@ -129,7 +126,7 @@ xfs_trans_get_buf(xfs_trans_t *tp,
129 * easily deadlock with our current transaction as well as cause 126 * easily deadlock with our current transaction as well as cause
130 * us to run out of stack space. 127 * us to run out of stack space.
131 */ 128 */
132 bp = xfs_buf_get_flags(target_dev, blkno, len, flags | BUF_BUSY); 129 bp = xfs_buf_get(target_dev, blkno, len, flags | BUF_BUSY);
133 if (bp == NULL) { 130 if (bp == NULL) {
134 return NULL; 131 return NULL;
135 } 132 }
@@ -302,7 +299,7 @@ xfs_trans_read_buf(
302 * Default to a normal get_buf() call if the tp is NULL. 299 * Default to a normal get_buf() call if the tp is NULL.
303 */ 300 */
304 if (tp == NULL) { 301 if (tp == NULL) {
305 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 302 bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY);
306 if (!bp) 303 if (!bp)
307 return (flags & XFS_BUF_TRYLOCK) ? 304 return (flags & XFS_BUF_TRYLOCK) ?
308 EAGAIN : XFS_ERROR(ENOMEM); 305 EAGAIN : XFS_ERROR(ENOMEM);
@@ -398,7 +395,7 @@ xfs_trans_read_buf(
398 * easily deadlock with our current transaction as well as cause 395 * easily deadlock with our current transaction as well as cause
399 * us to run out of stack space. 396 * us to run out of stack space.
400 */ 397 */
401 bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); 398 bp = xfs_buf_read(target, blkno, len, flags | BUF_BUSY);
402 if (bp == NULL) { 399 if (bp == NULL) {
403 *bpp = NULL; 400 *bpp = NULL;
404 return 0; 401 return 0;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b572f7e840e0..578f3f59b789 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -538,9 +538,8 @@ xfs_readlink_bmap(
538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
540 540
541 bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt), 541 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt),
542 XBF_LOCK | XBF_MAPPED | 542 XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK);
543 XBF_DONT_BLOCK);
544 error = XFS_BUF_GETERROR(bp); 543 error = XFS_BUF_GETERROR(bp);
545 if (error) { 544 if (error) {
546 xfs_ioerror_alert("xfs_readlink", 545 xfs_ioerror_alert("xfs_readlink",
@@ -709,6 +708,11 @@ xfs_fsync(
709} 708}
710 709
711/* 710/*
711 * Flags for xfs_free_eofblocks
712 */
713#define XFS_FREE_EOF_TRYLOCK (1<<0)
714
715/*
712 * This is called by xfs_inactive to free any blocks beyond eof 716 * This is called by xfs_inactive to free any blocks beyond eof
713 * when the link count isn't zero and by xfs_dm_punch_hole() when 717 * when the link count isn't zero and by xfs_dm_punch_hole() when
714 * punching a hole to EOF. 718 * punching a hole to EOF.
@@ -726,7 +730,6 @@ xfs_free_eofblocks(
726 xfs_filblks_t map_len; 730 xfs_filblks_t map_len;
727 int nimaps; 731 int nimaps;
728 xfs_bmbt_irec_t imap; 732 xfs_bmbt_irec_t imap;
729 int use_iolock = (flags & XFS_FREE_EOF_LOCK);
730 733
731 /* 734 /*
732 * Figure out if there are any blocks beyond the end 735 * Figure out if there are any blocks beyond the end
@@ -768,14 +771,19 @@ xfs_free_eofblocks(
768 * cache and we can't 771 * cache and we can't
769 * do that within a transaction. 772 * do that within a transaction.
770 */ 773 */
771 if (use_iolock) 774 if (flags & XFS_FREE_EOF_TRYLOCK) {
775 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
776 xfs_trans_cancel(tp, 0);
777 return 0;
778 }
779 } else {
772 xfs_ilock(ip, XFS_IOLOCK_EXCL); 780 xfs_ilock(ip, XFS_IOLOCK_EXCL);
781 }
773 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 782 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
774 ip->i_size); 783 ip->i_size);
775 if (error) { 784 if (error) {
776 xfs_trans_cancel(tp, 0); 785 xfs_trans_cancel(tp, 0);
777 if (use_iolock) 786 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
778 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
779 return error; 787 return error;
780 } 788 }
781 789
@@ -812,8 +820,7 @@ xfs_free_eofblocks(
812 error = xfs_trans_commit(tp, 820 error = xfs_trans_commit(tp,
813 XFS_TRANS_RELEASE_LOG_RES); 821 XFS_TRANS_RELEASE_LOG_RES);
814 } 822 }
815 xfs_iunlock(ip, (use_iolock ? (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL) 823 xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
816 : XFS_ILOCK_EXCL));
817 } 824 }
818 return error; 825 return error;
819} 826}
@@ -1113,7 +1120,17 @@ xfs_release(
1113 (ip->i_df.if_flags & XFS_IFEXTENTS)) && 1120 (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
1114 (!(ip->i_d.di_flags & 1121 (!(ip->i_d.di_flags &
1115 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { 1122 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
1116 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); 1123
1124 /*
1125 * If we can't get the iolock just skip truncating
1126 * the blocks past EOF because we could deadlock
1127 * with the mmap_sem otherwise. We'll get another
1128 * chance to drop them once the last reference to
1129 * the inode is dropped, so we'll never leak blocks
1130 * permanently.
1131 */
1132 error = xfs_free_eofblocks(mp, ip,
1133 XFS_FREE_EOF_TRYLOCK);
1117 if (error) 1134 if (error)
1118 return error; 1135 return error;
1119 } 1136 }
@@ -1184,7 +1201,7 @@ xfs_inactive(
1184 (!(ip->i_d.di_flags & 1201 (!(ip->i_d.di_flags &
1185 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || 1202 (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
1186 (ip->i_delayed_blks != 0)))) { 1203 (ip->i_delayed_blks != 0)))) {
1187 error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_LOCK); 1204 error = xfs_free_eofblocks(mp, ip, 0);
1188 if (error) 1205 if (error)
1189 return VN_INACTIVE_CACHE; 1206 return VN_INACTIVE_CACHE;
1190 } 1207 }
@@ -2456,46 +2473,6 @@ xfs_set_dmattrs(
2456 return error; 2473 return error;
2457} 2474}
2458 2475
2459int
2460xfs_reclaim(
2461 xfs_inode_t *ip)
2462{
2463
2464 xfs_itrace_entry(ip);
2465
2466 ASSERT(!VN_MAPPED(VFS_I(ip)));
2467
2468 /* bad inode, get out here ASAP */
2469 if (is_bad_inode(VFS_I(ip))) {
2470 xfs_ireclaim(ip);
2471 return 0;
2472 }
2473
2474 xfs_ioend_wait(ip);
2475
2476 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
2477
2478 /*
2479 * If we have nothing to flush with this inode then complete the
2480 * teardown now, otherwise break the link between the xfs inode and the
2481 * linux inode and clean up the xfs inode later. This avoids flushing
2482 * the inode to disk during the delete operation itself.
2483 *
2484 * When breaking the link, we need to set the XFS_IRECLAIMABLE flag
2485 * first to ensure that xfs_iunpin() will never see an xfs inode
2486 * that has a linux inode being reclaimed. Synchronisation is provided
2487 * by the i_flags_lock.
2488 */
2489 if (!ip->i_update_core && (ip->i_itemp == NULL)) {
2490 xfs_ilock(ip, XFS_ILOCK_EXCL);
2491 xfs_iflock(ip);
2492 xfs_iflags_set(ip, XFS_IRECLAIMABLE);
2493 return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
2494 }
2495 xfs_inode_set_reclaim_tag(ip);
2496 return 0;
2497}
2498
2499/* 2476/*
2500 * xfs_alloc_file_space() 2477 * xfs_alloc_file_space()
2501 * This routine allocates disk space for the given file. 2478 * This routine allocates disk space for the given file.
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index a9e102de71a1..167a467403a5 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -38,7 +38,6 @@ int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
38 const char *target_path, mode_t mode, struct xfs_inode **ipp, 38 const char *target_path, mode_t mode, struct xfs_inode **ipp,
39 cred_t *credp); 39 cred_t *credp);
40int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); 40int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
41int xfs_reclaim(struct xfs_inode *ip);
42int xfs_change_file_space(struct xfs_inode *ip, int cmd, 41int xfs_change_file_space(struct xfs_inode *ip, int cmd,
43 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); 42 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);
44int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, 43int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,