aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/adfs/super.c1
-rw-r--r--fs/afs/dir.c2
-rw-r--r--fs/afs/flock.c1
-rw-r--r--fs/afs/mntpt.c1
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/aio.c24
-rw-r--r--fs/autofs4/dev-ioctl.c1
-rw-r--r--fs/bfs/dir.c1
-rw-r--r--fs/bfs/file.c1
-rw-r--r--fs/binfmt_elf.c9
-rw-r--r--fs/bio-integrity.c170
-rw-r--r--fs/bio.c33
-rw-r--r--fs/btrfs/acl.c44
-rw-r--r--fs/btrfs/async-thread.c2
-rw-r--r--fs/btrfs/btrfs_inode.h4
-rw-r--r--fs/btrfs/compression.c1
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/extent-tree.c566
-rw-r--r--fs/btrfs/file.c6
-rw-r--r--fs/btrfs/inode.c44
-rw-r--r--fs/btrfs/ioctl.c7
-rw-r--r--fs/btrfs/relocation.c5
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/btrfs/transaction.c4
-rw-r--r--fs/char_dev.c1
-rw-r--r--fs/cifs/CHANGES6
-rw-r--r--fs/cifs/asn1.c55
-rw-r--r--fs/cifs/cifs_spnego.c9
-rw-r--r--fs/cifs/cifsacl.c26
-rw-r--r--fs/cifs/cifsfs.c158
-rw-r--r--fs/cifs/cifsfs.h15
-rw-r--r--fs/cifs/cifsglob.h30
-rw-r--r--fs/cifs/cifspdu.h14
-rw-r--r--fs/cifs/cifsproto.h23
-rw-r--r--fs/cifs/cifssmb.c147
-rw-r--r--fs/cifs/connect.c39
-rw-r--r--fs/cifs/dir.c52
-rw-r--r--fs/cifs/dns_resolve.c25
-rw-r--r--fs/cifs/file.c40
-rw-r--r--fs/cifs/inode.c777
-rw-r--r--fs/cifs/link.c3
-rw-r--r--fs/cifs/netmisc.c56
-rw-r--r--fs/cifs/readdir.c505
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/xattr.c12
-rw-r--r--fs/compat.c5
-rw-r--r--fs/compat_ioctl.c49
-rw-r--r--fs/devpts/inode.c10
-rw-r--r--fs/dlm/lock.c2
-rw-r--r--fs/dlm/lowcomms.c4
-rw-r--r--fs/dlm/plock.c17
-rw-r--r--fs/eventfd.c122
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/common.h4
-rw-r--r--fs/exofs/dir.c4
-rw-r--r--fs/exofs/exofs.h7
-rw-r--r--fs/exofs/file.c21
-rw-r--r--fs/exofs/inode.c7
-rw-r--r--fs/exofs/namei.c4
-rw-r--r--fs/exofs/osd.c4
-rw-r--r--fs/exofs/super.c7
-rw-r--r--fs/exofs/symlink.c4
-rw-r--r--fs/ext2/acl.c81
-rw-r--r--fs/ext2/acl.h4
-rw-r--r--fs/ext2/ext2.h4
-rw-r--r--fs/ext2/inode.c4
-rw-r--r--fs/ext2/ioctl.c1
-rw-r--r--fs/ext2/namei.c12
-rw-r--r--fs/ext2/super.c16
-rw-r--r--fs/ext3/acl.c85
-rw-r--r--fs/ext3/acl.h4
-rw-r--r--fs/ext3/inode.c4
-rw-r--r--fs/ext3/super.c16
-rw-r--r--fs/ext4/acl.c67
-rw-r--r--fs/ext4/acl.h4
-rw-r--r--fs/ext4/ext4.h18
-rw-r--r--fs/ext4/ext4_jbd2.c4
-rw-r--r--fs/ext4/ext4_jbd2.h6
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c388
-rw-r--r--fs/ext4/ioctl.c21
-rw-r--r--fs/ext4/mballoc.c50
-rw-r--r--fs/ext4/super.c16
-rw-r--r--fs/fat/dir.c1
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fat/namei_msdos.c1
-rw-r--r--fs/fat/namei_vfat.c1
-rw-r--r--fs/fcntl.c1
-rw-r--r--fs/freevxfs/vxfs_super.c1
-rw-r--r--fs/fs-writeback.c100
-rw-r--r--fs/fuse/dev.c91
-rw-r--r--fs/fuse/dir.c57
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/fuse/fuse_i.h27
-rw-r--r--fs/fuse/inode.c68
-rw-r--r--fs/gfs2/trace_gfs2.h8
-rw-r--r--fs/hfs/super.c1
-rw-r--r--fs/hfsplus/super.c1
-rw-r--r--fs/hostfs/hostfs_kern.c1
-rw-r--r--fs/hpfs/dir.c1
-rw-r--r--fs/hpfs/file.c1
-rw-r--r--fs/hpfs/hpfs_fn.h1
-rw-r--r--fs/hpfs/inode.c1
-rw-r--r--fs/hpfs/namei.c1
-rw-r--r--fs/inode.c27
-rw-r--r--fs/ioctl.c35
-rw-r--r--fs/isofs/inode.c4
-rw-r--r--fs/jbd2/journal.c31
-rw-r--r--fs/jbd2/transaction.c68
-rw-r--r--fs/jffs2/acl.c87
-rw-r--r--fs/jffs2/acl.h4
-rw-r--r--fs/jffs2/erase.c10
-rw-r--r--fs/jffs2/jffs2_fs_i.h4
-rw-r--r--fs/jffs2/os-linux.h4
-rw-r--r--fs/jffs2/readinode.c1
-rw-r--r--fs/jffs2/scan.c4
-rw-r--r--fs/jffs2/super.c1
-rw-r--r--fs/jfs/acl.c47
-rw-r--r--fs/jfs/jfs_incore.h6
-rw-r--r--fs/jfs/super.c16
-rw-r--r--fs/jfs/xattr.c10
-rw-r--r--fs/lockd/clntproc.c1
-rw-r--r--fs/lockd/svc4proc.c1
-rw-r--r--fs/lockd/svcproc.c1
-rw-r--r--fs/namei.c18
-rw-r--r--fs/namespace.c38
-rw-r--r--fs/nfs/delegation.c1
-rw-r--r--fs/nfs/dir.c1
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/getroot.c1
-rw-r--r--fs/nfs/inode.c1
-rw-r--r--fs/nfs/nfs4proc.c1
-rw-r--r--fs/nfs/read.c1
-rw-r--r--fs/nfs/write.c8
-rw-r--r--fs/nfsd/nfsctl.c1
-rw-r--r--fs/nfsd/nfssvc.c1
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/bmap.c5
-rw-r--r--fs/nilfs2/cpfile.c5
-rw-r--r--fs/nilfs2/dat.c9
-rw-r--r--fs/nilfs2/dir.c1
-rw-r--r--fs/nilfs2/inode.c8
-rw-r--r--fs/nilfs2/nilfs.h4
-rw-r--r--fs/nilfs2/segment.c30
-rw-r--r--fs/nilfs2/super.c10
-rw-r--r--fs/notify/inotify/inotify_user.c3
-rw-r--r--fs/ocfs2/dlmglue.c123
-rw-r--r--fs/ocfs2/dlmglue.h24
-rw-r--r--fs/ocfs2/file.c6
-rw-r--r--fs/ocfs2/inode.c11
-rw-r--r--fs/ocfs2/ioctl.c1
-rw-r--r--fs/ocfs2/journal.c43
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/ocfs2/namei.c15
-rw-r--r--fs/ocfs2/ocfs2.h10
-rw-r--r--fs/ocfs2/stack_o2cb.c11
-rw-r--r--fs/ocfs2/stack_user.c8
-rw-r--r--fs/ocfs2/stackglue.c13
-rw-r--r--fs/ocfs2/stackglue.h6
-rw-r--r--fs/ocfs2/suballoc.c28
-rw-r--r--fs/ocfs2/super.c69
-rw-r--r--fs/ocfs2/sysfile.c19
-rw-r--r--fs/open.c58
-rw-r--r--fs/partitions/check.c2
-rw-r--r--fs/quota/dquot.c4
-rw-r--r--fs/reiserfs/inode.c4
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/resize.c1
-rw-r--r--fs/reiserfs/super.c25
-rw-r--r--fs/reiserfs/xattr.c1
-rw-r--r--fs/reiserfs/xattr_acl.c58
-rw-r--r--fs/squashfs/super.c1
-rw-r--r--fs/super.c9
-rw-r--r--fs/sync.c5
-rw-r--r--fs/sysfs/bin.c1
-rw-r--r--fs/ubifs/io.c57
-rw-r--r--fs/ubifs/ioctl.c1
-rw-r--r--fs/ubifs/recovery.c57
-rw-r--r--fs/ubifs/replay.c9
-rw-r--r--fs/ubifs/scan.c20
-rw-r--r--fs/ubifs/super.c14
-rw-r--r--fs/ubifs/ubifs.h11
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/udf/balloc.c9
-rw-r--r--fs/udf/lowlevel.c7
-rw-r--r--fs/xfs/linux-2.6/kmem.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c73
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c1
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_iget.c2
-rw-r--r--fs/xfs/xfs_inode.h5
193 files changed, 2950 insertions, 2781 deletions
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index aad92f0a1048..6910a98bd73c 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -13,6 +13,7 @@
13#include <linux/parser.h> 13#include <linux/parser.h>
14#include <linux/mount.h> 14#include <linux/mount.h>
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16#include <linux/smp_lock.h>
16#include <linux/statfs.h> 17#include <linux/statfs.h>
17#include "adfs.h" 18#include "adfs.h"
18#include "dir_f.h" 19#include "dir_f.h"
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 9bd757774c9e..88067f36e5e7 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -564,7 +564,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
564static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) 564static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
565{ 565{
566 struct afs_vnode *vnode, *dir; 566 struct afs_vnode *vnode, *dir;
567 struct afs_fid fid; 567 struct afs_fid uninitialized_var(fid);
568 struct dentry *parent; 568 struct dentry *parent;
569 struct key *key; 569 struct key *key;
570 void *dir_version; 570 void *dir_version;
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 210acafe4a9b..3ff8bdd18fb3 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -432,7 +432,6 @@ vfs_rejected_lock:
432 list_del_init(&fl->fl_u.afs.link); 432 list_del_init(&fl->fl_u.afs.link);
433 if (list_empty(&vnode->granted_locks)) 433 if (list_empty(&vnode->granted_locks))
434 afs_defer_unlock(vnode, key); 434 afs_defer_unlock(vnode, key);
435 spin_unlock(&vnode->lock);
436 goto abort_attempt; 435 goto abort_attempt;
437} 436}
438 437
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index c52be53f6946..5ffb570cd3a8 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -17,7 +17,6 @@
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/namei.h> 19#include <linux/namei.h>
20#include <linux/mnt_namespace.h>
21#include "internal.h" 20#include "internal.h"
22 21
23 22
diff --git a/fs/afs/super.c b/fs/afs/super.c
index ad0514d0115f..e1ea1c240b6a 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -18,6 +18,7 @@
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/smp_lock.h>
21#include <linux/fs.h> 22#include <linux/fs.h>
22#include <linux/pagemap.h> 23#include <linux/pagemap.h>
23#include <linux/parser.h> 24#include <linux/parser.h>
diff --git a/fs/aio.c b/fs/aio.c
index 76da12537956..d065b2c3273e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -485,6 +485,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
485{ 485{
486 assert_spin_locked(&ctx->ctx_lock); 486 assert_spin_locked(&ctx->ctx_lock);
487 487
488 if (req->ki_eventfd != NULL)
489 eventfd_ctx_put(req->ki_eventfd);
488 if (req->ki_dtor) 490 if (req->ki_dtor)
489 req->ki_dtor(req); 491 req->ki_dtor(req);
490 if (req->ki_iovec != &req->ki_inline_vec) 492 if (req->ki_iovec != &req->ki_inline_vec)
@@ -509,8 +511,6 @@ static void aio_fput_routine(struct work_struct *data)
509 /* Complete the fput(s) */ 511 /* Complete the fput(s) */
510 if (req->ki_filp != NULL) 512 if (req->ki_filp != NULL)
511 __fput(req->ki_filp); 513 __fput(req->ki_filp);
512 if (req->ki_eventfd != NULL)
513 __fput(req->ki_eventfd);
514 514
515 /* Link the iocb into the context's free list */ 515 /* Link the iocb into the context's free list */
516 spin_lock_irq(&ctx->ctx_lock); 516 spin_lock_irq(&ctx->ctx_lock);
@@ -528,8 +528,6 @@ static void aio_fput_routine(struct work_struct *data)
528 */ 528 */
529static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) 529static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
530{ 530{
531 int schedule_putreq = 0;
532
533 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n", 531 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
534 req, atomic_long_read(&req->ki_filp->f_count)); 532 req, atomic_long_read(&req->ki_filp->f_count));
535 533
@@ -549,24 +547,16 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
549 * we would not be holding the last reference to the file*, so 547 * we would not be holding the last reference to the file*, so
550 * this function will be executed w/out any aio kthread wakeup. 548 * this function will be executed w/out any aio kthread wakeup.
551 */ 549 */
552 if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) 550 if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
553 schedule_putreq++;
554 else
555 req->ki_filp = NULL;
556 if (req->ki_eventfd != NULL) {
557 if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
558 schedule_putreq++;
559 else
560 req->ki_eventfd = NULL;
561 }
562 if (unlikely(schedule_putreq)) {
563 get_ioctx(ctx); 551 get_ioctx(ctx);
564 spin_lock(&fput_lock); 552 spin_lock(&fput_lock);
565 list_add(&req->ki_list, &fput_head); 553 list_add(&req->ki_list, &fput_head);
566 spin_unlock(&fput_lock); 554 spin_unlock(&fput_lock);
567 queue_work(aio_wq, &fput_work); 555 queue_work(aio_wq, &fput_work);
568 } else 556 } else {
557 req->ki_filp = NULL;
569 really_put_req(ctx, req); 558 really_put_req(ctx, req);
559 }
570 return 1; 560 return 1;
571} 561}
572 562
@@ -1622,7 +1612,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1622 * an eventfd() fd, and will be signaled for each completed 1612 * an eventfd() fd, and will be signaled for each completed
1623 * event using the eventfd_signal() function. 1613 * event using the eventfd_signal() function.
1624 */ 1614 */
1625 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); 1615 req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
1626 if (IS_ERR(req->ki_eventfd)) { 1616 if (IS_ERR(req->ki_eventfd)) {
1627 ret = PTR_ERR(req->ki_eventfd); 1617 ret = PTR_ERR(req->ki_eventfd);
1628 req->ki_eventfd = NULL; 1618 req->ki_eventfd = NULL;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index f3da2eb51f56..00bf8fcb245f 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -19,7 +19,6 @@
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/compat.h> 20#include <linux/compat.h>
21#include <linux/syscalls.h> 21#include <linux/syscalls.h>
22#include <linux/smp_lock.h>
23#include <linux/magic.h> 22#include <linux/magic.h>
24#include <linux/dcache.h> 23#include <linux/dcache.h>
25#include <linux/uaccess.h> 24#include <linux/uaccess.h>
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 54bd07d44e68..1e41aadb1068 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -8,7 +8,6 @@
8#include <linux/time.h> 8#include <linux/time.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/smp_lock.h>
12#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
13#include <linux/sched.h> 12#include <linux/sched.h>
14#include "bfs.h" 13#include "bfs.h"
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 6a021265f018..88b9a3ff44e4 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -11,7 +11,6 @@
11 11
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/smp_lock.h>
15#include "bfs.h" 14#include "bfs.h"
16 15
17#undef DEBUG 16#undef DEBUG
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9fa212b014a5..b7c1603cd4bd 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1522,11 +1522,11 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1522 info->thread = NULL; 1522 info->thread = NULL;
1523 1523
1524 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); 1524 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1525 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1526
1527 if (psinfo == NULL) 1525 if (psinfo == NULL)
1528 return 0; 1526 return 0;
1529 1527
1528 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1529
1530 /* 1530 /*
1531 * Figure out how many notes we're going to need for each thread. 1531 * Figure out how many notes we're going to need for each thread.
1532 */ 1532 */
@@ -1929,7 +1929,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
1929 elf = kmalloc(sizeof(*elf), GFP_KERNEL); 1929 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1930 if (!elf) 1930 if (!elf)
1931 goto out; 1931 goto out;
1932 1932 /*
1933 * The number of segs are recored into ELF header as 16bit value.
1934 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1935 */
1933 segs = current->mm->map_count; 1936 segs = current->mm->map_count;
1934#ifdef ELF_CORE_EXTRA_PHDRS 1937#ifdef ELF_CORE_EXTRA_PHDRS
1935 segs += ELF_CORE_EXTRA_PHDRS; 1938 segs += ELF_CORE_EXTRA_PHDRS;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 31c46a241bac..49a34e7f7306 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * bio-integrity.c - bio data integrity extensions 2 * bio-integrity.c - bio data integrity extensions
3 * 3 *
4 * Copyright (C) 2007, 2008 Oracle Corporation 4 * Copyright (C) 2007, 2008, 2009 Oracle Corporation
5 * Written by: Martin K. Petersen <martin.petersen@oracle.com> 5 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
@@ -25,63 +25,121 @@
25#include <linux/bio.h> 25#include <linux/bio.h>
26#include <linux/workqueue.h> 26#include <linux/workqueue.h>
27 27
28static struct kmem_cache *bio_integrity_slab __read_mostly; 28struct integrity_slab {
29static mempool_t *bio_integrity_pool; 29 struct kmem_cache *slab;
30static struct bio_set *integrity_bio_set; 30 unsigned short nr_vecs;
31 char name[8];
32};
33
34#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) }
35struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = {
36 IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES),
37};
38#undef IS
39
31static struct workqueue_struct *kintegrityd_wq; 40static struct workqueue_struct *kintegrityd_wq;
32 41
42static inline unsigned int vecs_to_idx(unsigned int nr)
43{
44 switch (nr) {
45 case 1:
46 return 0;
47 case 2 ... 4:
48 return 1;
49 case 5 ... 16:
50 return 2;
51 case 17 ... 64:
52 return 3;
53 case 65 ... 128:
54 return 4;
55 case 129 ... BIO_MAX_PAGES:
56 return 5;
57 default:
58 BUG();
59 }
60}
61
62static inline int use_bip_pool(unsigned int idx)
63{
64 if (idx == BIOVEC_NR_POOLS)
65 return 1;
66
67 return 0;
68}
69
33/** 70/**
34 * bio_integrity_alloc - Allocate integrity payload and attach it to bio 71 * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
35 * @bio: bio to attach integrity metadata to 72 * @bio: bio to attach integrity metadata to
36 * @gfp_mask: Memory allocation mask 73 * @gfp_mask: Memory allocation mask
37 * @nr_vecs: Number of integrity metadata scatter-gather elements 74 * @nr_vecs: Number of integrity metadata scatter-gather elements
75 * @bs: bio_set to allocate from
38 * 76 *
39 * Description: This function prepares a bio for attaching integrity 77 * Description: This function prepares a bio for attaching integrity
40 * metadata. nr_vecs specifies the maximum number of pages containing 78 * metadata. nr_vecs specifies the maximum number of pages containing
41 * integrity metadata that can be attached. 79 * integrity metadata that can be attached.
42 */ 80 */
43struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, 81struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
44 gfp_t gfp_mask, 82 gfp_t gfp_mask,
45 unsigned int nr_vecs) 83 unsigned int nr_vecs,
84 struct bio_set *bs)
46{ 85{
47 struct bio_integrity_payload *bip; 86 struct bio_integrity_payload *bip;
48 struct bio_vec *iv; 87 unsigned int idx = vecs_to_idx(nr_vecs);
49 unsigned long idx;
50 88
51 BUG_ON(bio == NULL); 89 BUG_ON(bio == NULL);
90 bip = NULL;
52 91
53 bip = mempool_alloc(bio_integrity_pool, gfp_mask); 92 /* Lower order allocations come straight from slab */
54 if (unlikely(bip == NULL)) { 93 if (!use_bip_pool(idx))
55 printk(KERN_ERR "%s: could not alloc bip\n", __func__); 94 bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask);
56 return NULL;
57 }
58 95
59 memset(bip, 0, sizeof(*bip)); 96 /* Use mempool if lower order alloc failed or max vecs were requested */
97 if (bip == NULL) {
98 bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
60 99
61 iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set); 100 if (unlikely(bip == NULL)) {
62 if (unlikely(iv == NULL)) { 101 printk(KERN_ERR "%s: could not alloc bip\n", __func__);
63 printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__); 102 return NULL;
64 mempool_free(bip, bio_integrity_pool); 103 }
65 return NULL;
66 } 104 }
67 105
68 bip->bip_pool = idx; 106 memset(bip, 0, sizeof(*bip));
69 bip->bip_vec = iv; 107
108 bip->bip_slab = idx;
70 bip->bip_bio = bio; 109 bip->bip_bio = bio;
71 bio->bi_integrity = bip; 110 bio->bi_integrity = bip;
72 111
73 return bip; 112 return bip;
74} 113}
114EXPORT_SYMBOL(bio_integrity_alloc_bioset);
115
116/**
117 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
118 * @bio: bio to attach integrity metadata to
119 * @gfp_mask: Memory allocation mask
120 * @nr_vecs: Number of integrity metadata scatter-gather elements
121 *
122 * Description: This function prepares a bio for attaching integrity
123 * metadata. nr_vecs specifies the maximum number of pages containing
124 * integrity metadata that can be attached.
125 */
126struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
127 gfp_t gfp_mask,
128 unsigned int nr_vecs)
129{
130 return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
131}
75EXPORT_SYMBOL(bio_integrity_alloc); 132EXPORT_SYMBOL(bio_integrity_alloc);
76 133
77/** 134/**
78 * bio_integrity_free - Free bio integrity payload 135 * bio_integrity_free - Free bio integrity payload
79 * @bio: bio containing bip to be freed 136 * @bio: bio containing bip to be freed
137 * @bs: bio_set this bio was allocated from
80 * 138 *
81 * Description: Used to free the integrity portion of a bio. Usually 139 * Description: Used to free the integrity portion of a bio. Usually
82 * called from bio_free(). 140 * called from bio_free().
83 */ 141 */
84void bio_integrity_free(struct bio *bio) 142void bio_integrity_free(struct bio *bio, struct bio_set *bs)
85{ 143{
86 struct bio_integrity_payload *bip = bio->bi_integrity; 144 struct bio_integrity_payload *bip = bio->bi_integrity;
87 145
@@ -92,8 +150,10 @@ void bio_integrity_free(struct bio *bio)
92 && bip->bip_buf != NULL) 150 && bip->bip_buf != NULL)
93 kfree(bip->bip_buf); 151 kfree(bip->bip_buf);
94 152
95 bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool); 153 if (use_bip_pool(bip->bip_slab))
96 mempool_free(bip, bio_integrity_pool); 154 mempool_free(bip, bs->bio_integrity_pool);
155 else
156 kmem_cache_free(bip_slab[bip->bip_slab].slab, bip);
97 157
98 bio->bi_integrity = NULL; 158 bio->bi_integrity = NULL;
99} 159}
@@ -114,7 +174,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
114 struct bio_integrity_payload *bip = bio->bi_integrity; 174 struct bio_integrity_payload *bip = bio->bi_integrity;
115 struct bio_vec *iv; 175 struct bio_vec *iv;
116 176
117 if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) { 177 if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
118 printk(KERN_ERR "%s: bip_vec full\n", __func__); 178 printk(KERN_ERR "%s: bip_vec full\n", __func__);
119 return 0; 179 return 0;
120 } 180 }
@@ -647,8 +707,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
647 bp->iv1 = bip->bip_vec[0]; 707 bp->iv1 = bip->bip_vec[0];
648 bp->iv2 = bip->bip_vec[0]; 708 bp->iv2 = bip->bip_vec[0];
649 709
650 bp->bip1.bip_vec = &bp->iv1; 710 bp->bip1.bip_vec[0] = bp->iv1;
651 bp->bip2.bip_vec = &bp->iv2; 711 bp->bip2.bip_vec[0] = bp->iv2;
652 712
653 bp->iv1.bv_len = sectors * bi->tuple_size; 713 bp->iv1.bv_len = sectors * bi->tuple_size;
654 bp->iv2.bv_offset += sectors * bi->tuple_size; 714 bp->iv2.bv_offset += sectors * bi->tuple_size;
@@ -667,17 +727,19 @@ EXPORT_SYMBOL(bio_integrity_split);
667 * @bio: New bio 727 * @bio: New bio
668 * @bio_src: Original bio 728 * @bio_src: Original bio
669 * @gfp_mask: Memory allocation mask 729 * @gfp_mask: Memory allocation mask
730 * @bs: bio_set to allocate bip from
670 * 731 *
671 * Description: Called to allocate a bip when cloning a bio 732 * Description: Called to allocate a bip when cloning a bio
672 */ 733 */
673int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) 734int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
735 gfp_t gfp_mask, struct bio_set *bs)
674{ 736{
675 struct bio_integrity_payload *bip_src = bio_src->bi_integrity; 737 struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
676 struct bio_integrity_payload *bip; 738 struct bio_integrity_payload *bip;
677 739
678 BUG_ON(bip_src == NULL); 740 BUG_ON(bip_src == NULL);
679 741
680 bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); 742 bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs);
681 743
682 if (bip == NULL) 744 if (bip == NULL)
683 return -EIO; 745 return -EIO;
@@ -693,25 +755,43 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
693} 755}
694EXPORT_SYMBOL(bio_integrity_clone); 756EXPORT_SYMBOL(bio_integrity_clone);
695 757
696static int __init bio_integrity_init(void) 758int bioset_integrity_create(struct bio_set *bs, int pool_size)
697{ 759{
698 kintegrityd_wq = create_workqueue("kintegrityd"); 760 unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
761
762 bs->bio_integrity_pool =
763 mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
699 764
765 if (!bs->bio_integrity_pool)
766 return -1;
767
768 return 0;
769}
770EXPORT_SYMBOL(bioset_integrity_create);
771
772void bioset_integrity_free(struct bio_set *bs)
773{
774 if (bs->bio_integrity_pool)
775 mempool_destroy(bs->bio_integrity_pool);
776}
777EXPORT_SYMBOL(bioset_integrity_free);
778
779void __init bio_integrity_init(void)
780{
781 unsigned int i;
782
783 kintegrityd_wq = create_workqueue("kintegrityd");
700 if (!kintegrityd_wq) 784 if (!kintegrityd_wq)
701 panic("Failed to create kintegrityd\n"); 785 panic("Failed to create kintegrityd\n");
702 786
703 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, 787 for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) {
704 SLAB_HWCACHE_ALIGN|SLAB_PANIC); 788 unsigned int size;
705 789
706 bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE, 790 size = sizeof(struct bio_integrity_payload)
707 bio_integrity_slab); 791 + bip_slab[i].nr_vecs * sizeof(struct bio_vec);
708 if (!bio_integrity_pool)
709 panic("bio_integrity: can't allocate bip pool\n");
710 792
711 integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0); 793 bip_slab[i].slab =
712 if (!integrity_bio_set) 794 kmem_cache_create(bip_slab[i].name, size, 0,
713 panic("bio_integrity: can't allocate bio_set\n"); 795 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
714 796 }
715 return 0;
716} 797}
717subsys_initcall(bio_integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 24c914043532..76738005c8e8 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -238,7 +238,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
238 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); 238 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
239 239
240 if (bio_integrity(bio)) 240 if (bio_integrity(bio))
241 bio_integrity_free(bio); 241 bio_integrity_free(bio, bs);
242 242
243 /* 243 /*
244 * If we have front padding, adjust the bio pointer before freeing 244 * If we have front padding, adjust the bio pointer before freeing
@@ -341,7 +341,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
341static void bio_kmalloc_destructor(struct bio *bio) 341static void bio_kmalloc_destructor(struct bio *bio)
342{ 342{
343 if (bio_integrity(bio)) 343 if (bio_integrity(bio))
344 bio_integrity_free(bio); 344 bio_integrity_free(bio, fs_bio_set);
345 kfree(bio); 345 kfree(bio);
346} 346}
347 347
@@ -472,7 +472,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
472 if (bio_integrity(bio)) { 472 if (bio_integrity(bio)) {
473 int ret; 473 int ret;
474 474
475 ret = bio_integrity_clone(b, bio, gfp_mask); 475 ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
476 476
477 if (ret < 0) { 477 if (ret < 0) {
478 bio_put(b); 478 bio_put(b);
@@ -705,14 +705,13 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
705} 705}
706 706
707static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, 707static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
708 struct sg_iovec *iov, int iov_count, int uncopy, 708 struct sg_iovec *iov, int iov_count,
709 int do_free_page) 709 int to_user, int from_user, int do_free_page)
710{ 710{
711 int ret = 0, i; 711 int ret = 0, i;
712 struct bio_vec *bvec; 712 struct bio_vec *bvec;
713 int iov_idx = 0; 713 int iov_idx = 0;
714 unsigned int iov_off = 0; 714 unsigned int iov_off = 0;
715 int read = bio_data_dir(bio) == READ;
716 715
717 __bio_for_each_segment(bvec, bio, i, 0) { 716 __bio_for_each_segment(bvec, bio, i, 0) {
718 char *bv_addr = page_address(bvec->bv_page); 717 char *bv_addr = page_address(bvec->bv_page);
@@ -727,13 +726,14 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
727 iov_addr = iov[iov_idx].iov_base + iov_off; 726 iov_addr = iov[iov_idx].iov_base + iov_off;
728 727
729 if (!ret) { 728 if (!ret) {
730 if (!read && !uncopy) 729 if (to_user)
731 ret = copy_from_user(bv_addr, iov_addr,
732 bytes);
733 if (read && uncopy)
734 ret = copy_to_user(iov_addr, bv_addr, 730 ret = copy_to_user(iov_addr, bv_addr,
735 bytes); 731 bytes);
736 732
733 if (from_user)
734 ret = copy_from_user(bv_addr, iov_addr,
735 bytes);
736
737 if (ret) 737 if (ret)
738 ret = -EFAULT; 738 ret = -EFAULT;
739 } 739 }
@@ -770,7 +770,8 @@ int bio_uncopy_user(struct bio *bio)
770 770
771 if (!bio_flagged(bio, BIO_NULL_MAPPED)) 771 if (!bio_flagged(bio, BIO_NULL_MAPPED))
772 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, 772 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
773 bmd->nr_sgvecs, 1, bmd->is_our_pages); 773 bmd->nr_sgvecs, bio_data_dir(bio) == READ,
774 0, bmd->is_our_pages);
774 bio_free_map_data(bmd); 775 bio_free_map_data(bmd);
775 bio_put(bio); 776 bio_put(bio);
776 return ret; 777 return ret;
@@ -875,8 +876,9 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
875 /* 876 /*
876 * success 877 * success
877 */ 878 */
878 if (!write_to_vm && (!map_data || !map_data->null_mapped)) { 879 if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
879 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); 880 (map_data && map_data->from_user)) {
881 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
880 if (ret) 882 if (ret)
881 goto cleanup; 883 goto cleanup;
882 } 884 }
@@ -1539,6 +1541,7 @@ void bioset_free(struct bio_set *bs)
1539 if (bs->bio_pool) 1541 if (bs->bio_pool)
1540 mempool_destroy(bs->bio_pool); 1542 mempool_destroy(bs->bio_pool);
1541 1543
1544 bioset_integrity_free(bs);
1542 biovec_free_pools(bs); 1545 biovec_free_pools(bs);
1543 bio_put_slab(bs); 1546 bio_put_slab(bs);
1544 1547
@@ -1579,6 +1582,9 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1579 if (!bs->bio_pool) 1582 if (!bs->bio_pool)
1580 goto bad; 1583 goto bad;
1581 1584
1585 if (bioset_integrity_create(bs, pool_size))
1586 goto bad;
1587
1582 if (!biovec_create_pools(bs, pool_size)) 1588 if (!biovec_create_pools(bs, pool_size))
1583 return bs; 1589 return bs;
1584 1590
@@ -1616,6 +1622,7 @@ static int __init init_bio(void)
1616 if (!bio_slabs) 1622 if (!bio_slabs)
1617 panic("bio: can't allocate bios\n"); 1623 panic("bio: can't allocate bios\n");
1618 1624
1625 bio_integrity_init();
1619 biovec_init_slabs(); 1626 biovec_init_slabs();
1620 1627
1621 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); 1628 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 603972576f0f..f128427b995b 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -29,51 +29,28 @@
29 29
30#ifdef CONFIG_FS_POSIX_ACL 30#ifdef CONFIG_FS_POSIX_ACL
31 31
32static void btrfs_update_cached_acl(struct inode *inode,
33 struct posix_acl **p_acl,
34 struct posix_acl *acl)
35{
36 spin_lock(&inode->i_lock);
37 if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
38 posix_acl_release(*p_acl);
39 *p_acl = posix_acl_dup(acl);
40 spin_unlock(&inode->i_lock);
41}
42
43static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) 32static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
44{ 33{
45 int size; 34 int size;
46 const char *name; 35 const char *name;
47 char *value = NULL; 36 char *value = NULL;
48 struct posix_acl *acl = NULL, **p_acl; 37 struct posix_acl *acl;
38
39 acl = get_cached_acl(inode, type);
40 if (acl != ACL_NOT_CACHED)
41 return acl;
49 42
50 switch (type) { 43 switch (type) {
51 case ACL_TYPE_ACCESS: 44 case ACL_TYPE_ACCESS:
52 name = POSIX_ACL_XATTR_ACCESS; 45 name = POSIX_ACL_XATTR_ACCESS;
53 p_acl = &BTRFS_I(inode)->i_acl;
54 break; 46 break;
55 case ACL_TYPE_DEFAULT: 47 case ACL_TYPE_DEFAULT:
56 name = POSIX_ACL_XATTR_DEFAULT; 48 name = POSIX_ACL_XATTR_DEFAULT;
57 p_acl = &BTRFS_I(inode)->i_default_acl;
58 break; 49 break;
59 default: 50 default:
60 return ERR_PTR(-EINVAL); 51 BUG();
61 } 52 }
62 53
63 /* Handle the cached NULL acl case without locking */
64 acl = ACCESS_ONCE(*p_acl);
65 if (!acl)
66 return acl;
67
68 spin_lock(&inode->i_lock);
69 acl = *p_acl;
70 if (acl != BTRFS_ACL_NOT_CACHED)
71 acl = posix_acl_dup(acl);
72 spin_unlock(&inode->i_lock);
73
74 if (acl != BTRFS_ACL_NOT_CACHED)
75 return acl;
76
77 size = __btrfs_getxattr(inode, name, "", 0); 54 size = __btrfs_getxattr(inode, name, "", 0);
78 if (size > 0) { 55 if (size > 0) {
79 value = kzalloc(size, GFP_NOFS); 56 value = kzalloc(size, GFP_NOFS);
@@ -82,13 +59,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
82 size = __btrfs_getxattr(inode, name, value, size); 59 size = __btrfs_getxattr(inode, name, value, size);
83 if (size > 0) { 60 if (size > 0) {
84 acl = posix_acl_from_xattr(value, size); 61 acl = posix_acl_from_xattr(value, size);
85 btrfs_update_cached_acl(inode, p_acl, acl); 62 set_cached_acl(inode, type, acl);
86 } 63 }
87 kfree(value); 64 kfree(value);
88 } else if (size == -ENOENT || size == -ENODATA || size == 0) { 65 } else if (size == -ENOENT || size == -ENODATA || size == 0) {
89 /* FIXME, who returns -ENOENT? I think nobody */ 66 /* FIXME, who returns -ENOENT? I think nobody */
90 acl = NULL; 67 acl = NULL;
91 btrfs_update_cached_acl(inode, p_acl, acl); 68 set_cached_acl(inode, type, acl);
92 } else { 69 } else {
93 acl = ERR_PTR(-EIO); 70 acl = ERR_PTR(-EIO);
94 } 71 }
@@ -121,7 +98,6 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
121{ 98{
122 int ret, size = 0; 99 int ret, size = 0;
123 const char *name; 100 const char *name;
124 struct posix_acl **p_acl;
125 char *value = NULL; 101 char *value = NULL;
126 mode_t mode; 102 mode_t mode;
127 103
@@ -141,13 +117,11 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
141 ret = 0; 117 ret = 0;
142 inode->i_mode = mode; 118 inode->i_mode = mode;
143 name = POSIX_ACL_XATTR_ACCESS; 119 name = POSIX_ACL_XATTR_ACCESS;
144 p_acl = &BTRFS_I(inode)->i_acl;
145 break; 120 break;
146 case ACL_TYPE_DEFAULT: 121 case ACL_TYPE_DEFAULT:
147 if (!S_ISDIR(inode->i_mode)) 122 if (!S_ISDIR(inode->i_mode))
148 return acl ? -EINVAL : 0; 123 return acl ? -EINVAL : 0;
149 name = POSIX_ACL_XATTR_DEFAULT; 124 name = POSIX_ACL_XATTR_DEFAULT;
150 p_acl = &BTRFS_I(inode)->i_default_acl;
151 break; 125 break;
152 default: 126 default:
153 return -EINVAL; 127 return -EINVAL;
@@ -172,7 +146,7 @@ out:
172 kfree(value); 146 kfree(value);
173 147
174 if (!ret) 148 if (!ret)
175 btrfs_update_cached_acl(inode, p_acl, acl); 149 set_cached_acl(inode, type, acl);
176 150
177 return ret; 151 return ret;
178} 152}
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7f88628a1a72..6e4f6c50a120 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -299,8 +299,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
299 "btrfs-%s-%d", workers->name, 299 "btrfs-%s-%d", workers->name,
300 workers->num_workers + i); 300 workers->num_workers + i);
301 if (IS_ERR(worker->task)) { 301 if (IS_ERR(worker->task)) {
302 kfree(worker);
303 ret = PTR_ERR(worker->task); 302 ret = PTR_ERR(worker->task);
303 kfree(worker);
304 goto fail; 304 goto fail;
305 } 305 }
306 306
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index acb4f3517582..ea1ea0af8c0e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -53,10 +53,6 @@ struct btrfs_inode {
53 /* used to order data wrt metadata */ 53 /* used to order data wrt metadata */
54 struct btrfs_ordered_inode_tree ordered_tree; 54 struct btrfs_ordered_inode_tree ordered_tree;
55 55
56 /* standard acl pointers */
57 struct posix_acl *i_acl;
58 struct posix_acl *i_default_acl;
59
60 /* for keeping track of orphaned inodes */ 56 /* for keeping track of orphaned inodes */
61 struct list_head i_orphan; 57 struct list_head i_orphan;
62 58
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index de1e2fd32080..9d8ba4d54a37 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -26,7 +26,6 @@
26#include <linux/time.h> 26#include <linux/time.h>
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/string.h> 28#include <linux/string.h>
29#include <linux/smp_lock.h>
30#include <linux/backing-dev.h> 29#include <linux/backing-dev.h>
31#include <linux/mpage.h> 30#include <linux/mpage.h>
32#include <linux/swap.h> 31#include <linux/swap.h>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 03441a99ea38..98a873838717 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -41,8 +41,6 @@ struct btrfs_ordered_sum;
41 41
42#define BTRFS_MAGIC "_BHRfS_M" 42#define BTRFS_MAGIC "_BHRfS_M"
43 43
44#define BTRFS_ACL_NOT_CACHED ((void *)-1)
45
46#define BTRFS_MAX_LEVEL 8 44#define BTRFS_MAX_LEVEL 8
47 45
48#define BTRFS_COMPAT_EXTENT_TREE_V0 46#define BTRFS_COMPAT_EXTENT_TREE_V0
@@ -2076,8 +2074,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2076int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 2074int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
2077int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); 2075int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
2078int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); 2076int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
2079int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 2077int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref);
2080 *root);
2081int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 2078int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
2082 struct btrfs_root *root, 2079 struct btrfs_root *root,
2083 struct extent_buffer *node, 2080 struct extent_buffer *node,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index edc7d208c5ce..a5aca3997d42 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -990,15 +990,13 @@ static inline int extent_ref_type(u64 parent, u64 owner)
990 return type; 990 return type;
991} 991}
992 992
993static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) 993static int find_next_key(struct btrfs_path *path, int level,
994 struct btrfs_key *key)
994 995
995{ 996{
996 int level; 997 for (; level < BTRFS_MAX_LEVEL; level++) {
997 BUG_ON(!path->keep_locks);
998 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
999 if (!path->nodes[level]) 998 if (!path->nodes[level])
1000 break; 999 break;
1001 btrfs_assert_tree_locked(path->nodes[level]);
1002 if (path->slots[level] + 1 >= 1000 if (path->slots[level] + 1 >=
1003 btrfs_header_nritems(path->nodes[level])) 1001 btrfs_header_nritems(path->nodes[level]))
1004 continue; 1002 continue;
@@ -1158,7 +1156,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1158 * For simplicity, we just do not add new inline back 1156 * For simplicity, we just do not add new inline back
1159 * ref if there is any kind of item for this block 1157 * ref if there is any kind of item for this block
1160 */ 1158 */
1161 if (find_next_key(path, &key) == 0 && key.objectid == bytenr && 1159 if (find_next_key(path, 0, &key) == 0 &&
1160 key.objectid == bytenr &&
1162 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1161 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1163 err = -EAGAIN; 1162 err = -EAGAIN;
1164 goto out; 1163 goto out;
@@ -2697,7 +2696,7 @@ again:
2697 2696
2698 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 2697 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
2699 ", %llu bytes_used, %llu bytes_reserved, " 2698 ", %llu bytes_used, %llu bytes_reserved, "
2700 "%llu bytes_pinned, %llu bytes_readonly, %llu may use" 2699 "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
2701 "%llu total\n", (unsigned long long)bytes, 2700 "%llu total\n", (unsigned long long)bytes,
2702 (unsigned long long)data_sinfo->bytes_delalloc, 2701 (unsigned long long)data_sinfo->bytes_delalloc,
2703 (unsigned long long)data_sinfo->bytes_used, 2702 (unsigned long long)data_sinfo->bytes_used,
@@ -4128,6 +4127,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
4128 return buf; 4127 return buf;
4129} 4128}
4130 4129
4130#if 0
4131int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4131int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
4132 struct btrfs_root *root, struct extent_buffer *leaf) 4132 struct btrfs_root *root, struct extent_buffer *leaf)
4133{ 4133{
@@ -4171,8 +4171,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
4171 return 0; 4171 return 0;
4172} 4172}
4173 4173
4174#if 0
4175
4176static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 4174static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
4177 struct btrfs_root *root, 4175 struct btrfs_root *root,
4178 struct btrfs_leaf_ref *ref) 4176 struct btrfs_leaf_ref *ref)
@@ -4553,262 +4551,471 @@ out:
4553} 4551}
4554#endif 4552#endif
4555 4553
4554struct walk_control {
4555 u64 refs[BTRFS_MAX_LEVEL];
4556 u64 flags[BTRFS_MAX_LEVEL];
4557 struct btrfs_key update_progress;
4558 int stage;
4559 int level;
4560 int shared_level;
4561 int update_ref;
4562 int keep_locks;
4563};
4564
4565#define DROP_REFERENCE 1
4566#define UPDATE_BACKREF 2
4567
4556/* 4568/*
4557 * helper function for drop_subtree, this function is similar to 4569 * hepler to process tree block while walking down the tree.
4558 * walk_down_tree. The main difference is that it checks reference 4570 *
4559 * counts while tree blocks are locked. 4571 * when wc->stage == DROP_REFERENCE, this function checks
4572 * reference count of the block. if the block is shared and
4573 * we need update back refs for the subtree rooted at the
4574 * block, this function changes wc->stage to UPDATE_BACKREF
4575 *
4576 * when wc->stage == UPDATE_BACKREF, this function updates
4577 * back refs for pointers in the block.
4578 *
4579 * NOTE: return value 1 means we should stop walking down.
4560 */ 4580 */
4561static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4581static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4562 struct btrfs_root *root, 4582 struct btrfs_root *root,
4563 struct btrfs_path *path, int *level) 4583 struct btrfs_path *path,
4584 struct walk_control *wc)
4564{ 4585{
4565 struct extent_buffer *next; 4586 int level = wc->level;
4566 struct extent_buffer *cur; 4587 struct extent_buffer *eb = path->nodes[level];
4567 struct extent_buffer *parent; 4588 struct btrfs_key key;
4568 u64 bytenr; 4589 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4569 u64 ptr_gen;
4570 u64 refs;
4571 u64 flags;
4572 u32 blocksize;
4573 int ret; 4590 int ret;
4574 4591
4575 cur = path->nodes[*level]; 4592 if (wc->stage == UPDATE_BACKREF &&
4576 ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len, 4593 btrfs_header_owner(eb) != root->root_key.objectid)
4577 &refs, &flags); 4594 return 1;
4578 BUG_ON(ret);
4579 if (refs > 1)
4580 goto out;
4581 4595
4582 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4596 /*
4597 * when reference count of tree block is 1, it won't increase
4598 * again. once full backref flag is set, we never clear it.
4599 */
4600 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
4601 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) {
4602 BUG_ON(!path->locks[level]);
4603 ret = btrfs_lookup_extent_info(trans, root,
4604 eb->start, eb->len,
4605 &wc->refs[level],
4606 &wc->flags[level]);
4607 BUG_ON(ret);
4608 BUG_ON(wc->refs[level] == 0);
4609 }
4583 4610
4584 while (*level >= 0) { 4611 if (wc->stage == DROP_REFERENCE &&
4585 cur = path->nodes[*level]; 4612 wc->update_ref && wc->refs[level] > 1) {
4586 if (*level == 0) { 4613 BUG_ON(eb == root->node);
4587 ret = btrfs_drop_leaf_ref(trans, root, cur); 4614 BUG_ON(path->slots[level] > 0);
4588 BUG_ON(ret); 4615 if (level == 0)
4589 clean_tree_block(trans, root, cur); 4616 btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
4590 break; 4617 else
4591 } 4618 btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
4592 if (path->slots[*level] >= btrfs_header_nritems(cur)) { 4619 if (btrfs_header_owner(eb) == root->root_key.objectid &&
4593 clean_tree_block(trans, root, cur); 4620 btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
4594 break; 4621 wc->stage = UPDATE_BACKREF;
4622 wc->shared_level = level;
4595 } 4623 }
4624 }
4596 4625
4597 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 4626 if (wc->stage == DROP_REFERENCE) {
4598 blocksize = btrfs_level_size(root, *level - 1); 4627 if (wc->refs[level] > 1)
4599 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 4628 return 1;
4600 4629
4601 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4630 if (path->locks[level] && !wc->keep_locks) {
4602 btrfs_tree_lock(next); 4631 btrfs_tree_unlock(eb);
4603 btrfs_set_lock_blocking(next); 4632 path->locks[level] = 0;
4633 }
4634 return 0;
4635 }
4604 4636
4605 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 4637 /* wc->stage == UPDATE_BACKREF */
4606 &refs, &flags); 4638 if (!(wc->flags[level] & flag)) {
4639 BUG_ON(!path->locks[level]);
4640 ret = btrfs_inc_ref(trans, root, eb, 1);
4607 BUG_ON(ret); 4641 BUG_ON(ret);
4608 if (refs > 1) { 4642 ret = btrfs_dec_ref(trans, root, eb, 0);
4609 parent = path->nodes[*level]; 4643 BUG_ON(ret);
4610 ret = btrfs_free_extent(trans, root, bytenr, 4644 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
4611 blocksize, parent->start, 4645 eb->len, flag, 0);
4612 btrfs_header_owner(parent), 4646 BUG_ON(ret);
4613 *level - 1, 0); 4647 wc->flags[level] |= flag;
4648 }
4649
4650 /*
4651 * the block is shared by multiple trees, so it's not good to
4652 * keep the tree lock
4653 */
4654 if (path->locks[level] && level > 0) {
4655 btrfs_tree_unlock(eb);
4656 path->locks[level] = 0;
4657 }
4658 return 0;
4659}
4660
4661/*
4662 * hepler to process tree block while walking up the tree.
4663 *
4664 * when wc->stage == DROP_REFERENCE, this function drops
4665 * reference count on the block.
4666 *
4667 * when wc->stage == UPDATE_BACKREF, this function changes
4668 * wc->stage back to DROP_REFERENCE if we changed wc->stage
4669 * to UPDATE_BACKREF previously while processing the block.
4670 *
4671 * NOTE: return value 1 means we should stop walking up.
4672 */
4673static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
4674 struct btrfs_root *root,
4675 struct btrfs_path *path,
4676 struct walk_control *wc)
4677{
4678 int ret = 0;
4679 int level = wc->level;
4680 struct extent_buffer *eb = path->nodes[level];
4681 u64 parent = 0;
4682
4683 if (wc->stage == UPDATE_BACKREF) {
4684 BUG_ON(wc->shared_level < level);
4685 if (level < wc->shared_level)
4686 goto out;
4687
4688 BUG_ON(wc->refs[level] <= 1);
4689 ret = find_next_key(path, level + 1, &wc->update_progress);
4690 if (ret > 0)
4691 wc->update_ref = 0;
4692
4693 wc->stage = DROP_REFERENCE;
4694 wc->shared_level = -1;
4695 path->slots[level] = 0;
4696
4697 /*
4698 * check reference count again if the block isn't locked.
4699 * we should start walking down the tree again if reference
4700 * count is one.
4701 */
4702 if (!path->locks[level]) {
4703 BUG_ON(level == 0);
4704 btrfs_tree_lock(eb);
4705 btrfs_set_lock_blocking(eb);
4706 path->locks[level] = 1;
4707
4708 ret = btrfs_lookup_extent_info(trans, root,
4709 eb->start, eb->len,
4710 &wc->refs[level],
4711 &wc->flags[level]);
4614 BUG_ON(ret); 4712 BUG_ON(ret);
4615 path->slots[*level]++; 4713 BUG_ON(wc->refs[level] == 0);
4616 btrfs_tree_unlock(next); 4714 if (wc->refs[level] == 1) {
4617 free_extent_buffer(next); 4715 btrfs_tree_unlock(eb);
4618 continue; 4716 path->locks[level] = 0;
4717 return 1;
4718 }
4719 } else {
4720 BUG_ON(level != 0);
4619 } 4721 }
4722 }
4620 4723
4621 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4724 /* wc->stage == DROP_REFERENCE */
4725 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
4622 4726
4623 *level = btrfs_header_level(next); 4727 if (wc->refs[level] == 1) {
4624 path->nodes[*level] = next; 4728 if (level == 0) {
4625 path->slots[*level] = 0; 4729 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4626 path->locks[*level] = 1; 4730 ret = btrfs_dec_ref(trans, root, eb, 1);
4627 cond_resched(); 4731 else
4732 ret = btrfs_dec_ref(trans, root, eb, 0);
4733 BUG_ON(ret);
4734 }
4735 /* make block locked assertion in clean_tree_block happy */
4736 if (!path->locks[level] &&
4737 btrfs_header_generation(eb) == trans->transid) {
4738 btrfs_tree_lock(eb);
4739 btrfs_set_lock_blocking(eb);
4740 path->locks[level] = 1;
4741 }
4742 clean_tree_block(trans, root, eb);
4743 }
4744
4745 if (eb == root->node) {
4746 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4747 parent = eb->start;
4748 else
4749 BUG_ON(root->root_key.objectid !=
4750 btrfs_header_owner(eb));
4751 } else {
4752 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4753 parent = path->nodes[level + 1]->start;
4754 else
4755 BUG_ON(root->root_key.objectid !=
4756 btrfs_header_owner(path->nodes[level + 1]));
4628 } 4757 }
4629out:
4630 if (path->nodes[*level] == root->node)
4631 parent = path->nodes[*level];
4632 else
4633 parent = path->nodes[*level + 1];
4634 bytenr = path->nodes[*level]->start;
4635 blocksize = path->nodes[*level]->len;
4636 4758
4637 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, 4759 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent,
4638 btrfs_header_owner(parent), *level, 0); 4760 root->root_key.objectid, level, 0);
4639 BUG_ON(ret); 4761 BUG_ON(ret);
4762out:
4763 wc->refs[level] = 0;
4764 wc->flags[level] = 0;
4765 return ret;
4766}
4767
4768static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4769 struct btrfs_root *root,
4770 struct btrfs_path *path,
4771 struct walk_control *wc)
4772{
4773 struct extent_buffer *next;
4774 struct extent_buffer *cur;
4775 u64 bytenr;
4776 u64 ptr_gen;
4777 u32 blocksize;
4778 int level = wc->level;
4779 int ret;
4780
4781 while (level >= 0) {
4782 cur = path->nodes[level];
4783 BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
4640 4784
4641 if (path->locks[*level]) { 4785 ret = walk_down_proc(trans, root, path, wc);
4642 btrfs_tree_unlock(path->nodes[*level]); 4786 if (ret > 0)
4643 path->locks[*level] = 0; 4787 break;
4788
4789 if (level == 0)
4790 break;
4791
4792 bytenr = btrfs_node_blockptr(cur, path->slots[level]);
4793 blocksize = btrfs_level_size(root, level - 1);
4794 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
4795
4796 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
4797 btrfs_tree_lock(next);
4798 btrfs_set_lock_blocking(next);
4799
4800 level--;
4801 BUG_ON(level != btrfs_header_level(next));
4802 path->nodes[level] = next;
4803 path->slots[level] = 0;
4804 path->locks[level] = 1;
4805 wc->level = level;
4644 } 4806 }
4645 free_extent_buffer(path->nodes[*level]);
4646 path->nodes[*level] = NULL;
4647 *level += 1;
4648 cond_resched();
4649 return 0; 4807 return 0;
4650} 4808}
4651 4809
4652/*
4653 * helper for dropping snapshots. This walks back up the tree in the path
4654 * to find the first node higher up where we haven't yet gone through
4655 * all the slots
4656 */
4657static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 4810static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
4658 struct btrfs_root *root, 4811 struct btrfs_root *root,
4659 struct btrfs_path *path, 4812 struct btrfs_path *path,
4660 int *level, int max_level) 4813 struct walk_control *wc, int max_level)
4661{ 4814{
4662 struct btrfs_root_item *root_item = &root->root_item; 4815 int level = wc->level;
4663 int i;
4664 int slot;
4665 int ret; 4816 int ret;
4666 4817
4667 for (i = *level; i < max_level && path->nodes[i]; i++) { 4818 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
4668 slot = path->slots[i]; 4819 while (level < max_level && path->nodes[level]) {
4669 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 4820 wc->level = level;
4670 /* 4821 if (path->slots[level] + 1 <
4671 * there is more work to do in this level. 4822 btrfs_header_nritems(path->nodes[level])) {
4672 * Update the drop_progress marker to reflect 4823 path->slots[level]++;
4673 * the work we've done so far, and then bump
4674 * the slot number
4675 */
4676 path->slots[i]++;
4677 WARN_ON(*level == 0);
4678 if (max_level == BTRFS_MAX_LEVEL) {
4679 btrfs_node_key(path->nodes[i],
4680 &root_item->drop_progress,
4681 path->slots[i]);
4682 root_item->drop_level = i;
4683 }
4684 *level = i;
4685 return 0; 4824 return 0;
4686 } else { 4825 } else {
4687 struct extent_buffer *parent; 4826 ret = walk_up_proc(trans, root, path, wc);
4688 4827 if (ret > 0)
4689 /* 4828 return 0;
4690 * this whole node is done, free our reference
4691 * on it and go up one level
4692 */
4693 if (path->nodes[*level] == root->node)
4694 parent = path->nodes[*level];
4695 else
4696 parent = path->nodes[*level + 1];
4697 4829
4698 clean_tree_block(trans, root, path->nodes[i]); 4830 if (path->locks[level]) {
4699 ret = btrfs_free_extent(trans, root, 4831 btrfs_tree_unlock(path->nodes[level]);
4700 path->nodes[i]->start, 4832 path->locks[level] = 0;
4701 path->nodes[i]->len,
4702 parent->start,
4703 btrfs_header_owner(parent),
4704 *level, 0);
4705 BUG_ON(ret);
4706 if (path->locks[*level]) {
4707 btrfs_tree_unlock(path->nodes[i]);
4708 path->locks[i] = 0;
4709 } 4833 }
4710 free_extent_buffer(path->nodes[i]); 4834 free_extent_buffer(path->nodes[level]);
4711 path->nodes[i] = NULL; 4835 path->nodes[level] = NULL;
4712 *level = i + 1; 4836 level++;
4713 } 4837 }
4714 } 4838 }
4715 return 1; 4839 return 1;
4716} 4840}
4717 4841
4718/* 4842/*
4719 * drop the reference count on the tree rooted at 'snap'. This traverses 4843 * drop a subvolume tree.
4720 * the tree freeing any blocks that have a ref count of zero after being 4844 *
4721 * decremented. 4845 * this function traverses the tree freeing any blocks that only
4846 * referenced by the tree.
4847 *
4848 * when a shared tree block is found. this function decreases its
4849 * reference count by one. if update_ref is true, this function
4850 * also make sure backrefs for the shared block and all lower level
4851 * blocks are properly updated.
4722 */ 4852 */
4723int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 4853int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
4724 *root)
4725{ 4854{
4726 int ret = 0;
4727 int wret;
4728 int level;
4729 struct btrfs_path *path; 4855 struct btrfs_path *path;
4730 int update_count; 4856 struct btrfs_trans_handle *trans;
4857 struct btrfs_root *tree_root = root->fs_info->tree_root;
4731 struct btrfs_root_item *root_item = &root->root_item; 4858 struct btrfs_root_item *root_item = &root->root_item;
4859 struct walk_control *wc;
4860 struct btrfs_key key;
4861 int err = 0;
4862 int ret;
4863 int level;
4732 4864
4733 path = btrfs_alloc_path(); 4865 path = btrfs_alloc_path();
4734 BUG_ON(!path); 4866 BUG_ON(!path);
4735 4867
4736 level = btrfs_header_level(root->node); 4868 wc = kzalloc(sizeof(*wc), GFP_NOFS);
4869 BUG_ON(!wc);
4870
4871 trans = btrfs_start_transaction(tree_root, 1);
4872
4737 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 4873 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4874 level = btrfs_header_level(root->node);
4738 path->nodes[level] = btrfs_lock_root_node(root); 4875 path->nodes[level] = btrfs_lock_root_node(root);
4739 btrfs_set_lock_blocking(path->nodes[level]); 4876 btrfs_set_lock_blocking(path->nodes[level]);
4740 path->slots[level] = 0; 4877 path->slots[level] = 0;
4741 path->locks[level] = 1; 4878 path->locks[level] = 1;
4879 memset(&wc->update_progress, 0,
4880 sizeof(wc->update_progress));
4742 } else { 4881 } else {
4743 struct btrfs_key key;
4744 struct btrfs_disk_key found_key;
4745 struct extent_buffer *node;
4746
4747 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 4882 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4883 memcpy(&wc->update_progress, &key,
4884 sizeof(wc->update_progress));
4885
4748 level = root_item->drop_level; 4886 level = root_item->drop_level;
4887 BUG_ON(level == 0);
4749 path->lowest_level = level; 4888 path->lowest_level = level;
4750 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4889 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4751 if (wret < 0) { 4890 path->lowest_level = 0;
4752 ret = wret; 4891 if (ret < 0) {
4892 err = ret;
4753 goto out; 4893 goto out;
4754 } 4894 }
4755 node = path->nodes[level]; 4895 btrfs_node_key_to_cpu(path->nodes[level], &key,
4756 btrfs_node_key(node, &found_key, path->slots[level]); 4896 path->slots[level]);
4757 WARN_ON(memcmp(&found_key, &root_item->drop_progress, 4897 WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
4758 sizeof(found_key))); 4898
4759 /* 4899 /*
4760 * unlock our path, this is safe because only this 4900 * unlock our path, this is safe because only this
4761 * function is allowed to delete this snapshot 4901 * function is allowed to delete this snapshot
4762 */ 4902 */
4763 btrfs_unlock_up_safe(path, 0); 4903 btrfs_unlock_up_safe(path, 0);
4904
4905 level = btrfs_header_level(root->node);
4906 while (1) {
4907 btrfs_tree_lock(path->nodes[level]);
4908 btrfs_set_lock_blocking(path->nodes[level]);
4909
4910 ret = btrfs_lookup_extent_info(trans, root,
4911 path->nodes[level]->start,
4912 path->nodes[level]->len,
4913 &wc->refs[level],
4914 &wc->flags[level]);
4915 BUG_ON(ret);
4916 BUG_ON(wc->refs[level] == 0);
4917
4918 if (level == root_item->drop_level)
4919 break;
4920
4921 btrfs_tree_unlock(path->nodes[level]);
4922 WARN_ON(wc->refs[level] != 1);
4923 level--;
4924 }
4764 } 4925 }
4926
4927 wc->level = level;
4928 wc->shared_level = -1;
4929 wc->stage = DROP_REFERENCE;
4930 wc->update_ref = update_ref;
4931 wc->keep_locks = 0;
4932
4765 while (1) { 4933 while (1) {
4766 unsigned long update; 4934 ret = walk_down_tree(trans, root, path, wc);
4767 wret = walk_down_tree(trans, root, path, &level); 4935 if (ret < 0) {
4768 if (wret > 0) 4936 err = ret;
4769 break; 4937 break;
4770 if (wret < 0) 4938 }
4771 ret = wret;
4772 4939
4773 wret = walk_up_tree(trans, root, path, &level, 4940 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
4774 BTRFS_MAX_LEVEL); 4941 if (ret < 0) {
4775 if (wret > 0) 4942 err = ret;
4776 break; 4943 break;
4777 if (wret < 0) 4944 }
4778 ret = wret; 4945
4779 if (trans->transaction->in_commit || 4946 if (ret > 0) {
4780 trans->transaction->delayed_refs.flushing) { 4947 BUG_ON(wc->stage != DROP_REFERENCE);
4781 ret = -EAGAIN;
4782 break; 4948 break;
4783 } 4949 }
4784 for (update_count = 0; update_count < 16; update_count++) { 4950
4951 if (wc->stage == DROP_REFERENCE) {
4952 level = wc->level;
4953 btrfs_node_key(path->nodes[level],
4954 &root_item->drop_progress,
4955 path->slots[level]);
4956 root_item->drop_level = level;
4957 }
4958
4959 BUG_ON(wc->level == 0);
4960 if (trans->transaction->in_commit ||
4961 trans->transaction->delayed_refs.flushing) {
4962 ret = btrfs_update_root(trans, tree_root,
4963 &root->root_key,
4964 root_item);
4965 BUG_ON(ret);
4966
4967 btrfs_end_transaction(trans, tree_root);
4968 trans = btrfs_start_transaction(tree_root, 1);
4969 } else {
4970 unsigned long update;
4785 update = trans->delayed_ref_updates; 4971 update = trans->delayed_ref_updates;
4786 trans->delayed_ref_updates = 0; 4972 trans->delayed_ref_updates = 0;
4787 if (update) 4973 if (update)
4788 btrfs_run_delayed_refs(trans, root, update); 4974 btrfs_run_delayed_refs(trans, tree_root,
4789 else 4975 update);
4790 break;
4791 } 4976 }
4792 } 4977 }
4978 btrfs_release_path(root, path);
4979 BUG_ON(err);
4980
4981 ret = btrfs_del_root(trans, tree_root, &root->root_key);
4982 BUG_ON(ret);
4983
4984 free_extent_buffer(root->node);
4985 free_extent_buffer(root->commit_root);
4986 kfree(root);
4793out: 4987out:
4988 btrfs_end_transaction(trans, tree_root);
4989 kfree(wc);
4794 btrfs_free_path(path); 4990 btrfs_free_path(path);
4795 return ret; 4991 return err;
4796} 4992}
4797 4993
4994/*
4995 * drop subtree rooted at tree block 'node'.
4996 *
4997 * NOTE: this function will unlock and release tree block 'node'
4998 */
4798int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 4999int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
4799 struct btrfs_root *root, 5000 struct btrfs_root *root,
4800 struct extent_buffer *node, 5001 struct extent_buffer *node,
4801 struct extent_buffer *parent) 5002 struct extent_buffer *parent)
4802{ 5003{
4803 struct btrfs_path *path; 5004 struct btrfs_path *path;
5005 struct walk_control *wc;
4804 int level; 5006 int level;
4805 int parent_level; 5007 int parent_level;
4806 int ret = 0; 5008 int ret = 0;
4807 int wret; 5009 int wret;
4808 5010
5011 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5012
4809 path = btrfs_alloc_path(); 5013 path = btrfs_alloc_path();
4810 BUG_ON(!path); 5014 BUG_ON(!path);
4811 5015
5016 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5017 BUG_ON(!wc);
5018
4812 btrfs_assert_tree_locked(parent); 5019 btrfs_assert_tree_locked(parent);
4813 parent_level = btrfs_header_level(parent); 5020 parent_level = btrfs_header_level(parent);
4814 extent_buffer_get(parent); 5021 extent_buffer_get(parent);
@@ -4817,24 +5024,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
4817 5024
4818 btrfs_assert_tree_locked(node); 5025 btrfs_assert_tree_locked(node);
4819 level = btrfs_header_level(node); 5026 level = btrfs_header_level(node);
4820 extent_buffer_get(node);
4821 path->nodes[level] = node; 5027 path->nodes[level] = node;
4822 path->slots[level] = 0; 5028 path->slots[level] = 0;
5029 path->locks[level] = 1;
5030
5031 wc->refs[parent_level] = 1;
5032 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5033 wc->level = level;
5034 wc->shared_level = -1;
5035 wc->stage = DROP_REFERENCE;
5036 wc->update_ref = 0;
5037 wc->keep_locks = 1;
4823 5038
4824 while (1) { 5039 while (1) {
4825 wret = walk_down_tree(trans, root, path, &level); 5040 wret = walk_down_tree(trans, root, path, wc);
4826 if (wret < 0) 5041 if (wret < 0) {
4827 ret = wret; 5042 ret = wret;
4828 if (wret != 0)
4829 break; 5043 break;
5044 }
4830 5045
4831 wret = walk_up_tree(trans, root, path, &level, parent_level); 5046 wret = walk_up_tree(trans, root, path, wc, parent_level);
4832 if (wret < 0) 5047 if (wret < 0)
4833 ret = wret; 5048 ret = wret;
4834 if (wret != 0) 5049 if (wret != 0)
4835 break; 5050 break;
4836 } 5051 }
4837 5052
5053 kfree(wc);
4838 btrfs_free_path(path); 5054 btrfs_free_path(path);
4839 return ret; 5055 return ret;
4840} 5056}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 126477eaecf5..4b833972273a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -22,7 +22,6 @@
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/string.h> 24#include <linux/string.h>
25#include <linux/smp_lock.h>
26#include <linux/backing-dev.h> 25#include <linux/backing-dev.h>
27#include <linux/mpage.h> 26#include <linux/mpage.h>
28#include <linux/swap.h> 27#include <linux/swap.h>
@@ -151,7 +150,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
151 } 150 }
152 if (end_pos > isize) { 151 if (end_pos > isize) {
153 i_size_write(inode, end_pos); 152 i_size_write(inode, end_pos);
154 btrfs_update_inode(trans, root, inode); 153 /* we've only changed i_size in ram, and we haven't updated
154 * the disk i_size. There is no need to log the inode
155 * at this time.
156 */
155 } 157 }
156 err = btrfs_end_transaction(trans, root); 158 err = btrfs_end_transaction(trans, root);
157out_unlock: 159out_unlock:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8612b3a09811..791eab19e330 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -26,7 +26,6 @@
26#include <linux/time.h> 26#include <linux/time.h>
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/string.h> 28#include <linux/string.h>
29#include <linux/smp_lock.h>
30#include <linux/backing-dev.h> 29#include <linux/backing-dev.h>
31#include <linux/mpage.h> 30#include <linux/mpage.h>
32#include <linux/swap.h> 31#include <linux/swap.h>
@@ -2122,10 +2121,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
2122 * any xattrs or acls 2121 * any xattrs or acls
2123 */ 2122 */
2124 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); 2123 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
2125 if (!maybe_acls) { 2124 if (!maybe_acls)
2126 BTRFS_I(inode)->i_acl = NULL; 2125 cache_no_acl(inode);
2127 BTRFS_I(inode)->i_default_acl = NULL;
2128 }
2129 2126
2130 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2127 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
2131 alloc_group_block, 0); 2128 alloc_group_block, 0);
@@ -3141,9 +3138,6 @@ static noinline void init_btrfs_i(struct inode *inode)
3141{ 3138{
3142 struct btrfs_inode *bi = BTRFS_I(inode); 3139 struct btrfs_inode *bi = BTRFS_I(inode);
3143 3140
3144 bi->i_acl = BTRFS_ACL_NOT_CACHED;
3145 bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
3146
3147 bi->generation = 0; 3141 bi->generation = 0;
3148 bi->sequence = 0; 3142 bi->sequence = 0;
3149 bi->last_trans = 0; 3143 bi->last_trans = 0;
@@ -3585,12 +3579,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3585 owner = 1; 3579 owner = 1;
3586 BTRFS_I(inode)->block_group = 3580 BTRFS_I(inode)->block_group =
3587 btrfs_find_block_group(root, 0, alloc_hint, owner); 3581 btrfs_find_block_group(root, 0, alloc_hint, owner);
3588 if ((mode & S_IFREG)) {
3589 if (btrfs_test_opt(root, NODATASUM))
3590 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
3591 if (btrfs_test_opt(root, NODATACOW))
3592 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
3593 }
3594 3582
3595 key[0].objectid = objectid; 3583 key[0].objectid = objectid;
3596 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); 3584 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -3645,6 +3633,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3645 3633
3646 btrfs_inherit_iflags(inode, dir); 3634 btrfs_inherit_iflags(inode, dir);
3647 3635
3636 if ((mode & S_IFREG)) {
3637 if (btrfs_test_opt(root, NODATASUM))
3638 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
3639 if (btrfs_test_opt(root, NODATACOW))
3640 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
3641 }
3642
3648 insert_inode_hash(inode); 3643 insert_inode_hash(inode);
3649 inode_tree_add(inode); 3644 inode_tree_add(inode);
3650 return inode; 3645 return inode;
@@ -4640,8 +4635,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
4640 ei->last_trans = 0; 4635 ei->last_trans = 0;
4641 ei->logged_trans = 0; 4636 ei->logged_trans = 0;
4642 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 4637 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
4643 ei->i_acl = BTRFS_ACL_NOT_CACHED;
4644 ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
4645 INIT_LIST_HEAD(&ei->i_orphan); 4638 INIT_LIST_HEAD(&ei->i_orphan);
4646 INIT_LIST_HEAD(&ei->ordered_operations); 4639 INIT_LIST_HEAD(&ei->ordered_operations);
4647 return &ei->vfs_inode; 4640 return &ei->vfs_inode;
@@ -4655,13 +4648,6 @@ void btrfs_destroy_inode(struct inode *inode)
4655 WARN_ON(!list_empty(&inode->i_dentry)); 4648 WARN_ON(!list_empty(&inode->i_dentry));
4656 WARN_ON(inode->i_data.nrpages); 4649 WARN_ON(inode->i_data.nrpages);
4657 4650
4658 if (BTRFS_I(inode)->i_acl &&
4659 BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED)
4660 posix_acl_release(BTRFS_I(inode)->i_acl);
4661 if (BTRFS_I(inode)->i_default_acl &&
4662 BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
4663 posix_acl_release(BTRFS_I(inode)->i_default_acl);
4664
4665 /* 4651 /*
4666 * Make sure we're properly removed from the ordered operation 4652 * Make sure we're properly removed from the ordered operation
4667 * lists. 4653 * lists.
@@ -5096,6 +5082,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5096 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5082 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5097 struct extent_map *em; 5083 struct extent_map *em;
5098 struct btrfs_trans_handle *trans; 5084 struct btrfs_trans_handle *trans;
5085 struct btrfs_root *root;
5099 int ret; 5086 int ret;
5100 5087
5101 alloc_start = offset & ~mask; 5088 alloc_start = offset & ~mask;
@@ -5114,6 +5101,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5114 goto out; 5101 goto out;
5115 } 5102 }
5116 5103
5104 root = BTRFS_I(inode)->root;
5105
5106 ret = btrfs_check_data_free_space(root, inode,
5107 alloc_end - alloc_start);
5108 if (ret)
5109 goto out;
5110
5117 locked_end = alloc_end - 1; 5111 locked_end = alloc_end - 1;
5118 while (1) { 5112 while (1) {
5119 struct btrfs_ordered_extent *ordered; 5113 struct btrfs_ordered_extent *ordered;
@@ -5121,7 +5115,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5121 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); 5115 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5122 if (!trans) { 5116 if (!trans) {
5123 ret = -EIO; 5117 ret = -EIO;
5124 goto out; 5118 goto out_free;
5125 } 5119 }
5126 5120
5127 /* the extent lock is ordered inside the running 5121 /* the extent lock is ordered inside the running
@@ -5182,6 +5176,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5182 GFP_NOFS); 5176 GFP_NOFS);
5183 5177
5184 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5178 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5179out_free:
5180 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5185out: 5181out:
5186 mutex_unlock(&inode->i_mutex); 5182 mutex_unlock(&inode->i_mutex);
5187 return ret; 5183 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index eff18f5b5362..bd88f25889f7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -27,7 +27,6 @@
27#include <linux/time.h> 27#include <linux/time.h>
28#include <linux/init.h> 28#include <linux/init.h>
29#include <linux/string.h> 29#include <linux/string.h>
30#include <linux/smp_lock.h>
31#include <linux/backing-dev.h> 30#include <linux/backing-dev.h>
32#include <linux/mount.h> 31#include <linux/mount.h>
33#include <linux/mpage.h> 32#include <linux/mpage.h>
@@ -1028,7 +1027,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1028 struct btrfs_file_extent_item); 1027 struct btrfs_file_extent_item);
1029 comp = btrfs_file_extent_compression(leaf, extent); 1028 comp = btrfs_file_extent_compression(leaf, extent);
1030 type = btrfs_file_extent_type(leaf, extent); 1029 type = btrfs_file_extent_type(leaf, extent);
1031 if (type == BTRFS_FILE_EXTENT_REG) { 1030 if (type == BTRFS_FILE_EXTENT_REG ||
1031 type == BTRFS_FILE_EXTENT_PREALLOC) {
1032 disko = btrfs_file_extent_disk_bytenr(leaf, 1032 disko = btrfs_file_extent_disk_bytenr(leaf,
1033 extent); 1033 extent);
1034 diskl = btrfs_file_extent_disk_num_bytes(leaf, 1034 diskl = btrfs_file_extent_disk_num_bytes(leaf,
@@ -1051,7 +1051,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1051 new_key.objectid = inode->i_ino; 1051 new_key.objectid = inode->i_ino;
1052 new_key.offset = key.offset + destoff - off; 1052 new_key.offset = key.offset + destoff - off;
1053 1053
1054 if (type == BTRFS_FILE_EXTENT_REG) { 1054 if (type == BTRFS_FILE_EXTENT_REG ||
1055 type == BTRFS_FILE_EXTENT_PREALLOC) {
1055 ret = btrfs_insert_empty_item(trans, root, path, 1056 ret = btrfs_insert_empty_item(trans, root, path,
1056 &new_key, size); 1057 &new_key, size);
1057 if (ret) 1058 if (ret)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b23dc209ae10..008397934778 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1788,7 +1788,7 @@ static void merge_func(struct btrfs_work *work)
1788 btrfs_end_transaction(trans, root); 1788 btrfs_end_transaction(trans, root);
1789 } 1789 }
1790 1790
1791 btrfs_drop_dead_root(reloc_root); 1791 btrfs_drop_snapshot(reloc_root, 0);
1792 1792
1793 if (atomic_dec_and_test(async->num_pending)) 1793 if (atomic_dec_and_test(async->num_pending))
1794 complete(async->done); 1794 complete(async->done);
@@ -2075,9 +2075,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2075 2075
2076 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2076 ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2077 BUG_ON(ret); 2077 BUG_ON(ret);
2078
2079 btrfs_tree_unlock(eb);
2080 free_extent_buffer(eb);
2081 } 2078 }
2082 if (!lowest) { 2079 if (!lowest) {
2083 btrfs_tree_unlock(upper->eb); 2080 btrfs_tree_unlock(upper->eb);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9f179d4832d5..6d6d06cb6dfc 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -26,7 +26,6 @@
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/seq_file.h> 27#include <linux/seq_file.h>
28#include <linux/string.h> 28#include <linux/string.h>
29#include <linux/smp_lock.h>
30#include <linux/backing-dev.h> 29#include <linux/backing-dev.h>
31#include <linux/mount.h> 30#include <linux/mount.h>
32#include <linux/mpage.h> 31#include <linux/mpage.h>
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4e83457ea253..2dbf1c1f56ee 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -593,6 +593,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
593 return 0; 593 return 0;
594} 594}
595 595
596#if 0
596/* 597/*
597 * when dropping snapshots, we generate a ton of delayed refs, and it makes 598 * when dropping snapshots, we generate a ton of delayed refs, and it makes
598 * sense not to join the transaction while it is trying to flush the current 599 * sense not to join the transaction while it is trying to flush the current
@@ -681,6 +682,7 @@ int btrfs_drop_dead_root(struct btrfs_root *root)
681 btrfs_btree_balance_dirty(tree_root, nr); 682 btrfs_btree_balance_dirty(tree_root, nr);
682 return ret; 683 return ret;
683} 684}
685#endif
684 686
685/* 687/*
686 * new snapshots need to be created at a very specific time in the 688 * new snapshots need to be created at a very specific time in the
@@ -1081,7 +1083,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1081 while (!list_empty(&list)) { 1083 while (!list_empty(&list)) {
1082 root = list_entry(list.next, struct btrfs_root, root_list); 1084 root = list_entry(list.next, struct btrfs_root, root_list);
1083 list_del_init(&root->root_list); 1085 list_del_init(&root->root_list);
1084 btrfs_drop_dead_root(root); 1086 btrfs_drop_snapshot(root, 0);
1085 } 1087 }
1086 return 0; 1088 return 0;
1087} 1089}
diff --git a/fs/char_dev.c b/fs/char_dev.c
index b7c9d5187a75..a173551e19d7 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -13,7 +13,6 @@
13#include <linux/major.h> 13#include <linux/major.h>
14#include <linux/errno.h> 14#include <linux/errno.h>
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/smp_lock.h>
17#include <linux/seq_file.h> 16#include <linux/seq_file.h>
18 17
19#include <linux/kobject.h> 18#include <linux/kobject.h>
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index b48689839428..92888aa90749 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -5,7 +5,11 @@ client generated ones by default (mount option "serverino" turned
5on by default if server supports it). Add forceuid and forcegid 5on by default if server supports it). Add forceuid and forcegid
6mount options (so that when negotiating unix extensions specifying 6mount options (so that when negotiating unix extensions specifying
7which uid mounted does not immediately force the server's reported 7which uid mounted does not immediately force the server's reported
8uids to be overridden). 8uids to be overridden). Add support for scope mount parm. Improve
9hard link detection to use same inode for both. Do not set
10read-only dos attribute on directories (for chmod) since Windows
11explorer special cases this attribute bit for directories for
12a different purpose.
9 13
10Version 1.58 14Version 1.58
11------------ 15------------
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 1b09f1670061..20692fbfdb24 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -49,6 +49,7 @@
49#define ASN1_OJI 6 /* Object Identifier */ 49#define ASN1_OJI 6 /* Object Identifier */
50#define ASN1_OJD 7 /* Object Description */ 50#define ASN1_OJD 7 /* Object Description */
51#define ASN1_EXT 8 /* External */ 51#define ASN1_EXT 8 /* External */
52#define ASN1_ENUM 10 /* Enumerated */
52#define ASN1_SEQ 16 /* Sequence */ 53#define ASN1_SEQ 16 /* Sequence */
53#define ASN1_SET 17 /* Set */ 54#define ASN1_SET 17 /* Set */
54#define ASN1_NUMSTR 18 /* Numerical String */ 55#define ASN1_NUMSTR 18 /* Numerical String */
@@ -78,10 +79,12 @@
78#define SPNEGO_OID_LEN 7 79#define SPNEGO_OID_LEN 7
79#define NTLMSSP_OID_LEN 10 80#define NTLMSSP_OID_LEN 10
80#define KRB5_OID_LEN 7 81#define KRB5_OID_LEN 7
82#define KRB5U2U_OID_LEN 8
81#define MSKRB5_OID_LEN 7 83#define MSKRB5_OID_LEN 7
82static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 }; 84static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 };
83static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 }; 85static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 };
84static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 }; 86static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 };
87static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 };
85static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 }; 88static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 };
86 89
87/* 90/*
@@ -122,6 +125,28 @@ asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
122 return 1; 125 return 1;
123} 126}
124 127
128#if 0 /* will be needed later by spnego decoding/encoding of ntlmssp */
129static unsigned char
130asn1_enum_decode(struct asn1_ctx *ctx, __le32 *val)
131{
132 unsigned char ch;
133
134 if (ctx->pointer >= ctx->end) {
135 ctx->error = ASN1_ERR_DEC_EMPTY;
136 return 0;
137 }
138
139 ch = *(ctx->pointer)++; /* ch has 0xa, ptr points to lenght octet */
140 if ((ch) == ASN1_ENUM) /* if ch value is ENUM, 0xa */
141 *val = *(++(ctx->pointer)); /* value has enum value */
142 else
143 return 0;
144
145 ctx->pointer++;
146 return 1;
147}
148#endif
149
125static unsigned char 150static unsigned char
126asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag) 151asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
127{ 152{
@@ -476,10 +501,9 @@ decode_negTokenInit(unsigned char *security_blob, int length,
476 unsigned int cls, con, tag, oidlen, rc; 501 unsigned int cls, con, tag, oidlen, rc;
477 bool use_ntlmssp = false; 502 bool use_ntlmssp = false;
478 bool use_kerberos = false; 503 bool use_kerberos = false;
504 bool use_kerberosu2u = false;
479 bool use_mskerberos = false; 505 bool use_mskerberos = false;
480 506
481 *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/
482
483 /* cifs_dump_mem(" Received SecBlob ", security_blob, length); */ 507 /* cifs_dump_mem(" Received SecBlob ", security_blob, length); */
484 508
485 asn1_open(&ctx, security_blob, length); 509 asn1_open(&ctx, security_blob, length);
@@ -515,6 +539,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
515 return 0; 539 return 0;
516 } 540 }
517 541
542 /* SPNEGO */
518 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 543 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
519 cFYI(1, ("Error decoding negTokenInit")); 544 cFYI(1, ("Error decoding negTokenInit"));
520 return 0; 545 return 0;
@@ -526,6 +551,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
526 return 0; 551 return 0;
527 } 552 }
528 553
554 /* negTokenInit */
529 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 555 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
530 cFYI(1, ("Error decoding negTokenInit")); 556 cFYI(1, ("Error decoding negTokenInit"));
531 return 0; 557 return 0;
@@ -537,6 +563,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
537 return 0; 563 return 0;
538 } 564 }
539 565
566 /* sequence */
540 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 567 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
541 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 568 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
542 return 0; 569 return 0;
@@ -548,6 +575,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
548 return 0; 575 return 0;
549 } 576 }
550 577
578 /* sequence of */
551 if (asn1_header_decode 579 if (asn1_header_decode
552 (&ctx, &sequence_end, &cls, &con, &tag) == 0) { 580 (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
553 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 581 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
@@ -560,6 +588,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
560 return 0; 588 return 0;
561 } 589 }
562 590
591 /* list of security mechanisms */
563 while (!asn1_eoc_decode(&ctx, sequence_end)) { 592 while (!asn1_eoc_decode(&ctx, sequence_end)) {
564 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); 593 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
565 if (!rc) { 594 if (!rc) {
@@ -576,11 +605,15 @@ decode_negTokenInit(unsigned char *security_blob, int length,
576 605
577 if (compare_oid(oid, oidlen, MSKRB5_OID, 606 if (compare_oid(oid, oidlen, MSKRB5_OID,
578 MSKRB5_OID_LEN) && 607 MSKRB5_OID_LEN) &&
579 !use_kerberos) 608 !use_mskerberos)
580 use_mskerberos = true; 609 use_mskerberos = true;
610 else if (compare_oid(oid, oidlen, KRB5U2U_OID,
611 KRB5U2U_OID_LEN) &&
612 !use_kerberosu2u)
613 use_kerberosu2u = true;
581 else if (compare_oid(oid, oidlen, KRB5_OID, 614 else if (compare_oid(oid, oidlen, KRB5_OID,
582 KRB5_OID_LEN) && 615 KRB5_OID_LEN) &&
583 !use_mskerberos) 616 !use_kerberos)
584 use_kerberos = true; 617 use_kerberos = true;
585 else if (compare_oid(oid, oidlen, NTLMSSP_OID, 618 else if (compare_oid(oid, oidlen, NTLMSSP_OID,
586 NTLMSSP_OID_LEN)) 619 NTLMSSP_OID_LEN))
@@ -593,7 +626,12 @@ decode_negTokenInit(unsigned char *security_blob, int length,
593 } 626 }
594 } 627 }
595 628
629 /* mechlistMIC */
596 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 630 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
631 /* Check if we have reached the end of the blob, but with
632 no mechListMic (e.g. NTLMSSP instead of KRB5) */
633 if (ctx.error == ASN1_ERR_DEC_EMPTY)
634 goto decode_negtoken_exit;
597 cFYI(1, ("Error decoding last part negTokenInit exit3")); 635 cFYI(1, ("Error decoding last part negTokenInit exit3"));
598 return 0; 636 return 0;
599 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { 637 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
@@ -602,6 +640,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
602 cls, con, tag, end, *end)); 640 cls, con, tag, end, *end));
603 return 0; 641 return 0;
604 } 642 }
643
644 /* sequence */
605 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 645 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
606 cFYI(1, ("Error decoding last part negTokenInit exit5")); 646 cFYI(1, ("Error decoding last part negTokenInit exit5"));
607 return 0; 647 return 0;
@@ -611,6 +651,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
611 cls, con, tag, end, *end)); 651 cls, con, tag, end, *end));
612 } 652 }
613 653
654 /* sequence of */
614 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 655 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
615 cFYI(1, ("Error decoding last part negTokenInit exit 7")); 656 cFYI(1, ("Error decoding last part negTokenInit exit 7"));
616 return 0; 657 return 0;
@@ -619,6 +660,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
619 cls, con, tag, end, *end)); 660 cls, con, tag, end, *end));
620 return 0; 661 return 0;
621 } 662 }
663
664 /* general string */
622 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 665 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
623 cFYI(1, ("Error decoding last part negTokenInit exit9")); 666 cFYI(1, ("Error decoding last part negTokenInit exit9"));
624 return 0; 667 return 0;
@@ -630,13 +673,13 @@ decode_negTokenInit(unsigned char *security_blob, int length,
630 } 673 }
631 cFYI(1, ("Need to call asn1_octets_decode() function for %s", 674 cFYI(1, ("Need to call asn1_octets_decode() function for %s",
632 ctx.pointer)); /* is this UTF-8 or ASCII? */ 675 ctx.pointer)); /* is this UTF-8 or ASCII? */
633 676decode_negtoken_exit:
634 if (use_kerberos) 677 if (use_kerberos)
635 *secType = Kerberos; 678 *secType = Kerberos;
636 else if (use_mskerberos) 679 else if (use_mskerberos)
637 *secType = MSKerberos; 680 *secType = MSKerberos;
638 else if (use_ntlmssp) 681 else if (use_ntlmssp)
639 *secType = NTLMSSP; 682 *secType = RawNTLMSSP;
640 683
641 return 1; 684 return 1;
642} 685}
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 4a4581cb2b5e..051caecf7d67 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -86,6 +86,9 @@ struct key_type cifs_spnego_key_type = {
86/* strlen of ";user=" */ 86/* strlen of ";user=" */
87#define USER_KEY_LEN 6 87#define USER_KEY_LEN 6
88 88
89/* strlen of ";pid=0x" */
90#define PID_KEY_LEN 7
91
89/* get a key struct with a SPNEGO security blob, suitable for session setup */ 92/* get a key struct with a SPNEGO security blob, suitable for session setup */
90struct key * 93struct key *
91cifs_get_spnego_key(struct cifsSesInfo *sesInfo) 94cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
@@ -103,7 +106,8 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
103 IP_KEY_LEN + INET6_ADDRSTRLEN + 106 IP_KEY_LEN + INET6_ADDRSTRLEN +
104 MAX_MECH_STR_LEN + 107 MAX_MECH_STR_LEN +
105 UID_KEY_LEN + (sizeof(uid_t) * 2) + 108 UID_KEY_LEN + (sizeof(uid_t) * 2) +
106 USER_KEY_LEN + strlen(sesInfo->userName) + 1; 109 USER_KEY_LEN + strlen(sesInfo->userName) +
110 PID_KEY_LEN + (sizeof(pid_t) * 2) + 1;
107 111
108 spnego_key = ERR_PTR(-ENOMEM); 112 spnego_key = ERR_PTR(-ENOMEM);
109 description = kzalloc(desc_len, GFP_KERNEL); 113 description = kzalloc(desc_len, GFP_KERNEL);
@@ -141,6 +145,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
141 dp = description + strlen(description); 145 dp = description + strlen(description);
142 sprintf(dp, ";user=%s", sesInfo->userName); 146 sprintf(dp, ";user=%s", sesInfo->userName);
143 147
148 dp = description + strlen(description);
149 sprintf(dp, ";pid=0x%x", current->pid);
150
144 cFYI(1, ("key description = %s", description)); 151 cFYI(1, ("key description = %s", description));
145 spnego_key = request_key(&cifs_spnego_key_type, description, ""); 152 spnego_key = request_key(&cifs_spnego_key_type, description, "");
146 153
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 1403b5d86a73..6941c22398a6 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -327,7 +327,7 @@ static void dump_ace(struct cifs_ace *pace, char *end_of_acl)
327 327
328static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, 328static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
329 struct cifs_sid *pownersid, struct cifs_sid *pgrpsid, 329 struct cifs_sid *pownersid, struct cifs_sid *pgrpsid,
330 struct inode *inode) 330 struct cifs_fattr *fattr)
331{ 331{
332 int i; 332 int i;
333 int num_aces = 0; 333 int num_aces = 0;
@@ -340,7 +340,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
340 if (!pdacl) { 340 if (!pdacl) {
341 /* no DACL in the security descriptor, set 341 /* no DACL in the security descriptor, set
342 all the permissions for user/group/other */ 342 all the permissions for user/group/other */
343 inode->i_mode |= S_IRWXUGO; 343 fattr->cf_mode |= S_IRWXUGO;
344 return; 344 return;
345 } 345 }
346 346
@@ -357,7 +357,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
357 /* reset rwx permissions for user/group/other. 357 /* reset rwx permissions for user/group/other.
358 Also, if num_aces is 0 i.e. DACL has no ACEs, 358 Also, if num_aces is 0 i.e. DACL has no ACEs,
359 user/group/other have no permissions */ 359 user/group/other have no permissions */
360 inode->i_mode &= ~(S_IRWXUGO); 360 fattr->cf_mode &= ~(S_IRWXUGO);
361 361
362 acl_base = (char *)pdacl; 362 acl_base = (char *)pdacl;
363 acl_size = sizeof(struct cifs_acl); 363 acl_size = sizeof(struct cifs_acl);
@@ -379,17 +379,17 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
379 if (compare_sids(&(ppace[i]->sid), pownersid)) 379 if (compare_sids(&(ppace[i]->sid), pownersid))
380 access_flags_to_mode(ppace[i]->access_req, 380 access_flags_to_mode(ppace[i]->access_req,
381 ppace[i]->type, 381 ppace[i]->type,
382 &(inode->i_mode), 382 &fattr->cf_mode,
383 &user_mask); 383 &user_mask);
384 if (compare_sids(&(ppace[i]->sid), pgrpsid)) 384 if (compare_sids(&(ppace[i]->sid), pgrpsid))
385 access_flags_to_mode(ppace[i]->access_req, 385 access_flags_to_mode(ppace[i]->access_req,
386 ppace[i]->type, 386 ppace[i]->type,
387 &(inode->i_mode), 387 &fattr->cf_mode,
388 &group_mask); 388 &group_mask);
389 if (compare_sids(&(ppace[i]->sid), &sid_everyone)) 389 if (compare_sids(&(ppace[i]->sid), &sid_everyone))
390 access_flags_to_mode(ppace[i]->access_req, 390 access_flags_to_mode(ppace[i]->access_req,
391 ppace[i]->type, 391 ppace[i]->type,
392 &(inode->i_mode), 392 &fattr->cf_mode,
393 &other_mask); 393 &other_mask);
394 394
395/* memcpy((void *)(&(cifscred->aces[i])), 395/* memcpy((void *)(&(cifscred->aces[i])),
@@ -464,7 +464,7 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
464 464
465/* Convert CIFS ACL to POSIX form */ 465/* Convert CIFS ACL to POSIX form */
466static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, 466static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
467 struct inode *inode) 467 struct cifs_fattr *fattr)
468{ 468{
469 int rc; 469 int rc;
470 struct cifs_sid *owner_sid_ptr, *group_sid_ptr; 470 struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
@@ -472,7 +472,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
472 char *end_of_acl = ((char *)pntsd) + acl_len; 472 char *end_of_acl = ((char *)pntsd) + acl_len;
473 __u32 dacloffset; 473 __u32 dacloffset;
474 474
475 if ((inode == NULL) || (pntsd == NULL)) 475 if (pntsd == NULL)
476 return -EIO; 476 return -EIO;
477 477
478 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + 478 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
@@ -497,7 +497,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
497 497
498 if (dacloffset) 498 if (dacloffset)
499 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, 499 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
500 group_sid_ptr, inode); 500 group_sid_ptr, fattr);
501 else 501 else
502 cFYI(1, ("no ACL")); /* BB grant all or default perms? */ 502 cFYI(1, ("no ACL")); /* BB grant all or default perms? */
503 503
@@ -508,7 +508,6 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
508 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr, 508 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
509 sizeof(struct cifs_sid)); */ 509 sizeof(struct cifs_sid)); */
510 510
511
512 return 0; 511 return 0;
513} 512}
514 513
@@ -671,8 +670,9 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
671} 670}
672 671
673/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ 672/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
674void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, 673void
675 const char *path, const __u16 *pfid) 674cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
675 struct inode *inode, const char *path, const __u16 *pfid)
676{ 676{
677 struct cifs_ntsd *pntsd = NULL; 677 struct cifs_ntsd *pntsd = NULL;
678 u32 acllen = 0; 678 u32 acllen = 0;
@@ -687,7 +687,7 @@ void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
687 687
688 /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ 688 /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
689 if (pntsd) 689 if (pntsd)
690 rc = parse_sec_desc(pntsd, acllen, inode); 690 rc = parse_sec_desc(pntsd, acllen, fattr);
691 if (rc) 691 if (rc)
692 cFYI(1, ("parse sec desc failed rc = %d", rc)); 692 cFYI(1, ("parse sec desc failed rc = %d", rc));
693 693
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0d92114195ab..44f30504b82d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -308,7 +308,6 @@ cifs_alloc_inode(struct super_block *sb)
308 if (!cifs_inode) 308 if (!cifs_inode)
309 return NULL; 309 return NULL;
310 cifs_inode->cifsAttrs = 0x20; /* default */ 310 cifs_inode->cifsAttrs = 0x20; /* default */
311 atomic_set(&cifs_inode->inUse, 0);
312 cifs_inode->time = 0; 311 cifs_inode->time = 0;
313 cifs_inode->write_behind_rc = 0; 312 cifs_inode->write_behind_rc = 0;
314 /* Until the file is open and we have gotten oplock 313 /* Until the file is open and we have gotten oplock
@@ -333,6 +332,27 @@ cifs_destroy_inode(struct inode *inode)
333 kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); 332 kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
334} 333}
335 334
335static void
336cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
337{
338 seq_printf(s, ",addr=");
339
340 switch (server->addr.sockAddr.sin_family) {
341 case AF_INET:
342 seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr);
343 break;
344 case AF_INET6:
345 seq_printf(s, "%pI6",
346 &server->addr.sockAddr6.sin6_addr.s6_addr);
347 if (server->addr.sockAddr6.sin6_scope_id)
348 seq_printf(s, "%%%u",
349 server->addr.sockAddr6.sin6_scope_id);
350 break;
351 default:
352 seq_printf(s, "(unknown)");
353 }
354}
355
336/* 356/*
337 * cifs_show_options() is for displaying mount options in /proc/mounts. 357 * cifs_show_options() is for displaying mount options in /proc/mounts.
338 * Not all settable options are displayed but most of the important 358 * Not all settable options are displayed but most of the important
@@ -343,83 +363,64 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
343{ 363{
344 struct cifs_sb_info *cifs_sb; 364 struct cifs_sb_info *cifs_sb;
345 struct cifsTconInfo *tcon; 365 struct cifsTconInfo *tcon;
346 struct TCP_Server_Info *server;
347 366
348 cifs_sb = CIFS_SB(m->mnt_sb); 367 cifs_sb = CIFS_SB(m->mnt_sb);
368 tcon = cifs_sb->tcon;
349 369
350 if (cifs_sb) { 370 seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName);
351 tcon = cifs_sb->tcon; 371 if (tcon->ses->userName)
352 if (tcon) { 372 seq_printf(s, ",username=%s", tcon->ses->userName);
353 seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName); 373 if (tcon->ses->domainName)
354 if (tcon->ses) { 374 seq_printf(s, ",domain=%s", tcon->ses->domainName);
355 if (tcon->ses->userName) 375
356 seq_printf(s, ",username=%s", 376 seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
357 tcon->ses->userName); 377 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
358 if (tcon->ses->domainName) 378 seq_printf(s, ",forceuid");
359 seq_printf(s, ",domain=%s", 379
360 tcon->ses->domainName); 380 seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
361 server = tcon->ses->server; 381 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
362 if (server) { 382 seq_printf(s, ",forcegid");
363 seq_printf(s, ",addr="); 383
364 switch (server->addr.sockAddr6. 384 cifs_show_address(s, tcon->ses->server);
365 sin6_family) { 385
366 case AF_INET6: 386 if (!tcon->unix_ext)
367 seq_printf(s, "%pI6", 387 seq_printf(s, ",file_mode=0%o,dir_mode=0%o",
368 &server->addr.sockAddr6.sin6_addr);
369 break;
370 case AF_INET:
371 seq_printf(s, "%pI4",
372 &server->addr.sockAddr.sin_addr.s_addr);
373 break;
374 }
375 }
376 }
377 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
378 !(tcon->unix_ext))
379 seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
380 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
381 !(tcon->unix_ext))
382 seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
383 if (!tcon->unix_ext) {
384 seq_printf(s, ",file_mode=0%o,dir_mode=0%o",
385 cifs_sb->mnt_file_mode, 388 cifs_sb->mnt_file_mode,
386 cifs_sb->mnt_dir_mode); 389 cifs_sb->mnt_dir_mode);
387 } 390 if (tcon->seal)
388 if (tcon->seal) 391 seq_printf(s, ",seal");
389 seq_printf(s, ",seal"); 392 if (tcon->nocase)
390 if (tcon->nocase) 393 seq_printf(s, ",nocase");
391 seq_printf(s, ",nocase"); 394 if (tcon->retry)
392 if (tcon->retry) 395 seq_printf(s, ",hard");
393 seq_printf(s, ",hard"); 396 if (cifs_sb->prepath)
394 } 397 seq_printf(s, ",prepath=%s", cifs_sb->prepath);
395 if (cifs_sb->prepath) 398 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
396 seq_printf(s, ",prepath=%s", cifs_sb->prepath); 399 seq_printf(s, ",posixpaths");
397 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) 400 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
398 seq_printf(s, ",posixpaths"); 401 seq_printf(s, ",setuids");
399 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) 402 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
400 seq_printf(s, ",setuids"); 403 seq_printf(s, ",serverino");
401 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) 404 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
402 seq_printf(s, ",serverino"); 405 seq_printf(s, ",directio");
403 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) 406 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
404 seq_printf(s, ",directio"); 407 seq_printf(s, ",nouser_xattr");
405 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 408 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
406 seq_printf(s, ",nouser_xattr"); 409 seq_printf(s, ",mapchars");
407 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) 410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
408 seq_printf(s, ",mapchars"); 411 seq_printf(s, ",sfu");
409 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) 412 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
410 seq_printf(s, ",sfu"); 413 seq_printf(s, ",nobrl");
411 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 414 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
412 seq_printf(s, ",nobrl"); 415 seq_printf(s, ",cifsacl");
413 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) 416 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
414 seq_printf(s, ",cifsacl"); 417 seq_printf(s, ",dynperm");
415 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) 418 if (m->mnt_sb->s_flags & MS_POSIXACL)
416 seq_printf(s, ",dynperm"); 419 seq_printf(s, ",acl");
417 if (m->mnt_sb->s_flags & MS_POSIXACL) 420
418 seq_printf(s, ",acl"); 421 seq_printf(s, ",rsize=%d", cifs_sb->rsize);
419 422 seq_printf(s, ",wsize=%d", cifs_sb->wsize);
420 seq_printf(s, ",rsize=%d", cifs_sb->rsize); 423
421 seq_printf(s, ",wsize=%d", cifs_sb->wsize);
422 }
423 return 0; 424 return 0;
424} 425}
425 426
@@ -535,9 +536,14 @@ static void cifs_umount_begin(struct super_block *sb)
535 if (tcon == NULL) 536 if (tcon == NULL)
536 return; 537 return;
537 538
538 lock_kernel();
539 read_lock(&cifs_tcp_ses_lock); 539 read_lock(&cifs_tcp_ses_lock);
540 if (tcon->tc_count == 1) 540 if ((tcon->tc_count > 1) || (tcon->tidStatus == CifsExiting)) {
541 /* we have other mounts to same share or we have
542 already tried to force umount this and woken up
543 all waiting network requests, nothing to do */
544 read_unlock(&cifs_tcp_ses_lock);
545 return;
546 } else if (tcon->tc_count == 1)
541 tcon->tidStatus = CifsExiting; 547 tcon->tidStatus = CifsExiting;
542 read_unlock(&cifs_tcp_ses_lock); 548 read_unlock(&cifs_tcp_ses_lock);
543 549
@@ -552,9 +558,7 @@ static void cifs_umount_begin(struct super_block *sb)
552 wake_up_all(&tcon->ses->server->response_q); 558 wake_up_all(&tcon->ses->server->response_q);
553 msleep(1); 559 msleep(1);
554 } 560 }
555/* BB FIXME - finish add checks for tidStatus BB */
556 561
557 unlock_kernel();
558 return; 562 return;
559} 563}
560 564
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 9570a0e8023f..6c170948300d 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -24,6 +24,19 @@
24 24
25#define ROOT_I 2 25#define ROOT_I 2
26 26
27/*
28 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
29 * so that it will fit.
30 */
31static inline ino_t
32cifs_uniqueid_to_ino_t(u64 fileid)
33{
34 ino_t ino = (ino_t) fileid;
35 if (sizeof(ino_t) < sizeof(u64))
36 ino ^= fileid >> (sizeof(u64)-sizeof(ino_t)) * 8;
37 return ino;
38}
39
27extern struct file_system_type cifs_fs_type; 40extern struct file_system_type cifs_fs_type;
28extern const struct address_space_operations cifs_addr_ops; 41extern const struct address_space_operations cifs_addr_ops;
29extern const struct address_space_operations cifs_addr_ops_smallbuf; 42extern const struct address_space_operations cifs_addr_ops_smallbuf;
@@ -100,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
100extern const struct export_operations cifs_export_ops; 113extern const struct export_operations cifs_export_ops;
101#endif /* EXPERIMENTAL */ 114#endif /* EXPERIMENTAL */
102 115
103#define CIFS_VERSION "1.59" 116#define CIFS_VERSION "1.60"
104#endif /* _CIFSFS_H */ 117#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a61ab772c6f6..63f6cdfa5638 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -83,7 +83,7 @@ enum securityEnum {
83 NTLM, /* Legacy NTLM012 auth with NTLM hash */ 83 NTLM, /* Legacy NTLM012 auth with NTLM hash */
84 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ 84 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
85 RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ 85 RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */
86 NTLMSSP, /* NTLMSSP via SPNEGO, NTLMv2 hash */ 86/* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */
87 Kerberos, /* Kerberos via SPNEGO */ 87 Kerberos, /* Kerberos via SPNEGO */
88 MSKerberos, /* MS Kerberos via SPNEGO */ 88 MSKerberos, /* MS Kerberos via SPNEGO */
89}; 89};
@@ -364,13 +364,13 @@ struct cifsInodeInfo {
364 struct list_head openFileList; 364 struct list_head openFileList;
365 int write_behind_rc; 365 int write_behind_rc;
366 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ 366 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
367 atomic_t inUse; /* num concurrent users (local openers cifs) of file*/
368 unsigned long time; /* jiffies of last update/check of inode */ 367 unsigned long time; /* jiffies of last update/check of inode */
369 bool clientCanCacheRead:1; /* read oplock */ 368 bool clientCanCacheRead:1; /* read oplock */
370 bool clientCanCacheAll:1; /* read and writebehind oplock */ 369 bool clientCanCacheAll:1; /* read and writebehind oplock */
371 bool oplockPending:1; 370 bool oplockPending:1;
372 bool delete_pending:1; /* DELETE_ON_CLOSE is set */ 371 bool delete_pending:1; /* DELETE_ON_CLOSE is set */
373 u64 server_eof; /* current file size on server */ 372 u64 server_eof; /* current file size on server */
373 u64 uniqueid; /* server inode number */
374 struct inode vfs_inode; 374 struct inode vfs_inode;
375}; 375};
376 376
@@ -472,6 +472,32 @@ struct dfs_info3_param {
472 char *node_name; 472 char *node_name;
473}; 473};
474 474
475/*
476 * common struct for holding inode info when searching for or updating an
477 * inode with new info
478 */
479
480#define CIFS_FATTR_DFS_REFERRAL 0x1
481#define CIFS_FATTR_DELETE_PENDING 0x2
482#define CIFS_FATTR_NEED_REVAL 0x4
483
484struct cifs_fattr {
485 u32 cf_flags;
486 u32 cf_cifsattrs;
487 u64 cf_uniqueid;
488 u64 cf_eof;
489 u64 cf_bytes;
490 uid_t cf_uid;
491 gid_t cf_gid;
492 umode_t cf_mode;
493 dev_t cf_rdev;
494 unsigned int cf_nlink;
495 unsigned int cf_dtype;
496 struct timespec cf_atime;
497 struct timespec cf_mtime;
498 struct timespec cf_ctime;
499};
500
475static inline void free_dfs_info_param(struct dfs_info3_param *param) 501static inline void free_dfs_info_param(struct dfs_info3_param *param)
476{ 502{
477 if (param) { 503 if (param) {
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index a785f69dbc9f..2d07f890a842 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2328,19 +2328,7 @@ struct file_attrib_tag {
2328typedef struct { 2328typedef struct {
2329 __le32 NextEntryOffset; 2329 __le32 NextEntryOffset;
2330 __u32 ResumeKey; /* as with FileIndex - no need to convert */ 2330 __u32 ResumeKey; /* as with FileIndex - no need to convert */
2331 __le64 EndOfFile; 2331 FILE_UNIX_BASIC_INFO basic;
2332 __le64 NumOfBytes;
2333 __le64 LastStatusChange; /*SNIA specs DCE time for the 3 time fields */
2334 __le64 LastAccessTime;
2335 __le64 LastModificationTime;
2336 __le64 Uid;
2337 __le64 Gid;
2338 __le32 Type;
2339 __le64 DevMajor;
2340 __le64 DevMinor;
2341 __le64 UniqueId;
2342 __le64 Permissions;
2343 __le64 Nlinks;
2344 char FileName[1]; 2332 char FileName[1];
2345} __attribute__((packed)) FILE_UNIX_INFO; /* level 0x202 */ 2333} __attribute__((packed)) FILE_UNIX_INFO; /* level 0x202 */
2346 2334
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f9452329bcce..da8fbf565991 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -74,7 +74,7 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr);
74extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); 74extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
75extern int decode_negTokenInit(unsigned char *security_blob, int length, 75extern int decode_negTokenInit(unsigned char *security_blob, int length,
76 enum securityEnum *secType); 76 enum securityEnum *secType);
77extern int cifs_inet_pton(const int, const char *source, void *dst); 77extern int cifs_convert_address(char *src, void *dst);
78extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); 78extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
79extern void header_assemble(struct smb_hdr *, char /* command */ , 79extern void header_assemble(struct smb_hdr *, char /* command */ ,
80 const struct cifsTconInfo *, int /* length of 80 const struct cifsTconInfo *, int /* length of
@@ -98,9 +98,13 @@ extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
98extern int cifs_posix_open(char *full_path, struct inode **pinode, 98extern int cifs_posix_open(char *full_path, struct inode **pinode,
99 struct super_block *sb, int mode, int oflags, 99 struct super_block *sb, int mode, int oflags,
100 int *poplock, __u16 *pnetfid, int xid); 100 int *poplock, __u16 *pnetfid, int xid);
101extern void posix_fill_in_inode(struct inode *tmp_inode, 101extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
102 FILE_UNIX_BASIC_INFO *pData, int isNewInode); 102 FILE_UNIX_BASIC_INFO *info,
103extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum); 103 struct cifs_sb_info *cifs_sb);
104extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr);
105extern struct inode *cifs_iget(struct super_block *sb,
106 struct cifs_fattr *fattr);
107
104extern int cifs_get_inode_info(struct inode **pinode, 108extern int cifs_get_inode_info(struct inode **pinode,
105 const unsigned char *search_path, 109 const unsigned char *search_path,
106 FILE_ALL_INFO *pfile_info, 110 FILE_ALL_INFO *pfile_info,
@@ -108,8 +112,9 @@ extern int cifs_get_inode_info(struct inode **pinode,
108extern int cifs_get_inode_info_unix(struct inode **pinode, 112extern int cifs_get_inode_info_unix(struct inode **pinode,
109 const unsigned char *search_path, 113 const unsigned char *search_path,
110 struct super_block *sb, int xid); 114 struct super_block *sb, int xid);
111extern void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, 115extern void cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
112 const char *path, const __u16 *pfid); 116 struct cifs_fattr *fattr, struct inode *inode,
117 const char *path, const __u16 *pfid);
113extern int mode_to_acl(struct inode *inode, const char *path, __u64); 118extern int mode_to_acl(struct inode *inode, const char *path, __u64);
114 119
115extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, 120extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
@@ -215,7 +220,11 @@ struct cifs_unix_set_info_args {
215 dev_t device; 220 dev_t device;
216}; 221};
217 222
218extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon, 223extern int CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
224 const struct cifs_unix_set_info_args *args,
225 u16 fid, u32 pid_of_opener);
226
227extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *pTcon,
219 char *fileName, 228 char *fileName,
220 const struct cifs_unix_set_info_args *args, 229 const struct cifs_unix_set_info_args *args,
221 const struct nls_table *nls_codepage, 230 const struct nls_table *nls_codepage,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b84c61d5bca4..922f5fe2084c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -594,7 +594,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
594 else if (secFlags & CIFSSEC_MAY_KRB5) 594 else if (secFlags & CIFSSEC_MAY_KRB5)
595 server->secType = Kerberos; 595 server->secType = Kerberos;
596 else if (secFlags & CIFSSEC_MAY_NTLMSSP) 596 else if (secFlags & CIFSSEC_MAY_NTLMSSP)
597 server->secType = NTLMSSP; 597 server->secType = RawNTLMSSP;
598 else if (secFlags & CIFSSEC_MAY_LANMAN) 598 else if (secFlags & CIFSSEC_MAY_LANMAN)
599 server->secType = LANMAN; 599 server->secType = LANMAN;
600/* #ifdef CONFIG_CIFS_EXPERIMENTAL 600/* #ifdef CONFIG_CIFS_EXPERIMENTAL
@@ -729,7 +729,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon)
729 * the tcon is no longer on the list, so no need to take lock before 729 * the tcon is no longer on the list, so no need to take lock before
730 * checking this. 730 * checking this.
731 */ 731 */
732 if (tcon->need_reconnect) 732 if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
733 return 0; 733 return 0;
734 734
735 rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon, 735 rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon,
@@ -5074,10 +5074,114 @@ SetAttrLgcyRetry:
5074} 5074}
5075#endif /* temporarily unneeded SetAttr legacy function */ 5075#endif /* temporarily unneeded SetAttr legacy function */
5076 5076
5077static void
5078cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
5079 const struct cifs_unix_set_info_args *args)
5080{
5081 u64 mode = args->mode;
5082
5083 /*
5084 * Samba server ignores set of file size to zero due to bugs in some
5085 * older clients, but we should be precise - we use SetFileSize to
5086 * set file size and do not want to truncate file size to zero
5087 * accidently as happened on one Samba server beta by putting
5088 * zero instead of -1 here
5089 */
5090 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
5091 data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
5092 data_offset->LastStatusChange = cpu_to_le64(args->ctime);
5093 data_offset->LastAccessTime = cpu_to_le64(args->atime);
5094 data_offset->LastModificationTime = cpu_to_le64(args->mtime);
5095 data_offset->Uid = cpu_to_le64(args->uid);
5096 data_offset->Gid = cpu_to_le64(args->gid);
5097 /* better to leave device as zero when it is */
5098 data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
5099 data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
5100 data_offset->Permissions = cpu_to_le64(mode);
5101
5102 if (S_ISREG(mode))
5103 data_offset->Type = cpu_to_le32(UNIX_FILE);
5104 else if (S_ISDIR(mode))
5105 data_offset->Type = cpu_to_le32(UNIX_DIR);
5106 else if (S_ISLNK(mode))
5107 data_offset->Type = cpu_to_le32(UNIX_SYMLINK);
5108 else if (S_ISCHR(mode))
5109 data_offset->Type = cpu_to_le32(UNIX_CHARDEV);
5110 else if (S_ISBLK(mode))
5111 data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV);
5112 else if (S_ISFIFO(mode))
5113 data_offset->Type = cpu_to_le32(UNIX_FIFO);
5114 else if (S_ISSOCK(mode))
5115 data_offset->Type = cpu_to_le32(UNIX_SOCKET);
5116}
5117
5077int 5118int
5078CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, 5119CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5079 const struct cifs_unix_set_info_args *args, 5120 const struct cifs_unix_set_info_args *args,
5080 const struct nls_table *nls_codepage, int remap) 5121 u16 fid, u32 pid_of_opener)
5122{
5123 struct smb_com_transaction2_sfi_req *pSMB = NULL;
5124 FILE_UNIX_BASIC_INFO *data_offset;
5125 int rc = 0;
5126 u16 params, param_offset, offset, byte_count, count;
5127
5128 cFYI(1, ("Set Unix Info (via SetFileInfo)"));
5129 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
5130
5131 if (rc)
5132 return rc;
5133
5134 pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
5135 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
5136
5137 params = 6;
5138 pSMB->MaxSetupCount = 0;
5139 pSMB->Reserved = 0;
5140 pSMB->Flags = 0;
5141 pSMB->Timeout = 0;
5142 pSMB->Reserved2 = 0;
5143 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
5144 offset = param_offset + params;
5145
5146 data_offset = (FILE_UNIX_BASIC_INFO *)
5147 ((char *)(&pSMB->hdr.Protocol) + offset);
5148 count = sizeof(FILE_UNIX_BASIC_INFO);
5149
5150 pSMB->MaxParameterCount = cpu_to_le16(2);
5151 /* BB find max SMB PDU from sess */
5152 pSMB->MaxDataCount = cpu_to_le16(1000);
5153 pSMB->SetupCount = 1;
5154 pSMB->Reserved3 = 0;
5155 pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION);
5156 byte_count = 3 /* pad */ + params + count;
5157 pSMB->DataCount = cpu_to_le16(count);
5158 pSMB->ParameterCount = cpu_to_le16(params);
5159 pSMB->TotalDataCount = pSMB->DataCount;
5160 pSMB->TotalParameterCount = pSMB->ParameterCount;
5161 pSMB->ParameterOffset = cpu_to_le16(param_offset);
5162 pSMB->DataOffset = cpu_to_le16(offset);
5163 pSMB->Fid = fid;
5164 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5165 pSMB->Reserved4 = 0;
5166 pSMB->hdr.smb_buf_length += byte_count;
5167 pSMB->ByteCount = cpu_to_le16(byte_count);
5168
5169 cifs_fill_unix_set_info(data_offset, args);
5170
5171 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
5172 if (rc)
5173 cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc));
5174
5175 /* Note: On -EAGAIN error only caller can retry on handle based calls
5176 since file handle passed in no longer valid */
5177
5178 return rc;
5179}
5180
5181int
5182CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
5183 const struct cifs_unix_set_info_args *args,
5184 const struct nls_table *nls_codepage, int remap)
5081{ 5185{
5082 TRANSACTION2_SPI_REQ *pSMB = NULL; 5186 TRANSACTION2_SPI_REQ *pSMB = NULL;
5083 TRANSACTION2_SPI_RSP *pSMBr = NULL; 5187 TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5086,7 +5190,6 @@ CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
5086 int bytes_returned = 0; 5190 int bytes_returned = 0;
5087 FILE_UNIX_BASIC_INFO *data_offset; 5191 FILE_UNIX_BASIC_INFO *data_offset;
5088 __u16 params, param_offset, offset, count, byte_count; 5192 __u16 params, param_offset, offset, count, byte_count;
5089 __u64 mode = args->mode;
5090 5193
5091 cFYI(1, ("In SetUID/GID/Mode")); 5194 cFYI(1, ("In SetUID/GID/Mode"));
5092setPermsRetry: 5195setPermsRetry:
@@ -5137,38 +5240,8 @@ setPermsRetry:
5137 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); 5240 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5138 pSMB->Reserved4 = 0; 5241 pSMB->Reserved4 = 0;
5139 pSMB->hdr.smb_buf_length += byte_count; 5242 pSMB->hdr.smb_buf_length += byte_count;
5140 /* Samba server ignores set of file size to zero due to bugs in some
5141 older clients, but we should be precise - we use SetFileSize to
5142 set file size and do not want to truncate file size to zero
5143 accidently as happened on one Samba server beta by putting
5144 zero instead of -1 here */
5145 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
5146 data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
5147 data_offset->LastStatusChange = cpu_to_le64(args->ctime);
5148 data_offset->LastAccessTime = cpu_to_le64(args->atime);
5149 data_offset->LastModificationTime = cpu_to_le64(args->mtime);
5150 data_offset->Uid = cpu_to_le64(args->uid);
5151 data_offset->Gid = cpu_to_le64(args->gid);
5152 /* better to leave device as zero when it is */
5153 data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
5154 data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
5155 data_offset->Permissions = cpu_to_le64(mode);
5156
5157 if (S_ISREG(mode))
5158 data_offset->Type = cpu_to_le32(UNIX_FILE);
5159 else if (S_ISDIR(mode))
5160 data_offset->Type = cpu_to_le32(UNIX_DIR);
5161 else if (S_ISLNK(mode))
5162 data_offset->Type = cpu_to_le32(UNIX_SYMLINK);
5163 else if (S_ISCHR(mode))
5164 data_offset->Type = cpu_to_le32(UNIX_CHARDEV);
5165 else if (S_ISBLK(mode))
5166 data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV);
5167 else if (S_ISFIFO(mode))
5168 data_offset->Type = cpu_to_le32(UNIX_FIFO);
5169 else if (S_ISSOCK(mode))
5170 data_offset->Type = cpu_to_le32(UNIX_SOCKET);
5171 5243
5244 cifs_fill_unix_set_info(data_offset, args);
5172 5245
5173 pSMB->ByteCount = cpu_to_le16(byte_count); 5246 pSMB->ByteCount = cpu_to_le16(byte_count);
5174 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5247 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 97f4311b9a8e..e16d7592116a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -70,7 +70,6 @@ struct smb_vol {
70 mode_t file_mode; 70 mode_t file_mode;
71 mode_t dir_mode; 71 mode_t dir_mode;
72 unsigned secFlg; 72 unsigned secFlg;
73 bool rw:1;
74 bool retry:1; 73 bool retry:1;
75 bool intr:1; 74 bool intr:1;
76 bool setuids:1; 75 bool setuids:1;
@@ -832,7 +831,6 @@ cifs_parse_mount_options(char *options, const char *devname,
832 vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR; 831 vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR;
833 832
834 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ 833 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
835 vol->rw = true;
836 /* default is always to request posix paths. */ 834 /* default is always to request posix paths. */
837 vol->posix_paths = 1; 835 vol->posix_paths = 1;
838 /* default to using server inode numbers where available */ 836 /* default to using server inode numbers where available */
@@ -1199,7 +1197,9 @@ cifs_parse_mount_options(char *options, const char *devname,
1199 } else if (strnicmp(data, "guest", 5) == 0) { 1197 } else if (strnicmp(data, "guest", 5) == 0) {
1200 /* ignore */ 1198 /* ignore */
1201 } else if (strnicmp(data, "rw", 2) == 0) { 1199 } else if (strnicmp(data, "rw", 2) == 0) {
1202 vol->rw = true; 1200 /* ignore */
1201 } else if (strnicmp(data, "ro", 2) == 0) {
1202 /* ignore */
1203 } else if (strnicmp(data, "noblocksend", 11) == 0) { 1203 } else if (strnicmp(data, "noblocksend", 11) == 0) {
1204 vol->noblocksnd = 1; 1204 vol->noblocksnd = 1;
1205 } else if (strnicmp(data, "noautotune", 10) == 0) { 1205 } else if (strnicmp(data, "noautotune", 10) == 0) {
@@ -1218,8 +1218,6 @@ cifs_parse_mount_options(char *options, const char *devname,
1218 parse these options again and set anything and it 1218 parse these options again and set anything and it
1219 is ok to just ignore them */ 1219 is ok to just ignore them */
1220 continue; 1220 continue;
1221 } else if (strnicmp(data, "ro", 2) == 0) {
1222 vol->rw = false;
1223 } else if (strnicmp(data, "hard", 4) == 0) { 1221 } else if (strnicmp(data, "hard", 4) == 0) {
1224 vol->retry = 1; 1222 vol->retry = 1;
1225 } else if (strnicmp(data, "soft", 4) == 0) { 1223 } else if (strnicmp(data, "soft", 4) == 0) {
@@ -1386,8 +1384,10 @@ cifs_find_tcp_session(struct sockaddr_storage *addr)
1386 server->addr.sockAddr.sin_addr.s_addr)) 1384 server->addr.sockAddr.sin_addr.s_addr))
1387 continue; 1385 continue;
1388 else if (addr->ss_family == AF_INET6 && 1386 else if (addr->ss_family == AF_INET6 &&
1389 !ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr, 1387 (!ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr,
1390 &addr6->sin6_addr)) 1388 &addr6->sin6_addr) ||
1389 server->addr.sockAddr6.sin6_scope_id !=
1390 addr6->sin6_scope_id))
1391 continue; 1391 continue;
1392 1392
1393 ++server->srv_count; 1393 ++server->srv_count;
@@ -1433,28 +1433,15 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1433 1433
1434 memset(&addr, 0, sizeof(struct sockaddr_storage)); 1434 memset(&addr, 0, sizeof(struct sockaddr_storage));
1435 1435
1436 if (volume_info->UNCip && volume_info->UNC) { 1436 cFYI(1, ("UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip));
1437 rc = cifs_inet_pton(AF_INET, volume_info->UNCip,
1438 &sin_server->sin_addr.s_addr);
1439
1440 if (rc <= 0) {
1441 /* not ipv4 address, try ipv6 */
1442 rc = cifs_inet_pton(AF_INET6, volume_info->UNCip,
1443 &sin_server6->sin6_addr.in6_u);
1444 if (rc > 0)
1445 addr.ss_family = AF_INET6;
1446 } else {
1447 addr.ss_family = AF_INET;
1448 }
1449 1437
1450 if (rc <= 0) { 1438 if (volume_info->UNCip && volume_info->UNC) {
1439 rc = cifs_convert_address(volume_info->UNCip, &addr);
1440 if (!rc) {
1451 /* we failed translating address */ 1441 /* we failed translating address */
1452 rc = -EINVAL; 1442 rc = -EINVAL;
1453 goto out_err; 1443 goto out_err;
1454 } 1444 }
1455
1456 cFYI(1, ("UNC: %s ip: %s", volume_info->UNC,
1457 volume_info->UNCip));
1458 } else if (volume_info->UNCip) { 1445 } else if (volume_info->UNCip) {
1459 /* BB using ip addr as tcp_ses name to connect to the 1446 /* BB using ip addr as tcp_ses name to connect to the
1460 DFS root below */ 1447 DFS root below */
@@ -1513,14 +1500,14 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1513 cFYI(1, ("attempting ipv6 connect")); 1500 cFYI(1, ("attempting ipv6 connect"));
1514 /* BB should we allow ipv6 on port 139? */ 1501 /* BB should we allow ipv6 on port 139? */
1515 /* other OS never observed in Wild doing 139 with v6 */ 1502 /* other OS never observed in Wild doing 139 with v6 */
1503 sin_server6->sin6_port = htons(volume_info->port);
1516 memcpy(&tcp_ses->addr.sockAddr6, sin_server6, 1504 memcpy(&tcp_ses->addr.sockAddr6, sin_server6,
1517 sizeof(struct sockaddr_in6)); 1505 sizeof(struct sockaddr_in6));
1518 sin_server6->sin6_port = htons(volume_info->port);
1519 rc = ipv6_connect(tcp_ses); 1506 rc = ipv6_connect(tcp_ses);
1520 } else { 1507 } else {
1508 sin_server->sin_port = htons(volume_info->port);
1521 memcpy(&tcp_ses->addr.sockAddr, sin_server, 1509 memcpy(&tcp_ses->addr.sockAddr, sin_server,
1522 sizeof(struct sockaddr_in)); 1510 sizeof(struct sockaddr_in));
1523 sin_server->sin_port = htons(volume_info->port);
1524 rc = ipv4_connect(tcp_ses); 1511 rc = ipv4_connect(tcp_ses);
1525 } 1512 }
1526 if (rc < 0) { 1513 if (rc < 0) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3758965d73d5..4326ffd90fa9 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -188,6 +188,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
188 FILE_UNIX_BASIC_INFO *presp_data; 188 FILE_UNIX_BASIC_INFO *presp_data;
189 __u32 posix_flags = 0; 189 __u32 posix_flags = 0;
190 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 190 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
191 struct cifs_fattr fattr;
191 192
192 cFYI(1, ("posix open %s", full_path)); 193 cFYI(1, ("posix open %s", full_path));
193 194
@@ -236,22 +237,21 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
236 if (presp_data->Type == cpu_to_le32(-1)) 237 if (presp_data->Type == cpu_to_le32(-1))
237 goto posix_open_ret; /* open ok, caller does qpathinfo */ 238 goto posix_open_ret; /* open ok, caller does qpathinfo */
238 239
239 /* get new inode and set it up */
240 if (!pinode) 240 if (!pinode)
241 goto posix_open_ret; /* caller does not need info */ 241 goto posix_open_ret; /* caller does not need info */
242 242
243 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
244
245 /* get new inode and set it up */
243 if (*pinode == NULL) { 246 if (*pinode == NULL) {
244 __u64 unique_id = le64_to_cpu(presp_data->UniqueId); 247 *pinode = cifs_iget(sb, &fattr);
245 *pinode = cifs_new_inode(sb, &unique_id); 248 if (!*pinode) {
249 rc = -ENOMEM;
250 goto posix_open_ret;
251 }
252 } else {
253 cifs_fattr_to_inode(*pinode, &fattr);
246 } 254 }
247 /* else an inode was passed in. Update its info, don't create one */
248
249 /* We do not need to close the file if new_inode fails since
250 the caller will retry qpathinfo as long as inode is null */
251 if (*pinode == NULL)
252 goto posix_open_ret;
253
254 posix_fill_in_inode(*pinode, presp_data, 1);
255 255
256 cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only); 256 cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only);
257 257
@@ -307,8 +307,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
307 307
308 full_path = build_path_from_dentry(direntry); 308 full_path = build_path_from_dentry(direntry);
309 if (full_path == NULL) { 309 if (full_path == NULL) {
310 rc = -ENOMEM;
310 FreeXid(xid); 311 FreeXid(xid);
311 return -ENOMEM; 312 return rc;
312 } 313 }
313 314
314 if (oplockEnabled) 315 if (oplockEnabled)
@@ -424,9 +425,10 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
424 args.uid = NO_CHANGE_64; 425 args.uid = NO_CHANGE_64;
425 args.gid = NO_CHANGE_64; 426 args.gid = NO_CHANGE_64;
426 } 427 }
427 CIFSSMBUnixSetInfo(xid, tcon, full_path, &args, 428 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
428 cifs_sb->local_nls, 429 cifs_sb->local_nls,
429 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 430 cifs_sb->mnt_cifs_flags &
431 CIFS_MOUNT_MAP_SPECIAL_CHR);
430 } else { 432 } else {
431 /* BB implement mode setting via Windows security 433 /* BB implement mode setting via Windows security
432 descriptors e.g. */ 434 descriptors e.g. */
@@ -514,10 +516,10 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
514 args.uid = NO_CHANGE_64; 516 args.uid = NO_CHANGE_64;
515 args.gid = NO_CHANGE_64; 517 args.gid = NO_CHANGE_64;
516 } 518 }
517 rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, 519 rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args,
518 &args, cifs_sb->local_nls, 520 cifs_sb->local_nls,
519 cifs_sb->mnt_cifs_flags & 521 cifs_sb->mnt_cifs_flags &
520 CIFS_MOUNT_MAP_SPECIAL_CHR); 522 CIFS_MOUNT_MAP_SPECIAL_CHR);
521 523
522 if (!rc) { 524 if (!rc) {
523 rc = cifs_get_inode_info_unix(&newinode, full_path, 525 rc = cifs_get_inode_info_unix(&newinode, full_path,
@@ -540,8 +542,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
540 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); 542 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
541 if (buf == NULL) { 543 if (buf == NULL) {
542 kfree(full_path); 544 kfree(full_path);
545 rc = -ENOMEM;
543 FreeXid(xid); 546 FreeXid(xid);
544 return -ENOMEM; 547 return rc;
545 } 548 }
546 549
547 rc = CIFSSMBOpen(xid, pTcon, full_path, 550 rc = CIFSSMBOpen(xid, pTcon, full_path,
@@ -641,6 +644,15 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
641 } 644 }
642 } 645 }
643 646
647 /*
648 * O_EXCL: optimize away the lookup, but don't hash the dentry. Let
649 * the VFS handle the create.
650 */
651 if (nd->flags & LOOKUP_EXCL) {
652 d_instantiate(direntry, NULL);
653 return 0;
654 }
655
644 /* can not grab the rename sem here since it would 656 /* can not grab the rename sem here since it would
645 deadlock in the cases (beginning of sys_rename itself) 657 deadlock in the cases (beginning of sys_rename itself)
646 in which we already have the sb rename sem */ 658 in which we already have the sb rename sem */
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index df4a306f697e..87948147d7ec 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -35,26 +35,11 @@
35 * 0 - name is not IP 35 * 0 - name is not IP
36 */ 36 */
37static int 37static int
38is_ip(const char *name) 38is_ip(char *name)
39{ 39{
40 int rc; 40 struct sockaddr_storage ss;
41 struct sockaddr_in sin_server; 41
42 struct sockaddr_in6 sin_server6; 42 return cifs_convert_address(name, &ss);
43
44 rc = cifs_inet_pton(AF_INET, name,
45 &sin_server.sin_addr.s_addr);
46
47 if (rc <= 0) {
48 /* not ipv4 address, try ipv6 */
49 rc = cifs_inet_pton(AF_INET6, name,
50 &sin_server6.sin6_addr.in6_u);
51 if (rc > 0)
52 return 1;
53 } else {
54 return 1;
55 }
56 /* we failed translating address */
57 return 0;
58} 43}
59 44
60static int 45static int
@@ -72,7 +57,7 @@ dns_resolver_instantiate(struct key *key, const void *data,
72 ip[datalen] = '\0'; 57 ip[datalen] = '\0';
73 58
74 /* make sure this looks like an address */ 59 /* make sure this looks like an address */
75 if (!is_ip((const char *) ip)) { 60 if (!is_ip(ip)) {
76 kfree(ip); 61 kfree(ip);
77 return -EINVAL; 62 return -EINVAL;
78 } 63 }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 06866841b97f..c34b7f8a217b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -300,14 +300,16 @@ int cifs_open(struct inode *inode, struct file *file)
300 pCifsInode = CIFS_I(file->f_path.dentry->d_inode); 300 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
301 pCifsFile = cifs_fill_filedata(file); 301 pCifsFile = cifs_fill_filedata(file);
302 if (pCifsFile) { 302 if (pCifsFile) {
303 rc = 0;
303 FreeXid(xid); 304 FreeXid(xid);
304 return 0; 305 return rc;
305 } 306 }
306 307
307 full_path = build_path_from_dentry(file->f_path.dentry); 308 full_path = build_path_from_dentry(file->f_path.dentry);
308 if (full_path == NULL) { 309 if (full_path == NULL) {
310 rc = -ENOMEM;
309 FreeXid(xid); 311 FreeXid(xid);
310 return -ENOMEM; 312 return rc;
311 } 313 }
312 314
313 cFYI(1, ("inode = 0x%p file flags are 0x%x for %s", 315 cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
@@ -446,9 +448,9 @@ int cifs_open(struct inode *inode, struct file *file)
446 .mtime = NO_CHANGE_64, 448 .mtime = NO_CHANGE_64,
447 .device = 0, 449 .device = 0,
448 }; 450 };
449 CIFSSMBUnixSetInfo(xid, tcon, full_path, &args, 451 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
450 cifs_sb->local_nls, 452 cifs_sb->local_nls,
451 cifs_sb->mnt_cifs_flags & 453 cifs_sb->mnt_cifs_flags &
452 CIFS_MOUNT_MAP_SPECIAL_CHR); 454 CIFS_MOUNT_MAP_SPECIAL_CHR);
453 } 455 }
454 } 456 }
@@ -491,11 +493,12 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
491 return -EBADF; 493 return -EBADF;
492 494
493 xid = GetXid(); 495 xid = GetXid();
494 mutex_unlock(&pCifsFile->fh_mutex); 496 mutex_lock(&pCifsFile->fh_mutex);
495 if (!pCifsFile->invalidHandle) { 497 if (!pCifsFile->invalidHandle) {
496 mutex_lock(&pCifsFile->fh_mutex); 498 mutex_unlock(&pCifsFile->fh_mutex);
499 rc = 0;
497 FreeXid(xid); 500 FreeXid(xid);
498 return 0; 501 return rc;
499 } 502 }
500 503
501 if (file->f_path.dentry == NULL) { 504 if (file->f_path.dentry == NULL) {
@@ -524,7 +527,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
524 if (full_path == NULL) { 527 if (full_path == NULL) {
525 rc = -ENOMEM; 528 rc = -ENOMEM;
526reopen_error_exit: 529reopen_error_exit:
527 mutex_lock(&pCifsFile->fh_mutex); 530 mutex_unlock(&pCifsFile->fh_mutex);
528 FreeXid(xid); 531 FreeXid(xid);
529 return rc; 532 return rc;
530 } 533 }
@@ -566,14 +569,14 @@ reopen_error_exit:
566 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 569 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
567 CIFS_MOUNT_MAP_SPECIAL_CHR); 570 CIFS_MOUNT_MAP_SPECIAL_CHR);
568 if (rc) { 571 if (rc) {
569 mutex_lock(&pCifsFile->fh_mutex); 572 mutex_unlock(&pCifsFile->fh_mutex);
570 cFYI(1, ("cifs_open returned 0x%x", rc)); 573 cFYI(1, ("cifs_open returned 0x%x", rc));
571 cFYI(1, ("oplock: %d", oplock)); 574 cFYI(1, ("oplock: %d", oplock));
572 } else { 575 } else {
573reopen_success: 576reopen_success:
574 pCifsFile->netfid = netfid; 577 pCifsFile->netfid = netfid;
575 pCifsFile->invalidHandle = false; 578 pCifsFile->invalidHandle = false;
576 mutex_lock(&pCifsFile->fh_mutex); 579 mutex_unlock(&pCifsFile->fh_mutex);
577 pCifsInode = CIFS_I(inode); 580 pCifsInode = CIFS_I(inode);
578 if (pCifsInode) { 581 if (pCifsInode) {
579 if (can_flush) { 582 if (can_flush) {
@@ -845,8 +848,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
845 tcon = cifs_sb->tcon; 848 tcon = cifs_sb->tcon;
846 849
847 if (file->private_data == NULL) { 850 if (file->private_data == NULL) {
851 rc = -EBADF;
848 FreeXid(xid); 852 FreeXid(xid);
849 return -EBADF; 853 return rc;
850 } 854 }
851 netfid = ((struct cifsFileInfo *)file->private_data)->netfid; 855 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
852 856
@@ -1805,8 +1809,9 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1805 pTcon = cifs_sb->tcon; 1809 pTcon = cifs_sb->tcon;
1806 1810
1807 if (file->private_data == NULL) { 1811 if (file->private_data == NULL) {
1812 rc = -EBADF;
1808 FreeXid(xid); 1813 FreeXid(xid);
1809 return -EBADF; 1814 return rc;
1810 } 1815 }
1811 open_file = (struct cifsFileInfo *)file->private_data; 1816 open_file = (struct cifsFileInfo *)file->private_data;
1812 1817
@@ -1885,8 +1890,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1885 pTcon = cifs_sb->tcon; 1890 pTcon = cifs_sb->tcon;
1886 1891
1887 if (file->private_data == NULL) { 1892 if (file->private_data == NULL) {
1893 rc = -EBADF;
1888 FreeXid(xid); 1894 FreeXid(xid);
1889 return -EBADF; 1895 return rc;
1890 } 1896 }
1891 open_file = (struct cifsFileInfo *)file->private_data; 1897 open_file = (struct cifsFileInfo *)file->private_data;
1892 1898
@@ -2019,8 +2025,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2019 2025
2020 xid = GetXid(); 2026 xid = GetXid();
2021 if (file->private_data == NULL) { 2027 if (file->private_data == NULL) {
2028 rc = -EBADF;
2022 FreeXid(xid); 2029 FreeXid(xid);
2023 return -EBADF; 2030 return rc;
2024 } 2031 }
2025 open_file = (struct cifsFileInfo *)file->private_data; 2032 open_file = (struct cifsFileInfo *)file->private_data;
2026 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 2033 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -2185,8 +2192,9 @@ static int cifs_readpage(struct file *file, struct page *page)
2185 xid = GetXid(); 2192 xid = GetXid();
2186 2193
2187 if (file->private_data == NULL) { 2194 if (file->private_data == NULL) {
2195 rc = -EBADF;
2188 FreeXid(xid); 2196 FreeXid(xid);
2189 return -EBADF; 2197 return rc;
2190 } 2198 }
2191 2199
2192 cFYI(1, ("readpage %p at offset %d 0x%x\n", 2200 cFYI(1, ("readpage %p at offset %d 0x%x\n",
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index fad882b075ba..18afe57b2461 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -77,239 +77,202 @@ static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
77 } 77 }
78} 78}
79 79
80static void cifs_unix_info_to_inode(struct inode *inode, 80/* populate an inode with info from a cifs_fattr struct */
81 FILE_UNIX_BASIC_INFO *info, int force_uid_gid) 81void
82cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
82{ 83{
84 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
83 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 85 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
84 struct cifsInodeInfo *cifsInfo = CIFS_I(inode); 86 unsigned long oldtime = cifs_i->time;
85 __u64 num_of_bytes = le64_to_cpu(info->NumOfBytes); 87
86 __u64 end_of_file = le64_to_cpu(info->EndOfFile); 88 inode->i_atime = fattr->cf_atime;
89 inode->i_mtime = fattr->cf_mtime;
90 inode->i_ctime = fattr->cf_ctime;
91 inode->i_rdev = fattr->cf_rdev;
92 inode->i_nlink = fattr->cf_nlink;
93 inode->i_uid = fattr->cf_uid;
94 inode->i_gid = fattr->cf_gid;
95
96 /* if dynperm is set, don't clobber existing mode */
97 if (inode->i_state & I_NEW ||
98 !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM))
99 inode->i_mode = fattr->cf_mode;
100
101 cifs_i->cifsAttrs = fattr->cf_cifsattrs;
102 cifs_i->uniqueid = fattr->cf_uniqueid;
103
104 if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL)
105 cifs_i->time = 0;
106 else
107 cifs_i->time = jiffies;
108
109 cFYI(1, ("inode 0x%p old_time=%ld new_time=%ld", inode,
110 oldtime, cifs_i->time));
87 111
88 inode->i_atime = cifs_NTtimeToUnix(info->LastAccessTime); 112 cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING;
89 inode->i_mtime = 113
90 cifs_NTtimeToUnix(info->LastModificationTime); 114 /*
91 inode->i_ctime = cifs_NTtimeToUnix(info->LastStatusChange); 115 * Can't safely change the file size here if the client is writing to
92 inode->i_mode = le64_to_cpu(info->Permissions); 116 * it due to potential races.
117 */
118 spin_lock(&inode->i_lock);
119 if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) {
120 i_size_write(inode, fattr->cf_eof);
121
122 /*
123 * i_blocks is not related to (i_size / i_blksize),
124 * but instead 512 byte (2**9) size is required for
125 * calculating num blocks.
126 */
127 inode->i_blocks = (512 - 1 + fattr->cf_bytes) >> 9;
128 }
129 spin_unlock(&inode->i_lock);
130
131 cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL);
132}
133
134/* Fill a cifs_fattr struct with info from FILE_UNIX_BASIC_INFO. */
135void
136cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
137 struct cifs_sb_info *cifs_sb)
138{
139 memset(fattr, 0, sizeof(*fattr));
140 fattr->cf_uniqueid = le64_to_cpu(info->UniqueId);
141 fattr->cf_bytes = le64_to_cpu(info->NumOfBytes);
142 fattr->cf_eof = le64_to_cpu(info->EndOfFile);
143
144 fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
145 fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime);
146 fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
147 fattr->cf_mode = le64_to_cpu(info->Permissions);
93 148
94 /* 149 /*
95 * Since we set the inode type below we need to mask off 150 * Since we set the inode type below we need to mask off
96 * to avoid strange results if bits set above. 151 * to avoid strange results if bits set above.
97 */ 152 */
98 inode->i_mode &= ~S_IFMT; 153 fattr->cf_mode &= ~S_IFMT;
99 switch (le32_to_cpu(info->Type)) { 154 switch (le32_to_cpu(info->Type)) {
100 case UNIX_FILE: 155 case UNIX_FILE:
101 inode->i_mode |= S_IFREG; 156 fattr->cf_mode |= S_IFREG;
157 fattr->cf_dtype = DT_REG;
102 break; 158 break;
103 case UNIX_SYMLINK: 159 case UNIX_SYMLINK:
104 inode->i_mode |= S_IFLNK; 160 fattr->cf_mode |= S_IFLNK;
161 fattr->cf_dtype = DT_LNK;
105 break; 162 break;
106 case UNIX_DIR: 163 case UNIX_DIR:
107 inode->i_mode |= S_IFDIR; 164 fattr->cf_mode |= S_IFDIR;
165 fattr->cf_dtype = DT_DIR;
108 break; 166 break;
109 case UNIX_CHARDEV: 167 case UNIX_CHARDEV:
110 inode->i_mode |= S_IFCHR; 168 fattr->cf_mode |= S_IFCHR;
111 inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor), 169 fattr->cf_dtype = DT_CHR;
112 le64_to_cpu(info->DevMinor) & MINORMASK); 170 fattr->cf_rdev = MKDEV(le64_to_cpu(info->DevMajor),
171 le64_to_cpu(info->DevMinor) & MINORMASK);
113 break; 172 break;
114 case UNIX_BLOCKDEV: 173 case UNIX_BLOCKDEV:
115 inode->i_mode |= S_IFBLK; 174 fattr->cf_mode |= S_IFBLK;
116 inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor), 175 fattr->cf_dtype = DT_BLK;
117 le64_to_cpu(info->DevMinor) & MINORMASK); 176 fattr->cf_rdev = MKDEV(le64_to_cpu(info->DevMajor),
177 le64_to_cpu(info->DevMinor) & MINORMASK);
118 break; 178 break;
119 case UNIX_FIFO: 179 case UNIX_FIFO:
120 inode->i_mode |= S_IFIFO; 180 fattr->cf_mode |= S_IFIFO;
181 fattr->cf_dtype = DT_FIFO;
121 break; 182 break;
122 case UNIX_SOCKET: 183 case UNIX_SOCKET:
123 inode->i_mode |= S_IFSOCK; 184 fattr->cf_mode |= S_IFSOCK;
185 fattr->cf_dtype = DT_SOCK;
124 break; 186 break;
125 default: 187 default:
126 /* safest to call it a file if we do not know */ 188 /* safest to call it a file if we do not know */
127 inode->i_mode |= S_IFREG; 189 fattr->cf_mode |= S_IFREG;
190 fattr->cf_dtype = DT_REG;
128 cFYI(1, ("unknown type %d", le32_to_cpu(info->Type))); 191 cFYI(1, ("unknown type %d", le32_to_cpu(info->Type)));
129 break; 192 break;
130 } 193 }
131 194
132 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) && 195 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
133 !force_uid_gid) 196 fattr->cf_uid = cifs_sb->mnt_uid;
134 inode->i_uid = cifs_sb->mnt_uid;
135 else 197 else
136 inode->i_uid = le64_to_cpu(info->Uid); 198 fattr->cf_uid = le64_to_cpu(info->Uid);
137 199
138 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) && 200 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
139 !force_uid_gid) 201 fattr->cf_gid = cifs_sb->mnt_gid;
140 inode->i_gid = cifs_sb->mnt_gid;
141 else 202 else
142 inode->i_gid = le64_to_cpu(info->Gid); 203 fattr->cf_gid = le64_to_cpu(info->Gid);
143
144 inode->i_nlink = le64_to_cpu(info->Nlinks);
145
146 cifsInfo->server_eof = end_of_file;
147 spin_lock(&inode->i_lock);
148 if (is_size_safe_to_change(cifsInfo, end_of_file)) {
149 /*
150 * We can not safely change the file size here if the client
151 * is writing to it due to potential races.
152 */
153 i_size_write(inode, end_of_file);
154 204
155 /* 205 fattr->cf_nlink = le64_to_cpu(info->Nlinks);
156 * i_blocks is not related to (i_size / i_blksize),
157 * but instead 512 byte (2**9) size is required for
158 * calculating num blocks.
159 */
160 inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
161 }
162 spin_unlock(&inode->i_lock);
163} 206}
164 207
165
166/* 208/*
167 * Needed to setup inode data for the directory which is the 209 * Fill a cifs_fattr struct with fake inode info.
168 * junction to the new submount (ie to setup the fake directory
169 * which represents a DFS referral)
170 */
171static void fill_fake_finddataunix(FILE_UNIX_BASIC_INFO *pfnd_dat,
172 struct super_block *sb)
173{
174 struct inode *pinode = NULL;
175
176 memset(pfnd_dat, 0, sizeof(FILE_UNIX_BASIC_INFO));
177
178/* __le64 pfnd_dat->EndOfFile = cpu_to_le64(0);
179 __le64 pfnd_dat->NumOfBytes = cpu_to_le64(0);
180 __u64 UniqueId = 0; */
181 pfnd_dat->LastStatusChange =
182 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
183 pfnd_dat->LastAccessTime =
184 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
185 pfnd_dat->LastModificationTime =
186 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
187 pfnd_dat->Type = cpu_to_le32(UNIX_DIR);
188 pfnd_dat->Permissions = cpu_to_le64(S_IXUGO | S_IRWXU);
189 pfnd_dat->Nlinks = cpu_to_le64(2);
190 if (sb->s_root)
191 pinode = sb->s_root->d_inode;
192 if (pinode == NULL)
193 return;
194
195 /* fill in default values for the remaining based on root
196 inode since we can not query the server for this inode info */
197 pfnd_dat->DevMajor = cpu_to_le64(MAJOR(pinode->i_rdev));
198 pfnd_dat->DevMinor = cpu_to_le64(MINOR(pinode->i_rdev));
199 pfnd_dat->Uid = cpu_to_le64(pinode->i_uid);
200 pfnd_dat->Gid = cpu_to_le64(pinode->i_gid);
201}
202
203/**
204 * cifs_new inode - create new inode, initialize, and hash it
205 * @sb - pointer to superblock
206 * @inum - if valid pointer and serverino is enabled, replace i_ino with val
207 *
208 * Create a new inode, initialize it for CIFS and hash it. Returns the new
209 * inode or NULL if one couldn't be allocated.
210 * 210 *
211 * If the share isn't mounted with "serverino" or inum is a NULL pointer then 211 * Needed to setup cifs_fattr data for the directory which is the
212 * we'll just use the inode number assigned by new_inode(). Note that this can 212 * junction to the new submount (ie to setup the fake directory
213 * mean i_ino collisions since the i_ino assigned by new_inode is not 213 * which represents a DFS referral).
214 * guaranteed to be unique.
215 */ 214 */
216struct inode * 215void
217cifs_new_inode(struct super_block *sb, __u64 *inum) 216cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
218{ 217{
219 struct inode *inode; 218 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
220
221 inode = new_inode(sb);
222 if (inode == NULL)
223 return NULL;
224
225 /*
226 * BB: Is i_ino == 0 legal? Here, we assume that it is. If it isn't we
227 * stop passing inum as ptr. Are there sanity checks we can use to
228 * ensure that the server is really filling in that field? Also,
229 * if serverino is disabled, perhaps we should be using iunique()?
230 */
231 if (inum && (CIFS_SB(sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM))
232 inode->i_ino = (unsigned long) *inum;
233
234 /*
235 * must set this here instead of cifs_alloc_inode since VFS will
236 * clobber i_flags
237 */
238 if (sb->s_flags & MS_NOATIME)
239 inode->i_flags |= S_NOATIME | S_NOCMTIME;
240
241 insert_inode_hash(inode);
242 219
243 return inode; 220 cFYI(1, ("creating fake fattr for DFS referral"));
221
222 memset(fattr, 0, sizeof(*fattr));
223 fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU;
224 fattr->cf_uid = cifs_sb->mnt_uid;
225 fattr->cf_gid = cifs_sb->mnt_gid;
226 fattr->cf_atime = CURRENT_TIME;
227 fattr->cf_ctime = CURRENT_TIME;
228 fattr->cf_mtime = CURRENT_TIME;
229 fattr->cf_nlink = 2;
230 fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL;
244} 231}
245 232
246int cifs_get_inode_info_unix(struct inode **pinode, 233int cifs_get_inode_info_unix(struct inode **pinode,
247 const unsigned char *full_path, struct super_block *sb, int xid) 234 const unsigned char *full_path,
235 struct super_block *sb, int xid)
248{ 236{
249 int rc = 0; 237 int rc;
250 FILE_UNIX_BASIC_INFO find_data; 238 FILE_UNIX_BASIC_INFO find_data;
251 struct cifsTconInfo *pTcon; 239 struct cifs_fattr fattr;
252 struct inode *inode; 240 struct cifsTconInfo *tcon;
253 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 241 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
254 bool is_dfs_referral = false;
255 struct cifsInodeInfo *cifsInfo;
256 __u64 num_of_bytes;
257 __u64 end_of_file;
258 242
259 pTcon = cifs_sb->tcon; 243 tcon = cifs_sb->tcon;
260 cFYI(1, ("Getting info on %s", full_path)); 244 cFYI(1, ("Getting info on %s", full_path));
261 245
262 /* could have done a find first instead but this returns more info */ 246 /* could have done a find first instead but this returns more info */
263 rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data, 247 rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data,
264 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 248 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
265 CIFS_MOUNT_MAP_SPECIAL_CHR); 249 CIFS_MOUNT_MAP_SPECIAL_CHR);
266 if (rc == -EREMOTE && !is_dfs_referral) {
267 is_dfs_referral = true;
268 cFYI(DBG2, ("DFS ref"));
269 /* for DFS, server does not give us real inode data */
270 fill_fake_finddataunix(&find_data, sb);
271 rc = 0;
272 } else if (rc)
273 goto cgiiu_exit;
274 250
275 num_of_bytes = le64_to_cpu(find_data.NumOfBytes); 251 if (!rc) {
276 end_of_file = le64_to_cpu(find_data.EndOfFile); 252 cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb);
253 } else if (rc == -EREMOTE) {
254 cifs_create_dfs_fattr(&fattr, sb);
255 rc = 0;
256 } else {
257 return rc;
258 }
277 259
278 /* get new inode */
279 if (*pinode == NULL) { 260 if (*pinode == NULL) {
280 __u64 unique_id = le64_to_cpu(find_data.UniqueId); 261 /* get new inode */
281 *pinode = cifs_new_inode(sb, &unique_id); 262 *pinode = cifs_iget(sb, &fattr);
282 if (*pinode == NULL) { 263 if (!*pinode)
283 rc = -ENOMEM; 264 rc = -ENOMEM;
284 goto cgiiu_exit; 265 } else {
285 } 266 /* we already have inode, update it */
267 cifs_fattr_to_inode(*pinode, &fattr);
286 } 268 }
287 269
288 inode = *pinode;
289 cifsInfo = CIFS_I(inode);
290
291 cFYI(1, ("Old time %ld", cifsInfo->time));
292 cifsInfo->time = jiffies;
293 cFYI(1, ("New time %ld", cifsInfo->time));
294 /* this is ok to set on every inode revalidate */
295 atomic_set(&cifsInfo->inUse, 1);
296
297 cifs_unix_info_to_inode(inode, &find_data, 0);
298
299 if (num_of_bytes < end_of_file)
300 cFYI(1, ("allocation size less than end of file"));
301 cFYI(1, ("Size %ld and blocks %llu",
302 (unsigned long) inode->i_size,
303 (unsigned long long)inode->i_blocks));
304
305 cifs_set_ops(inode, is_dfs_referral);
306cgiiu_exit:
307 return rc; 270 return rc;
308} 271}
309 272
310static int decode_sfu_inode(struct inode *inode, __u64 size, 273static int
311 const unsigned char *path, 274cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
312 struct cifs_sb_info *cifs_sb, int xid) 275 struct cifs_sb_info *cifs_sb, int xid)
313{ 276{
314 int rc; 277 int rc;
315 int oplock = 0; 278 int oplock = 0;
@@ -321,10 +284,15 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
321 284
322 pbuf = buf; 285 pbuf = buf;
323 286
324 if (size == 0) { 287 fattr->cf_mode &= ~S_IFMT;
325 inode->i_mode |= S_IFIFO; 288
289 if (fattr->cf_eof == 0) {
290 fattr->cf_mode |= S_IFIFO;
291 fattr->cf_dtype = DT_FIFO;
326 return 0; 292 return 0;
327 } else if (size < 8) { 293 } else if (fattr->cf_eof < 8) {
294 fattr->cf_mode |= S_IFREG;
295 fattr->cf_dtype = DT_REG;
328 return -EINVAL; /* EOPNOTSUPP? */ 296 return -EINVAL; /* EOPNOTSUPP? */
329 } 297 }
330 298
@@ -336,42 +304,46 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
336 if (rc == 0) { 304 if (rc == 0) {
337 int buf_type = CIFS_NO_BUFFER; 305 int buf_type = CIFS_NO_BUFFER;
338 /* Read header */ 306 /* Read header */
339 rc = CIFSSMBRead(xid, pTcon, 307 rc = CIFSSMBRead(xid, pTcon, netfid,
340 netfid,
341 24 /* length */, 0 /* offset */, 308 24 /* length */, 0 /* offset */,
342 &bytes_read, &pbuf, &buf_type); 309 &bytes_read, &pbuf, &buf_type);
343 if ((rc == 0) && (bytes_read >= 8)) { 310 if ((rc == 0) && (bytes_read >= 8)) {
344 if (memcmp("IntxBLK", pbuf, 8) == 0) { 311 if (memcmp("IntxBLK", pbuf, 8) == 0) {
345 cFYI(1, ("Block device")); 312 cFYI(1, ("Block device"));
346 inode->i_mode |= S_IFBLK; 313 fattr->cf_mode |= S_IFBLK;
314 fattr->cf_dtype = DT_BLK;
347 if (bytes_read == 24) { 315 if (bytes_read == 24) {
348 /* we have enough to decode dev num */ 316 /* we have enough to decode dev num */
349 __u64 mjr; /* major */ 317 __u64 mjr; /* major */
350 __u64 mnr; /* minor */ 318 __u64 mnr; /* minor */
351 mjr = le64_to_cpu(*(__le64 *)(pbuf+8)); 319 mjr = le64_to_cpu(*(__le64 *)(pbuf+8));
352 mnr = le64_to_cpu(*(__le64 *)(pbuf+16)); 320 mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
353 inode->i_rdev = MKDEV(mjr, mnr); 321 fattr->cf_rdev = MKDEV(mjr, mnr);
354 } 322 }
355 } else if (memcmp("IntxCHR", pbuf, 8) == 0) { 323 } else if (memcmp("IntxCHR", pbuf, 8) == 0) {
356 cFYI(1, ("Char device")); 324 cFYI(1, ("Char device"));
357 inode->i_mode |= S_IFCHR; 325 fattr->cf_mode |= S_IFCHR;
326 fattr->cf_dtype = DT_CHR;
358 if (bytes_read == 24) { 327 if (bytes_read == 24) {
359 /* we have enough to decode dev num */ 328 /* we have enough to decode dev num */
360 __u64 mjr; /* major */ 329 __u64 mjr; /* major */
361 __u64 mnr; /* minor */ 330 __u64 mnr; /* minor */
362 mjr = le64_to_cpu(*(__le64 *)(pbuf+8)); 331 mjr = le64_to_cpu(*(__le64 *)(pbuf+8));
363 mnr = le64_to_cpu(*(__le64 *)(pbuf+16)); 332 mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
364 inode->i_rdev = MKDEV(mjr, mnr); 333 fattr->cf_rdev = MKDEV(mjr, mnr);
365 } 334 }
366 } else if (memcmp("IntxLNK", pbuf, 7) == 0) { 335 } else if (memcmp("IntxLNK", pbuf, 7) == 0) {
367 cFYI(1, ("Symlink")); 336 cFYI(1, ("Symlink"));
368 inode->i_mode |= S_IFLNK; 337 fattr->cf_mode |= S_IFLNK;
338 fattr->cf_dtype = DT_LNK;
369 } else { 339 } else {
370 inode->i_mode |= S_IFREG; /* file? */ 340 fattr->cf_mode |= S_IFREG; /* file? */
341 fattr->cf_dtype = DT_REG;
371 rc = -EOPNOTSUPP; 342 rc = -EOPNOTSUPP;
372 } 343 }
373 } else { 344 } else {
374 inode->i_mode |= S_IFREG; /* then it is a file */ 345 fattr->cf_mode |= S_IFREG; /* then it is a file */
346 fattr->cf_dtype = DT_REG;
375 rc = -EOPNOTSUPP; /* or some unknown SFU type */ 347 rc = -EOPNOTSUPP; /* or some unknown SFU type */
376 } 348 }
377 CIFSSMBClose(xid, pTcon, netfid); 349 CIFSSMBClose(xid, pTcon, netfid);
@@ -381,9 +353,13 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
381 353
382#define SFBITS_MASK (S_ISVTX | S_ISGID | S_ISUID) /* SETFILEBITS valid bits */ 354#define SFBITS_MASK (S_ISVTX | S_ISGID | S_ISUID) /* SETFILEBITS valid bits */
383 355
384static int get_sfu_mode(struct inode *inode, 356/*
385 const unsigned char *path, 357 * Fetch mode bits as provided by SFU.
386 struct cifs_sb_info *cifs_sb, int xid) 358 *
359 * FIXME: Doesn't this clobber the type bit we got from cifs_sfu_type ?
360 */
361static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
362 struct cifs_sb_info *cifs_sb, int xid)
387{ 363{
388#ifdef CONFIG_CIFS_XATTR 364#ifdef CONFIG_CIFS_XATTR
389 ssize_t rc; 365 ssize_t rc;
@@ -391,68 +367,80 @@ static int get_sfu_mode(struct inode *inode,
391 __u32 mode; 367 __u32 mode;
392 368
393 rc = CIFSSMBQueryEA(xid, cifs_sb->tcon, path, "SETFILEBITS", 369 rc = CIFSSMBQueryEA(xid, cifs_sb->tcon, path, "SETFILEBITS",
394 ea_value, 4 /* size of buf */, cifs_sb->local_nls, 370 ea_value, 4 /* size of buf */, cifs_sb->local_nls,
395 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 371 cifs_sb->mnt_cifs_flags &
372 CIFS_MOUNT_MAP_SPECIAL_CHR);
396 if (rc < 0) 373 if (rc < 0)
397 return (int)rc; 374 return (int)rc;
398 else if (rc > 3) { 375 else if (rc > 3) {
399 mode = le32_to_cpu(*((__le32 *)ea_value)); 376 mode = le32_to_cpu(*((__le32 *)ea_value));
400 inode->i_mode &= ~SFBITS_MASK; 377 fattr->cf_mode &= ~SFBITS_MASK;
401 cFYI(1, ("special bits 0%o org mode 0%o", mode, inode->i_mode)); 378 cFYI(1, ("special bits 0%o org mode 0%o", mode,
402 inode->i_mode = (mode & SFBITS_MASK) | inode->i_mode; 379 fattr->cf_mode));
380 fattr->cf_mode = (mode & SFBITS_MASK) | fattr->cf_mode;
403 cFYI(1, ("special mode bits 0%o", mode)); 381 cFYI(1, ("special mode bits 0%o", mode));
404 return 0;
405 } else {
406 return 0;
407 } 382 }
383
384 return 0;
408#else 385#else
409 return -EOPNOTSUPP; 386 return -EOPNOTSUPP;
410#endif 387#endif
411} 388}
412 389
413/* 390/* Fill a cifs_fattr struct with info from FILE_ALL_INFO */
414 * Needed to setup inode data for the directory which is the 391void
415 * junction to the new submount (ie to setup the fake directory 392cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
416 * which represents a DFS referral) 393 struct cifs_sb_info *cifs_sb, bool adjust_tz)
417 */
418static void fill_fake_finddata(FILE_ALL_INFO *pfnd_dat,
419 struct super_block *sb)
420{ 394{
421 memset(pfnd_dat, 0, sizeof(FILE_ALL_INFO)); 395 memset(fattr, 0, sizeof(*fattr));
422 396 fattr->cf_cifsattrs = le32_to_cpu(info->Attributes);
423/* __le64 pfnd_dat->AllocationSize = cpu_to_le64(0); 397 if (info->DeletePending)
424 __le64 pfnd_dat->EndOfFile = cpu_to_le64(0); 398 fattr->cf_flags |= CIFS_FATTR_DELETE_PENDING;
425 __u8 pfnd_dat->DeletePending = 0; 399
426 __u8 pfnd_data->Directory = 0; 400 if (info->LastAccessTime)
427 __le32 pfnd_dat->EASize = 0; 401 fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
428 __u64 pfnd_dat->IndexNumber = 0; 402 else
429 __u64 pfnd_dat->IndexNumber1 = 0; */ 403 fattr->cf_atime = CURRENT_TIME;
430 pfnd_dat->CreationTime = 404
431 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); 405 fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
432 pfnd_dat->LastAccessTime = 406 fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
433 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); 407
434 pfnd_dat->LastWriteTime = 408 if (adjust_tz) {
435 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); 409 fattr->cf_ctime.tv_sec += cifs_sb->tcon->ses->server->timeAdj;
436 pfnd_dat->ChangeTime = 410 fattr->cf_mtime.tv_sec += cifs_sb->tcon->ses->server->timeAdj;
437 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); 411 }
438 pfnd_dat->Attributes = cpu_to_le32(ATTR_DIRECTORY); 412
439 pfnd_dat->NumberOfLinks = cpu_to_le32(2); 413 fattr->cf_eof = le64_to_cpu(info->EndOfFile);
414 fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
415
416 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
417 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
418 fattr->cf_dtype = DT_DIR;
419 } else {
420 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
421 fattr->cf_dtype = DT_REG;
422
423 /* clear write bits if ATTR_READONLY is set */
424 if (fattr->cf_cifsattrs & ATTR_READONLY)
425 fattr->cf_mode &= ~(S_IWUGO);
426 }
427
428 fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
429
430 fattr->cf_uid = cifs_sb->mnt_uid;
431 fattr->cf_gid = cifs_sb->mnt_gid;
440} 432}
441 433
442int cifs_get_inode_info(struct inode **pinode, 434int cifs_get_inode_info(struct inode **pinode,
443 const unsigned char *full_path, FILE_ALL_INFO *pfindData, 435 const unsigned char *full_path, FILE_ALL_INFO *pfindData,
444 struct super_block *sb, int xid, const __u16 *pfid) 436 struct super_block *sb, int xid, const __u16 *pfid)
445{ 437{
446 int rc = 0; 438 int rc = 0, tmprc;
447 __u32 attr;
448 struct cifsInodeInfo *cifsInfo;
449 struct cifsTconInfo *pTcon; 439 struct cifsTconInfo *pTcon;
450 struct inode *inode;
451 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 440 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
452 char *buf = NULL; 441 char *buf = NULL;
453 bool adjustTZ = false; 442 bool adjustTZ = false;
454 bool is_dfs_referral = false; 443 struct cifs_fattr fattr;
455 umode_t default_mode;
456 444
457 pTcon = cifs_sb->tcon; 445 pTcon = cifs_sb->tcon;
458 cFYI(1, ("Getting info on %s", full_path)); 446 cFYI(1, ("Getting info on %s", full_path));
@@ -487,163 +475,82 @@ int cifs_get_inode_info(struct inode **pinode,
487 adjustTZ = true; 475 adjustTZ = true;
488 } 476 }
489 } 477 }
490 /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */ 478
491 if (rc == -EREMOTE) { 479 if (!rc) {
492 is_dfs_referral = true; 480 cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *) pfindData,
493 fill_fake_finddata(pfindData, sb); 481 cifs_sb, adjustTZ);
482 } else if (rc == -EREMOTE) {
483 cifs_create_dfs_fattr(&fattr, sb);
494 rc = 0; 484 rc = 0;
495 } else if (rc) 485 } else {
496 goto cgii_exit; 486 goto cgii_exit;
487 }
497 488
498 attr = le32_to_cpu(pfindData->Attributes); 489 /*
499 490 * If an inode wasn't passed in, then get the inode number
500 /* get new inode */ 491 *
492 * Is an i_ino of zero legal? Can we use that to check if the server
493 * supports returning inode numbers? Are there other sanity checks we
494 * can use to ensure that the server is really filling in that field?
495 *
496 * We can not use the IndexNumber field by default from Windows or
497 * Samba (in ALL_INFO buf) but we can request it explicitly. The SNIA
498 * CIFS spec claims that this value is unique within the scope of a
499 * share, and the windows docs hint that it's actually unique
500 * per-machine.
501 *
502 * There may be higher info levels that work but are there Windows
503 * server or network appliances for which IndexNumber field is not
504 * guaranteed unique?
505 */
501 if (*pinode == NULL) { 506 if (*pinode == NULL) {
502 __u64 inode_num;
503 __u64 *pinum = &inode_num;
504
505 /* Is an i_ino of zero legal? Can we use that to check
506 if the server supports returning inode numbers? Are
507 there other sanity checks we can use to ensure that
508 the server is really filling in that field? */
509
510 /* We can not use the IndexNumber field by default from
511 Windows or Samba (in ALL_INFO buf) but we can request
512 it explicitly. It may not be unique presumably if
513 the server has multiple devices mounted under one share */
514
515 /* There may be higher info levels that work but are
516 there Windows server or network appliances for which
517 IndexNumber field is not guaranteed unique? */
518
519 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 507 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
520 int rc1 = 0; 508 int rc1 = 0;
521 509
522 rc1 = CIFSGetSrvInodeNumber(xid, pTcon, 510 rc1 = CIFSGetSrvInodeNumber(xid, pTcon,
523 full_path, pinum, 511 full_path, &fattr.cf_uniqueid,
524 cifs_sb->local_nls, 512 cifs_sb->local_nls,
525 cifs_sb->mnt_cifs_flags & 513 cifs_sb->mnt_cifs_flags &
526 CIFS_MOUNT_MAP_SPECIAL_CHR); 514 CIFS_MOUNT_MAP_SPECIAL_CHR);
527 if (rc1) { 515 if (rc1) {
528 cFYI(1, ("GetSrvInodeNum rc %d", rc1));
529 pinum = NULL;
530 /* BB EOPNOSUPP disable SERVER_INUM? */ 516 /* BB EOPNOSUPP disable SERVER_INUM? */
517 cFYI(1, ("GetSrvInodeNum rc %d", rc1));
518 fattr.cf_uniqueid = iunique(sb, ROOT_I);
531 } 519 }
532 } else { 520 } else {
533 pinum = NULL; 521 fattr.cf_uniqueid = iunique(sb, ROOT_I);
534 } 522 }
535
536 *pinode = cifs_new_inode(sb, pinum);
537 if (*pinode == NULL) {
538 rc = -ENOMEM;
539 goto cgii_exit;
540 }
541 }
542 inode = *pinode;
543 cifsInfo = CIFS_I(inode);
544 cifsInfo->cifsAttrs = attr;
545 cifsInfo->delete_pending = pfindData->DeletePending ? true : false;
546 cFYI(1, ("Old time %ld", cifsInfo->time));
547 cifsInfo->time = jiffies;
548 cFYI(1, ("New time %ld", cifsInfo->time));
549
550 /* blksize needs to be multiple of two. So safer to default to
551 blksize and blkbits set in superblock so 2**blkbits and blksize
552 will match rather than setting to:
553 (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/
554
555 /* Linux can not store file creation time so ignore it */
556 if (pfindData->LastAccessTime)
557 inode->i_atime = cifs_NTtimeToUnix(pfindData->LastAccessTime);
558 else /* do not need to use current_fs_time - time not stored */
559 inode->i_atime = CURRENT_TIME;
560 inode->i_mtime = cifs_NTtimeToUnix(pfindData->LastWriteTime);
561 inode->i_ctime = cifs_NTtimeToUnix(pfindData->ChangeTime);
562 cFYI(DBG2, ("Attributes came in as 0x%x", attr));
563 if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) {
564 inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj;
565 inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj;
566 }
567
568 /* get default inode mode */
569 if (attr & ATTR_DIRECTORY)
570 default_mode = cifs_sb->mnt_dir_mode;
571 else
572 default_mode = cifs_sb->mnt_file_mode;
573
574 /* set permission bits */
575 if (atomic_read(&cifsInfo->inUse) == 0 ||
576 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
577 inode->i_mode = default_mode;
578 else {
579 /* just reenable write bits if !ATTR_READONLY */
580 if ((inode->i_mode & S_IWUGO) == 0 &&
581 (attr & ATTR_READONLY) == 0)
582 inode->i_mode |= (S_IWUGO & default_mode);
583
584 inode->i_mode &= ~S_IFMT;
585 }
586 /* clear write bits if ATTR_READONLY is set */
587 if (attr & ATTR_READONLY)
588 inode->i_mode &= ~S_IWUGO;
589
590 /* set inode type */
591 if ((attr & ATTR_SYSTEM) &&
592 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) {
593 /* no need to fix endianness on 0 */
594 if (pfindData->EndOfFile == 0)
595 inode->i_mode |= S_IFIFO;
596 else if (decode_sfu_inode(inode,
597 le64_to_cpu(pfindData->EndOfFile),
598 full_path, cifs_sb, xid))
599 cFYI(1, ("unknown SFU file type\n"));
600 } else { 523 } else {
601 if (attr & ATTR_DIRECTORY) 524 fattr.cf_uniqueid = CIFS_I(*pinode)->uniqueid;
602 inode->i_mode |= S_IFDIR;
603 else
604 inode->i_mode |= S_IFREG;
605 } 525 }
606 526
607 cifsInfo->server_eof = le64_to_cpu(pfindData->EndOfFile); 527 /* query for SFU type info if supported and needed */
608 spin_lock(&inode->i_lock); 528 if (fattr.cf_cifsattrs & ATTR_SYSTEM &&
609 if (is_size_safe_to_change(cifsInfo, cifsInfo->server_eof)) { 529 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
610 /* can not safely shrink the file size here if the 530 tmprc = cifs_sfu_type(&fattr, full_path, cifs_sb, xid);
611 client is writing to it due to potential races */ 531 if (tmprc)
612 i_size_write(inode, cifsInfo->server_eof); 532 cFYI(1, ("cifs_sfu_type failed: %d", tmprc));
613
614 /* 512 bytes (2**9) is the fake blocksize that must be
615 used for this calculation */
616 inode->i_blocks = (512 - 1 + le64_to_cpu(
617 pfindData->AllocationSize)) >> 9;
618 } 533 }
619 spin_unlock(&inode->i_lock);
620
621 inode->i_nlink = le32_to_cpu(pfindData->NumberOfLinks);
622 534
623 /* BB fill in uid and gid here? with help from winbind?
624 or retrieve from NTFS stream extended attribute */
625#ifdef CONFIG_CIFS_EXPERIMENTAL 535#ifdef CONFIG_CIFS_EXPERIMENTAL
626 /* fill in 0777 bits from ACL */ 536 /* fill in 0777 bits from ACL */
627 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 537 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
628 cFYI(1, ("Getting mode bits from ACL")); 538 cFYI(1, ("Getting mode bits from ACL"));
629 acl_to_uid_mode(cifs_sb, inode, full_path, pfid); 539 cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid);
630 } 540 }
631#endif 541#endif
632 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
633 /* fill in remaining high mode bits e.g. SUID, VTX */
634 get_sfu_mode(inode, full_path, cifs_sb, xid);
635 } else if (atomic_read(&cifsInfo->inUse) == 0) {
636 inode->i_uid = cifs_sb->mnt_uid;
637 inode->i_gid = cifs_sb->mnt_gid;
638 /* set so we do not keep refreshing these fields with
639 bad data after user has changed them in memory */
640 atomic_set(&cifsInfo->inUse, 1);
641 }
642
643 cifs_set_ops(inode, is_dfs_referral);
644
645 542
543 /* fill in remaining high mode bits e.g. SUID, VTX */
544 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
545 cifs_sfu_mode(&fattr, full_path, cifs_sb, xid);
646 546
547 if (!*pinode) {
548 *pinode = cifs_iget(sb, &fattr);
549 if (!*pinode)
550 rc = -ENOMEM;
551 } else {
552 cifs_fattr_to_inode(*pinode, &fattr);
553 }
647 554
648cgii_exit: 555cgii_exit:
649 kfree(buf); 556 kfree(buf);
@@ -695,33 +602,78 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb)
695 return full_path; 602 return full_path;
696} 603}
697 604
605static int
606cifs_find_inode(struct inode *inode, void *opaque)
607{
608 struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
609
610 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
611 return 0;
612
613 return 1;
614}
615
616static int
617cifs_init_inode(struct inode *inode, void *opaque)
618{
619 struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
620
621 CIFS_I(inode)->uniqueid = fattr->cf_uniqueid;
622 return 0;
623}
624
625/* Given fattrs, get a corresponding inode */
626struct inode *
627cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
628{
629 unsigned long hash;
630 struct inode *inode;
631
632 cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid));
633
634 /* hash down to 32-bits on 32-bit arch */
635 hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
636
637 inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
638
639 /* we have fattrs in hand, update the inode */
640 if (inode) {
641 cifs_fattr_to_inode(inode, fattr);
642 if (sb->s_flags & MS_NOATIME)
643 inode->i_flags |= S_NOATIME | S_NOCMTIME;
644 if (inode->i_state & I_NEW) {
645 inode->i_ino = hash;
646 unlock_new_inode(inode);
647 }
648 }
649
650 return inode;
651}
652
698/* gets root inode */ 653/* gets root inode */
699struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) 654struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
700{ 655{
701 int xid; 656 int xid;
702 struct cifs_sb_info *cifs_sb; 657 struct cifs_sb_info *cifs_sb;
703 struct inode *inode; 658 struct inode *inode = NULL;
704 long rc; 659 long rc;
705 char *full_path; 660 char *full_path;
706 661
707 inode = iget_locked(sb, ino); 662 cifs_sb = CIFS_SB(sb);
708 if (!inode)
709 return ERR_PTR(-ENOMEM);
710 if (!(inode->i_state & I_NEW))
711 return inode;
712
713 cifs_sb = CIFS_SB(inode->i_sb);
714 full_path = cifs_build_path_to_root(cifs_sb); 663 full_path = cifs_build_path_to_root(cifs_sb);
715 if (full_path == NULL) 664 if (full_path == NULL)
716 return ERR_PTR(-ENOMEM); 665 return ERR_PTR(-ENOMEM);
717 666
718 xid = GetXid(); 667 xid = GetXid();
719 if (cifs_sb->tcon->unix_ext) 668 if (cifs_sb->tcon->unix_ext)
720 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, 669 rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid);
721 xid);
722 else 670 else
723 rc = cifs_get_inode_info(&inode, full_path, NULL, inode->i_sb, 671 rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
724 xid, NULL); 672 xid, NULL);
673
674 if (!inode)
675 return ERR_PTR(-ENOMEM);
676
725 if (rc && cifs_sb->tcon->ipc) { 677 if (rc && cifs_sb->tcon->ipc) {
726 cFYI(1, ("ipc connection - fake read inode")); 678 cFYI(1, ("ipc connection - fake read inode"));
727 inode->i_mode |= S_IFDIR; 679 inode->i_mode |= S_IFDIR;
@@ -737,7 +689,6 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
737 return ERR_PTR(rc); 689 return ERR_PTR(rc);
738 } 690 }
739 691
740 unlock_new_inode(inode);
741 692
742 kfree(full_path); 693 kfree(full_path);
743 /* can not call macro FreeXid here since in a void func 694 /* can not call macro FreeXid here since in a void func
@@ -988,8 +939,9 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
988 * sb->s_vfs_rename_mutex here */ 939 * sb->s_vfs_rename_mutex here */
989 full_path = build_path_from_dentry(dentry); 940 full_path = build_path_from_dentry(dentry);
990 if (full_path == NULL) { 941 if (full_path == NULL) {
942 rc = -ENOMEM;
991 FreeXid(xid); 943 FreeXid(xid);
992 return -ENOMEM; 944 return rc;
993 } 945 }
994 946
995 if ((tcon->ses->capabilities & CAP_UNIX) && 947 if ((tcon->ses->capabilities & CAP_UNIX) &&
@@ -1062,44 +1014,6 @@ out_reval:
1062 return rc; 1014 return rc;
1063} 1015}
1064 1016
1065void posix_fill_in_inode(struct inode *tmp_inode,
1066 FILE_UNIX_BASIC_INFO *pData, int isNewInode)
1067{
1068 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
1069 loff_t local_size;
1070 struct timespec local_mtime;
1071
1072 cifsInfo->time = jiffies;
1073 atomic_inc(&cifsInfo->inUse);
1074
1075 /* save mtime and size */
1076 local_mtime = tmp_inode->i_mtime;
1077 local_size = tmp_inode->i_size;
1078
1079 cifs_unix_info_to_inode(tmp_inode, pData, 1);
1080 cifs_set_ops(tmp_inode, false);
1081
1082 if (!S_ISREG(tmp_inode->i_mode))
1083 return;
1084
1085 /*
1086 * No sense invalidating pages for new inode
1087 * since we we have not started caching
1088 * readahead file data yet.
1089 */
1090 if (isNewInode)
1091 return;
1092
1093 if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
1094 (local_size == tmp_inode->i_size)) {
1095 cFYI(1, ("inode exists but unchanged"));
1096 } else {
1097 /* file may have changed on server */
1098 cFYI(1, ("invalidate inode, readdir detected change"));
1099 invalidate_remote_inode(tmp_inode);
1100 }
1101}
1102
1103int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) 1017int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1104{ 1018{
1105 int rc = 0, tmprc; 1019 int rc = 0, tmprc;
@@ -1108,6 +1022,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1108 struct cifsTconInfo *pTcon; 1022 struct cifsTconInfo *pTcon;
1109 char *full_path = NULL; 1023 char *full_path = NULL;
1110 struct inode *newinode = NULL; 1024 struct inode *newinode = NULL;
1025 struct cifs_fattr fattr;
1111 1026
1112 cFYI(1, ("In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode)); 1027 cFYI(1, ("In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode));
1113 1028
@@ -1118,8 +1033,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1118 1033
1119 full_path = build_path_from_dentry(direntry); 1034 full_path = build_path_from_dentry(direntry);
1120 if (full_path == NULL) { 1035 if (full_path == NULL) {
1036 rc = -ENOMEM;
1121 FreeXid(xid); 1037 FreeXid(xid);
1122 return -ENOMEM; 1038 return rc;
1123 } 1039 }
1124 1040
1125 if ((pTcon->ses->capabilities & CAP_UNIX) && 1041 if ((pTcon->ses->capabilities & CAP_UNIX) &&
@@ -1146,7 +1062,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1146 cFYI(1, ("posix mkdir returned 0x%x", rc)); 1062 cFYI(1, ("posix mkdir returned 0x%x", rc));
1147 d_drop(direntry); 1063 d_drop(direntry);
1148 } else { 1064 } else {
1149 __u64 unique_id;
1150 if (pInfo->Type == cpu_to_le32(-1)) { 1065 if (pInfo->Type == cpu_to_le32(-1)) {
1151 /* no return info, go query for it */ 1066 /* no return info, go query for it */
1152 kfree(pInfo); 1067 kfree(pInfo);
@@ -1160,20 +1075,15 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1160 else 1075 else
1161 direntry->d_op = &cifs_dentry_ops; 1076 direntry->d_op = &cifs_dentry_ops;
1162 1077
1163 unique_id = le64_to_cpu(pInfo->UniqueId); 1078 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
1164 newinode = cifs_new_inode(inode->i_sb, &unique_id); 1079 newinode = cifs_iget(inode->i_sb, &fattr);
1165 if (newinode == NULL) { 1080 if (!newinode) {
1166 kfree(pInfo); 1081 kfree(pInfo);
1167 goto mkdir_get_info; 1082 goto mkdir_get_info;
1168 } 1083 }
1169 1084
1170 newinode->i_nlink = 2;
1171 d_instantiate(direntry, newinode); 1085 d_instantiate(direntry, newinode);
1172 1086
1173 /* we already checked in POSIXCreate whether
1174 frame was long enough */
1175 posix_fill_in_inode(direntry->d_inode,
1176 pInfo, 1 /* NewInode */);
1177#ifdef CONFIG_CIFS_DEBUG2 1087#ifdef CONFIG_CIFS_DEBUG2
1178 cFYI(1, ("instantiated dentry %p %s to inode %p", 1088 cFYI(1, ("instantiated dentry %p %s to inode %p",
1179 direntry, direntry->d_name.name, newinode)); 1089 direntry, direntry->d_name.name, newinode));
@@ -1236,10 +1146,10 @@ mkdir_get_info:
1236 args.uid = NO_CHANGE_64; 1146 args.uid = NO_CHANGE_64;
1237 args.gid = NO_CHANGE_64; 1147 args.gid = NO_CHANGE_64;
1238 } 1148 }
1239 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args, 1149 CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args,
1240 cifs_sb->local_nls, 1150 cifs_sb->local_nls,
1241 cifs_sb->mnt_cifs_flags & 1151 cifs_sb->mnt_cifs_flags &
1242 CIFS_MOUNT_MAP_SPECIAL_CHR); 1152 CIFS_MOUNT_MAP_SPECIAL_CHR);
1243 } else { 1153 } else {
1244 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && 1154 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
1245 (mode & S_IWUGO) == 0) { 1155 (mode & S_IWUGO) == 0) {
@@ -1303,8 +1213,9 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1303 1213
1304 full_path = build_path_from_dentry(direntry); 1214 full_path = build_path_from_dentry(direntry);
1305 if (full_path == NULL) { 1215 if (full_path == NULL) {
1216 rc = -ENOMEM;
1306 FreeXid(xid); 1217 FreeXid(xid);
1307 return -ENOMEM; 1218 return rc;
1308 } 1219 }
1309 1220
1310 rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls, 1221 rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls,
@@ -1508,8 +1419,9 @@ int cifs_revalidate(struct dentry *direntry)
1508 since that would deadlock */ 1419 since that would deadlock */
1509 full_path = build_path_from_dentry(direntry); 1420 full_path = build_path_from_dentry(direntry);
1510 if (full_path == NULL) { 1421 if (full_path == NULL) {
1422 rc = -ENOMEM;
1511 FreeXid(xid); 1423 FreeXid(xid);
1512 return -ENOMEM; 1424 return rc;
1513 } 1425 }
1514 cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " 1426 cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
1515 "jiffies %ld", full_path, direntry->d_inode, 1427 "jiffies %ld", full_path, direntry->d_inode,
@@ -1618,6 +1530,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1618 if (!err) { 1530 if (!err) {
1619 generic_fillattr(dentry->d_inode, stat); 1531 generic_fillattr(dentry->d_inode, stat);
1620 stat->blksize = CIFS_MAX_MSGSIZE; 1532 stat->blksize = CIFS_MAX_MSGSIZE;
1533 stat->ino = CIFS_I(dentry->d_inode)->uniqueid;
1621 } 1534 }
1622 return err; 1535 return err;
1623} 1536}
@@ -1782,6 +1695,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1782 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1695 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1783 struct cifsTconInfo *pTcon = cifs_sb->tcon; 1696 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1784 struct cifs_unix_set_info_args *args = NULL; 1697 struct cifs_unix_set_info_args *args = NULL;
1698 struct cifsFileInfo *open_file;
1785 1699
1786 cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x", 1700 cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x",
1787 direntry->d_name.name, attrs->ia_valid)); 1701 direntry->d_name.name, attrs->ia_valid));
@@ -1868,10 +1782,18 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1868 args->ctime = NO_CHANGE_64; 1782 args->ctime = NO_CHANGE_64;
1869 1783
1870 args->device = 0; 1784 args->device = 0;
1871 rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args, 1785 open_file = find_writable_file(cifsInode);
1872 cifs_sb->local_nls, 1786 if (open_file) {
1873 cifs_sb->mnt_cifs_flags & 1787 u16 nfid = open_file->netfid;
1874 CIFS_MOUNT_MAP_SPECIAL_CHR); 1788 u32 npid = open_file->pid;
1789 rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid);
1790 atomic_dec(&open_file->wrtPending);
1791 } else {
1792 rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
1793 cifs_sb->local_nls,
1794 cifs_sb->mnt_cifs_flags &
1795 CIFS_MOUNT_MAP_SPECIAL_CHR);
1796 }
1875 1797
1876 if (!rc) 1798 if (!rc)
1877 rc = inode_setattr(inode, attrs); 1799 rc = inode_setattr(inode, attrs);
@@ -1911,8 +1833,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
1911 1833
1912 full_path = build_path_from_dentry(direntry); 1834 full_path = build_path_from_dentry(direntry);
1913 if (full_path == NULL) { 1835 if (full_path == NULL) {
1836 rc = -ENOMEM;
1914 FreeXid(xid); 1837 FreeXid(xid);
1915 return -ENOMEM; 1838 return rc;
1916 } 1839 }
1917 1840
1918 /* 1841 /*
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index cd83c53fcbb5..fc1e0487eaee 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -172,8 +172,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
172 full_path = build_path_from_dentry(direntry); 172 full_path = build_path_from_dentry(direntry);
173 173
174 if (full_path == NULL) { 174 if (full_path == NULL) {
175 rc = -ENOMEM;
175 FreeXid(xid); 176 FreeXid(xid);
176 return -ENOMEM; 177 return rc;
177 } 178 }
178 179
179 cFYI(1, ("Full path: %s", full_path)); 180 cFYI(1, ("Full path: %s", full_path));
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 32d6baa0a54f..bd6d6895730d 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -133,10 +133,12 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
133 {0, 0} 133 {0, 0}
134}; 134};
135 135
136/* Convert string containing dotted ip address to binary form */ 136/*
137/* returns 0 if invalid address */ 137 * Convert a string containing text IPv4 or IPv6 address to binary form.
138 138 *
139int 139 * Returns 0 on failure.
140 */
141static int
140cifs_inet_pton(const int address_family, const char *cp, void *dst) 142cifs_inet_pton(const int address_family, const char *cp, void *dst)
141{ 143{
142 int ret = 0; 144 int ret = 0;
@@ -153,6 +155,52 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst)
153 return ret; 155 return ret;
154} 156}
155 157
158/*
159 * Try to convert a string to an IPv4 address and then attempt to convert
160 * it to an IPv6 address if that fails. Set the family field if either
161 * succeeds. If it's an IPv6 address and it has a '%' sign in it, try to
162 * treat the part following it as a numeric sin6_scope_id.
163 *
164 * Returns 0 on failure.
165 */
166int
167cifs_convert_address(char *src, void *dst)
168{
169 int rc;
170 char *pct, *endp;
171 struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
172 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
173
174 /* IPv4 address */
175 if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) {
176 s4->sin_family = AF_INET;
177 return 1;
178 }
179
180 /* temporarily terminate string */
181 pct = strchr(src, '%');
182 if (pct)
183 *pct = '\0';
184
185 rc = cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr);
186
187 /* repair temp termination (if any) and make pct point to scopeid */
188 if (pct)
189 *pct++ = '%';
190
191 if (!rc)
192 return rc;
193
194 s6->sin6_family = AF_INET6;
195 if (pct) {
196 s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0);
197 if (!*pct || *endp)
198 return 0;
199 }
200
201 return rc;
202}
203
156/***************************************************************************** 204/*****************************************************************************
157convert a NT status code to a dos class/code 205convert a NT status code to a dos class/code
158 *****************************************************************************/ 206 *****************************************************************************/
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 86d0055dc529..f823a4a208a7 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -63,374 +63,123 @@ static inline void dump_cifs_file_struct(struct file *file, char *label)
63} 63}
64#endif /* DEBUG2 */ 64#endif /* DEBUG2 */
65 65
66/* Returns 1 if new inode created, 2 if both dentry and inode were */ 66/*
67/* Might check in the future if inode number changed so we can rehash inode */ 67 * Find the dentry that matches "name". If there isn't one, create one. If it's
68static int 68 * a negative dentry or the uniqueid changed, then drop it and recreate it.
69construct_dentry(struct qstr *qstring, struct file *file, 69 */
70 struct inode **ptmp_inode, struct dentry **pnew_dentry, 70static struct dentry *
71 __u64 *inum) 71cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
72 struct cifs_fattr *fattr)
72{ 73{
73 struct dentry *tmp_dentry = NULL; 74 struct dentry *dentry, *alias;
74 struct super_block *sb = file->f_path.dentry->d_sb; 75 struct inode *inode;
75 int rc = 0; 76 struct super_block *sb = parent->d_inode->i_sb;
77
78 cFYI(1, ("For %s", name->name));
79
80 dentry = d_lookup(parent, name);
81 if (dentry) {
82 /* FIXME: check for inode number changes? */
83 if (dentry->d_inode != NULL)
84 return dentry;
85 d_drop(dentry);
86 dput(dentry);
87 }
76 88
77 cFYI(1, ("For %s", qstring->name)); 89 dentry = d_alloc(parent, name);
78 90 if (dentry == NULL)
79 qstring->hash = full_name_hash(qstring->name, qstring->len); 91 return NULL;
80 tmp_dentry = d_lookup(file->f_path.dentry, qstring);
81 if (tmp_dentry) {
82 /* BB: overwrite old name? i.e. tmp_dentry->d_name and
83 * tmp_dentry->d_name.len??
84 */
85 cFYI(0, ("existing dentry with inode 0x%p",
86 tmp_dentry->d_inode));
87 *ptmp_inode = tmp_dentry->d_inode;
88 if (*ptmp_inode == NULL) {
89 *ptmp_inode = cifs_new_inode(sb, inum);
90 if (*ptmp_inode == NULL)
91 return rc;
92 rc = 1;
93 }
94 } else {
95 tmp_dentry = d_alloc(file->f_path.dentry, qstring);
96 if (tmp_dentry == NULL) {
97 cERROR(1, ("Failed allocating dentry"));
98 *ptmp_inode = NULL;
99 return rc;
100 }
101 92
102 if (CIFS_SB(sb)->tcon->nocase) 93 inode = cifs_iget(sb, fattr);
103 tmp_dentry->d_op = &cifs_ci_dentry_ops; 94 if (!inode) {
104 else 95 dput(dentry);
105 tmp_dentry->d_op = &cifs_dentry_ops; 96 return NULL;
97 }
106 98
107 *ptmp_inode = cifs_new_inode(sb, inum); 99 if (CIFS_SB(sb)->tcon->nocase)
108 if (*ptmp_inode == NULL) 100 dentry->d_op = &cifs_ci_dentry_ops;
109 return rc; 101 else
110 rc = 2; 102 dentry->d_op = &cifs_dentry_ops;
103
104 alias = d_materialise_unique(dentry, inode);
105 if (alias != NULL) {
106 dput(dentry);
107 if (IS_ERR(alias))
108 return NULL;
109 dentry = alias;
111 } 110 }
112 111
113 tmp_dentry->d_time = jiffies; 112 return dentry;
114 *pnew_dentry = tmp_dentry;
115 return rc;
116} 113}
117 114
118static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, 115static void
119 char *buf, unsigned int *pobject_type, int isNewInode) 116cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
120{ 117{
121 loff_t local_size; 118 fattr->cf_uid = cifs_sb->mnt_uid;
122 struct timespec local_mtime; 119 fattr->cf_gid = cifs_sb->mnt_gid;
123
124 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
125 struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
126 __u32 attr;
127 __u64 allocation_size;
128 __u64 end_of_file;
129 umode_t default_mode;
130
131 /* save mtime and size */
132 local_mtime = tmp_inode->i_mtime;
133 local_size = tmp_inode->i_size;
134
135 if (new_buf_type) {
136 FILE_DIRECTORY_INFO *pfindData = (FILE_DIRECTORY_INFO *)buf;
137
138 attr = le32_to_cpu(pfindData->ExtFileAttributes);
139 allocation_size = le64_to_cpu(pfindData->AllocationSize);
140 end_of_file = le64_to_cpu(pfindData->EndOfFile);
141 tmp_inode->i_atime =
142 cifs_NTtimeToUnix(pfindData->LastAccessTime);
143 tmp_inode->i_mtime =
144 cifs_NTtimeToUnix(pfindData->LastWriteTime);
145 tmp_inode->i_ctime =
146 cifs_NTtimeToUnix(pfindData->ChangeTime);
147 } else { /* legacy, OS2 and DOS style */
148 int offset = cifs_sb->tcon->ses->server->timeAdj;
149 FIND_FILE_STANDARD_INFO *pfindData =
150 (FIND_FILE_STANDARD_INFO *)buf;
151
152 tmp_inode->i_mtime = cnvrtDosUnixTm(pfindData->LastWriteDate,
153 pfindData->LastWriteTime,
154 offset);
155 tmp_inode->i_atime = cnvrtDosUnixTm(pfindData->LastAccessDate,
156 pfindData->LastAccessTime,
157 offset);
158 tmp_inode->i_ctime = cnvrtDosUnixTm(pfindData->LastWriteDate,
159 pfindData->LastWriteTime,
160 offset);
161 attr = le16_to_cpu(pfindData->Attributes);
162 allocation_size = le32_to_cpu(pfindData->AllocationSize);
163 end_of_file = le32_to_cpu(pfindData->DataSize);
164 }
165 120
166 /* Linux can not store file creation time unfortunately so ignore it */ 121 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
167 122 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
168 cifsInfo->cifsAttrs = attr; 123 fattr->cf_dtype = DT_DIR;
169#ifdef CONFIG_CIFS_EXPERIMENTAL 124 } else {
170 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 125 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
171 /* get more accurate mode via ACL - so force inode refresh */ 126 fattr->cf_dtype = DT_REG;
172 cifsInfo->time = 0;
173 } else
174#endif /* CONFIG_CIFS_EXPERIMENTAL */
175 cifsInfo->time = jiffies;
176
177 /* treat dos attribute of read-only as read-only mode bit e.g. 555? */
178 /* 2767 perms - indicate mandatory locking */
179 /* BB fill in uid and gid here? with help from winbind?
180 or retrieve from NTFS stream extended attribute */
181 if (atomic_read(&cifsInfo->inUse) == 0) {
182 tmp_inode->i_uid = cifs_sb->mnt_uid;
183 tmp_inode->i_gid = cifs_sb->mnt_gid;
184 }
185
186 if (attr & ATTR_DIRECTORY)
187 default_mode = cifs_sb->mnt_dir_mode;
188 else
189 default_mode = cifs_sb->mnt_file_mode;
190
191 /* set initial permissions */
192 if ((atomic_read(&cifsInfo->inUse) == 0) ||
193 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
194 tmp_inode->i_mode = default_mode;
195 else {
196 /* just reenable write bits if !ATTR_READONLY */
197 if ((tmp_inode->i_mode & S_IWUGO) == 0 &&
198 (attr & ATTR_READONLY) == 0)
199 tmp_inode->i_mode |= (S_IWUGO & default_mode);
200
201 tmp_inode->i_mode &= ~S_IFMT;
202 } 127 }
203 128
204 /* clear write bits if ATTR_READONLY is set */ 129 if (fattr->cf_cifsattrs & ATTR_READONLY)
205 if (attr & ATTR_READONLY) 130 fattr->cf_mode &= ~S_IWUGO;
206 tmp_inode->i_mode &= ~S_IWUGO;
207 131
208 /* set inode type */ 132 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL &&
209 if ((attr & ATTR_SYSTEM) && 133 fattr->cf_cifsattrs & ATTR_SYSTEM) {
210 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { 134 if (fattr->cf_eof == 0) {
211 if (end_of_file == 0) { 135 fattr->cf_mode &= ~S_IFMT;
212 tmp_inode->i_mode |= S_IFIFO; 136 fattr->cf_mode |= S_IFIFO;
213 *pobject_type = DT_FIFO; 137 fattr->cf_dtype = DT_FIFO;
214 } else { 138 } else {
215 /* 139 /*
216 * trying to get the type can be slow, so just call 140 * trying to get the type and mode via SFU can be slow,
217 * this a regular file for now, and mark for reval 141 * so just call those regular files for now, and mark
142 * for reval
218 */ 143 */
219 tmp_inode->i_mode |= S_IFREG; 144 fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
220 *pobject_type = DT_REG;
221 cifsInfo->time = 0;
222 }
223 } else {
224 if (attr & ATTR_DIRECTORY) {
225 tmp_inode->i_mode |= S_IFDIR;
226 *pobject_type = DT_DIR;
227 } else {
228 tmp_inode->i_mode |= S_IFREG;
229 *pobject_type = DT_REG;
230 } 145 }
231 } 146 }
147}
232 148
233 /* can not fill in nlink here as in qpathinfo version and Unx search */ 149void
234 if (atomic_read(&cifsInfo->inUse) == 0) 150cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
235 atomic_set(&cifsInfo->inUse, 1); 151 struct cifs_sb_info *cifs_sb)
236 152{
237 cifsInfo->server_eof = end_of_file; 153 memset(fattr, 0, sizeof(*fattr));
238 spin_lock(&tmp_inode->i_lock); 154 fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes);
239 if (is_size_safe_to_change(cifsInfo, end_of_file)) { 155 fattr->cf_eof = le64_to_cpu(info->EndOfFile);
240 /* can not safely change the file size here if the 156 fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
241 client is writing to it due to potential races */ 157 fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
242 i_size_write(tmp_inode, end_of_file); 158 fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
243 159 fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
244 /* 512 bytes (2**9) is the fake blocksize that must be used */ 160
245 /* for this calculation, even though the reported blocksize is larger */ 161 cifs_fill_common_info(fattr, cifs_sb);
246 tmp_inode->i_blocks = (512 - 1 + allocation_size) >> 9;
247 }
248 spin_unlock(&tmp_inode->i_lock);
249
250 if (allocation_size < end_of_file)
251 cFYI(1, ("May be sparse file, allocation less than file size"));
252 cFYI(1, ("File Size %ld and blocks %llu",
253 (unsigned long)tmp_inode->i_size,
254 (unsigned long long)tmp_inode->i_blocks));
255 if (S_ISREG(tmp_inode->i_mode)) {
256 cFYI(1, ("File inode"));
257 tmp_inode->i_op = &cifs_file_inode_ops;
258 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
259 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
260 tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
261 else
262 tmp_inode->i_fop = &cifs_file_direct_ops;
263 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
264 tmp_inode->i_fop = &cifs_file_nobrl_ops;
265 else
266 tmp_inode->i_fop = &cifs_file_ops;
267
268 if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
269 (cifs_sb->tcon->ses->server->maxBuf <
270 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
271 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
272 else
273 tmp_inode->i_data.a_ops = &cifs_addr_ops;
274
275 if (isNewInode)
276 return; /* No sense invalidating pages for new inode
277 since have not started caching readahead file
278 data yet */
279
280 if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
281 (local_size == tmp_inode->i_size)) {
282 cFYI(1, ("inode exists but unchanged"));
283 } else {
284 /* file may have changed on server */
285 cFYI(1, ("invalidate inode, readdir detected change"));
286 invalidate_remote_inode(tmp_inode);
287 }
288 } else if (S_ISDIR(tmp_inode->i_mode)) {
289 cFYI(1, ("Directory inode"));
290 tmp_inode->i_op = &cifs_dir_inode_ops;
291 tmp_inode->i_fop = &cifs_dir_ops;
292 } else if (S_ISLNK(tmp_inode->i_mode)) {
293 cFYI(1, ("Symbolic Link inode"));
294 tmp_inode->i_op = &cifs_symlink_inode_ops;
295 } else {
296 cFYI(1, ("Init special inode"));
297 init_special_inode(tmp_inode, tmp_inode->i_mode,
298 tmp_inode->i_rdev);
299 }
300} 162}
301 163
302static void unix_fill_in_inode(struct inode *tmp_inode, 164void
303 FILE_UNIX_INFO *pfindData, unsigned int *pobject_type, int isNewInode) 165cifs_std_info_to_fattr(struct cifs_fattr *fattr, FIND_FILE_STANDARD_INFO *info,
166 struct cifs_sb_info *cifs_sb)
304{ 167{
305 loff_t local_size; 168 int offset = cifs_sb->tcon->ses->server->timeAdj;
306 struct timespec local_mtime;
307
308 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
309 struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
310
311 __u32 type = le32_to_cpu(pfindData->Type);
312 __u64 num_of_bytes = le64_to_cpu(pfindData->NumOfBytes);
313 __u64 end_of_file = le64_to_cpu(pfindData->EndOfFile);
314 cifsInfo->time = jiffies;
315 atomic_inc(&cifsInfo->inUse);
316
317 /* save mtime and size */
318 local_mtime = tmp_inode->i_mtime;
319 local_size = tmp_inode->i_size;
320
321 tmp_inode->i_atime =
322 cifs_NTtimeToUnix(pfindData->LastAccessTime);
323 tmp_inode->i_mtime =
324 cifs_NTtimeToUnix(pfindData->LastModificationTime);
325 tmp_inode->i_ctime =
326 cifs_NTtimeToUnix(pfindData->LastStatusChange);
327
328 tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions);
329 /* since we set the inode type below we need to mask off type
330 to avoid strange results if bits above were corrupt */
331 tmp_inode->i_mode &= ~S_IFMT;
332 if (type == UNIX_FILE) {
333 *pobject_type = DT_REG;
334 tmp_inode->i_mode |= S_IFREG;
335 } else if (type == UNIX_SYMLINK) {
336 *pobject_type = DT_LNK;
337 tmp_inode->i_mode |= S_IFLNK;
338 } else if (type == UNIX_DIR) {
339 *pobject_type = DT_DIR;
340 tmp_inode->i_mode |= S_IFDIR;
341 } else if (type == UNIX_CHARDEV) {
342 *pobject_type = DT_CHR;
343 tmp_inode->i_mode |= S_IFCHR;
344 tmp_inode->i_rdev = MKDEV(le64_to_cpu(pfindData->DevMajor),
345 le64_to_cpu(pfindData->DevMinor) & MINORMASK);
346 } else if (type == UNIX_BLOCKDEV) {
347 *pobject_type = DT_BLK;
348 tmp_inode->i_mode |= S_IFBLK;
349 tmp_inode->i_rdev = MKDEV(le64_to_cpu(pfindData->DevMajor),
350 le64_to_cpu(pfindData->DevMinor) & MINORMASK);
351 } else if (type == UNIX_FIFO) {
352 *pobject_type = DT_FIFO;
353 tmp_inode->i_mode |= S_IFIFO;
354 } else if (type == UNIX_SOCKET) {
355 *pobject_type = DT_SOCK;
356 tmp_inode->i_mode |= S_IFSOCK;
357 } else {
358 /* safest to just call it a file */
359 *pobject_type = DT_REG;
360 tmp_inode->i_mode |= S_IFREG;
361 cFYI(1, ("unknown inode type %d", type));
362 }
363 169
364 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) 170 memset(fattr, 0, sizeof(*fattr));
365 tmp_inode->i_uid = cifs_sb->mnt_uid; 171 fattr->cf_atime = cnvrtDosUnixTm(info->LastAccessDate,
366 else 172 info->LastAccessTime, offset);
367 tmp_inode->i_uid = le64_to_cpu(pfindData->Uid); 173 fattr->cf_ctime = cnvrtDosUnixTm(info->LastWriteDate,
368 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) 174 info->LastWriteTime, offset);
369 tmp_inode->i_gid = cifs_sb->mnt_gid; 175 fattr->cf_mtime = cnvrtDosUnixTm(info->LastWriteDate,
370 else 176 info->LastWriteTime, offset);
371 tmp_inode->i_gid = le64_to_cpu(pfindData->Gid);
372 tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks);
373
374 cifsInfo->server_eof = end_of_file;
375 spin_lock(&tmp_inode->i_lock);
376 if (is_size_safe_to_change(cifsInfo, end_of_file)) {
377 /* can not safely change the file size here if the
378 client is writing to it due to potential races */
379 i_size_write(tmp_inode, end_of_file);
380
381 /* 512 bytes (2**9) is the fake blocksize that must be used */
382 /* for this calculation, not the real blocksize */
383 tmp_inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
384 }
385 spin_unlock(&tmp_inode->i_lock);
386 177
387 if (S_ISREG(tmp_inode->i_mode)) { 178 fattr->cf_cifsattrs = le16_to_cpu(info->Attributes);
388 cFYI(1, ("File inode")); 179 fattr->cf_bytes = le32_to_cpu(info->AllocationSize);
389 tmp_inode->i_op = &cifs_file_inode_ops; 180 fattr->cf_eof = le32_to_cpu(info->DataSize);
390 181
391 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 182 cifs_fill_common_info(fattr, cifs_sb);
392 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
393 tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
394 else
395 tmp_inode->i_fop = &cifs_file_direct_ops;
396 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
397 tmp_inode->i_fop = &cifs_file_nobrl_ops;
398 else
399 tmp_inode->i_fop = &cifs_file_ops;
400
401 if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
402 (cifs_sb->tcon->ses->server->maxBuf <
403 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
404 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
405 else
406 tmp_inode->i_data.a_ops = &cifs_addr_ops;
407
408 if (isNewInode)
409 return; /* No sense invalidating pages for new inode
410 since we have not started caching readahead
411 file data for it yet */
412
413 if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
414 (local_size == tmp_inode->i_size)) {
415 cFYI(1, ("inode exists but unchanged"));
416 } else {
417 /* file may have changed on server */
418 cFYI(1, ("invalidate inode, readdir detected change"));
419 invalidate_remote_inode(tmp_inode);
420 }
421 } else if (S_ISDIR(tmp_inode->i_mode)) {
422 cFYI(1, ("Directory inode"));
423 tmp_inode->i_op = &cifs_dir_inode_ops;
424 tmp_inode->i_fop = &cifs_dir_ops;
425 } else if (S_ISLNK(tmp_inode->i_mode)) {
426 cFYI(1, ("Symbolic Link inode"));
427 tmp_inode->i_op = &cifs_symlink_inode_ops;
428/* tmp_inode->i_fop = *//* do not need to set to anything */
429 } else {
430 cFYI(1, ("Special inode"));
431 init_special_inode(tmp_inode, tmp_inode->i_mode,
432 tmp_inode->i_rdev);
433 }
434} 183}
435 184
436/* BB eventually need to add the following helper function to 185/* BB eventually need to add the following helper function to
@@ -872,7 +621,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
872 len = strnlen(filename, PATH_MAX); 621 len = strnlen(filename, PATH_MAX);
873 } 622 }
874 623
875 *pinum = le64_to_cpu(pFindData->UniqueId); 624 *pinum = le64_to_cpu(pFindData->basic.UniqueId);
876 } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { 625 } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) {
877 FILE_DIRECTORY_INFO *pFindData = 626 FILE_DIRECTORY_INFO *pFindData =
878 (FILE_DIRECTORY_INFO *)current_entry; 627 (FILE_DIRECTORY_INFO *)current_entry;
@@ -932,11 +681,12 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
932 int rc = 0; 681 int rc = 0;
933 struct qstr qstring; 682 struct qstr qstring;
934 struct cifsFileInfo *pCifsF; 683 struct cifsFileInfo *pCifsF;
935 unsigned int obj_type; 684 u64 inum;
936 __u64 inum; 685 ino_t ino;
686 struct super_block *sb;
937 struct cifs_sb_info *cifs_sb; 687 struct cifs_sb_info *cifs_sb;
938 struct inode *tmp_inode;
939 struct dentry *tmp_dentry; 688 struct dentry *tmp_dentry;
689 struct cifs_fattr fattr;
940 690
941 /* get filename and len into qstring */ 691 /* get filename and len into qstring */
942 /* get dentry */ 692 /* get dentry */
@@ -954,60 +704,53 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
954 if (rc != 0) 704 if (rc != 0)
955 return 0; 705 return 0;
956 706
957 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 707 sb = file->f_path.dentry->d_sb;
708 cifs_sb = CIFS_SB(sb);
958 709
959 qstring.name = scratch_buf; 710 qstring.name = scratch_buf;
960 rc = cifs_get_name_from_search_buf(&qstring, pfindEntry, 711 rc = cifs_get_name_from_search_buf(&qstring, pfindEntry,
961 pCifsF->srch_inf.info_level, 712 pCifsF->srch_inf.info_level,
962 pCifsF->srch_inf.unicode, cifs_sb, 713 pCifsF->srch_inf.unicode, cifs_sb,
963 max_len, 714 max_len, &inum /* returned */);
964 &inum /* returned */);
965 715
966 if (rc) 716 if (rc)
967 return rc; 717 return rc;
968 718
969 /* only these two infolevels return valid inode numbers */
970 if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX ||
971 pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO)
972 rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry,
973 &inum);
974 else
975 rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry,
976 NULL);
977
978 if ((tmp_inode == NULL) || (tmp_dentry == NULL))
979 return -ENOMEM;
980
981 /* we pass in rc below, indicating whether it is a new inode,
982 so we can figure out whether to invalidate the inode cached
983 data if the file has changed */
984 if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) 719 if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX)
985 unix_fill_in_inode(tmp_inode, 720 cifs_unix_basic_to_fattr(&fattr,
986 (FILE_UNIX_INFO *)pfindEntry, 721 &((FILE_UNIX_INFO *) pfindEntry)->basic,
987 &obj_type, rc); 722 cifs_sb);
988 else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) 723 else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD)
989 fill_in_inode(tmp_inode, 0 /* old level 1 buffer type */, 724 cifs_std_info_to_fattr(&fattr, (FIND_FILE_STANDARD_INFO *)
990 pfindEntry, &obj_type, rc); 725 pfindEntry, cifs_sb);
991 else 726 else
992 fill_in_inode(tmp_inode, 1 /* NT */, pfindEntry, &obj_type, rc); 727 cifs_dir_info_to_fattr(&fattr, (FILE_DIRECTORY_INFO *)
728 pfindEntry, cifs_sb);
993 729
994 if (rc) /* new inode - needs to be tied to dentry */ { 730 /* FIXME: make _to_fattr functions fill this out */
995 d_instantiate(tmp_dentry, tmp_inode); 731 if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO)
996 if (rc == 2) 732 fattr.cf_uniqueid = inum;
997 d_rehash(tmp_dentry); 733 else
998 } 734 fattr.cf_uniqueid = iunique(sb, ROOT_I);
999 735
736 ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid);
737 tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring, &fattr);
1000 738
1001 rc = filldir(direntry, qstring.name, qstring.len, file->f_pos, 739 rc = filldir(direntry, qstring.name, qstring.len, file->f_pos,
1002 tmp_inode->i_ino, obj_type); 740 ino, fattr.cf_dtype);
741
742 /*
743 * we can not return filldir errors to the caller since they are
744 * "normal" when the stat blocksize is too small - we return remapped
745 * error instead
746 *
747 * FIXME: This looks bogus. filldir returns -EOVERFLOW in the above
748 * case already. Why should we be clobbering other errors from it?
749 */
1003 if (rc) { 750 if (rc) {
1004 cFYI(1, ("filldir rc = %d", rc)); 751 cFYI(1, ("filldir rc = %d", rc));
1005 /* we can not return filldir errors to the caller
1006 since they are "normal" when the stat blocksize
1007 is too small - we return remapped error instead */
1008 rc = -EOVERFLOW; 752 rc = -EOVERFLOW;
1009 } 753 }
1010
1011 dput(tmp_dentry); 754 dput(tmp_dentry);
1012 return rc; 755 return rc;
1013} 756}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 897a052270f9..7085a6275c4c 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -802,7 +802,7 @@ ssetup_ntlmssp_authenticate:
802#endif /* CONFIG_CIFS_UPCALL */ 802#endif /* CONFIG_CIFS_UPCALL */
803 } else { 803 } else {
804#ifdef CONFIG_CIFS_EXPERIMENTAL 804#ifdef CONFIG_CIFS_EXPERIMENTAL
805 if ((experimEnabled > 1) && (type == RawNTLMSSP)) { 805 if (type == RawNTLMSSP) {
806 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { 806 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
807 cERROR(1, ("NTLMSSP requires Unicode support")); 807 cERROR(1, ("NTLMSSP requires Unicode support"));
808 rc = -ENOSYS; 808 rc = -ENOSYS;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index e9527eedc639..a75afa3dd9e1 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -64,8 +64,9 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
64 64
65 full_path = build_path_from_dentry(direntry); 65 full_path = build_path_from_dentry(direntry);
66 if (full_path == NULL) { 66 if (full_path == NULL) {
67 rc = -ENOMEM;
67 FreeXid(xid); 68 FreeXid(xid);
68 return -ENOMEM; 69 return rc;
69 } 70 }
70 if (ea_name == NULL) { 71 if (ea_name == NULL) {
71 cFYI(1, ("Null xattr names not supported")); 72 cFYI(1, ("Null xattr names not supported"));
@@ -118,8 +119,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
118 119
119 full_path = build_path_from_dentry(direntry); 120 full_path = build_path_from_dentry(direntry);
120 if (full_path == NULL) { 121 if (full_path == NULL) {
122 rc = -ENOMEM;
121 FreeXid(xid); 123 FreeXid(xid);
122 return -ENOMEM; 124 return rc;
123 } 125 }
124 /* return dos attributes as pseudo xattr */ 126 /* return dos attributes as pseudo xattr */
125 /* return alt name if available as pseudo attr */ 127 /* return alt name if available as pseudo attr */
@@ -225,8 +227,9 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
225 227
226 full_path = build_path_from_dentry(direntry); 228 full_path = build_path_from_dentry(direntry);
227 if (full_path == NULL) { 229 if (full_path == NULL) {
230 rc = -ENOMEM;
228 FreeXid(xid); 231 FreeXid(xid);
229 return -ENOMEM; 232 return rc;
230 } 233 }
231 /* return dos attributes as pseudo xattr */ 234 /* return dos attributes as pseudo xattr */
232 /* return alt name if available as pseudo attr */ 235 /* return alt name if available as pseudo attr */
@@ -351,8 +354,9 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
351 354
352 full_path = build_path_from_dentry(direntry); 355 full_path = build_path_from_dentry(direntry);
353 if (full_path == NULL) { 356 if (full_path == NULL) {
357 rc = -ENOMEM;
354 FreeXid(xid); 358 FreeXid(xid);
355 return -ENOMEM; 359 return rc;
356 } 360 }
357 /* return dos attributes as pseudo xattr */ 361 /* return dos attributes as pseudo xattr */
358 /* return alt name if available as pseudo attr */ 362 /* return alt name if available as pseudo attr */
diff --git a/fs/compat.c b/fs/compat.c
index cdd51a3a7c53..94502dab972a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -32,7 +32,6 @@
32#include <linux/smb_mount.h> 32#include <linux/smb_mount.h>
33#include <linux/ncp_mount.h> 33#include <linux/ncp_mount.h>
34#include <linux/nfs4_mount.h> 34#include <linux/nfs4_mount.h>
35#include <linux/smp_lock.h>
36#include <linux/syscalls.h> 35#include <linux/syscalls.h>
37#include <linux/ctype.h> 36#include <linux/ctype.h>
38#include <linux/module.h> 37#include <linux/module.h>
@@ -1486,8 +1485,8 @@ int compat_do_execve(char * filename,
1486 if (!bprm) 1485 if (!bprm)
1487 goto out_files; 1486 goto out_files;
1488 1487
1489 retval = mutex_lock_interruptible(&current->cred_guard_mutex); 1488 retval = -ERESTARTNOINTR;
1490 if (retval < 0) 1489 if (mutex_lock_interruptible(&current->cred_guard_mutex))
1491 goto out_free; 1490 goto out_free;
1492 current->in_execve = 1; 1491 current->in_execve = 1;
1493 1492
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c135202c38b3..f28f070a60fc 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -19,6 +19,7 @@
19#include <linux/compiler.h> 19#include <linux/compiler.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/smp.h> 21#include <linux/smp.h>
22#include <linux/smp_lock.h>
22#include <linux/ioctl.h> 23#include <linux/ioctl.h>
23#include <linux/if.h> 24#include <linux/if.h>
24#include <linux/if_bridge.h> 25#include <linux/if_bridge.h>
@@ -31,6 +32,7 @@
31#include <linux/skbuff.h> 32#include <linux/skbuff.h>
32#include <linux/netlink.h> 33#include <linux/netlink.h>
33#include <linux/vt.h> 34#include <linux/vt.h>
35#include <linux/falloc.h>
34#include <linux/fs.h> 36#include <linux/fs.h>
35#include <linux/file.h> 37#include <linux/file.h>
36#include <linux/ppp_defs.h> 38#include <linux/ppp_defs.h>
@@ -1779,6 +1781,41 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
1779 return sys_ioctl(fd, cmd, (unsigned long)tn); 1781 return sys_ioctl(fd, cmd, (unsigned long)tn);
1780} 1782}
1781 1783
1784/* on ia32 l_start is on a 32-bit boundary */
1785#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
1786struct space_resv_32 {
1787 __s16 l_type;
1788 __s16 l_whence;
1789 __s64 l_start __attribute__((packed));
1790 /* len == 0 means until end of file */
1791 __s64 l_len __attribute__((packed));
1792 __s32 l_sysid;
1793 __u32 l_pid;
1794 __s32 l_pad[4]; /* reserve area */
1795};
1796
1797#define FS_IOC_RESVSP_32 _IOW ('X', 40, struct space_resv_32)
1798#define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32)
1799
1800/* just account for different alignment */
1801static int compat_ioctl_preallocate(struct file *file, unsigned long arg)
1802{
1803 struct space_resv_32 __user *p32 = (void __user *)arg;
1804 struct space_resv __user *p = compat_alloc_user_space(sizeof(*p));
1805
1806 if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) ||
1807 copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) ||
1808 copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) ||
1809 copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) ||
1810 copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) ||
1811 copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) ||
1812 copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32)))
1813 return -EFAULT;
1814
1815 return ioctl_preallocate(file, p);
1816}
1817#endif
1818
1782 1819
1783typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int, 1820typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int,
1784 unsigned long, struct file *); 1821 unsigned long, struct file *);
@@ -2756,6 +2793,18 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
2756 case FIOQSIZE: 2793 case FIOQSIZE:
2757 break; 2794 break;
2758 2795
2796#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
2797 case FS_IOC_RESVSP_32:
2798 case FS_IOC_RESVSP64_32:
2799 error = compat_ioctl_preallocate(filp, arg);
2800 goto out_fput;
2801#else
2802 case FS_IOC_RESVSP:
2803 case FS_IOC_RESVSP64:
2804 error = ioctl_preallocate(filp, (void __user *)arg);
2805 goto out_fput;
2806#endif
2807
2759 case FIBMAP: 2808 case FIBMAP:
2760 case FIGETBSZ: 2809 case FIGETBSZ:
2761 case FIONREAD: 2810 case FIONREAD:
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 9b1d285f9fe6..75efb028974b 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -423,7 +423,6 @@ static void devpts_kill_sb(struct super_block *sb)
423} 423}
424 424
425static struct file_system_type devpts_fs_type = { 425static struct file_system_type devpts_fs_type = {
426 .owner = THIS_MODULE,
427 .name = "devpts", 426 .name = "devpts",
428 .get_sb = devpts_get_sb, 427 .get_sb = devpts_get_sb,
429 .kill_sb = devpts_kill_sb, 428 .kill_sb = devpts_kill_sb,
@@ -564,13 +563,4 @@ static int __init init_devpts_fs(void)
564 } 563 }
565 return err; 564 return err;
566} 565}
567
568static void __exit exit_devpts_fs(void)
569{
570 unregister_filesystem(&devpts_fs_type);
571 mntput(devpts_mnt);
572}
573
574module_init(init_devpts_fs) 566module_init(init_devpts_fs)
575module_exit(exit_devpts_fs)
576MODULE_LICENSE("GPL");
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 205ec95b347e..eb507c453c5f 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -435,7 +435,7 @@ static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
435static int find_rsb(struct dlm_ls *ls, char *name, int namelen, 435static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
436 unsigned int flags, struct dlm_rsb **r_ret) 436 unsigned int flags, struct dlm_rsb **r_ret)
437{ 437{
438 struct dlm_rsb *r, *tmp; 438 struct dlm_rsb *r = NULL, *tmp;
439 uint32_t hash, bucket; 439 uint32_t hash, bucket;
440 int error = -EINVAL; 440 int error = -EINVAL;
441 441
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index cdb580a9c7a2..618a60f03886 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -902,7 +902,7 @@ static void tcp_connect_to_sock(struct connection *con)
902 int result = -EHOSTUNREACH; 902 int result = -EHOSTUNREACH;
903 struct sockaddr_storage saddr, src_addr; 903 struct sockaddr_storage saddr, src_addr;
904 int addr_len; 904 int addr_len;
905 struct socket *sock; 905 struct socket *sock = NULL;
906 906
907 if (con->nodeid == 0) { 907 if (con->nodeid == 0) {
908 log_print("attempt to connect sock 0 foiled"); 908 log_print("attempt to connect sock 0 foiled");
@@ -962,6 +962,8 @@ out_err:
962 if (con->sock) { 962 if (con->sock) {
963 sock_release(con->sock); 963 sock_release(con->sock);
964 con->sock = NULL; 964 con->sock = NULL;
965 } else if (sock) {
966 sock_release(sock);
965 } 967 }
966 /* 968 /*
967 * Some errors are fatal and this list might need adjusting. For other 969 * Some errors are fatal and this list might need adjusting. For other
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 894a32d438d5..16f682e26c07 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -353,7 +353,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
353{ 353{
354 struct dlm_plock_info info; 354 struct dlm_plock_info info;
355 struct plock_op *op; 355 struct plock_op *op;
356 int found = 0; 356 int found = 0, do_callback = 0;
357 357
358 if (count != sizeof(info)) 358 if (count != sizeof(info))
359 return -EINVAL; 359 return -EINVAL;
@@ -366,21 +366,24 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
366 366
367 spin_lock(&ops_lock); 367 spin_lock(&ops_lock);
368 list_for_each_entry(op, &recv_list, list) { 368 list_for_each_entry(op, &recv_list, list) {
369 if (op->info.fsid == info.fsid && op->info.number == info.number && 369 if (op->info.fsid == info.fsid &&
370 op->info.number == info.number &&
370 op->info.owner == info.owner) { 371 op->info.owner == info.owner) {
372 struct plock_xop *xop = (struct plock_xop *)op;
371 list_del_init(&op->list); 373 list_del_init(&op->list);
372 found = 1;
373 op->done = 1;
374 memcpy(&op->info, &info, sizeof(info)); 374 memcpy(&op->info, &info, sizeof(info));
375 if (xop->callback)
376 do_callback = 1;
377 else
378 op->done = 1;
379 found = 1;
375 break; 380 break;
376 } 381 }
377 } 382 }
378 spin_unlock(&ops_lock); 383 spin_unlock(&ops_lock);
379 384
380 if (found) { 385 if (found) {
381 struct plock_xop *xop; 386 if (do_callback)
382 xop = (struct plock_xop *)op;
383 if (xop->callback)
384 dlm_plock_callback(op); 387 dlm_plock_callback(op);
385 else 388 else
386 wake_up(&recv_wq); 389 wake_up(&recv_wq);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 3f0e1974abdc..31d12de83a2a 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -14,35 +14,44 @@
14#include <linux/list.h> 14#include <linux/list.h>
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/anon_inodes.h> 16#include <linux/anon_inodes.h>
17#include <linux/eventfd.h>
18#include <linux/syscalls.h> 17#include <linux/syscalls.h>
19#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/kref.h>
20#include <linux/eventfd.h>
20 21
21struct eventfd_ctx { 22struct eventfd_ctx {
23 struct kref kref;
22 wait_queue_head_t wqh; 24 wait_queue_head_t wqh;
23 /* 25 /*
24 * Every time that a write(2) is performed on an eventfd, the 26 * Every time that a write(2) is performed on an eventfd, the
25 * value of the __u64 being written is added to "count" and a 27 * value of the __u64 being written is added to "count" and a
26 * wakeup is performed on "wqh". A read(2) will return the "count" 28 * wakeup is performed on "wqh". A read(2) will return the "count"
27 * value to userspace, and will reset "count" to zero. The kernel 29 * value to userspace, and will reset "count" to zero. The kernel
28 * size eventfd_signal() also, adds to the "count" counter and 30 * side eventfd_signal() also, adds to the "count" counter and
29 * issue a wakeup. 31 * issue a wakeup.
30 */ 32 */
31 __u64 count; 33 __u64 count;
32 unsigned int flags; 34 unsigned int flags;
33}; 35};
34 36
35/* 37/**
36 * Adds "n" to the eventfd counter "count". Returns "n" in case of 38 * eventfd_signal - Adds @n to the eventfd counter.
37 * success, or a value lower then "n" in case of coutner overflow. 39 * @ctx: [in] Pointer to the eventfd context.
38 * This function is supposed to be called by the kernel in paths 40 * @n: [in] Value of the counter to be added to the eventfd internal counter.
39 * that do not allow sleeping. In this function we allow the counter 41 * The value cannot be negative.
40 * to reach the ULLONG_MAX value, and we signal this as overflow 42 *
41 * condition by returining a POLLERR to poll(2). 43 * This function is supposed to be called by the kernel in paths that do not
44 * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
45 * value, and we signal this as overflow condition by returining a POLLERR
46 * to poll(2).
47 *
48 * Returns @n in case of success, a non-negative number lower than @n in case
49 * of overflow, or the following error codes:
50 *
51 * -EINVAL : The value of @n is negative.
42 */ 52 */
43int eventfd_signal(struct file *file, int n) 53int eventfd_signal(struct eventfd_ctx *ctx, int n)
44{ 54{
45 struct eventfd_ctx *ctx = file->private_data;
46 unsigned long flags; 55 unsigned long flags;
47 56
48 if (n < 0) 57 if (n < 0)
@@ -59,9 +68,45 @@ int eventfd_signal(struct file *file, int n)
59} 68}
60EXPORT_SYMBOL_GPL(eventfd_signal); 69EXPORT_SYMBOL_GPL(eventfd_signal);
61 70
71static void eventfd_free(struct kref *kref)
72{
73 struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
74
75 kfree(ctx);
76}
77
78/**
79 * eventfd_ctx_get - Acquires a reference to the internal eventfd context.
80 * @ctx: [in] Pointer to the eventfd context.
81 *
82 * Returns: In case of success, returns a pointer to the eventfd context.
83 */
84struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
85{
86 kref_get(&ctx->kref);
87 return ctx;
88}
89EXPORT_SYMBOL_GPL(eventfd_ctx_get);
90
91/**
92 * eventfd_ctx_put - Releases a reference to the internal eventfd context.
93 * @ctx: [in] Pointer to eventfd context.
94 *
95 * The eventfd context reference must have been previously acquired either
96 * with eventfd_ctx_get() or eventfd_ctx_fdget()).
97 */
98void eventfd_ctx_put(struct eventfd_ctx *ctx)
99{
100 kref_put(&ctx->kref, eventfd_free);
101}
102EXPORT_SYMBOL_GPL(eventfd_ctx_put);
103
62static int eventfd_release(struct inode *inode, struct file *file) 104static int eventfd_release(struct inode *inode, struct file *file)
63{ 105{
64 kfree(file->private_data); 106 struct eventfd_ctx *ctx = file->private_data;
107
108 wake_up_poll(&ctx->wqh, POLLHUP);
109 eventfd_ctx_put(ctx);
65 return 0; 110 return 0;
66} 111}
67 112
@@ -185,6 +230,16 @@ static const struct file_operations eventfd_fops = {
185 .write = eventfd_write, 230 .write = eventfd_write,
186}; 231};
187 232
233/**
234 * eventfd_fget - Acquire a reference of an eventfd file descriptor.
235 * @fd: [in] Eventfd file descriptor.
236 *
237 * Returns a pointer to the eventfd file structure in case of success, or the
238 * following error pointer:
239 *
240 * -EBADF : Invalid @fd file descriptor.
241 * -EINVAL : The @fd file descriptor is not an eventfd file.
242 */
188struct file *eventfd_fget(int fd) 243struct file *eventfd_fget(int fd)
189{ 244{
190 struct file *file; 245 struct file *file;
@@ -201,6 +256,48 @@ struct file *eventfd_fget(int fd)
201} 256}
202EXPORT_SYMBOL_GPL(eventfd_fget); 257EXPORT_SYMBOL_GPL(eventfd_fget);
203 258
259/**
260 * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context.
261 * @fd: [in] Eventfd file descriptor.
262 *
263 * Returns a pointer to the internal eventfd context, otherwise the error
264 * pointers returned by the following functions:
265 *
266 * eventfd_fget
267 */
268struct eventfd_ctx *eventfd_ctx_fdget(int fd)
269{
270 struct file *file;
271 struct eventfd_ctx *ctx;
272
273 file = eventfd_fget(fd);
274 if (IS_ERR(file))
275 return (struct eventfd_ctx *) file;
276 ctx = eventfd_ctx_get(file->private_data);
277 fput(file);
278
279 return ctx;
280}
281EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
282
283/**
284 * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context.
285 * @file: [in] Eventfd file pointer.
286 *
287 * Returns a pointer to the internal eventfd context, otherwise the error
288 * pointer:
289 *
290 * -EINVAL : The @fd file descriptor is not an eventfd file.
291 */
292struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
293{
294 if (file->f_op != &eventfd_fops)
295 return ERR_PTR(-EINVAL);
296
297 return eventfd_ctx_get(file->private_data);
298}
299EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
300
204SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) 301SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
205{ 302{
206 int fd; 303 int fd;
@@ -217,6 +314,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
217 if (!ctx) 314 if (!ctx)
218 return -ENOMEM; 315 return -ENOMEM;
219 316
317 kref_init(&ctx->kref);
220 init_waitqueue_head(&ctx->wqh); 318 init_waitqueue_head(&ctx->wqh);
221 ctx->count = count; 319 ctx->count = count;
222 ctx->flags = flags; 320 ctx->flags = flags;
diff --git a/fs/exec.c b/fs/exec.c
index e639957d7a57..4a8849e45b21 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1277,8 +1277,8 @@ int do_execve(char * filename,
1277 if (!bprm) 1277 if (!bprm)
1278 goto out_files; 1278 goto out_files;
1279 1279
1280 retval = mutex_lock_interruptible(&current->cred_guard_mutex); 1280 retval = -ERESTARTNOINTR;
1281 if (retval < 0) 1281 if (mutex_lock_interruptible(&current->cred_guard_mutex))
1282 goto out_free; 1282 goto out_free;
1283 current->in_execve = 1; 1283 current->in_execve = 1;
1284 1284
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index 24667eedc023..c6718e4817fe 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -2,9 +2,7 @@
2 * common.h - Common definitions for both Kernel and user-mode utilities 2 * common.h - Common definitions for both Kernel and user-mode utilities
3 * 3 *
4 * Copyright (C) 2005, 2006 4 * Copyright (C) 2005, 2006
5 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 5 * Avishay Traeger (avishay@gmail.com)
6 * Copyright (C) 2005, 2006
7 * International Business Machines
8 * Copyright (C) 2008, 2009 6 * Copyright (C) 2008, 2009
9 * Boaz Harrosh <bharrosh@panasas.com> 7 * Boaz Harrosh <bharrosh@panasas.com>
10 * 8 *
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 65b0c8c776a1..4cfab1cc75c0 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 0fd4c7859679..5ec72e020b22 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
@@ -156,6 +154,9 @@ ino_t exofs_parent_ino(struct dentry *child);
156int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, 154int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
157 struct inode *); 155 struct inode *);
158 156
157/* super.c */
158int exofs_sync_fs(struct super_block *sb, int wait);
159
159/********************* 160/*********************
160 * operation vectors * 161 * operation vectors *
161 *********************/ 162 *********************/
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 6ed7fe484752..839b9dc1e70f 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
@@ -47,16 +45,23 @@ static int exofs_file_fsync(struct file *filp, struct dentry *dentry,
47{ 45{
48 int ret; 46 int ret;
49 struct address_space *mapping = filp->f_mapping; 47 struct address_space *mapping = filp->f_mapping;
48 struct inode *inode = dentry->d_inode;
49 struct super_block *sb;
50 50
51 ret = filemap_write_and_wait(mapping); 51 ret = filemap_write_and_wait(mapping);
52 if (ret) 52 if (ret)
53 return ret; 53 return ret;
54 54
55 /*Note: file_fsync below also calles sync_blockdev, which is a no-op 55 /* sync the inode attributes */
56 * for exofs, but other then that it does sync_inode and 56 ret = write_inode_now(inode, 1);
57 * sync_superblock which is what we need here. 57
58 */ 58 /* This is a good place to write the sb */
59 return file_fsync(filp, dentry, datasync); 59 /* TODO: Sechedule an sb-sync on create */
60 sb = inode->i_sb;
61 if (sb->s_dirt)
62 exofs_sync_fs(sb, 1);
63
64 return ret;
60} 65}
61 66
62static int exofs_flush(struct file *file, fl_owner_t id) 67static int exofs_flush(struct file *file, fl_owner_t id)
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 77d0a295eb1c..6c10f7476699 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
@@ -295,6 +293,9 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
295err: 293err:
296 if (!is_sync) 294 if (!is_sync)
297 _unlock_pcol_pages(pcol, ret, READ); 295 _unlock_pcol_pages(pcol, ret, READ);
296 else /* Pages unlocked by caller in sync mode only free bio */
297 pcol_free(pcol);
298
298 kfree(pcol_copy); 299 kfree(pcol_copy);
299 if (or) 300 if (or)
300 osd_end_request(or); 301 osd_end_request(or);
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 77fdd765e76d..b7dd0c236863 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
index b3d2ccb87aaa..4372542df284 100644
--- a/fs/exofs/osd.c
+++ b/fs/exofs/osd.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 8216c5b77b53..5ab10c3bbebe 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
@@ -33,6 +31,7 @@
33 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 */ 32 */
35 33
34#include <linux/smp_lock.h>
36#include <linux/string.h> 35#include <linux/string.h>
37#include <linux/parser.h> 36#include <linux/parser.h>
38#include <linux/vfs.h> 37#include <linux/vfs.h>
@@ -200,7 +199,7 @@ static const struct export_operations exofs_export_ops;
200/* 199/*
201 * Write the superblock to the OSD 200 * Write the superblock to the OSD
202 */ 201 */
203static int exofs_sync_fs(struct super_block *sb, int wait) 202int exofs_sync_fs(struct super_block *sb, int wait)
204{ 203{
205 struct exofs_sb_info *sbi; 204 struct exofs_sb_info *sbi;
206 struct exofs_fscb *fscb; 205 struct exofs_fscb *fscb;
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
index 36e2d7bc7f7b..4dd687c3e747 100644
--- a/fs/exofs/symlink.c
+++ b/fs/exofs/symlink.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d46e38cb85c5..d636e1297cad 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -125,37 +125,12 @@ fail:
125 return ERR_PTR(-EINVAL); 125 return ERR_PTR(-EINVAL);
126} 126}
127 127
128static inline struct posix_acl *
129ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
130{
131 struct posix_acl *acl = EXT2_ACL_NOT_CACHED;
132
133 spin_lock(&inode->i_lock);
134 if (*i_acl != EXT2_ACL_NOT_CACHED)
135 acl = posix_acl_dup(*i_acl);
136 spin_unlock(&inode->i_lock);
137
138 return acl;
139}
140
141static inline void
142ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl,
143 struct posix_acl *acl)
144{
145 spin_lock(&inode->i_lock);
146 if (*i_acl != EXT2_ACL_NOT_CACHED)
147 posix_acl_release(*i_acl);
148 *i_acl = posix_acl_dup(acl);
149 spin_unlock(&inode->i_lock);
150}
151
152/* 128/*
153 * inode->i_mutex: don't care 129 * inode->i_mutex: don't care
154 */ 130 */
155static struct posix_acl * 131static struct posix_acl *
156ext2_get_acl(struct inode *inode, int type) 132ext2_get_acl(struct inode *inode, int type)
157{ 133{
158 struct ext2_inode_info *ei = EXT2_I(inode);
159 int name_index; 134 int name_index;
160 char *value = NULL; 135 char *value = NULL;
161 struct posix_acl *acl; 136 struct posix_acl *acl;
@@ -164,23 +139,19 @@ ext2_get_acl(struct inode *inode, int type)
164 if (!test_opt(inode->i_sb, POSIX_ACL)) 139 if (!test_opt(inode->i_sb, POSIX_ACL))
165 return NULL; 140 return NULL;
166 141
167 switch(type) { 142 acl = get_cached_acl(inode, type);
168 case ACL_TYPE_ACCESS: 143 if (acl != ACL_NOT_CACHED)
169 acl = ext2_iget_acl(inode, &ei->i_acl); 144 return acl;
170 if (acl != EXT2_ACL_NOT_CACHED) 145
171 return acl; 146 switch (type) {
172 name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; 147 case ACL_TYPE_ACCESS:
173 break; 148 name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
174 149 break;
175 case ACL_TYPE_DEFAULT: 150 case ACL_TYPE_DEFAULT:
176 acl = ext2_iget_acl(inode, &ei->i_default_acl); 151 name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT;
177 if (acl != EXT2_ACL_NOT_CACHED) 152 break;
178 return acl; 153 default:
179 name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; 154 BUG();
180 break;
181
182 default:
183 return ERR_PTR(-EINVAL);
184 } 155 }
185 retval = ext2_xattr_get(inode, name_index, "", NULL, 0); 156 retval = ext2_xattr_get(inode, name_index, "", NULL, 0);
186 if (retval > 0) { 157 if (retval > 0) {
@@ -197,17 +168,9 @@ ext2_get_acl(struct inode *inode, int type)
197 acl = ERR_PTR(retval); 168 acl = ERR_PTR(retval);
198 kfree(value); 169 kfree(value);
199 170
200 if (!IS_ERR(acl)) { 171 if (!IS_ERR(acl))
201 switch(type) { 172 set_cached_acl(inode, type, acl);
202 case ACL_TYPE_ACCESS:
203 ext2_iset_acl(inode, &ei->i_acl, acl);
204 break;
205 173
206 case ACL_TYPE_DEFAULT:
207 ext2_iset_acl(inode, &ei->i_default_acl, acl);
208 break;
209 }
210 }
211 return acl; 174 return acl;
212} 175}
213 176
@@ -217,7 +180,6 @@ ext2_get_acl(struct inode *inode, int type)
217static int 180static int
218ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) 181ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
219{ 182{
220 struct ext2_inode_info *ei = EXT2_I(inode);
221 int name_index; 183 int name_index;
222 void *value = NULL; 184 void *value = NULL;
223 size_t size = 0; 185 size_t size = 0;
@@ -263,17 +225,8 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
263 error = ext2_xattr_set(inode, name_index, "", value, size, 0); 225 error = ext2_xattr_set(inode, name_index, "", value, size, 0);
264 226
265 kfree(value); 227 kfree(value);
266 if (!error) { 228 if (!error)
267 switch(type) { 229 set_cached_acl(inode, type, acl);
268 case ACL_TYPE_ACCESS:
269 ext2_iset_acl(inode, &ei->i_acl, acl);
270 break;
271
272 case ACL_TYPE_DEFAULT:
273 ext2_iset_acl(inode, &ei->i_default_acl, acl);
274 break;
275 }
276 }
277 return error; 230 return error;
278} 231}
279 232
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index b42cf578554b..ecefe478898f 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -53,10 +53,6 @@ static inline int ext2_acl_count(size_t size)
53 53
54#ifdef CONFIG_EXT2_FS_POSIX_ACL 54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55 55
56/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl
57 if the ACL has not been cached */
58#define EXT2_ACL_NOT_CACHED ((void *)-1)
59
60/* acl.c */ 56/* acl.c */
61extern int ext2_permission (struct inode *, int); 57extern int ext2_permission (struct inode *, int);
62extern int ext2_acl_chmod (struct inode *); 58extern int ext2_acl_chmod (struct inode *);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index d988a718aedb..9a8a8e27a063 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -47,10 +47,6 @@ struct ext2_inode_info {
47 */ 47 */
48 struct rw_semaphore xattr_sem; 48 struct rw_semaphore xattr_sem;
49#endif 49#endif
50#ifdef CONFIG_EXT2_FS_POSIX_ACL
51 struct posix_acl *i_acl;
52 struct posix_acl *i_default_acl;
53#endif
54 rwlock_t i_meta_lock; 50 rwlock_t i_meta_lock;
55 51
56 /* 52 /*
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 29ed682061f6..e27130341d4f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1224,10 +1224,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
1224 return inode; 1224 return inode;
1225 1225
1226 ei = EXT2_I(inode); 1226 ei = EXT2_I(inode);
1227#ifdef CONFIG_EXT2_FS_POSIX_ACL
1228 ei->i_acl = EXT2_ACL_NOT_CACHED;
1229 ei->i_default_acl = EXT2_ACL_NOT_CACHED;
1230#endif
1231 ei->i_block_alloc_info = NULL; 1227 ei->i_block_alloc_info = NULL;
1232 1228
1233 raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); 1229 raw_inode = ext2_get_inode(inode->i_sb, ino, &bh);
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 7cb4badef927..e7431309bdca 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -13,7 +13,6 @@
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/compat.h> 14#include <linux/compat.h>
15#include <linux/mount.h> 15#include <linux/mount.h>
16#include <linux/smp_lock.h>
17#include <asm/current.h> 16#include <asm/current.h>
18#include <asm/uaccess.h> 17#include <asm/uaccess.h>
19 18
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 6524ecaebb7a..e1dedb0f7873 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -66,8 +66,16 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
66 inode = NULL; 66 inode = NULL;
67 if (ino) { 67 if (ino) {
68 inode = ext2_iget(dir->i_sb, ino); 68 inode = ext2_iget(dir->i_sb, ino);
69 if (IS_ERR(inode)) 69 if (unlikely(IS_ERR(inode))) {
70 return ERR_CAST(inode); 70 if (PTR_ERR(inode) == -ESTALE) {
71 ext2_error(dir->i_sb, __func__,
72 "deleted inode referenced: %lu",
73 ino);
74 return ERR_PTR(-EIO);
75 } else {
76 return ERR_CAST(inode);
77 }
78 }
71 } 79 }
72 return d_splice_alias(inode, dentry); 80 return d_splice_alias(inode, dentry);
73} 81}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 458999638c3d..1a9ffee47d56 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -152,10 +152,6 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
152 ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); 152 ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL);
153 if (!ei) 153 if (!ei)
154 return NULL; 154 return NULL;
155#ifdef CONFIG_EXT2_FS_POSIX_ACL
156 ei->i_acl = EXT2_ACL_NOT_CACHED;
157 ei->i_default_acl = EXT2_ACL_NOT_CACHED;
158#endif
159 ei->i_block_alloc_info = NULL; 155 ei->i_block_alloc_info = NULL;
160 ei->vfs_inode.i_version = 1; 156 ei->vfs_inode.i_version = 1;
161 return &ei->vfs_inode; 157 return &ei->vfs_inode;
@@ -198,18 +194,6 @@ static void destroy_inodecache(void)
198static void ext2_clear_inode(struct inode *inode) 194static void ext2_clear_inode(struct inode *inode)
199{ 195{
200 struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info; 196 struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
201#ifdef CONFIG_EXT2_FS_POSIX_ACL
202 struct ext2_inode_info *ei = EXT2_I(inode);
203
204 if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) {
205 posix_acl_release(ei->i_acl);
206 ei->i_acl = EXT2_ACL_NOT_CACHED;
207 }
208 if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) {
209 posix_acl_release(ei->i_default_acl);
210 ei->i_default_acl = EXT2_ACL_NOT_CACHED;
211 }
212#endif
213 ext2_discard_reservation(inode); 197 ext2_discard_reservation(inode);
214 EXT2_I(inode)->i_block_alloc_info = NULL; 198 EXT2_I(inode)->i_block_alloc_info = NULL;
215 if (unlikely(rsv)) 199 if (unlikely(rsv))
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e0c745451715..e167bae37ef0 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -126,33 +126,6 @@ fail:
126 return ERR_PTR(-EINVAL); 126 return ERR_PTR(-EINVAL);
127} 127}
128 128
129static inline struct posix_acl *
130ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
131{
132 struct posix_acl *acl = ACCESS_ONCE(*i_acl);
133
134 if (acl) {
135 spin_lock(&inode->i_lock);
136 acl = *i_acl;
137 if (acl != EXT3_ACL_NOT_CACHED)
138 acl = posix_acl_dup(acl);
139 spin_unlock(&inode->i_lock);
140 }
141
142 return acl;
143}
144
145static inline void
146ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
147 struct posix_acl *acl)
148{
149 spin_lock(&inode->i_lock);
150 if (*i_acl != EXT3_ACL_NOT_CACHED)
151 posix_acl_release(*i_acl);
152 *i_acl = posix_acl_dup(acl);
153 spin_unlock(&inode->i_lock);
154}
155
156/* 129/*
157 * Inode operation get_posix_acl(). 130 * Inode operation get_posix_acl().
158 * 131 *
@@ -161,7 +134,6 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
161static struct posix_acl * 134static struct posix_acl *
162ext3_get_acl(struct inode *inode, int type) 135ext3_get_acl(struct inode *inode, int type)
163{ 136{
164 struct ext3_inode_info *ei = EXT3_I(inode);
165 int name_index; 137 int name_index;
166 char *value = NULL; 138 char *value = NULL;
167 struct posix_acl *acl; 139 struct posix_acl *acl;
@@ -170,24 +142,21 @@ ext3_get_acl(struct inode *inode, int type)
170 if (!test_opt(inode->i_sb, POSIX_ACL)) 142 if (!test_opt(inode->i_sb, POSIX_ACL))
171 return NULL; 143 return NULL;
172 144
173 switch(type) { 145 acl = get_cached_acl(inode, type);
174 case ACL_TYPE_ACCESS: 146 if (acl != ACL_NOT_CACHED)
175 acl = ext3_iget_acl(inode, &ei->i_acl); 147 return acl;
176 if (acl != EXT3_ACL_NOT_CACHED) 148
177 return acl; 149 switch (type) {
178 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; 150 case ACL_TYPE_ACCESS:
179 break; 151 name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
180 152 break;
181 case ACL_TYPE_DEFAULT: 153 case ACL_TYPE_DEFAULT:
182 acl = ext3_iget_acl(inode, &ei->i_default_acl); 154 name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
183 if (acl != EXT3_ACL_NOT_CACHED) 155 break;
184 return acl; 156 default:
185 name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; 157 BUG();
186 break;
187
188 default:
189 return ERR_PTR(-EINVAL);
190 } 158 }
159
191 retval = ext3_xattr_get(inode, name_index, "", NULL, 0); 160 retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
192 if (retval > 0) { 161 if (retval > 0) {
193 value = kmalloc(retval, GFP_NOFS); 162 value = kmalloc(retval, GFP_NOFS);
@@ -203,17 +172,9 @@ ext3_get_acl(struct inode *inode, int type)
203 acl = ERR_PTR(retval); 172 acl = ERR_PTR(retval);
204 kfree(value); 173 kfree(value);
205 174
206 if (!IS_ERR(acl)) { 175 if (!IS_ERR(acl))
207 switch(type) { 176 set_cached_acl(inode, type, acl);
208 case ACL_TYPE_ACCESS:
209 ext3_iset_acl(inode, &ei->i_acl, acl);
210 break;
211 177
212 case ACL_TYPE_DEFAULT:
213 ext3_iset_acl(inode, &ei->i_default_acl, acl);
214 break;
215 }
216 }
217 return acl; 178 return acl;
218} 179}
219 180
@@ -226,7 +187,6 @@ static int
226ext3_set_acl(handle_t *handle, struct inode *inode, int type, 187ext3_set_acl(handle_t *handle, struct inode *inode, int type,
227 struct posix_acl *acl) 188 struct posix_acl *acl)
228{ 189{
229 struct ext3_inode_info *ei = EXT3_I(inode);
230 int name_index; 190 int name_index;
231 void *value = NULL; 191 void *value = NULL;
232 size_t size = 0; 192 size_t size = 0;
@@ -271,17 +231,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
271 value, size, 0); 231 value, size, 0);
272 232
273 kfree(value); 233 kfree(value);
274 if (!error) {
275 switch(type) {
276 case ACL_TYPE_ACCESS:
277 ext3_iset_acl(inode, &ei->i_acl, acl);
278 break;
279 234
280 case ACL_TYPE_DEFAULT: 235 if (!error)
281 ext3_iset_acl(inode, &ei->i_default_acl, acl); 236 set_cached_acl(inode, type, acl);
282 break; 237
283 }
284 }
285 return error; 238 return error;
286} 239}
287 240
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 42da16b8cac0..07d15a3a5969 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -53,10 +53,6 @@ static inline int ext3_acl_count(size_t size)
53 53
54#ifdef CONFIG_EXT3_FS_POSIX_ACL 54#ifdef CONFIG_EXT3_FS_POSIX_ACL
55 55
56/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl
57 if the ACL has not been cached */
58#define EXT3_ACL_NOT_CACHED ((void *)-1)
59
60/* acl.c */ 56/* acl.c */
61extern int ext3_permission (struct inode *, int); 57extern int ext3_permission (struct inode *, int);
62extern int ext3_acl_chmod (struct inode *); 58extern int ext3_acl_chmod (struct inode *);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 05dea8132fc0..5f51fed5c750 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2752,10 +2752,6 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
2752 return inode; 2752 return inode;
2753 2753
2754 ei = EXT3_I(inode); 2754 ei = EXT3_I(inode);
2755#ifdef CONFIG_EXT3_FS_POSIX_ACL
2756 ei->i_acl = EXT3_ACL_NOT_CACHED;
2757 ei->i_default_acl = EXT3_ACL_NOT_CACHED;
2758#endif
2759 ei->i_block_alloc_info = NULL; 2755 ei->i_block_alloc_info = NULL;
2760 2756
2761 ret = __ext3_get_inode_loc(inode, &iloc, 0); 2757 ret = __ext3_get_inode_loc(inode, &iloc, 0);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 601e881e6105..524b349c6299 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -464,10 +464,6 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
464 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); 464 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
465 if (!ei) 465 if (!ei)
466 return NULL; 466 return NULL;
467#ifdef CONFIG_EXT3_FS_POSIX_ACL
468 ei->i_acl = EXT3_ACL_NOT_CACHED;
469 ei->i_default_acl = EXT3_ACL_NOT_CACHED;
470#endif
471 ei->i_block_alloc_info = NULL; 467 ei->i_block_alloc_info = NULL;
472 ei->vfs_inode.i_version = 1; 468 ei->vfs_inode.i_version = 1;
473 return &ei->vfs_inode; 469 return &ei->vfs_inode;
@@ -518,18 +514,6 @@ static void destroy_inodecache(void)
518static void ext3_clear_inode(struct inode *inode) 514static void ext3_clear_inode(struct inode *inode)
519{ 515{
520 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; 516 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
521#ifdef CONFIG_EXT3_FS_POSIX_ACL
522 if (EXT3_I(inode)->i_acl &&
523 EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
524 posix_acl_release(EXT3_I(inode)->i_acl);
525 EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
526 }
527 if (EXT3_I(inode)->i_default_acl &&
528 EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
529 posix_acl_release(EXT3_I(inode)->i_default_acl);
530 EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
531 }
532#endif
533 ext3_discard_reservation(inode); 517 ext3_discard_reservation(inode);
534 EXT3_I(inode)->i_block_alloc_info = NULL; 518 EXT3_I(inode)->i_block_alloc_info = NULL;
535 if (unlikely(rsv)) 519 if (unlikely(rsv))
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 605aeed96d68..f6d8967149ca 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -126,33 +126,6 @@ fail:
126 return ERR_PTR(-EINVAL); 126 return ERR_PTR(-EINVAL);
127} 127}
128 128
129static inline struct posix_acl *
130ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
131{
132 struct posix_acl *acl = ACCESS_ONCE(*i_acl);
133
134 if (acl) {
135 spin_lock(&inode->i_lock);
136 acl = *i_acl;
137 if (acl != EXT4_ACL_NOT_CACHED)
138 acl = posix_acl_dup(acl);
139 spin_unlock(&inode->i_lock);
140 }
141
142 return acl;
143}
144
145static inline void
146ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
147 struct posix_acl *acl)
148{
149 spin_lock(&inode->i_lock);
150 if (*i_acl != EXT4_ACL_NOT_CACHED)
151 posix_acl_release(*i_acl);
152 *i_acl = posix_acl_dup(acl);
153 spin_unlock(&inode->i_lock);
154}
155
156/* 129/*
157 * Inode operation get_posix_acl(). 130 * Inode operation get_posix_acl().
158 * 131 *
@@ -161,7 +134,6 @@ ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
161static struct posix_acl * 134static struct posix_acl *
162ext4_get_acl(struct inode *inode, int type) 135ext4_get_acl(struct inode *inode, int type)
163{ 136{
164 struct ext4_inode_info *ei = EXT4_I(inode);
165 int name_index; 137 int name_index;
166 char *value = NULL; 138 char *value = NULL;
167 struct posix_acl *acl; 139 struct posix_acl *acl;
@@ -170,23 +142,19 @@ ext4_get_acl(struct inode *inode, int type)
170 if (!test_opt(inode->i_sb, POSIX_ACL)) 142 if (!test_opt(inode->i_sb, POSIX_ACL))
171 return NULL; 143 return NULL;
172 144
145 acl = get_cached_acl(inode, type);
146 if (acl != ACL_NOT_CACHED)
147 return acl;
148
173 switch (type) { 149 switch (type) {
174 case ACL_TYPE_ACCESS: 150 case ACL_TYPE_ACCESS:
175 acl = ext4_iget_acl(inode, &ei->i_acl);
176 if (acl != EXT4_ACL_NOT_CACHED)
177 return acl;
178 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; 151 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
179 break; 152 break;
180
181 case ACL_TYPE_DEFAULT: 153 case ACL_TYPE_DEFAULT:
182 acl = ext4_iget_acl(inode, &ei->i_default_acl);
183 if (acl != EXT4_ACL_NOT_CACHED)
184 return acl;
185 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; 154 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
186 break; 155 break;
187
188 default: 156 default:
189 return ERR_PTR(-EINVAL); 157 BUG();
190 } 158 }
191 retval = ext4_xattr_get(inode, name_index, "", NULL, 0); 159 retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
192 if (retval > 0) { 160 if (retval > 0) {
@@ -203,17 +171,9 @@ ext4_get_acl(struct inode *inode, int type)
203 acl = ERR_PTR(retval); 171 acl = ERR_PTR(retval);
204 kfree(value); 172 kfree(value);
205 173
206 if (!IS_ERR(acl)) { 174 if (!IS_ERR(acl))
207 switch (type) { 175 set_cached_acl(inode, type, acl);
208 case ACL_TYPE_ACCESS:
209 ext4_iset_acl(inode, &ei->i_acl, acl);
210 break;
211 176
212 case ACL_TYPE_DEFAULT:
213 ext4_iset_acl(inode, &ei->i_default_acl, acl);
214 break;
215 }
216 }
217 return acl; 177 return acl;
218} 178}
219 179
@@ -226,7 +186,6 @@ static int
226ext4_set_acl(handle_t *handle, struct inode *inode, int type, 186ext4_set_acl(handle_t *handle, struct inode *inode, int type,
227 struct posix_acl *acl) 187 struct posix_acl *acl)
228{ 188{
229 struct ext4_inode_info *ei = EXT4_I(inode);
230 int name_index; 189 int name_index;
231 void *value = NULL; 190 void *value = NULL;
232 size_t size = 0; 191 size_t size = 0;
@@ -271,17 +230,9 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
271 value, size, 0); 230 value, size, 0);
272 231
273 kfree(value); 232 kfree(value);
274 if (!error) { 233 if (!error)
275 switch (type) { 234 set_cached_acl(inode, type, acl);
276 case ACL_TYPE_ACCESS:
277 ext4_iset_acl(inode, &ei->i_acl, acl);
278 break;
279 235
280 case ACL_TYPE_DEFAULT:
281 ext4_iset_acl(inode, &ei->i_default_acl, acl);
282 break;
283 }
284 }
285 return error; 236 return error;
286} 237}
287 238
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index cb45257a246e..949789d2bba6 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -53,10 +53,6 @@ static inline int ext4_acl_count(size_t size)
53 53
54#ifdef CONFIG_EXT4_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
57 if the ACL has not been cached */
58#define EXT4_ACL_NOT_CACHED ((void *)-1)
59
60/* acl.c */ 56/* acl.c */
61extern int ext4_permission(struct inode *, int); 57extern int ext4_permission(struct inode *, int);
62extern int ext4_acl_chmod(struct inode *); 58extern int ext4_acl_chmod(struct inode *);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 17b9998680e3..9714db393efe 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -93,20 +93,20 @@ typedef unsigned int ext4_group_t;
93struct ext4_allocation_request { 93struct ext4_allocation_request {
94 /* target inode for block we're allocating */ 94 /* target inode for block we're allocating */
95 struct inode *inode; 95 struct inode *inode;
96 /* how many blocks we want to allocate */
97 unsigned int len;
96 /* logical block in target inode */ 98 /* logical block in target inode */
97 ext4_lblk_t logical; 99 ext4_lblk_t logical;
98 /* phys. target (a hint) */
99 ext4_fsblk_t goal;
100 /* the closest logical allocated block to the left */ 100 /* the closest logical allocated block to the left */
101 ext4_lblk_t lleft; 101 ext4_lblk_t lleft;
102 /* phys. block for ^^^ */
103 ext4_fsblk_t pleft;
104 /* the closest logical allocated block to the right */ 102 /* the closest logical allocated block to the right */
105 ext4_lblk_t lright; 103 ext4_lblk_t lright;
106 /* phys. block for ^^^ */ 104 /* phys. target (a hint) */
105 ext4_fsblk_t goal;
106 /* phys. block for the closest logical allocated block to the left */
107 ext4_fsblk_t pleft;
108 /* phys. block for the closest logical allocated block to the right */
107 ext4_fsblk_t pright; 109 ext4_fsblk_t pright;
108 /* how many blocks we want to allocate */
109 unsigned int len;
110 /* flags. see above EXT4_MB_HINT_* */ 110 /* flags. see above EXT4_MB_HINT_* */
111 unsigned int flags; 111 unsigned int flags;
112}; 112};
@@ -595,10 +595,6 @@ struct ext4_inode_info {
595 */ 595 */
596 struct rw_semaphore xattr_sem; 596 struct rw_semaphore xattr_sem;
597#endif 597#endif
598#ifdef CONFIG_EXT4_FS_POSIX_ACL
599 struct posix_acl *i_acl;
600 struct posix_acl *i_default_acl;
601#endif
602 598
603 struct list_head i_orphan; /* unlinked but open inodes */ 599 struct list_head i_orphan; /* unlinked but open inodes */
604 600
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index ad13a84644e1..eb27fd0f2ee8 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -43,6 +43,8 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
43 ext4_journal_abort_handle(where, __func__, bh, 43 ext4_journal_abort_handle(where, __func__, bh,
44 handle, err); 44 handle, err);
45 } 45 }
46 else
47 brelse(bh);
46 return err; 48 return err;
47} 49}
48 50
@@ -57,6 +59,8 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
57 ext4_journal_abort_handle(where, __func__, bh, 59 ext4_journal_abort_handle(where, __func__, bh,
58 handle, err); 60 handle, err);
59 } 61 }
62 else
63 brelse(bh);
60 return err; 64 return err;
61} 65}
62 66
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index be2f426f6805..139fb8cb87e4 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -131,9 +131,11 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
131int __ext4_journal_get_write_access(const char *where, handle_t *handle, 131int __ext4_journal_get_write_access(const char *where, handle_t *handle,
132 struct buffer_head *bh); 132 struct buffer_head *bh);
133 133
134/* When called with an invalid handle, this will still do a put on the BH */
134int __ext4_journal_forget(const char *where, handle_t *handle, 135int __ext4_journal_forget(const char *where, handle_t *handle,
135 struct buffer_head *bh); 136 struct buffer_head *bh);
136 137
138/* When called with an invalid handle, this will still do a put on the BH */
137int __ext4_journal_revoke(const char *where, handle_t *handle, 139int __ext4_journal_revoke(const char *where, handle_t *handle,
138 ext4_fsblk_t blocknr, struct buffer_head *bh); 140 ext4_fsblk_t blocknr, struct buffer_head *bh);
139 141
@@ -281,10 +283,10 @@ static inline int ext4_should_order_data(struct inode *inode)
281 283
282static inline int ext4_should_writeback_data(struct inode *inode) 284static inline int ext4_should_writeback_data(struct inode *inode)
283{ 285{
284 if (EXT4_JOURNAL(inode) == NULL)
285 return 0;
286 if (!S_ISREG(inode->i_mode)) 286 if (!S_ISREG(inode->i_mode))
287 return 0; 287 return 0;
288 if (EXT4_JOURNAL(inode) == NULL)
289 return 1;
288 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) 290 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
289 return 0; 291 return 0;
290 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 292 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 50322a09bd01..73ebfb44ad75 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1977,6 +1977,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
1977 */ 1977 */
1978 /* 1 bitmap, 1 block group descriptor */ 1978 /* 1 bitmap, 1 block group descriptor */
1979 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); 1979 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
1980 return ret;
1980 } 1981 }
1981 } 1982 }
1982 1983
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 2f645732e3b7..29e6dc7299b8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -833,7 +833,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
833 if (!goal) 833 if (!goal)
834 goal = sbi->s_inode_goal; 834 goal = sbi->s_inode_goal;
835 835
836 if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) { 836 if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) {
837 group = (goal - 1) / EXT4_INODES_PER_GROUP(sb); 837 group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
838 ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb); 838 ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
839 ret2 = 0; 839 ret2 = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7c17ae275af4..f9c642b22efa 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -78,16 +78,14 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
78 * but there may still be a record of it in the journal, and that record 78 * but there may still be a record of it in the journal, and that record
79 * still needs to be revoked. 79 * still needs to be revoked.
80 * 80 *
81 * If the handle isn't valid we're not journaling so there's nothing to do. 81 * If the handle isn't valid we're not journaling, but we still need to
82 * call into ext4_journal_revoke() to put the buffer head.
82 */ 83 */
83int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 84int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
84 struct buffer_head *bh, ext4_fsblk_t blocknr) 85 struct buffer_head *bh, ext4_fsblk_t blocknr)
85{ 86{
86 int err; 87 int err;
87 88
88 if (!ext4_handle_valid(handle))
89 return 0;
90
91 might_sleep(); 89 might_sleep();
92 90
93 BUFFER_TRACE(bh, "enter"); 91 BUFFER_TRACE(bh, "enter");
@@ -1513,14 +1511,14 @@ retry:
1513 * Add inode to orphan list in case we crash before 1511 * Add inode to orphan list in case we crash before
1514 * truncate finishes 1512 * truncate finishes
1515 */ 1513 */
1516 if (pos + len > inode->i_size) 1514 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1517 ext4_orphan_add(handle, inode); 1515 ext4_orphan_add(handle, inode);
1518 1516
1519 ext4_journal_stop(handle); 1517 ext4_journal_stop(handle);
1520 if (pos + len > inode->i_size) { 1518 if (pos + len > inode->i_size) {
1521 vmtruncate(inode, inode->i_size); 1519 ext4_truncate(inode);
1522 /* 1520 /*
1523 * If vmtruncate failed early the inode might 1521 * If truncate failed early the inode might
1524 * still be on the orphan list; we need to 1522 * still be on the orphan list; we need to
1525 * make sure the inode is removed from the 1523 * make sure the inode is removed from the
1526 * orphan list in that case. 1524 * orphan list in that case.
@@ -1614,7 +1612,7 @@ static int ext4_ordered_write_end(struct file *file,
1614 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, 1612 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1615 page, fsdata); 1613 page, fsdata);
1616 copied = ret2; 1614 copied = ret2;
1617 if (pos + len > inode->i_size) 1615 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1618 /* if we have allocated more blocks and copied 1616 /* if we have allocated more blocks and copied
1619 * less. We will have blocks allocated outside 1617 * less. We will have blocks allocated outside
1620 * inode->i_size. So truncate them 1618 * inode->i_size. So truncate them
@@ -1628,9 +1626,9 @@ static int ext4_ordered_write_end(struct file *file,
1628 ret = ret2; 1626 ret = ret2;
1629 1627
1630 if (pos + len > inode->i_size) { 1628 if (pos + len > inode->i_size) {
1631 vmtruncate(inode, inode->i_size); 1629 ext4_truncate(inode);
1632 /* 1630 /*
1633 * If vmtruncate failed early the inode might still be 1631 * If truncate failed early the inode might still be
1634 * on the orphan list; we need to make sure the inode 1632 * on the orphan list; we need to make sure the inode
1635 * is removed from the orphan list in that case. 1633 * is removed from the orphan list in that case.
1636 */ 1634 */
@@ -1655,7 +1653,7 @@ static int ext4_writeback_write_end(struct file *file,
1655 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, 1653 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1656 page, fsdata); 1654 page, fsdata);
1657 copied = ret2; 1655 copied = ret2;
1658 if (pos + len > inode->i_size) 1656 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1659 /* if we have allocated more blocks and copied 1657 /* if we have allocated more blocks and copied
1660 * less. We will have blocks allocated outside 1658 * less. We will have blocks allocated outside
1661 * inode->i_size. So truncate them 1659 * inode->i_size. So truncate them
@@ -1670,9 +1668,9 @@ static int ext4_writeback_write_end(struct file *file,
1670 ret = ret2; 1668 ret = ret2;
1671 1669
1672 if (pos + len > inode->i_size) { 1670 if (pos + len > inode->i_size) {
1673 vmtruncate(inode, inode->i_size); 1671 ext4_truncate(inode);
1674 /* 1672 /*
1675 * If vmtruncate failed early the inode might still be 1673 * If truncate failed early the inode might still be
1676 * on the orphan list; we need to make sure the inode 1674 * on the orphan list; we need to make sure the inode
1677 * is removed from the orphan list in that case. 1675 * is removed from the orphan list in that case.
1678 */ 1676 */
@@ -1722,7 +1720,7 @@ static int ext4_journalled_write_end(struct file *file,
1722 1720
1723 unlock_page(page); 1721 unlock_page(page);
1724 page_cache_release(page); 1722 page_cache_release(page);
1725 if (pos + len > inode->i_size) 1723 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1726 /* if we have allocated more blocks and copied 1724 /* if we have allocated more blocks and copied
1727 * less. We will have blocks allocated outside 1725 * less. We will have blocks allocated outside
1728 * inode->i_size. So truncate them 1726 * inode->i_size. So truncate them
@@ -1733,9 +1731,9 @@ static int ext4_journalled_write_end(struct file *file,
1733 if (!ret) 1731 if (!ret)
1734 ret = ret2; 1732 ret = ret2;
1735 if (pos + len > inode->i_size) { 1733 if (pos + len > inode->i_size) {
1736 vmtruncate(inode, inode->i_size); 1734 ext4_truncate(inode);
1737 /* 1735 /*
1738 * If vmtruncate failed early the inode might still be 1736 * If truncate failed early the inode might still be
1739 * on the orphan list; we need to make sure the inode 1737 * on the orphan list; we need to make sure the inode
1740 * is removed from the orphan list in that case. 1738 * is removed from the orphan list in that case.
1741 */ 1739 */
@@ -2305,15 +2303,9 @@ flush_it:
2305 return; 2303 return;
2306} 2304}
2307 2305
2308static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) 2306static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
2309{ 2307{
2310 /* 2308 return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
2311 * unmapped buffer is possible for holes.
2312 * delay buffer is possible with delayed allocation.
2313 * We also need to consider unwritten buffer as unmapped.
2314 */
2315 return (!buffer_mapped(bh) || buffer_delay(bh) ||
2316 buffer_unwritten(bh)) && buffer_dirty(bh);
2317} 2309}
2318 2310
2319/* 2311/*
@@ -2398,9 +2390,9 @@ static int __mpage_da_writepage(struct page *page,
2398 * We need to try to allocate 2390 * We need to try to allocate
2399 * unmapped blocks in the same page. 2391 * unmapped blocks in the same page.
2400 * Otherwise we won't make progress 2392 * Otherwise we won't make progress
2401 * with the page in ext4_da_writepage 2393 * with the page in ext4_writepage
2402 */ 2394 */
2403 if (ext4_bh_unmapped_or_delay(NULL, bh)) { 2395 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2404 mpage_add_bh_to_extent(mpd, logical, 2396 mpage_add_bh_to_extent(mpd, logical,
2405 bh->b_size, 2397 bh->b_size,
2406 bh->b_state); 2398 bh->b_state);
@@ -2517,7 +2509,6 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
2517 * so call get_block_wrap with create = 0 2509 * so call get_block_wrap with create = 0
2518 */ 2510 */
2519 ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); 2511 ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
2520 BUG_ON(create && ret == 0);
2521 if (ret > 0) { 2512 if (ret > 0) {
2522 bh_result->b_size = (ret << inode->i_blkbits); 2513 bh_result->b_size = (ret << inode->i_blkbits);
2523 ret = 0; 2514 ret = 0;
@@ -2525,15 +2516,102 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
2525 return ret; 2516 return ret;
2526} 2517}
2527 2518
2519static int bget_one(handle_t *handle, struct buffer_head *bh)
2520{
2521 get_bh(bh);
2522 return 0;
2523}
2524
2525static int bput_one(handle_t *handle, struct buffer_head *bh)
2526{
2527 put_bh(bh);
2528 return 0;
2529}
2530
2531static int __ext4_journalled_writepage(struct page *page,
2532 struct writeback_control *wbc,
2533 unsigned int len)
2534{
2535 struct address_space *mapping = page->mapping;
2536 struct inode *inode = mapping->host;
2537 struct buffer_head *page_bufs;
2538 handle_t *handle = NULL;
2539 int ret = 0;
2540 int err;
2541
2542 page_bufs = page_buffers(page);
2543 BUG_ON(!page_bufs);
2544 walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
2545 /* As soon as we unlock the page, it can go away, but we have
2546 * references to buffers so we are safe */
2547 unlock_page(page);
2548
2549 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
2550 if (IS_ERR(handle)) {
2551 ret = PTR_ERR(handle);
2552 goto out;
2553 }
2554
2555 ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
2556 do_journal_get_write_access);
2557
2558 err = walk_page_buffers(handle, page_bufs, 0, len, NULL,
2559 write_end_fn);
2560 if (ret == 0)
2561 ret = err;
2562 err = ext4_journal_stop(handle);
2563 if (!ret)
2564 ret = err;
2565
2566 walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
2567 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
2568out:
2569 return ret;
2570}
2571
2528/* 2572/*
2573 * Note that we don't need to start a transaction unless we're journaling data
2574 * because we should have holes filled from ext4_page_mkwrite(). We even don't
2575 * need to file the inode to the transaction's list in ordered mode because if
2576 * we are writing back data added by write(), the inode is already there and if
2577 * we are writing back data modified via mmap(), noone guarantees in which
2578 * transaction the data will hit the disk. In case we are journaling data, we
2579 * cannot start transaction directly because transaction start ranks above page
2580 * lock so we have to do some magic.
2581 *
2529 * This function can get called via... 2582 * This function can get called via...
2530 * - ext4_da_writepages after taking page lock (have journal handle) 2583 * - ext4_da_writepages after taking page lock (have journal handle)
2531 * - journal_submit_inode_data_buffers (no journal handle) 2584 * - journal_submit_inode_data_buffers (no journal handle)
2532 * - shrink_page_list via pdflush (no journal handle) 2585 * - shrink_page_list via pdflush (no journal handle)
2533 * - grab_page_cache when doing write_begin (have journal handle) 2586 * - grab_page_cache when doing write_begin (have journal handle)
2587 *
2588 * We don't do any block allocation in this function. If we have page with
2589 * multiple blocks we need to write those buffer_heads that are mapped. This
2590 * is important for mmaped based write. So if we do with blocksize 1K
2591 * truncate(f, 1024);
2592 * a = mmap(f, 0, 4096);
2593 * a[0] = 'a';
2594 * truncate(f, 4096);
2595 * we have in the page first buffer_head mapped via page_mkwrite call back
2596 * but other bufer_heads would be unmapped but dirty(dirty done via the
2597 * do_wp_page). So writepage should write the first block. If we modify
2598 * the mmap area beyond 1024 we will again get a page_fault and the
2599 * page_mkwrite callback will do the block allocation and mark the
2600 * buffer_heads mapped.
2601 *
2602 * We redirty the page if we have any buffer_heads that is either delay or
2603 * unwritten in the page.
2604 *
2605 * We can get recursively called as show below.
2606 *
2607 * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
2608 * ext4_writepage()
2609 *
2610 * But since we don't do any block allocation we should not deadlock.
2611 * Page also have the dirty flag cleared so we don't get recurive page_lock.
2534 */ 2612 */
2535static int ext4_da_writepage(struct page *page, 2613static int ext4_writepage(struct page *page,
2536 struct writeback_control *wbc) 2614 struct writeback_control *wbc)
2537{ 2615{
2538 int ret = 0; 2616 int ret = 0;
2539 loff_t size; 2617 loff_t size;
@@ -2541,7 +2619,7 @@ static int ext4_da_writepage(struct page *page,
2541 struct buffer_head *page_bufs; 2619 struct buffer_head *page_bufs;
2542 struct inode *inode = page->mapping->host; 2620 struct inode *inode = page->mapping->host;
2543 2621
2544 trace_ext4_da_writepage(inode, page); 2622 trace_ext4_writepage(inode, page);
2545 size = i_size_read(inode); 2623 size = i_size_read(inode);
2546 if (page->index == size >> PAGE_CACHE_SHIFT) 2624 if (page->index == size >> PAGE_CACHE_SHIFT)
2547 len = size & ~PAGE_CACHE_MASK; 2625 len = size & ~PAGE_CACHE_MASK;
@@ -2551,7 +2629,7 @@ static int ext4_da_writepage(struct page *page,
2551 if (page_has_buffers(page)) { 2629 if (page_has_buffers(page)) {
2552 page_bufs = page_buffers(page); 2630 page_bufs = page_buffers(page);
2553 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, 2631 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2554 ext4_bh_unmapped_or_delay)) { 2632 ext4_bh_delay_or_unwritten)) {
2555 /* 2633 /*
2556 * We don't want to do block allocation 2634 * We don't want to do block allocation
2557 * So redirty the page and return 2635 * So redirty the page and return
@@ -2578,13 +2656,13 @@ static int ext4_da_writepage(struct page *page,
2578 * all are mapped and non delay. We don't want to 2656 * all are mapped and non delay. We don't want to
2579 * do block allocation here. 2657 * do block allocation here.
2580 */ 2658 */
2581 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, 2659 ret = block_prepare_write(page, 0, len,
2582 noalloc_get_block_write); 2660 noalloc_get_block_write);
2583 if (!ret) { 2661 if (!ret) {
2584 page_bufs = page_buffers(page); 2662 page_bufs = page_buffers(page);
2585 /* check whether all are mapped and non delay */ 2663 /* check whether all are mapped and non delay */
2586 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, 2664 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2587 ext4_bh_unmapped_or_delay)) { 2665 ext4_bh_delay_or_unwritten)) {
2588 redirty_page_for_writepage(wbc, page); 2666 redirty_page_for_writepage(wbc, page);
2589 unlock_page(page); 2667 unlock_page(page);
2590 return 0; 2668 return 0;
@@ -2600,7 +2678,16 @@ static int ext4_da_writepage(struct page *page,
2600 return 0; 2678 return 0;
2601 } 2679 }
2602 /* now mark the buffer_heads as dirty and uptodate */ 2680 /* now mark the buffer_heads as dirty and uptodate */
2603 block_commit_write(page, 0, PAGE_CACHE_SIZE); 2681 block_commit_write(page, 0, len);
2682 }
2683
2684 if (PageChecked(page) && ext4_should_journal_data(inode)) {
2685 /*
2686 * It's mmapped pagecache. Add buffers and journal it. There
2687 * doesn't seem much point in redirtying the page here.
2688 */
2689 ClearPageChecked(page);
2690 return __ext4_journalled_writepage(page, wbc, len);
2604 } 2691 }
2605 2692
2606 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2693 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -2907,7 +2994,7 @@ retry:
2907 * i_size_read because we hold i_mutex. 2994 * i_size_read because we hold i_mutex.
2908 */ 2995 */
2909 if (pos + len > inode->i_size) 2996 if (pos + len > inode->i_size)
2910 vmtruncate(inode, inode->i_size); 2997 ext4_truncate(inode);
2911 } 2998 }
2912 2999
2913 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3000 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -3130,222 +3217,6 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3130 return generic_block_bmap(mapping, block, ext4_get_block); 3217 return generic_block_bmap(mapping, block, ext4_get_block);
3131} 3218}
3132 3219
3133static int bget_one(handle_t *handle, struct buffer_head *bh)
3134{
3135 get_bh(bh);
3136 return 0;
3137}
3138
3139static int bput_one(handle_t *handle, struct buffer_head *bh)
3140{
3141 put_bh(bh);
3142 return 0;
3143}
3144
3145/*
3146 * Note that we don't need to start a transaction unless we're journaling data
3147 * because we should have holes filled from ext4_page_mkwrite(). We even don't
3148 * need to file the inode to the transaction's list in ordered mode because if
3149 * we are writing back data added by write(), the inode is already there and if
3150 * we are writing back data modified via mmap(), noone guarantees in which
3151 * transaction the data will hit the disk. In case we are journaling data, we
3152 * cannot start transaction directly because transaction start ranks above page
3153 * lock so we have to do some magic.
3154 *
3155 * In all journaling modes block_write_full_page() will start the I/O.
3156 *
3157 * Problem:
3158 *
3159 * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
3160 * ext4_writepage()
3161 *
3162 * Similar for:
3163 *
3164 * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ...
3165 *
3166 * Same applies to ext4_get_block(). We will deadlock on various things like
3167 * lock_journal and i_data_sem
3168 *
3169 * Setting PF_MEMALLOC here doesn't work - too many internal memory
3170 * allocations fail.
3171 *
3172 * 16May01: If we're reentered then journal_current_handle() will be
3173 * non-zero. We simply *return*.
3174 *
3175 * 1 July 2001: @@@ FIXME:
3176 * In journalled data mode, a data buffer may be metadata against the
3177 * current transaction. But the same file is part of a shared mapping
3178 * and someone does a writepage() on it.
3179 *
3180 * We will move the buffer onto the async_data list, but *after* it has
3181 * been dirtied. So there's a small window where we have dirty data on
3182 * BJ_Metadata.
3183 *
3184 * Note that this only applies to the last partial page in the file. The
3185 * bit which block_write_full_page() uses prepare/commit for. (That's
3186 * broken code anyway: it's wrong for msync()).
3187 *
3188 * It's a rare case: affects the final partial page, for journalled data
3189 * where the file is subject to bith write() and writepage() in the same
3190 * transction. To fix it we'll need a custom block_write_full_page().
3191 * We'll probably need that anyway for journalling writepage() output.
3192 *
3193 * We don't honour synchronous mounts for writepage(). That would be
3194 * disastrous. Any write() or metadata operation will sync the fs for
3195 * us.
3196 *
3197 */
3198static int __ext4_normal_writepage(struct page *page,
3199 struct writeback_control *wbc)
3200{
3201 struct inode *inode = page->mapping->host;
3202
3203 if (test_opt(inode->i_sb, NOBH))
3204 return nobh_writepage(page, noalloc_get_block_write, wbc);
3205 else
3206 return block_write_full_page(page, noalloc_get_block_write,
3207 wbc);
3208}
3209
3210static int ext4_normal_writepage(struct page *page,
3211 struct writeback_control *wbc)
3212{
3213 struct inode *inode = page->mapping->host;
3214 loff_t size = i_size_read(inode);
3215 loff_t len;
3216
3217 trace_ext4_normal_writepage(inode, page);
3218 J_ASSERT(PageLocked(page));
3219 if (page->index == size >> PAGE_CACHE_SHIFT)
3220 len = size & ~PAGE_CACHE_MASK;
3221 else
3222 len = PAGE_CACHE_SIZE;
3223
3224 if (page_has_buffers(page)) {
3225 /* if page has buffers it should all be mapped
3226 * and allocated. If there are not buffers attached
3227 * to the page we know the page is dirty but it lost
3228 * buffers. That means that at some moment in time
3229 * after write_begin() / write_end() has been called
3230 * all buffers have been clean and thus they must have been
3231 * written at least once. So they are all mapped and we can
3232 * happily proceed with mapping them and writing the page.
3233 */
3234 BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
3235 ext4_bh_unmapped_or_delay));
3236 }
3237
3238 if (!ext4_journal_current_handle())
3239 return __ext4_normal_writepage(page, wbc);
3240
3241 redirty_page_for_writepage(wbc, page);
3242 unlock_page(page);
3243 return 0;
3244}
3245
3246static int __ext4_journalled_writepage(struct page *page,
3247 struct writeback_control *wbc)
3248{
3249 struct address_space *mapping = page->mapping;
3250 struct inode *inode = mapping->host;
3251 struct buffer_head *page_bufs;
3252 handle_t *handle = NULL;
3253 int ret = 0;
3254 int err;
3255
3256 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
3257 noalloc_get_block_write);
3258 if (ret != 0)
3259 goto out_unlock;
3260
3261 page_bufs = page_buffers(page);
3262 walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL,
3263 bget_one);
3264 /* As soon as we unlock the page, it can go away, but we have
3265 * references to buffers so we are safe */
3266 unlock_page(page);
3267
3268 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
3269 if (IS_ERR(handle)) {
3270 ret = PTR_ERR(handle);
3271 goto out;
3272 }
3273
3274 ret = walk_page_buffers(handle, page_bufs, 0,
3275 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
3276
3277 err = walk_page_buffers(handle, page_bufs, 0,
3278 PAGE_CACHE_SIZE, NULL, write_end_fn);
3279 if (ret == 0)
3280 ret = err;
3281 err = ext4_journal_stop(handle);
3282 if (!ret)
3283 ret = err;
3284
3285 walk_page_buffers(handle, page_bufs, 0,
3286 PAGE_CACHE_SIZE, NULL, bput_one);
3287 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
3288 goto out;
3289
3290out_unlock:
3291 unlock_page(page);
3292out:
3293 return ret;
3294}
3295
3296static int ext4_journalled_writepage(struct page *page,
3297 struct writeback_control *wbc)
3298{
3299 struct inode *inode = page->mapping->host;
3300 loff_t size = i_size_read(inode);
3301 loff_t len;
3302
3303 trace_ext4_journalled_writepage(inode, page);
3304 J_ASSERT(PageLocked(page));
3305 if (page->index == size >> PAGE_CACHE_SHIFT)
3306 len = size & ~PAGE_CACHE_MASK;
3307 else
3308 len = PAGE_CACHE_SIZE;
3309
3310 if (page_has_buffers(page)) {
3311 /* if page has buffers it should all be mapped
3312 * and allocated. If there are not buffers attached
3313 * to the page we know the page is dirty but it lost
3314 * buffers. That means that at some moment in time
3315 * after write_begin() / write_end() has been called
3316 * all buffers have been clean and thus they must have been
3317 * written at least once. So they are all mapped and we can
3318 * happily proceed with mapping them and writing the page.
3319 */
3320 BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
3321 ext4_bh_unmapped_or_delay));
3322 }
3323
3324 if (ext4_journal_current_handle())
3325 goto no_write;
3326
3327 if (PageChecked(page)) {
3328 /*
3329 * It's mmapped pagecache. Add buffers and journal it. There
3330 * doesn't seem much point in redirtying the page here.
3331 */
3332 ClearPageChecked(page);
3333 return __ext4_journalled_writepage(page, wbc);
3334 } else {
3335 /*
3336 * It may be a page full of checkpoint-mode buffers. We don't
3337 * really know unless we go poke around in the buffer_heads.
3338 * But block_write_full_page will do the right thing.
3339 */
3340 return block_write_full_page(page, noalloc_get_block_write,
3341 wbc);
3342 }
3343no_write:
3344 redirty_page_for_writepage(wbc, page);
3345 unlock_page(page);
3346 return 0;
3347}
3348
3349static int ext4_readpage(struct file *file, struct page *page) 3220static int ext4_readpage(struct file *file, struct page *page)
3350{ 3221{
3351 return mpage_readpage(page, ext4_get_block); 3222 return mpage_readpage(page, ext4_get_block);
@@ -3492,7 +3363,7 @@ static int ext4_journalled_set_page_dirty(struct page *page)
3492static const struct address_space_operations ext4_ordered_aops = { 3363static const struct address_space_operations ext4_ordered_aops = {
3493 .readpage = ext4_readpage, 3364 .readpage = ext4_readpage,
3494 .readpages = ext4_readpages, 3365 .readpages = ext4_readpages,
3495 .writepage = ext4_normal_writepage, 3366 .writepage = ext4_writepage,
3496 .sync_page = block_sync_page, 3367 .sync_page = block_sync_page,
3497 .write_begin = ext4_write_begin, 3368 .write_begin = ext4_write_begin,
3498 .write_end = ext4_ordered_write_end, 3369 .write_end = ext4_ordered_write_end,
@@ -3507,7 +3378,7 @@ static const struct address_space_operations ext4_ordered_aops = {
3507static const struct address_space_operations ext4_writeback_aops = { 3378static const struct address_space_operations ext4_writeback_aops = {
3508 .readpage = ext4_readpage, 3379 .readpage = ext4_readpage,
3509 .readpages = ext4_readpages, 3380 .readpages = ext4_readpages,
3510 .writepage = ext4_normal_writepage, 3381 .writepage = ext4_writepage,
3511 .sync_page = block_sync_page, 3382 .sync_page = block_sync_page,
3512 .write_begin = ext4_write_begin, 3383 .write_begin = ext4_write_begin,
3513 .write_end = ext4_writeback_write_end, 3384 .write_end = ext4_writeback_write_end,
@@ -3522,7 +3393,7 @@ static const struct address_space_operations ext4_writeback_aops = {
3522static const struct address_space_operations ext4_journalled_aops = { 3393static const struct address_space_operations ext4_journalled_aops = {
3523 .readpage = ext4_readpage, 3394 .readpage = ext4_readpage,
3524 .readpages = ext4_readpages, 3395 .readpages = ext4_readpages,
3525 .writepage = ext4_journalled_writepage, 3396 .writepage = ext4_writepage,
3526 .sync_page = block_sync_page, 3397 .sync_page = block_sync_page,
3527 .write_begin = ext4_write_begin, 3398 .write_begin = ext4_write_begin,
3528 .write_end = ext4_journalled_write_end, 3399 .write_end = ext4_journalled_write_end,
@@ -3536,7 +3407,7 @@ static const struct address_space_operations ext4_journalled_aops = {
3536static const struct address_space_operations ext4_da_aops = { 3407static const struct address_space_operations ext4_da_aops = {
3537 .readpage = ext4_readpage, 3408 .readpage = ext4_readpage,
3538 .readpages = ext4_readpages, 3409 .readpages = ext4_readpages,
3539 .writepage = ext4_da_writepage, 3410 .writepage = ext4_writepage,
3540 .writepages = ext4_da_writepages, 3411 .writepages = ext4_da_writepages,
3541 .sync_page = block_sync_page, 3412 .sync_page = block_sync_page,
3542 .write_begin = ext4_da_write_begin, 3413 .write_begin = ext4_da_write_begin,
@@ -3583,7 +3454,8 @@ int ext4_block_truncate_page(handle_t *handle,
3583 struct page *page; 3454 struct page *page;
3584 int err = 0; 3455 int err = 0;
3585 3456
3586 page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT); 3457 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3458 mapping_gfp_mask(mapping) & ~__GFP_FS);
3587 if (!page) 3459 if (!page)
3588 return -EINVAL; 3460 return -EINVAL;
3589 3461
@@ -4453,10 +4325,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4453 return inode; 4325 return inode;
4454 4326
4455 ei = EXT4_I(inode); 4327 ei = EXT4_I(inode);
4456#ifdef CONFIG_EXT4_FS_POSIX_ACL
4457 ei->i_acl = EXT4_ACL_NOT_CACHED;
4458 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
4459#endif
4460 4328
4461 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4329 ret = __ext4_get_inode_loc(inode, &iloc, 0);
4462 if (ret < 0) 4330 if (ret < 0)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bb415408fdb6..7050a9cd04a4 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -12,7 +12,6 @@
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/time.h> 13#include <linux/time.h>
14#include <linux/compat.h> 14#include <linux/compat.h>
15#include <linux/smp_lock.h>
16#include <linux/mount.h> 15#include <linux/mount.h>
17#include <linux/file.h> 16#include <linux/file.h>
18#include <asm/uaccess.h> 17#include <asm/uaccess.h>
@@ -192,7 +191,7 @@ setversion_out:
192 case EXT4_IOC_GROUP_EXTEND: { 191 case EXT4_IOC_GROUP_EXTEND: {
193 ext4_fsblk_t n_blocks_count; 192 ext4_fsblk_t n_blocks_count;
194 struct super_block *sb = inode->i_sb; 193 struct super_block *sb = inode->i_sb;
195 int err, err2; 194 int err, err2=0;
196 195
197 if (!capable(CAP_SYS_RESOURCE)) 196 if (!capable(CAP_SYS_RESOURCE))
198 return -EPERM; 197 return -EPERM;
@@ -205,9 +204,11 @@ setversion_out:
205 return err; 204 return err;
206 205
207 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); 206 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
208 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 207 if (EXT4_SB(sb)->s_journal) {
209 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 208 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
210 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 209 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
210 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
211 }
211 if (err == 0) 212 if (err == 0)
212 err = err2; 213 err = err2;
213 mnt_drop_write(filp->f_path.mnt); 214 mnt_drop_write(filp->f_path.mnt);
@@ -252,7 +253,7 @@ setversion_out:
252 case EXT4_IOC_GROUP_ADD: { 253 case EXT4_IOC_GROUP_ADD: {
253 struct ext4_new_group_data input; 254 struct ext4_new_group_data input;
254 struct super_block *sb = inode->i_sb; 255 struct super_block *sb = inode->i_sb;
255 int err, err2; 256 int err, err2=0;
256 257
257 if (!capable(CAP_SYS_RESOURCE)) 258 if (!capable(CAP_SYS_RESOURCE))
258 return -EPERM; 259 return -EPERM;
@@ -266,9 +267,11 @@ setversion_out:
266 return err; 267 return err;
267 268
268 err = ext4_group_add(sb, &input); 269 err = ext4_group_add(sb, &input);
269 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 270 if (EXT4_SB(sb)->s_journal) {
270 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 271 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
271 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 272 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
273 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
274 }
272 if (err == 0) 275 if (err == 0)
273 err = err2; 276 err = err2;
274 mnt_drop_write(filp->f_path.mnt); 277 mnt_drop_write(filp->f_path.mnt);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 519a0a686d94..cd258463e2a9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -657,7 +657,8 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
657 } 657 }
658} 658}
659 659
660static void ext4_mb_generate_buddy(struct super_block *sb, 660static noinline_for_stack
661void ext4_mb_generate_buddy(struct super_block *sb,
661 void *buddy, void *bitmap, ext4_group_t group) 662 void *buddy, void *bitmap, ext4_group_t group)
662{ 663{
663 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 664 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
@@ -1480,7 +1481,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1480 ext4_mb_check_limits(ac, e4b, 0); 1481 ext4_mb_check_limits(ac, e4b, 0);
1481} 1482}
1482 1483
1483static int ext4_mb_try_best_found(struct ext4_allocation_context *ac, 1484static noinline_for_stack
1485int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1484 struct ext4_buddy *e4b) 1486 struct ext4_buddy *e4b)
1485{ 1487{
1486 struct ext4_free_extent ex = ac->ac_b_ex; 1488 struct ext4_free_extent ex = ac->ac_b_ex;
@@ -1507,7 +1509,8 @@ static int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1507 return 0; 1509 return 0;
1508} 1510}
1509 1511
1510static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, 1512static noinline_for_stack
1513int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1511 struct ext4_buddy *e4b) 1514 struct ext4_buddy *e4b)
1512{ 1515{
1513 ext4_group_t group = ac->ac_g_ex.fe_group; 1516 ext4_group_t group = ac->ac_g_ex.fe_group;
@@ -1566,7 +1569,8 @@ static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1566 * The routine scans buddy structures (not bitmap!) from given order 1569 * The routine scans buddy structures (not bitmap!) from given order
1567 * to max order and tries to find big enough chunk to satisfy the req 1570 * to max order and tries to find big enough chunk to satisfy the req
1568 */ 1571 */
1569static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, 1572static noinline_for_stack
1573void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1570 struct ext4_buddy *e4b) 1574 struct ext4_buddy *e4b)
1571{ 1575{
1572 struct super_block *sb = ac->ac_sb; 1576 struct super_block *sb = ac->ac_sb;
@@ -1609,7 +1613,8 @@ static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1609 * In order to optimize scanning, caller must pass number of 1613 * In order to optimize scanning, caller must pass number of
1610 * free blocks in the group, so the routine can know upper limit. 1614 * free blocks in the group, so the routine can know upper limit.
1611 */ 1615 */
1612static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, 1616static noinline_for_stack
1617void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1613 struct ext4_buddy *e4b) 1618 struct ext4_buddy *e4b)
1614{ 1619{
1615 struct super_block *sb = ac->ac_sb; 1620 struct super_block *sb = ac->ac_sb;
@@ -1668,7 +1673,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1668 * we try to find stripe-aligned chunks for stripe-size requests 1673 * we try to find stripe-aligned chunks for stripe-size requests
1669 * XXX should do so at least for multiples of stripe size as well 1674 * XXX should do so at least for multiples of stripe size as well
1670 */ 1675 */
1671static void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, 1676static noinline_for_stack
1677void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1672 struct ext4_buddy *e4b) 1678 struct ext4_buddy *e4b)
1673{ 1679{
1674 struct super_block *sb = ac->ac_sb; 1680 struct super_block *sb = ac->ac_sb;
@@ -1831,7 +1837,8 @@ void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
1831 1837
1832} 1838}
1833 1839
1834static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) 1840static noinline_for_stack
1841int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1835{ 1842{
1836 1843
1837 int ret; 1844 int ret;
@@ -2902,7 +2909,11 @@ int __init init_ext4_mballoc(void)
2902 2909
2903void exit_ext4_mballoc(void) 2910void exit_ext4_mballoc(void)
2904{ 2911{
2905 /* XXX: synchronize_rcu(); */ 2912 /*
2913 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
2914 * before destroying the slab cache.
2915 */
2916 rcu_barrier();
2906 kmem_cache_destroy(ext4_pspace_cachep); 2917 kmem_cache_destroy(ext4_pspace_cachep);
2907 kmem_cache_destroy(ext4_ac_cachep); 2918 kmem_cache_destroy(ext4_ac_cachep);
2908 kmem_cache_destroy(ext4_free_ext_cachep); 2919 kmem_cache_destroy(ext4_free_ext_cachep);
@@ -3457,7 +3468,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3457 * used in in-core bitmap. buddy must be generated from this bitmap 3468 * used in in-core bitmap. buddy must be generated from this bitmap
3458 * Need to be called with ext4 group lock held 3469 * Need to be called with ext4 group lock held
3459 */ 3470 */
3460static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 3471static noinline_for_stack
3472void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3461 ext4_group_t group) 3473 ext4_group_t group)
3462{ 3474{
3463 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 3475 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
@@ -4215,14 +4227,9 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4215 ext4_get_group_no_and_offset(sb, goal, &group, &block); 4227 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4216 4228
4217 /* set up allocation goals */ 4229 /* set up allocation goals */
4230 memset(ac, 0, sizeof(struct ext4_allocation_context));
4218 ac->ac_b_ex.fe_logical = ar->logical; 4231 ac->ac_b_ex.fe_logical = ar->logical;
4219 ac->ac_b_ex.fe_group = 0;
4220 ac->ac_b_ex.fe_start = 0;
4221 ac->ac_b_ex.fe_len = 0;
4222 ac->ac_status = AC_STATUS_CONTINUE; 4232 ac->ac_status = AC_STATUS_CONTINUE;
4223 ac->ac_groups_scanned = 0;
4224 ac->ac_ex_scanned = 0;
4225 ac->ac_found = 0;
4226 ac->ac_sb = sb; 4233 ac->ac_sb = sb;
4227 ac->ac_inode = ar->inode; 4234 ac->ac_inode = ar->inode;
4228 ac->ac_o_ex.fe_logical = ar->logical; 4235 ac->ac_o_ex.fe_logical = ar->logical;
@@ -4233,15 +4240,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4233 ac->ac_g_ex.fe_group = group; 4240 ac->ac_g_ex.fe_group = group;
4234 ac->ac_g_ex.fe_start = block; 4241 ac->ac_g_ex.fe_start = block;
4235 ac->ac_g_ex.fe_len = len; 4242 ac->ac_g_ex.fe_len = len;
4236 ac->ac_f_ex.fe_len = 0;
4237 ac->ac_flags = ar->flags; 4243 ac->ac_flags = ar->flags;
4238 ac->ac_2order = 0;
4239 ac->ac_criteria = 0;
4240 ac->ac_pa = NULL;
4241 ac->ac_bitmap_page = NULL;
4242 ac->ac_buddy_page = NULL;
4243 ac->alloc_semp = NULL;
4244 ac->ac_lg = NULL;
4245 4244
4246 /* we have to define context: we'll we work with a file or 4245 /* we have to define context: we'll we work with a file or
4247 * locality group. this is a policy, actually */ 4246 * locality group. this is a policy, actually */
@@ -4509,10 +4508,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4509 } 4508 }
4510 4509
4511 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4510 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4512 if (ac) { 4511 if (!ac) {
4513 ac->ac_sb = sb;
4514 ac->ac_inode = ar->inode;
4515 } else {
4516 ar->len = 0; 4512 ar->len = 0;
4517 *errp = -ENOMEM; 4513 *errp = -ENOMEM;
4518 goto out1; 4514 goto out1;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8bb9e2d3e4b8..8f4f079e6b9a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -666,10 +666,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
666 if (!ei) 666 if (!ei)
667 return NULL; 667 return NULL;
668 668
669#ifdef CONFIG_EXT4_FS_POSIX_ACL
670 ei->i_acl = EXT4_ACL_NOT_CACHED;
671 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
672#endif
673 ei->vfs_inode.i_version = 1; 669 ei->vfs_inode.i_version = 1;
674 ei->vfs_inode.i_data.writeback_index = 0; 670 ei->vfs_inode.i_data.writeback_index = 0;
675 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 671 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
@@ -735,18 +731,6 @@ static void destroy_inodecache(void)
735 731
736static void ext4_clear_inode(struct inode *inode) 732static void ext4_clear_inode(struct inode *inode)
737{ 733{
738#ifdef CONFIG_EXT4_FS_POSIX_ACL
739 if (EXT4_I(inode)->i_acl &&
740 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
741 posix_acl_release(EXT4_I(inode)->i_acl);
742 EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
743 }
744 if (EXT4_I(inode)->i_default_acl &&
745 EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
746 posix_acl_release(EXT4_I(inode)->i_default_acl);
747 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
748 }
749#endif
750 ext4_discard_preallocations(inode); 734 ext4_discard_preallocations(inode);
751 if (EXT4_JOURNAL(inode)) 735 if (EXT4_JOURNAL(inode))
752 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, 736 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 38ff75a0fe22..530b4ca01510 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/smp_lock.h>
20#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
21#include <linux/compat.h> 20#include <linux/compat.h>
22#include <asm/uaccess.h> 21#include <asm/uaccess.h>
diff --git a/fs/fat/file.c b/fs/fat/file.c
index b28ea646ff60..f042b965c95c 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -134,7 +134,7 @@ static int fat_file_release(struct inode *inode, struct file *filp)
134 if ((filp->f_mode & FMODE_WRITE) && 134 if ((filp->f_mode & FMODE_WRITE) &&
135 MSDOS_SB(inode->i_sb)->options.flush) { 135 MSDOS_SB(inode->i_sb)->options.flush) {
136 fat_flush_inodes(inode->i_sb, inode, NULL); 136 fat_flush_inodes(inode->i_sb, inode, NULL);
137 congestion_wait(WRITE, HZ/10); 137 congestion_wait(BLK_RW_ASYNC, HZ/10);
138 } 138 }
139 return 0; 139 return 0;
140} 140}
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 82f88733b681..bbc94ae4fd77 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -9,7 +9,6 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/smp_lock.h>
13#include "fat.h" 12#include "fat.h"
14 13
15/* Characters that are undesirable in an MS-DOS file name */ 14/* Characters that are undesirable in an MS-DOS file name */
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 73471b7ecc8c..cb6e83557112 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -19,7 +19,6 @@
19#include <linux/jiffies.h> 19#include <linux/jiffies.h>
20#include <linux/ctype.h> 20#include <linux/ctype.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/smp_lock.h>
23#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
24#include <linux/namei.h> 23#include <linux/namei.h>
25#include "fat.h" 24#include "fat.h"
diff --git a/fs/fcntl.c b/fs/fcntl.c
index a040b764f8e3..ae413086db97 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -19,7 +19,6 @@
19#include <linux/signal.h> 19#include <linux/signal.h>
20#include <linux/rcupdate.h> 20#include <linux/rcupdate.h>
21#include <linux/pid_namespace.h> 21#include <linux/pid_namespace.h>
22#include <linux/smp_lock.h>
23 22
24#include <asm/poll.h> 23#include <asm/poll.h>
25#include <asm/siginfo.h> 24#include <asm/siginfo.h>
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index cdbd1654e4cd..1e8af939b3e4 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -38,6 +38,7 @@
38#include <linux/buffer_head.h> 38#include <linux/buffer_head.h>
39#include <linux/kernel.h> 39#include <linux/kernel.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/smp_lock.h>
41#include <linux/stat.h> 42#include <linux/stat.h>
42#include <linux/vfs.h> 43#include <linux/vfs.h>
43#include <linux/mount.h> 44#include <linux/mount.h>
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index caf049146ca2..c54226be5294 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -278,7 +278,26 @@ int sb_has_dirty_inodes(struct super_block *sb)
278EXPORT_SYMBOL(sb_has_dirty_inodes); 278EXPORT_SYMBOL(sb_has_dirty_inodes);
279 279
280/* 280/*
281 * Write a single inode's dirty pages and inode data out to disk. 281 * Wait for writeback on an inode to complete.
282 */
283static void inode_wait_for_writeback(struct inode *inode)
284{
285 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
286 wait_queue_head_t *wqh;
287
288 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
289 do {
290 spin_unlock(&inode_lock);
291 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
292 spin_lock(&inode_lock);
293 } while (inode->i_state & I_SYNC);
294}
295
296/*
297 * Write out an inode's dirty pages. Called under inode_lock. Either the
298 * caller has ref on the inode (either via __iget or via syscall against an fd)
299 * or the inode has I_WILL_FREE set (via generic_forget_inode)
300 *
282 * If `wait' is set, wait on the writeout. 301 * If `wait' is set, wait on the writeout.
283 * 302 *
284 * The whole writeout design is quite complex and fragile. We want to avoid 303 * The whole writeout design is quite complex and fragile. We want to avoid
@@ -288,13 +307,38 @@ EXPORT_SYMBOL(sb_has_dirty_inodes);
288 * Called under inode_lock. 307 * Called under inode_lock.
289 */ 308 */
290static int 309static int
291__sync_single_inode(struct inode *inode, struct writeback_control *wbc) 310writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
292{ 311{
293 unsigned dirty;
294 struct address_space *mapping = inode->i_mapping; 312 struct address_space *mapping = inode->i_mapping;
295 int wait = wbc->sync_mode == WB_SYNC_ALL; 313 int wait = wbc->sync_mode == WB_SYNC_ALL;
314 unsigned dirty;
296 int ret; 315 int ret;
297 316
317 if (!atomic_read(&inode->i_count))
318 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
319 else
320 WARN_ON(inode->i_state & I_WILL_FREE);
321
322 if (inode->i_state & I_SYNC) {
323 /*
324 * If this inode is locked for writeback and we are not doing
325 * writeback-for-data-integrity, move it to s_more_io so that
326 * writeback can proceed with the other inodes on s_io.
327 *
328 * We'll have another go at writing back this inode when we
329 * completed a full scan of s_io.
330 */
331 if (!wait) {
332 requeue_io(inode);
333 return 0;
334 }
335
336 /*
337 * It's a data-integrity sync. We must wait.
338 */
339 inode_wait_for_writeback(inode);
340 }
341
298 BUG_ON(inode->i_state & I_SYNC); 342 BUG_ON(inode->i_state & I_SYNC);
299 343
300 /* Set I_SYNC, reset I_DIRTY */ 344 /* Set I_SYNC, reset I_DIRTY */
@@ -390,50 +434,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
390} 434}
391 435
392/* 436/*
393 * Write out an inode's dirty pages. Called under inode_lock. Either the
394 * caller has ref on the inode (either via __iget or via syscall against an fd)
395 * or the inode has I_WILL_FREE set (via generic_forget_inode)
396 */
397static int
398__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
399{
400 wait_queue_head_t *wqh;
401
402 if (!atomic_read(&inode->i_count))
403 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
404 else
405 WARN_ON(inode->i_state & I_WILL_FREE);
406
407 if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {
408 /*
409 * We're skipping this inode because it's locked, and we're not
410 * doing writeback-for-data-integrity. Move it to s_more_io so
411 * that writeback can proceed with the other inodes on s_io.
412 * We'll have another go at writing back this inode when we
413 * completed a full scan of s_io.
414 */
415 requeue_io(inode);
416 return 0;
417 }
418
419 /*
420 * It's a data-integrity sync. We must wait.
421 */
422 if (inode->i_state & I_SYNC) {
423 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
424
425 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
426 do {
427 spin_unlock(&inode_lock);
428 __wait_on_bit(wqh, &wq, inode_wait,
429 TASK_UNINTERRUPTIBLE);
430 spin_lock(&inode_lock);
431 } while (inode->i_state & I_SYNC);
432 }
433 return __sync_single_inode(inode, wbc);
434}
435
436/*
437 * Write out a superblock's list of dirty inodes. A wait will be performed 437 * Write out a superblock's list of dirty inodes. A wait will be performed
438 * upon no inodes, all inodes or the final one, depending upon sync_mode. 438 * upon no inodes, all inodes or the final one, depending upon sync_mode.
439 * 439 *
@@ -526,7 +526,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
526 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); 526 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
527 __iget(inode); 527 __iget(inode);
528 pages_skipped = wbc->pages_skipped; 528 pages_skipped = wbc->pages_skipped;
529 __writeback_single_inode(inode, wbc); 529 writeback_single_inode(inode, wbc);
530 if (current_is_pdflush()) 530 if (current_is_pdflush())
531 writeback_release(bdi); 531 writeback_release(bdi);
532 if (wbc->pages_skipped != pages_skipped) { 532 if (wbc->pages_skipped != pages_skipped) {
@@ -708,7 +708,7 @@ int write_inode_now(struct inode *inode, int sync)
708 708
709 might_sleep(); 709 might_sleep();
710 spin_lock(&inode_lock); 710 spin_lock(&inode_lock);
711 ret = __writeback_single_inode(inode, &wbc); 711 ret = writeback_single_inode(inode, &wbc);
712 spin_unlock(&inode_lock); 712 spin_unlock(&inode_lock);
713 if (sync) 713 if (sync)
714 inode_sync_wait(inode); 714 inode_sync_wait(inode);
@@ -732,7 +732,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
732 int ret; 732 int ret;
733 733
734 spin_lock(&inode_lock); 734 spin_lock(&inode_lock);
735 ret = __writeback_single_inode(inode, wbc); 735 ret = writeback_single_inode(inode, wbc);
736 spin_unlock(&inode_lock); 736 spin_unlock(&inode_lock);
737 return ret; 737 return ret;
738} 738}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8fed2ed12f38..6484eb75acd6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -286,8 +286,8 @@ __releases(&fc->lock)
286 } 286 }
287 if (fc->num_background == FUSE_CONGESTION_THRESHOLD && 287 if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
288 fc->connected && fc->bdi_initialized) { 288 fc->connected && fc->bdi_initialized) {
289 clear_bdi_congested(&fc->bdi, READ); 289 clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
290 clear_bdi_congested(&fc->bdi, WRITE); 290 clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
291 } 291 }
292 fc->num_background--; 292 fc->num_background--;
293 fc->active_background--; 293 fc->active_background--;
@@ -414,8 +414,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
414 fc->blocked = 1; 414 fc->blocked = 1;
415 if (fc->num_background == FUSE_CONGESTION_THRESHOLD && 415 if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
416 fc->bdi_initialized) { 416 fc->bdi_initialized) {
417 set_bdi_congested(&fc->bdi, READ); 417 set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
418 set_bdi_congested(&fc->bdi, WRITE); 418 set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
419 } 419 }
420 list_add_tail(&req->list, &fc->bg_queue); 420 list_add_tail(&req->list, &fc->bg_queue);
421 flush_bg_queue(fc); 421 flush_bg_queue(fc);
@@ -849,6 +849,81 @@ err:
849 return err; 849 return err;
850} 850}
851 851
852static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
853 struct fuse_copy_state *cs)
854{
855 struct fuse_notify_inval_inode_out outarg;
856 int err = -EINVAL;
857
858 if (size != sizeof(outarg))
859 goto err;
860
861 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
862 if (err)
863 goto err;
864 fuse_copy_finish(cs);
865
866 down_read(&fc->killsb);
867 err = -ENOENT;
868 if (!fc->sb)
869 goto err_unlock;
870
871 err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
872 outarg.off, outarg.len);
873
874err_unlock:
875 up_read(&fc->killsb);
876 return err;
877
878err:
879 fuse_copy_finish(cs);
880 return err;
881}
882
883static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
884 struct fuse_copy_state *cs)
885{
886 struct fuse_notify_inval_entry_out outarg;
887 int err = -EINVAL;
888 char buf[FUSE_NAME_MAX+1];
889 struct qstr name;
890
891 if (size < sizeof(outarg))
892 goto err;
893
894 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
895 if (err)
896 goto err;
897
898 err = -ENAMETOOLONG;
899 if (outarg.namelen > FUSE_NAME_MAX)
900 goto err;
901
902 name.name = buf;
903 name.len = outarg.namelen;
904 err = fuse_copy_one(cs, buf, outarg.namelen + 1);
905 if (err)
906 goto err;
907 fuse_copy_finish(cs);
908 buf[outarg.namelen] = 0;
909 name.hash = full_name_hash(name.name, name.len);
910
911 down_read(&fc->killsb);
912 err = -ENOENT;
913 if (!fc->sb)
914 goto err_unlock;
915
916 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
917
918err_unlock:
919 up_read(&fc->killsb);
920 return err;
921
922err:
923 fuse_copy_finish(cs);
924 return err;
925}
926
852static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, 927static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
853 unsigned int size, struct fuse_copy_state *cs) 928 unsigned int size, struct fuse_copy_state *cs)
854{ 929{
@@ -856,6 +931,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
856 case FUSE_NOTIFY_POLL: 931 case FUSE_NOTIFY_POLL:
857 return fuse_notify_poll(fc, size, cs); 932 return fuse_notify_poll(fc, size, cs);
858 933
934 case FUSE_NOTIFY_INVAL_INODE:
935 return fuse_notify_inval_inode(fc, size, cs);
936
937 case FUSE_NOTIFY_INVAL_ENTRY:
938 return fuse_notify_inval_entry(fc, size, cs);
939
859 default: 940 default:
860 fuse_copy_finish(cs); 941 fuse_copy_finish(cs);
861 return -EINVAL; 942 return -EINVAL;
@@ -910,7 +991,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
910 unsigned long nr_segs, loff_t pos) 991 unsigned long nr_segs, loff_t pos)
911{ 992{
912 int err; 993 int err;
913 unsigned nbytes = iov_length(iov, nr_segs); 994 size_t nbytes = iov_length(iov, nr_segs);
914 struct fuse_req *req; 995 struct fuse_req *req;
915 struct fuse_out_header oh; 996 struct fuse_out_header oh;
916 struct fuse_copy_state cs; 997 struct fuse_copy_state cs;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b3089a083d30..e703654e7f40 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -375,7 +375,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
375 struct fuse_conn *fc = get_fuse_conn(dir); 375 struct fuse_conn *fc = get_fuse_conn(dir);
376 struct fuse_req *req; 376 struct fuse_req *req;
377 struct fuse_req *forget_req; 377 struct fuse_req *forget_req;
378 struct fuse_open_in inarg; 378 struct fuse_create_in inarg;
379 struct fuse_open_out outopen; 379 struct fuse_open_out outopen;
380 struct fuse_entry_out outentry; 380 struct fuse_entry_out outentry;
381 struct fuse_file *ff; 381 struct fuse_file *ff;
@@ -399,15 +399,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
399 if (!ff) 399 if (!ff)
400 goto out_put_request; 400 goto out_put_request;
401 401
402 if (!fc->dont_mask)
403 mode &= ~current_umask();
404
402 flags &= ~O_NOCTTY; 405 flags &= ~O_NOCTTY;
403 memset(&inarg, 0, sizeof(inarg)); 406 memset(&inarg, 0, sizeof(inarg));
404 memset(&outentry, 0, sizeof(outentry)); 407 memset(&outentry, 0, sizeof(outentry));
405 inarg.flags = flags; 408 inarg.flags = flags;
406 inarg.mode = mode; 409 inarg.mode = mode;
410 inarg.umask = current_umask();
407 req->in.h.opcode = FUSE_CREATE; 411 req->in.h.opcode = FUSE_CREATE;
408 req->in.h.nodeid = get_node_id(dir); 412 req->in.h.nodeid = get_node_id(dir);
409 req->in.numargs = 2; 413 req->in.numargs = 2;
410 req->in.args[0].size = sizeof(inarg); 414 req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
415 sizeof(inarg);
411 req->in.args[0].value = &inarg; 416 req->in.args[0].value = &inarg;
412 req->in.args[1].size = entry->d_name.len + 1; 417 req->in.args[1].size = entry->d_name.len + 1;
413 req->in.args[1].value = entry->d_name.name; 418 req->in.args[1].value = entry->d_name.name;
@@ -546,12 +551,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
546 if (IS_ERR(req)) 551 if (IS_ERR(req))
547 return PTR_ERR(req); 552 return PTR_ERR(req);
548 553
554 if (!fc->dont_mask)
555 mode &= ~current_umask();
556
549 memset(&inarg, 0, sizeof(inarg)); 557 memset(&inarg, 0, sizeof(inarg));
550 inarg.mode = mode; 558 inarg.mode = mode;
551 inarg.rdev = new_encode_dev(rdev); 559 inarg.rdev = new_encode_dev(rdev);
560 inarg.umask = current_umask();
552 req->in.h.opcode = FUSE_MKNOD; 561 req->in.h.opcode = FUSE_MKNOD;
553 req->in.numargs = 2; 562 req->in.numargs = 2;
554 req->in.args[0].size = sizeof(inarg); 563 req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
564 sizeof(inarg);
555 req->in.args[0].value = &inarg; 565 req->in.args[0].value = &inarg;
556 req->in.args[1].size = entry->d_name.len + 1; 566 req->in.args[1].size = entry->d_name.len + 1;
557 req->in.args[1].value = entry->d_name.name; 567 req->in.args[1].value = entry->d_name.name;
@@ -578,8 +588,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
578 if (IS_ERR(req)) 588 if (IS_ERR(req))
579 return PTR_ERR(req); 589 return PTR_ERR(req);
580 590
591 if (!fc->dont_mask)
592 mode &= ~current_umask();
593
581 memset(&inarg, 0, sizeof(inarg)); 594 memset(&inarg, 0, sizeof(inarg));
582 inarg.mode = mode; 595 inarg.mode = mode;
596 inarg.umask = current_umask();
583 req->in.h.opcode = FUSE_MKDIR; 597 req->in.h.opcode = FUSE_MKDIR;
584 req->in.numargs = 2; 598 req->in.numargs = 2;
585 req->in.args[0].size = sizeof(inarg); 599 req->in.args[0].size = sizeof(inarg);
@@ -845,6 +859,43 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
845 return err; 859 return err;
846} 860}
847 861
862int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
863 struct qstr *name)
864{
865 int err = -ENOTDIR;
866 struct inode *parent;
867 struct dentry *dir;
868 struct dentry *entry;
869
870 parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
871 if (!parent)
872 return -ENOENT;
873
874 mutex_lock(&parent->i_mutex);
875 if (!S_ISDIR(parent->i_mode))
876 goto unlock;
877
878 err = -ENOENT;
879 dir = d_find_alias(parent);
880 if (!dir)
881 goto unlock;
882
883 entry = d_lookup(dir, name);
884 dput(dir);
885 if (!entry)
886 goto unlock;
887
888 fuse_invalidate_attr(parent);
889 fuse_invalidate_entry(entry);
890 dput(entry);
891 err = 0;
892
893 unlock:
894 mutex_unlock(&parent->i_mutex);
895 iput(parent);
896 return err;
897}
898
848/* 899/*
849 * Calling into a user-controlled filesystem gives the filesystem 900 * Calling into a user-controlled filesystem gives the filesystem
850 * daemon ptrace-like capabilities over the requester process. This 901 * daemon ptrace-like capabilities over the requester process. This
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index fce6ce694fde..cbc464043b6f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1922,7 +1922,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
1922 1922
1923 req = fuse_get_req(fc); 1923 req = fuse_get_req(fc);
1924 if (IS_ERR(req)) 1924 if (IS_ERR(req))
1925 return PTR_ERR(req); 1925 return POLLERR;
1926 1926
1927 req->in.h.opcode = FUSE_POLL; 1927 req->in.h.opcode = FUSE_POLL;
1928 req->in.h.nodeid = ff->nodeid; 1928 req->in.h.nodeid = ff->nodeid;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index aaf2f9ff970e..52b641fc0faf 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -446,6 +446,9 @@ struct fuse_conn {
446 /** Do multi-page cached writes */ 446 /** Do multi-page cached writes */
447 unsigned big_writes:1; 447 unsigned big_writes:1;
448 448
449 /** Don't apply umask to creation modes */
450 unsigned dont_mask:1;
451
449 /** The number of requests waiting for completion */ 452 /** The number of requests waiting for completion */
450 atomic_t num_waiting; 453 atomic_t num_waiting;
451 454
@@ -481,6 +484,12 @@ struct fuse_conn {
481 484
482 /** Called on final put */ 485 /** Called on final put */
483 void (*release)(struct fuse_conn *); 486 void (*release)(struct fuse_conn *);
487
488 /** Super block for this connection. */
489 struct super_block *sb;
490
491 /** Read/write semaphore to hold when accessing sb. */
492 struct rw_semaphore killsb;
484}; 493};
485 494
486static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 495static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -509,6 +518,11 @@ extern const struct file_operations fuse_dev_operations;
509extern const struct dentry_operations fuse_dentry_operations; 518extern const struct dentry_operations fuse_dentry_operations;
510 519
511/** 520/**
521 * Inode to nodeid comparison.
522 */
523int fuse_inode_eq(struct inode *inode, void *_nodeidp);
524
525/**
512 * Get a filled in inode 526 * Get a filled in inode
513 */ 527 */
514struct inode *fuse_iget(struct super_block *sb, u64 nodeid, 528struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
@@ -708,6 +722,19 @@ void fuse_release_nowrite(struct inode *inode);
708 722
709u64 fuse_get_attr_version(struct fuse_conn *fc); 723u64 fuse_get_attr_version(struct fuse_conn *fc);
710 724
725/**
726 * File-system tells the kernel to invalidate cache for the given node id.
727 */
728int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
729 loff_t offset, loff_t len);
730
731/**
732 * File-system tells the kernel to invalidate parent attributes and
733 * the dentry matching parent/name.
734 */
735int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
736 struct qstr *name);
737
711int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 738int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
712 bool isdir); 739 bool isdir);
713ssize_t fuse_direct_io(struct file *file, const char __user *buf, 740ssize_t fuse_direct_io(struct file *file, const char __user *buf,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d8673ccf90b7..f91ccc4a189d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -206,7 +206,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
206 BUG(); 206 BUG();
207} 207}
208 208
209static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 209int fuse_inode_eq(struct inode *inode, void *_nodeidp)
210{ 210{
211 u64 nodeid = *(u64 *) _nodeidp; 211 u64 nodeid = *(u64 *) _nodeidp;
212 if (get_node_id(inode) == nodeid) 212 if (get_node_id(inode) == nodeid)
@@ -257,6 +257,31 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
257 return inode; 257 return inode;
258} 258}
259 259
260int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
261 loff_t offset, loff_t len)
262{
263 struct inode *inode;
264 pgoff_t pg_start;
265 pgoff_t pg_end;
266
267 inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
268 if (!inode)
269 return -ENOENT;
270
271 fuse_invalidate_attr(inode);
272 if (offset >= 0) {
273 pg_start = offset >> PAGE_CACHE_SHIFT;
274 if (len <= 0)
275 pg_end = -1;
276 else
277 pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
278 invalidate_inode_pages2_range(inode->i_mapping,
279 pg_start, pg_end);
280 }
281 iput(inode);
282 return 0;
283}
284
260static void fuse_umount_begin(struct super_block *sb) 285static void fuse_umount_begin(struct super_block *sb)
261{ 286{
262 fuse_abort_conn(get_fuse_conn_super(sb)); 287 fuse_abort_conn(get_fuse_conn_super(sb));
@@ -480,6 +505,7 @@ void fuse_conn_init(struct fuse_conn *fc)
480 memset(fc, 0, sizeof(*fc)); 505 memset(fc, 0, sizeof(*fc));
481 spin_lock_init(&fc->lock); 506 spin_lock_init(&fc->lock);
482 mutex_init(&fc->inst_mutex); 507 mutex_init(&fc->inst_mutex);
508 init_rwsem(&fc->killsb);
483 atomic_set(&fc->count, 1); 509 atomic_set(&fc->count, 1);
484 init_waitqueue_head(&fc->waitq); 510 init_waitqueue_head(&fc->waitq);
485 init_waitqueue_head(&fc->blocked_waitq); 511 init_waitqueue_head(&fc->blocked_waitq);
@@ -725,6 +751,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
725 } 751 }
726 if (arg->flags & FUSE_BIG_WRITES) 752 if (arg->flags & FUSE_BIG_WRITES)
727 fc->big_writes = 1; 753 fc->big_writes = 1;
754 if (arg->flags & FUSE_DONT_MASK)
755 fc->dont_mask = 1;
728 } else { 756 } else {
729 ra_pages = fc->max_read / PAGE_CACHE_SIZE; 757 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
730 fc->no_lock = 1; 758 fc->no_lock = 1;
@@ -748,7 +776,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
748 arg->minor = FUSE_KERNEL_MINOR_VERSION; 776 arg->minor = FUSE_KERNEL_MINOR_VERSION;
749 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; 777 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
750 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 778 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
751 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES; 779 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
752 req->in.h.opcode = FUSE_INIT; 780 req->in.h.opcode = FUSE_INIT;
753 req->in.numargs = 1; 781 req->in.numargs = 1;
754 req->in.args[0].size = sizeof(*arg); 782 req->in.args[0].size = sizeof(*arg);
@@ -860,10 +888,16 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
860 fuse_conn_init(fc); 888 fuse_conn_init(fc);
861 889
862 fc->dev = sb->s_dev; 890 fc->dev = sb->s_dev;
891 fc->sb = sb;
863 err = fuse_bdi_init(fc, sb); 892 err = fuse_bdi_init(fc, sb);
864 if (err) 893 if (err)
865 goto err_put_conn; 894 goto err_put_conn;
866 895
896 /* Handle umasking inside the fuse code */
897 if (sb->s_flags & MS_POSIXACL)
898 fc->dont_mask = 1;
899 sb->s_flags |= MS_POSIXACL;
900
867 fc->release = fuse_free_conn; 901 fc->release = fuse_free_conn;
868 fc->flags = d.flags; 902 fc->flags = d.flags;
869 fc->user_id = d.user_id; 903 fc->user_id = d.user_id;
@@ -941,12 +975,25 @@ static int fuse_get_sb(struct file_system_type *fs_type,
941 return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); 975 return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
942} 976}
943 977
978static void fuse_kill_sb_anon(struct super_block *sb)
979{
980 struct fuse_conn *fc = get_fuse_conn_super(sb);
981
982 if (fc) {
983 down_write(&fc->killsb);
984 fc->sb = NULL;
985 up_write(&fc->killsb);
986 }
987
988 kill_anon_super(sb);
989}
990
944static struct file_system_type fuse_fs_type = { 991static struct file_system_type fuse_fs_type = {
945 .owner = THIS_MODULE, 992 .owner = THIS_MODULE,
946 .name = "fuse", 993 .name = "fuse",
947 .fs_flags = FS_HAS_SUBTYPE, 994 .fs_flags = FS_HAS_SUBTYPE,
948 .get_sb = fuse_get_sb, 995 .get_sb = fuse_get_sb,
949 .kill_sb = kill_anon_super, 996 .kill_sb = fuse_kill_sb_anon,
950}; 997};
951 998
952#ifdef CONFIG_BLOCK 999#ifdef CONFIG_BLOCK
@@ -958,11 +1005,24 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type,
958 mnt); 1005 mnt);
959} 1006}
960 1007
1008static void fuse_kill_sb_blk(struct super_block *sb)
1009{
1010 struct fuse_conn *fc = get_fuse_conn_super(sb);
1011
1012 if (fc) {
1013 down_write(&fc->killsb);
1014 fc->sb = NULL;
1015 up_write(&fc->killsb);
1016 }
1017
1018 kill_block_super(sb);
1019}
1020
961static struct file_system_type fuseblk_fs_type = { 1021static struct file_system_type fuseblk_fs_type = {
962 .owner = THIS_MODULE, 1022 .owner = THIS_MODULE,
963 .name = "fuseblk", 1023 .name = "fuseblk",
964 .get_sb = fuse_get_sb_blk, 1024 .get_sb = fuse_get_sb_blk,
965 .kill_sb = kill_block_super, 1025 .kill_sb = fuse_kill_sb_blk,
966 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, 1026 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
967}; 1027};
968 1028
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 98d6ef1c1dc0..148d55c14171 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -1,12 +1,11 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM gfs2
3
1#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ) 4#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_GFS2_H 5#define _TRACE_GFS2_H
3 6
4#include <linux/tracepoint.h> 7#include <linux/tracepoint.h>
5 8
6#undef TRACE_SYSTEM
7#define TRACE_SYSTEM gfs2
8#define TRACE_INCLUDE_FILE trace_gfs2
9
10#include <linux/fs.h> 9#include <linux/fs.h>
11#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
12#include <linux/dlmconstants.h> 11#include <linux/dlmconstants.h>
@@ -403,5 +402,6 @@ TRACE_EVENT(gfs2_block_alloc,
403/* This part must be outside protection */ 402/* This part must be outside protection */
404#undef TRACE_INCLUDE_PATH 403#undef TRACE_INCLUDE_PATH
405#define TRACE_INCLUDE_PATH . 404#define TRACE_INCLUDE_PATH .
405#define TRACE_INCLUDE_FILE trace_gfs2
406#include <trace/define_trace.h> 406#include <trace/define_trace.h>
407 407
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 6f833dc8e910..f7fcbe49da72 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -19,6 +19,7 @@
19#include <linux/nls.h> 19#include <linux/nls.h>
20#include <linux/parser.h> 20#include <linux/parser.h>
21#include <linux/seq_file.h> 21#include <linux/seq_file.h>
22#include <linux/smp_lock.h>
22#include <linux/vfs.h> 23#include <linux/vfs.h>
23 24
24#include "hfs_fs.h" 25#include "hfs_fs.h"
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9fc3af0c0dab..c0759fe0855b 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -12,6 +12,7 @@
12#include <linux/pagemap.h> 12#include <linux/pagemap.h>
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/smp_lock.h>
15#include <linux/vfs.h> 16#include <linux/vfs.h>
16#include <linux/nls.h> 17#include <linux/nls.h>
17 18
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fe02ad4740e7..032604e5ef2c 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -972,6 +972,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
972 sb->s_blocksize_bits = 10; 972 sb->s_blocksize_bits = 10;
973 sb->s_magic = HOSTFS_SUPER_MAGIC; 973 sb->s_magic = HOSTFS_SUPER_MAGIC;
974 sb->s_op = &hostfs_sbops; 974 sb->s_op = &hostfs_sbops;
975 sb->s_maxbytes = MAX_LFS_FILESIZE;
975 976
976 /* NULL is printed as <NULL> by sprintf: avoid that. */ 977 /* NULL is printed as <NULL> by sprintf: avoid that. */
977 if (req_root == NULL) 978 if (req_root == NULL)
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 6916c41d7017..8865c94f55f6 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -6,6 +6,7 @@
6 * directory VFS functions 6 * directory VFS functions
7 */ 7 */
8 8
9#include <linux/smp_lock.h>
9#include "hpfs_fn.h" 10#include "hpfs_fn.h"
10 11
11static int hpfs_dir_release(struct inode *inode, struct file *filp) 12static int hpfs_dir_release(struct inode *inode, struct file *filp)
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 64ab52259204..3efabff00367 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -6,6 +6,7 @@
6 * file VFS functions 6 * file VFS functions
7 */ 7 */
8 8
9#include <linux/smp_lock.h>
9#include "hpfs_fn.h" 10#include "hpfs_fn.h"
10 11
11#define BLOCKS(size) (((size) + 511) >> 9) 12#define BLOCKS(size) (((size) + 511) >> 9)
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index c2ea31bae313..701ca54c0867 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -13,7 +13,6 @@
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/smp_lock.h>
17 16
18#include "hpfs.h" 17#include "hpfs.h"
19 18
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 39a1bfbea312..fe703ae46bc7 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -6,6 +6,7 @@
6 * inode VFS functions 6 * inode VFS functions
7 */ 7 */
8 8
9#include <linux/smp_lock.h>
9#include "hpfs_fn.h" 10#include "hpfs_fn.h"
10 11
11void hpfs_init_inode(struct inode *i) 12void hpfs_init_inode(struct inode *i)
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index b649232dde97..82b9c4ba9ed0 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -6,6 +6,7 @@
6 * adding & removing files & directories 6 * adding & removing files & directories
7 */ 7 */
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/smp_lock.h>
9#include "hpfs_fn.h" 10#include "hpfs_fn.h"
10 11
11static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 12static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
diff --git a/fs/inode.c b/fs/inode.c
index f643be565df8..901bad1e5f12 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -25,6 +25,7 @@
25#include <linux/fsnotify.h> 25#include <linux/fsnotify.h>
26#include <linux/mount.h> 26#include <linux/mount.h>
27#include <linux/async.h> 27#include <linux/async.h>
28#include <linux/posix_acl.h>
28 29
29/* 30/*
30 * This is needed for the following functions: 31 * This is needed for the following functions:
@@ -189,6 +190,9 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
189 } 190 }
190 inode->i_private = NULL; 191 inode->i_private = NULL;
191 inode->i_mapping = mapping; 192 inode->i_mapping = mapping;
193#ifdef CONFIG_FS_POSIX_ACL
194 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
195#endif
192 196
193#ifdef CONFIG_FSNOTIFY 197#ifdef CONFIG_FSNOTIFY
194 inode->i_fsnotify_mask = 0; 198 inode->i_fsnotify_mask = 0;
@@ -227,6 +231,12 @@ void destroy_inode(struct inode *inode)
227 ima_inode_free(inode); 231 ima_inode_free(inode);
228 security_inode_free(inode); 232 security_inode_free(inode);
229 fsnotify_inode_delete(inode); 233 fsnotify_inode_delete(inode);
234#ifdef CONFIG_FS_POSIX_ACL
235 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
236 posix_acl_release(inode->i_acl);
237 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
238 posix_acl_release(inode->i_default_acl);
239#endif
230 if (inode->i_sb->s_op->destroy_inode) 240 if (inode->i_sb->s_op->destroy_inode)
231 inode->i_sb->s_op->destroy_inode(inode); 241 inode->i_sb->s_op->destroy_inode(inode);
232 else 242 else
@@ -665,12 +675,17 @@ void unlock_new_inode(struct inode *inode)
665 if (inode->i_mode & S_IFDIR) { 675 if (inode->i_mode & S_IFDIR) {
666 struct file_system_type *type = inode->i_sb->s_type; 676 struct file_system_type *type = inode->i_sb->s_type;
667 677
668 /* 678 /* Set new key only if filesystem hasn't already changed it */
669 * ensure nobody is actually holding i_mutex 679 if (!lockdep_match_class(&inode->i_mutex,
670 */ 680 &type->i_mutex_key)) {
671 mutex_destroy(&inode->i_mutex); 681 /*
672 mutex_init(&inode->i_mutex); 682 * ensure nobody is actually holding i_mutex
673 lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); 683 */
684 mutex_destroy(&inode->i_mutex);
685 mutex_init(&inode->i_mutex);
686 lockdep_set_class(&inode->i_mutex,
687 &type->i_mutex_dir_key);
688 }
674 } 689 }
675#endif 690#endif
676 /* 691 /*
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 001f8d3118f2..5612880fcbe7 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,6 +15,7 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/writeback.h> 16#include <linux/writeback.h>
17#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
18#include <linux/falloc.h>
18 19
19#include <asm/ioctls.h> 20#include <asm/ioctls.h>
20 21
@@ -403,6 +404,37 @@ EXPORT_SYMBOL(generic_block_fiemap);
403 404
404#endif /* CONFIG_BLOCK */ 405#endif /* CONFIG_BLOCK */
405 406
407/*
408 * This provides compatibility with legacy XFS pre-allocation ioctls
409 * which predate the fallocate syscall.
410 *
411 * Only the l_start, l_len and l_whence fields of the 'struct space_resv'
412 * are used here, rest are ignored.
413 */
414int ioctl_preallocate(struct file *filp, void __user *argp)
415{
416 struct inode *inode = filp->f_path.dentry->d_inode;
417 struct space_resv sr;
418
419 if (copy_from_user(&sr, argp, sizeof(sr)))
420 return -EFAULT;
421
422 switch (sr.l_whence) {
423 case SEEK_SET:
424 break;
425 case SEEK_CUR:
426 sr.l_start += filp->f_pos;
427 break;
428 case SEEK_END:
429 sr.l_start += i_size_read(inode);
430 break;
431 default:
432 return -EINVAL;
433 }
434
435 return do_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
436}
437
406static int file_ioctl(struct file *filp, unsigned int cmd, 438static int file_ioctl(struct file *filp, unsigned int cmd,
407 unsigned long arg) 439 unsigned long arg)
408{ 440{
@@ -414,6 +446,9 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
414 return ioctl_fibmap(filp, p); 446 return ioctl_fibmap(filp, p);
415 case FIONREAD: 447 case FIONREAD:
416 return put_user(i_size_read(inode) - filp->f_pos, p); 448 return put_user(i_size_read(inode) - filp->f_pos, p);
449 case FS_IOC_RESVSP:
450 case FS_IOC_RESVSP64:
451 return ioctl_preallocate(filp, p);
417 } 452 }
418 453
419 return vfs_ioctl(filp, cmd, arg); 454 return vfs_ioctl(filp, cmd, arg);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 58a7963e168a..85f96bc651c7 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -142,6 +142,7 @@ static const struct dentry_operations isofs_dentry_ops[] = {
142 142
143struct iso9660_options{ 143struct iso9660_options{
144 unsigned int rock:1; 144 unsigned int rock:1;
145 unsigned int joliet:1;
145 unsigned int cruft:1; 146 unsigned int cruft:1;
146 unsigned int hide:1; 147 unsigned int hide:1;
147 unsigned int showassoc:1; 148 unsigned int showassoc:1;
@@ -151,7 +152,6 @@ struct iso9660_options{
151 unsigned int gid_set:1; 152 unsigned int gid_set:1;
152 unsigned int utf8:1; 153 unsigned int utf8:1;
153 unsigned char map; 154 unsigned char map;
154 char joliet;
155 unsigned char check; 155 unsigned char check;
156 unsigned int blocksize; 156 unsigned int blocksize;
157 mode_t fmode; 157 mode_t fmode;
@@ -632,7 +632,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
632 else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) { 632 else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) {
633 sec = (struct iso_supplementary_descriptor *)vdp; 633 sec = (struct iso_supplementary_descriptor *)vdp;
634 if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) { 634 if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) {
635 if (opt.joliet == 'y') { 635 if (opt.joliet) {
636 if (sec->escape[2] == 0x40) 636 if (sec->escape[2] == 0x40)
637 joliet_level = 1; 637 joliet_level = 1;
638 else if (sec->escape[2] == 0x43) 638 else if (sec->escape[2] == 0x43)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 18bfd5dab642..e378cb383979 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,6 +297,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
297 unsigned int new_offset; 297 unsigned int new_offset;
298 struct buffer_head *bh_in = jh2bh(jh_in); 298 struct buffer_head *bh_in = jh2bh(jh_in);
299 struct jbd2_buffer_trigger_type *triggers; 299 struct jbd2_buffer_trigger_type *triggers;
300 journal_t *journal = transaction->t_journal;
300 301
301 /* 302 /*
302 * The buffer really shouldn't be locked: only the current committing 303 * The buffer really shouldn't be locked: only the current committing
@@ -310,6 +311,11 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
310 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); 311 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
311 312
312 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); 313 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
314 /* keep subsequent assertions sane */
315 new_bh->b_state = 0;
316 init_buffer(new_bh, NULL, NULL);
317 atomic_set(&new_bh->b_count, 1);
318 new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
313 319
314 /* 320 /*
315 * If a new transaction has already done a buffer copy-out, then 321 * If a new transaction has already done a buffer copy-out, then
@@ -388,14 +394,6 @@ repeat:
388 kunmap_atomic(mapped_data, KM_USER0); 394 kunmap_atomic(mapped_data, KM_USER0);
389 } 395 }
390 396
391 /* keep subsequent assertions sane */
392 new_bh->b_state = 0;
393 init_buffer(new_bh, NULL, NULL);
394 atomic_set(&new_bh->b_count, 1);
395 jbd_unlock_bh_state(bh_in);
396
397 new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
398
399 set_bh_page(new_bh, new_page, new_offset); 397 set_bh_page(new_bh, new_page, new_offset);
400 new_jh->b_transaction = NULL; 398 new_jh->b_transaction = NULL;
401 new_bh->b_size = jh2bh(jh_in)->b_size; 399 new_bh->b_size = jh2bh(jh_in)->b_size;
@@ -412,7 +410,11 @@ repeat:
412 * copying is moved to the transaction's shadow queue. 410 * copying is moved to the transaction's shadow queue.
413 */ 411 */
414 JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); 412 JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
415 jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); 413 spin_lock(&journal->j_list_lock);
414 __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
415 spin_unlock(&journal->j_list_lock);
416 jbd_unlock_bh_state(bh_in);
417
416 JBUFFER_TRACE(new_jh, "file as BJ_IO"); 418 JBUFFER_TRACE(new_jh, "file as BJ_IO");
417 jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); 419 jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
418 420
@@ -2410,6 +2412,7 @@ const char *jbd2_dev_to_name(dev_t device)
2410 int i = hash_32(device, CACHE_SIZE_BITS); 2412 int i = hash_32(device, CACHE_SIZE_BITS);
2411 char *ret; 2413 char *ret;
2412 struct block_device *bd; 2414 struct block_device *bd;
2415 static struct devname_cache *new_dev;
2413 2416
2414 rcu_read_lock(); 2417 rcu_read_lock();
2415 if (devcache[i] && devcache[i]->device == device) { 2418 if (devcache[i] && devcache[i]->device == device) {
@@ -2419,20 +2422,20 @@ const char *jbd2_dev_to_name(dev_t device)
2419 } 2422 }
2420 rcu_read_unlock(); 2423 rcu_read_unlock();
2421 2424
2425 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
2426 if (!new_dev)
2427 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2422 spin_lock(&devname_cache_lock); 2428 spin_lock(&devname_cache_lock);
2423 if (devcache[i]) { 2429 if (devcache[i]) {
2424 if (devcache[i]->device == device) { 2430 if (devcache[i]->device == device) {
2431 kfree(new_dev);
2425 ret = devcache[i]->devname; 2432 ret = devcache[i]->devname;
2426 spin_unlock(&devname_cache_lock); 2433 spin_unlock(&devname_cache_lock);
2427 return ret; 2434 return ret;
2428 } 2435 }
2429 call_rcu(&devcache[i]->rcu, free_devcache); 2436 call_rcu(&devcache[i]->rcu, free_devcache);
2430 } 2437 }
2431 devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); 2438 devcache[i] = new_dev;
2432 if (!devcache[i]) {
2433 spin_unlock(&devname_cache_lock);
2434 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2435 }
2436 devcache[i]->device = device; 2439 devcache[i]->device = device;
2437 bd = bdget(device); 2440 bd = bdget(device);
2438 if (bd) { 2441 if (bd) {
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 494501edba6b..6213ac728f30 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -499,34 +499,15 @@ void jbd2_journal_unlock_updates (journal_t *journal)
499 wake_up(&journal->j_wait_transaction_locked); 499 wake_up(&journal->j_wait_transaction_locked);
500} 500}
501 501
502/* 502static void warn_dirty_buffer(struct buffer_head *bh)
503 * Report any unexpected dirty buffers which turn up. Normally those
504 * indicate an error, but they can occur if the user is running (say)
505 * tune2fs to modify the live filesystem, so we need the option of
506 * continuing as gracefully as possible. #
507 *
508 * The caller should already hold the journal lock and
509 * j_list_lock spinlock: most callers will need those anyway
510 * in order to probe the buffer's journaling state safely.
511 */
512static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
513{ 503{
514 int jlist; 504 char b[BDEVNAME_SIZE];
515
516 /* If this buffer is one which might reasonably be dirty
517 * --- ie. data, or not part of this journal --- then
518 * we're OK to leave it alone, but otherwise we need to
519 * move the dirty bit to the journal's own internal
520 * JBDDirty bit. */
521 jlist = jh->b_jlist;
522 505
523 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 506 printk(KERN_WARNING
524 jlist == BJ_Shadow || jlist == BJ_Forget) { 507 "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
525 struct buffer_head *bh = jh2bh(jh); 508 "There's a risk of filesystem corruption in case of system "
526 509 "crash.\n",
527 if (test_clear_buffer_dirty(bh)) 510 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
528 set_buffer_jbddirty(bh);
529 }
530} 511}
531 512
532/* 513/*
@@ -593,14 +574,16 @@ repeat:
593 if (jh->b_next_transaction) 574 if (jh->b_next_transaction)
594 J_ASSERT_JH(jh, jh->b_next_transaction == 575 J_ASSERT_JH(jh, jh->b_next_transaction ==
595 transaction); 576 transaction);
577 warn_dirty_buffer(bh);
596 } 578 }
597 /* 579 /*
598 * In any case we need to clean the dirty flag and we must 580 * In any case we need to clean the dirty flag and we must
599 * do it under the buffer lock to be sure we don't race 581 * do it under the buffer lock to be sure we don't race
600 * with running write-out. 582 * with running write-out.
601 */ 583 */
602 JBUFFER_TRACE(jh, "Unexpected dirty buffer"); 584 JBUFFER_TRACE(jh, "Journalling dirty buffer");
603 jbd_unexpected_dirty_buffer(jh); 585 clear_buffer_dirty(bh);
586 set_buffer_jbddirty(bh);
604 } 587 }
605 588
606 unlock_buffer(bh); 589 unlock_buffer(bh);
@@ -843,6 +826,15 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
843 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); 826 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
844 827
845 if (jh->b_transaction == NULL) { 828 if (jh->b_transaction == NULL) {
829 /*
830 * Previous jbd2_journal_forget() could have left the buffer
831 * with jbddirty bit set because it was being committed. When
832 * the commit finished, we've filed the buffer for
833 * checkpointing and marked it dirty. Now we are reallocating
834 * the buffer so the transaction freeing it must have
835 * committed and so it's safe to clear the dirty bit.
836 */
837 clear_buffer_dirty(jh2bh(jh));
846 jh->b_transaction = transaction; 838 jh->b_transaction = transaction;
847 839
848 /* first access by this transaction */ 840 /* first access by this transaction */
@@ -1644,8 +1636,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1644 1636
1645 if (jh->b_cp_transaction) { 1637 if (jh->b_cp_transaction) {
1646 JBUFFER_TRACE(jh, "on running+cp transaction"); 1638 JBUFFER_TRACE(jh, "on running+cp transaction");
1639 /*
1640 * We don't want to write the buffer anymore, clear the
1641 * bit so that we don't confuse checks in
1642 * __journal_file_buffer
1643 */
1644 clear_buffer_dirty(bh);
1647 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 1645 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1648 clear_buffer_jbddirty(bh);
1649 may_free = 0; 1646 may_free = 0;
1650 } else { 1647 } else {
1651 JBUFFER_TRACE(jh, "on running transaction"); 1648 JBUFFER_TRACE(jh, "on running transaction");
@@ -1896,12 +1893,17 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
1896 if (jh->b_transaction && jh->b_jlist == jlist) 1893 if (jh->b_transaction && jh->b_jlist == jlist)
1897 return; 1894 return;
1898 1895
1899 /* The following list of buffer states needs to be consistent
1900 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
1901 * state. */
1902
1903 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 1896 if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
1904 jlist == BJ_Shadow || jlist == BJ_Forget) { 1897 jlist == BJ_Shadow || jlist == BJ_Forget) {
1898 /*
1899 * For metadata buffers, we track dirty bit in buffer_jbddirty
1900 * instead of buffer_dirty. We should not see a dirty bit set
1901 * here because we clear it in do_get_write_access but e.g.
1902 * tune2fs can modify the sb and set the dirty bit at any time
1903 * so we try to gracefully handle that.
1904 */
1905 if (buffer_dirty(bh))
1906 warn_dirty_buffer(bh);
1905 if (test_clear_buffer_dirty(bh) || 1907 if (test_clear_buffer_dirty(bh) ||
1906 test_clear_buffer_jbddirty(bh)) 1908 test_clear_buffer_jbddirty(bh))
1907 was_dirty = 1; 1909 was_dirty = 1;
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 043740dde20c..8fcb6239218e 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -156,48 +156,25 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
156 return ERR_PTR(-EINVAL); 156 return ERR_PTR(-EINVAL);
157} 157}
158 158
159static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **i_acl)
160{
161 struct posix_acl *acl = JFFS2_ACL_NOT_CACHED;
162
163 spin_lock(&inode->i_lock);
164 if (*i_acl != JFFS2_ACL_NOT_CACHED)
165 acl = posix_acl_dup(*i_acl);
166 spin_unlock(&inode->i_lock);
167 return acl;
168}
169
170static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct posix_acl *acl)
171{
172 spin_lock(&inode->i_lock);
173 if (*i_acl != JFFS2_ACL_NOT_CACHED)
174 posix_acl_release(*i_acl);
175 *i_acl = posix_acl_dup(acl);
176 spin_unlock(&inode->i_lock);
177}
178
179static struct posix_acl *jffs2_get_acl(struct inode *inode, int type) 159static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
180{ 160{
181 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
182 struct posix_acl *acl; 161 struct posix_acl *acl;
183 char *value = NULL; 162 char *value = NULL;
184 int rc, xprefix; 163 int rc, xprefix;
185 164
165 acl = get_cached_acl(inode, type);
166 if (acl != ACL_NOT_CACHED)
167 return acl;
168
186 switch (type) { 169 switch (type) {
187 case ACL_TYPE_ACCESS: 170 case ACL_TYPE_ACCESS:
188 acl = jffs2_iget_acl(inode, &f->i_acl_access);
189 if (acl != JFFS2_ACL_NOT_CACHED)
190 return acl;
191 xprefix = JFFS2_XPREFIX_ACL_ACCESS; 171 xprefix = JFFS2_XPREFIX_ACL_ACCESS;
192 break; 172 break;
193 case ACL_TYPE_DEFAULT: 173 case ACL_TYPE_DEFAULT:
194 acl = jffs2_iget_acl(inode, &f->i_acl_default);
195 if (acl != JFFS2_ACL_NOT_CACHED)
196 return acl;
197 xprefix = JFFS2_XPREFIX_ACL_DEFAULT; 174 xprefix = JFFS2_XPREFIX_ACL_DEFAULT;
198 break; 175 break;
199 default: 176 default:
200 return ERR_PTR(-EINVAL); 177 BUG();
201 } 178 }
202 rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0); 179 rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0);
203 if (rc > 0) { 180 if (rc > 0) {
@@ -215,16 +192,8 @@ static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
215 } 192 }
216 if (value) 193 if (value)
217 kfree(value); 194 kfree(value);
218 if (!IS_ERR(acl)) { 195 if (!IS_ERR(acl))
219 switch (type) { 196 set_cached_acl(inode, type, acl);
220 case ACL_TYPE_ACCESS:
221 jffs2_iset_acl(inode, &f->i_acl_access, acl);
222 break;
223 case ACL_TYPE_DEFAULT:
224 jffs2_iset_acl(inode, &f->i_acl_default, acl);
225 break;
226 }
227 }
228 return acl; 197 return acl;
229} 198}
230 199
@@ -249,7 +218,6 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a
249 218
250static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) 219static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
251{ 220{
252 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
253 int rc, xprefix; 221 int rc, xprefix;
254 222
255 if (S_ISLNK(inode->i_mode)) 223 if (S_ISLNK(inode->i_mode))
@@ -285,16 +253,8 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
285 return -EINVAL; 253 return -EINVAL;
286 } 254 }
287 rc = __jffs2_set_acl(inode, xprefix, acl); 255 rc = __jffs2_set_acl(inode, xprefix, acl);
288 if (!rc) { 256 if (!rc)
289 switch(type) { 257 set_cached_acl(inode, type, acl);
290 case ACL_TYPE_ACCESS:
291 jffs2_iset_acl(inode, &f->i_acl_access, acl);
292 break;
293 case ACL_TYPE_DEFAULT:
294 jffs2_iset_acl(inode, &f->i_acl_default, acl);
295 break;
296 }
297 }
298 return rc; 258 return rc;
299} 259}
300 260
@@ -321,12 +281,10 @@ int jffs2_permission(struct inode *inode, int mask)
321 281
322int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) 282int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
323{ 283{
324 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
325 struct posix_acl *acl, *clone; 284 struct posix_acl *acl, *clone;
326 int rc; 285 int rc;
327 286
328 f->i_acl_default = NULL; 287 cache_no_acl(inode);
329 f->i_acl_access = NULL;
330 288
331 if (S_ISLNK(*i_mode)) 289 if (S_ISLNK(*i_mode))
332 return 0; /* Symlink always has no-ACL */ 290 return 0; /* Symlink always has no-ACL */
@@ -339,7 +297,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
339 *i_mode &= ~current_umask(); 297 *i_mode &= ~current_umask();
340 } else { 298 } else {
341 if (S_ISDIR(*i_mode)) 299 if (S_ISDIR(*i_mode))
342 jffs2_iset_acl(inode, &f->i_acl_default, acl); 300 set_cached_acl(inode, ACL_TYPE_DEFAULT, acl);
343 301
344 clone = posix_acl_clone(acl, GFP_KERNEL); 302 clone = posix_acl_clone(acl, GFP_KERNEL);
345 if (!clone) 303 if (!clone)
@@ -350,7 +308,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
350 return rc; 308 return rc;
351 } 309 }
352 if (rc > 0) 310 if (rc > 0)
353 jffs2_iset_acl(inode, &f->i_acl_access, clone); 311 set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
354 312
355 posix_acl_release(clone); 313 posix_acl_release(clone);
356 } 314 }
@@ -359,17 +317,16 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
359 317
360int jffs2_init_acl_post(struct inode *inode) 318int jffs2_init_acl_post(struct inode *inode)
361{ 319{
362 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
363 int rc; 320 int rc;
364 321
365 if (f->i_acl_default) { 322 if (inode->i_default_acl) {
366 rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, f->i_acl_default); 323 rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, inode->i_default_acl);
367 if (rc) 324 if (rc)
368 return rc; 325 return rc;
369 } 326 }
370 327
371 if (f->i_acl_access) { 328 if (inode->i_acl) {
372 rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, f->i_acl_access); 329 rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, inode->i_acl);
373 if (rc) 330 if (rc)
374 return rc; 331 return rc;
375 } 332 }
@@ -377,18 +334,6 @@ int jffs2_init_acl_post(struct inode *inode)
377 return 0; 334 return 0;
378} 335}
379 336
380void jffs2_clear_acl(struct jffs2_inode_info *f)
381{
382 if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) {
383 posix_acl_release(f->i_acl_access);
384 f->i_acl_access = JFFS2_ACL_NOT_CACHED;
385 }
386 if (f->i_acl_default && f->i_acl_default != JFFS2_ACL_NOT_CACHED) {
387 posix_acl_release(f->i_acl_default);
388 f->i_acl_default = JFFS2_ACL_NOT_CACHED;
389 }
390}
391
392int jffs2_acl_chmod(struct inode *inode) 337int jffs2_acl_chmod(struct inode *inode)
393{ 338{
394 struct posix_acl *acl, *clone; 339 struct posix_acl *acl, *clone;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 8ca058aed384..fc929f2a14f6 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,13 +26,10 @@ struct jffs2_acl_header {
26 26
27#ifdef CONFIG_JFFS2_FS_POSIX_ACL 27#ifdef CONFIG_JFFS2_FS_POSIX_ACL
28 28
29#define JFFS2_ACL_NOT_CACHED ((void *)-1)
30
31extern int jffs2_permission(struct inode *, int); 29extern int jffs2_permission(struct inode *, int);
32extern int jffs2_acl_chmod(struct inode *); 30extern int jffs2_acl_chmod(struct inode *);
33extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
34extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
35extern void jffs2_clear_acl(struct jffs2_inode_info *);
36 33
37extern struct xattr_handler jffs2_acl_access_xattr_handler; 34extern struct xattr_handler jffs2_acl_access_xattr_handler;
38extern struct xattr_handler jffs2_acl_default_xattr_handler; 35extern struct xattr_handler jffs2_acl_default_xattr_handler;
@@ -43,6 +40,5 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
43#define jffs2_acl_chmod(inode) (0) 40#define jffs2_acl_chmod(inode) (0)
44#define jffs2_init_acl_pre(dir_i,inode,mode) (0) 41#define jffs2_init_acl_pre(dir_i,inode,mode) (0)
45#define jffs2_init_acl_post(inode) (0) 42#define jffs2_init_acl_post(inode) (0)
46#define jffs2_clear_acl(f)
47 43
48#endif /* CONFIG_JFFS2_FS_POSIX_ACL */ 44#endif /* CONFIG_JFFS2_FS_POSIX_ACL */
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index a0244740b75a..b47679be118a 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -270,19 +270,21 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
270 D2({ 270 D2({
271 int i=0; 271 int i=0;
272 struct jffs2_raw_node_ref *this; 272 struct jffs2_raw_node_ref *this;
273 printk(KERN_DEBUG "After remove_node_refs_from_ino_list: \n" KERN_DEBUG); 273 printk(KERN_DEBUG "After remove_node_refs_from_ino_list: \n");
274 274
275 this = ic->nodes; 275 this = ic->nodes;
276 276
277 printk(KERN_DEBUG);
277 while(this) { 278 while(this) {
278 printk( "0x%08x(%d)->", ref_offset(this), ref_flags(this)); 279 printk(KERN_CONT "0x%08x(%d)->",
280 ref_offset(this), ref_flags(this));
279 if (++i == 5) { 281 if (++i == 5) {
280 printk("\n" KERN_DEBUG); 282 printk(KERN_DEBUG);
281 i=0; 283 i=0;
282 } 284 }
283 this = this->next_in_ino; 285 this = this->next_in_ino;
284 } 286 }
285 printk("\n"); 287 printk(KERN_CONT "\n");
286 }); 288 });
287 289
288 switch (ic->class) { 290 switch (ic->class) {
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 4c41db91eaa4..c6923da98263 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -50,10 +50,6 @@ struct jffs2_inode_info {
50 uint16_t flags; 50 uint16_t flags;
51 uint8_t usercompr; 51 uint8_t usercompr;
52 struct inode vfs_inode; 52 struct inode vfs_inode;
53#ifdef CONFIG_JFFS2_FS_POSIX_ACL
54 struct posix_acl *i_acl_access;
55 struct posix_acl *i_acl_default;
56#endif
57}; 53};
58 54
59#endif /* _JFFS2_FS_I */ 55#endif /* _JFFS2_FS_I */
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 2228380c47b9..a7f03b7ebcb3 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -56,10 +56,6 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
56 f->target = NULL; 56 f->target = NULL;
57 f->flags = 0; 57 f->flags = 0;
58 f->usercompr = 0; 58 f->usercompr = 0;
59#ifdef CONFIG_JFFS2_FS_POSIX_ACL
60 f->i_acl_access = JFFS2_ACL_NOT_CACHED;
61 f->i_acl_default = JFFS2_ACL_NOT_CACHED;
62#endif
63} 59}
64 60
65 61
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 1fc1e92356ee..1a80301004b8 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1424,7 +1424,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
1424 struct jffs2_full_dirent *fd, *fds; 1424 struct jffs2_full_dirent *fd, *fds;
1425 int deleted; 1425 int deleted;
1426 1426
1427 jffs2_clear_acl(f);
1428 jffs2_xattr_delete_inode(c, f->inocache); 1427 jffs2_xattr_delete_inode(c, f->inocache);
1429 mutex_lock(&f->sem); 1428 mutex_lock(&f->sem);
1430 deleted = f->inocache && !f->inocache->pino_nlink; 1429 deleted = f->inocache && !f->inocache->pino_nlink;
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 7515e73e2bfb..696686cc206e 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -130,9 +130,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
130 if (jffs2_sum_active()) { 130 if (jffs2_sum_active()) {
131 s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); 131 s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
132 if (!s) { 132 if (!s) {
133 kfree(flashbuf);
134 JFFS2_WARNING("Can't allocate memory for summary\n"); 133 JFFS2_WARNING("Can't allocate memory for summary\n");
135 return -ENOMEM; 134 ret = -ENOMEM;
135 goto out;
136 } 136 }
137 } 137 }
138 138
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 07a22caf2687..0035c021395a 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -12,6 +12,7 @@
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/smp_lock.h>
15#include <linux/init.h> 16#include <linux/init.h>
16#include <linux/list.h> 17#include <linux/list.h>
17#include <linux/fs.h> 18#include <linux/fs.h>
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 06ca1b8d2054..91fa3ad6e8c2 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -31,27 +31,24 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
31{ 31{
32 struct posix_acl *acl; 32 struct posix_acl *acl;
33 char *ea_name; 33 char *ea_name;
34 struct jfs_inode_info *ji = JFS_IP(inode);
35 struct posix_acl **p_acl;
36 int size; 34 int size;
37 char *value = NULL; 35 char *value = NULL;
38 36
37 acl = get_cached_acl(inode, type);
38 if (acl != ACL_NOT_CACHED)
39 return acl;
40
39 switch(type) { 41 switch(type) {
40 case ACL_TYPE_ACCESS: 42 case ACL_TYPE_ACCESS:
41 ea_name = POSIX_ACL_XATTR_ACCESS; 43 ea_name = POSIX_ACL_XATTR_ACCESS;
42 p_acl = &ji->i_acl;
43 break; 44 break;
44 case ACL_TYPE_DEFAULT: 45 case ACL_TYPE_DEFAULT:
45 ea_name = POSIX_ACL_XATTR_DEFAULT; 46 ea_name = POSIX_ACL_XATTR_DEFAULT;
46 p_acl = &ji->i_default_acl;
47 break; 47 break;
48 default: 48 default:
49 return ERR_PTR(-EINVAL); 49 return ERR_PTR(-EINVAL);
50 } 50 }
51 51
52 if (*p_acl != JFS_ACL_NOT_CACHED)
53 return posix_acl_dup(*p_acl);
54
55 size = __jfs_getxattr(inode, ea_name, NULL, 0); 52 size = __jfs_getxattr(inode, ea_name, NULL, 0);
56 53
57 if (size > 0) { 54 if (size > 0) {
@@ -62,17 +59,18 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
62 } 59 }
63 60
64 if (size < 0) { 61 if (size < 0) {
65 if (size == -ENODATA) { 62 if (size == -ENODATA)
66 *p_acl = NULL;
67 acl = NULL; 63 acl = NULL;
68 } else 64 else
69 acl = ERR_PTR(size); 65 acl = ERR_PTR(size);
70 } else { 66 } else {
71 acl = posix_acl_from_xattr(value, size); 67 acl = posix_acl_from_xattr(value, size);
72 if (!IS_ERR(acl))
73 *p_acl = posix_acl_dup(acl);
74 } 68 }
75 kfree(value); 69 kfree(value);
70 if (!IS_ERR(acl)) {
71 set_cached_acl(inode, type, acl);
72 posix_acl_release(acl);
73 }
76 return acl; 74 return acl;
77} 75}
78 76
@@ -80,8 +78,6 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
80 struct posix_acl *acl) 78 struct posix_acl *acl)
81{ 79{
82 char *ea_name; 80 char *ea_name;
83 struct jfs_inode_info *ji = JFS_IP(inode);
84 struct posix_acl **p_acl;
85 int rc; 81 int rc;
86 int size = 0; 82 int size = 0;
87 char *value = NULL; 83 char *value = NULL;
@@ -92,11 +88,9 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
92 switch(type) { 88 switch(type) {
93 case ACL_TYPE_ACCESS: 89 case ACL_TYPE_ACCESS:
94 ea_name = POSIX_ACL_XATTR_ACCESS; 90 ea_name = POSIX_ACL_XATTR_ACCESS;
95 p_acl = &ji->i_acl;
96 break; 91 break;
97 case ACL_TYPE_DEFAULT: 92 case ACL_TYPE_DEFAULT:
98 ea_name = POSIX_ACL_XATTR_DEFAULT; 93 ea_name = POSIX_ACL_XATTR_DEFAULT;
99 p_acl = &ji->i_default_acl;
100 if (!S_ISDIR(inode->i_mode)) 94 if (!S_ISDIR(inode->i_mode))
101 return acl ? -EACCES : 0; 95 return acl ? -EACCES : 0;
102 break; 96 break;
@@ -116,27 +110,24 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
116out: 110out:
117 kfree(value); 111 kfree(value);
118 112
119 if (!rc) { 113 if (!rc)
120 if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED)) 114 set_cached_acl(inode, type, acl);
121 posix_acl_release(*p_acl); 115
122 *p_acl = posix_acl_dup(acl);
123 }
124 return rc; 116 return rc;
125} 117}
126 118
127static int jfs_check_acl(struct inode *inode, int mask) 119static int jfs_check_acl(struct inode *inode, int mask)
128{ 120{
129 struct jfs_inode_info *ji = JFS_IP(inode); 121 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
130 122
131 if (ji->i_acl == JFS_ACL_NOT_CACHED) { 123 if (IS_ERR(acl))
132 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); 124 return PTR_ERR(acl);
133 if (IS_ERR(acl)) 125 if (acl) {
134 return PTR_ERR(acl); 126 int error = posix_acl_permission(inode, acl, mask);
135 posix_acl_release(acl); 127 posix_acl_release(acl);
128 return error;
136 } 129 }
137 130
138 if (ji->i_acl)
139 return posix_acl_permission(inode, ji->i_acl, mask);
140 return -EAGAIN; 131 return -EAGAIN;
141} 132}
142 133
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 439901d205fe..1439f119ec83 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -74,10 +74,6 @@ struct jfs_inode_info {
74 /* xattr_sem allows us to access the xattrs without taking i_mutex */ 74 /* xattr_sem allows us to access the xattrs without taking i_mutex */
75 struct rw_semaphore xattr_sem; 75 struct rw_semaphore xattr_sem;
76 lid_t xtlid; /* lid of xtree lock on directory */ 76 lid_t xtlid; /* lid of xtree lock on directory */
77#ifdef CONFIG_JFS_POSIX_ACL
78 struct posix_acl *i_acl;
79 struct posix_acl *i_default_acl;
80#endif
81 union { 77 union {
82 struct { 78 struct {
83 xtpage_t _xtroot; /* 288: xtree root */ 79 xtpage_t _xtroot; /* 288: xtree root */
@@ -107,8 +103,6 @@ struct jfs_inode_info {
107#define i_inline u.link._inline 103#define i_inline u.link._inline
108#define i_inline_ea u.link._inline_ea 104#define i_inline_ea u.link._inline_ea
109 105
110#define JFS_ACL_NOT_CACHED ((void *)-1)
111
112#define IREAD_LOCK(ip, subclass) \ 106#define IREAD_LOCK(ip, subclass) \
113 down_read_nested(&JFS_IP(ip)->rdwrlock, subclass) 107 down_read_nested(&JFS_IP(ip)->rdwrlock, subclass)
114#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock) 108#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 09b1b6ee2186..37e6dcda8fc8 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -128,18 +128,6 @@ static void jfs_destroy_inode(struct inode *inode)
128 ji->active_ag = -1; 128 ji->active_ag = -1;
129 } 129 }
130 spin_unlock_irq(&ji->ag_lock); 130 spin_unlock_irq(&ji->ag_lock);
131
132#ifdef CONFIG_JFS_POSIX_ACL
133 if (ji->i_acl != JFS_ACL_NOT_CACHED) {
134 posix_acl_release(ji->i_acl);
135 ji->i_acl = JFS_ACL_NOT_CACHED;
136 }
137 if (ji->i_default_acl != JFS_ACL_NOT_CACHED) {
138 posix_acl_release(ji->i_default_acl);
139 ji->i_default_acl = JFS_ACL_NOT_CACHED;
140 }
141#endif
142
143 kmem_cache_free(jfs_inode_cachep, ji); 131 kmem_cache_free(jfs_inode_cachep, ji);
144} 132}
145 133
@@ -798,10 +786,6 @@ static void init_once(void *foo)
798 init_rwsem(&jfs_ip->xattr_sem); 786 init_rwsem(&jfs_ip->xattr_sem);
799 spin_lock_init(&jfs_ip->ag_lock); 787 spin_lock_init(&jfs_ip->ag_lock);
800 jfs_ip->active_ag = -1; 788 jfs_ip->active_ag = -1;
801#ifdef CONFIG_JFS_POSIX_ACL
802 jfs_ip->i_acl = JFS_ACL_NOT_CACHED;
803 jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED;
804#endif
805 inode_init_once(&jfs_ip->vfs_inode); 789 inode_init_once(&jfs_ip->vfs_inode);
806} 790}
807 791
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 61dfa8173ebc..fad364548bc9 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -727,10 +727,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
727 /* 727 /*
728 * We're changing the ACL. Get rid of the cached one 728 * We're changing the ACL. Get rid of the cached one
729 */ 729 */
730 acl =JFS_IP(inode)->i_acl; 730 forget_cached_acl(inode, ACL_TYPE_ACCESS);
731 if (acl != JFS_ACL_NOT_CACHED)
732 posix_acl_release(acl);
733 JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
734 731
735 return 0; 732 return 0;
736 } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { 733 } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
@@ -746,10 +743,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
746 /* 743 /*
747 * We're changing the default ACL. Get rid of the cached one 744 * We're changing the default ACL. Get rid of the cached one
748 */ 745 */
749 acl =JFS_IP(inode)->i_default_acl; 746 forget_cached_acl(inode, ACL_TYPE_DEFAULT);
750 if (acl && (acl != JFS_ACL_NOT_CACHED))
751 posix_acl_release(acl);
752 JFS_IP(inode)->i_default_acl = JFS_ACL_NOT_CACHED;
753 747
754 return 0; 748 return 0;
755 } 749 }
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index f2fdcbce143e..4336adba952a 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -7,6 +7,7 @@
7 */ 7 */
8 8
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/smp_lock.h>
10#include <linux/types.h> 11#include <linux/types.h>
11#include <linux/errno.h> 12#include <linux/errno.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 1725037374c5..bd173a6ca3b1 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -10,6 +10,7 @@
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h>
13#include <linux/in.h> 14#include <linux/in.h>
14#include <linux/sunrpc/svc.h> 15#include <linux/sunrpc/svc.h>
15#include <linux/sunrpc/clnt.h> 16#include <linux/sunrpc/clnt.h>
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3688e55901fc..e1d28ddd2169 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -10,6 +10,7 @@
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h>
13#include <linux/in.h> 14#include <linux/in.h>
14#include <linux/sunrpc/svc.h> 15#include <linux/sunrpc/svc.h>
15#include <linux/sunrpc/clnt.h> 16#include <linux/sunrpc/clnt.h>
diff --git a/fs/namei.c b/fs/namei.c
index 527119afb6a5..f3c5b278895a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1698,8 +1698,11 @@ struct file *do_filp_open(int dfd, const char *pathname,
1698 if (error) 1698 if (error)
1699 return ERR_PTR(error); 1699 return ERR_PTR(error);
1700 error = path_walk(pathname, &nd); 1700 error = path_walk(pathname, &nd);
1701 if (error) 1701 if (error) {
1702 if (nd.root.mnt)
1703 path_put(&nd.root);
1702 return ERR_PTR(error); 1704 return ERR_PTR(error);
1705 }
1703 if (unlikely(!audit_dummy_context())) 1706 if (unlikely(!audit_dummy_context()))
1704 audit_inode(pathname, nd.path.dentry); 1707 audit_inode(pathname, nd.path.dentry);
1705 1708
@@ -1758,7 +1761,13 @@ do_last:
1758 goto exit; 1761 goto exit;
1759 } 1762 }
1760 filp = nameidata_to_filp(&nd, open_flag); 1763 filp = nameidata_to_filp(&nd, open_flag);
1764 if (IS_ERR(filp))
1765 ima_counts_put(&nd.path,
1766 acc_mode & (MAY_READ | MAY_WRITE |
1767 MAY_EXEC));
1761 mnt_drop_write(nd.path.mnt); 1768 mnt_drop_write(nd.path.mnt);
1769 if (nd.root.mnt)
1770 path_put(&nd.root);
1762 return filp; 1771 return filp;
1763 } 1772 }
1764 1773
@@ -1812,6 +1821,9 @@ ok:
1812 goto exit; 1821 goto exit;
1813 } 1822 }
1814 filp = nameidata_to_filp(&nd, open_flag); 1823 filp = nameidata_to_filp(&nd, open_flag);
1824 if (IS_ERR(filp))
1825 ima_counts_put(&nd.path,
1826 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
1815 /* 1827 /*
1816 * It is now safe to drop the mnt write 1828 * It is now safe to drop the mnt write
1817 * because the filp has had a write taken 1829 * because the filp has had a write taken
@@ -1819,6 +1831,8 @@ ok:
1819 */ 1831 */
1820 if (will_write) 1832 if (will_write)
1821 mnt_drop_write(nd.path.mnt); 1833 mnt_drop_write(nd.path.mnt);
1834 if (nd.root.mnt)
1835 path_put(&nd.root);
1822 return filp; 1836 return filp;
1823 1837
1824exit_mutex_unlock: 1838exit_mutex_unlock:
@@ -1859,6 +1873,8 @@ do_link:
1859 * with "intent.open". 1873 * with "intent.open".
1860 */ 1874 */
1861 release_open_intent(&nd); 1875 release_open_intent(&nd);
1876 if (nd.root.mnt)
1877 path_put(&nd.root);
1862 return ERR_PTR(error); 1878 return ERR_PTR(error);
1863 } 1879 }
1864 nd.flags &= ~LOOKUP_PARENT; 1880 nd.flags &= ~LOOKUP_PARENT;
diff --git a/fs/namespace.c b/fs/namespace.c
index a7bea8c8bd46..277c28a63ead 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -22,6 +22,7 @@
22#include <linux/seq_file.h> 22#include <linux/seq_file.h>
23#include <linux/mnt_namespace.h> 23#include <linux/mnt_namespace.h>
24#include <linux/namei.h> 24#include <linux/namei.h>
25#include <linux/nsproxy.h>
25#include <linux/security.h> 26#include <linux/security.h>
26#include <linux/mount.h> 27#include <linux/mount.h>
27#include <linux/ramfs.h> 28#include <linux/ramfs.h>
@@ -42,6 +43,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
42static int event; 43static int event;
43static DEFINE_IDA(mnt_id_ida); 44static DEFINE_IDA(mnt_id_ida);
44static DEFINE_IDA(mnt_group_ida); 45static DEFINE_IDA(mnt_group_ida);
46static int mnt_id_start = 0;
47static int mnt_group_start = 1;
45 48
46static struct list_head *mount_hashtable __read_mostly; 49static struct list_head *mount_hashtable __read_mostly;
47static struct kmem_cache *mnt_cache __read_mostly; 50static struct kmem_cache *mnt_cache __read_mostly;
@@ -69,7 +72,9 @@ static int mnt_alloc_id(struct vfsmount *mnt)
69retry: 72retry:
70 ida_pre_get(&mnt_id_ida, GFP_KERNEL); 73 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
71 spin_lock(&vfsmount_lock); 74 spin_lock(&vfsmount_lock);
72 res = ida_get_new(&mnt_id_ida, &mnt->mnt_id); 75 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
76 if (!res)
77 mnt_id_start = mnt->mnt_id + 1;
73 spin_unlock(&vfsmount_lock); 78 spin_unlock(&vfsmount_lock);
74 if (res == -EAGAIN) 79 if (res == -EAGAIN)
75 goto retry; 80 goto retry;
@@ -79,8 +84,11 @@ retry:
79 84
80static void mnt_free_id(struct vfsmount *mnt) 85static void mnt_free_id(struct vfsmount *mnt)
81{ 86{
87 int id = mnt->mnt_id;
82 spin_lock(&vfsmount_lock); 88 spin_lock(&vfsmount_lock);
83 ida_remove(&mnt_id_ida, mnt->mnt_id); 89 ida_remove(&mnt_id_ida, id);
90 if (mnt_id_start > id)
91 mnt_id_start = id;
84 spin_unlock(&vfsmount_lock); 92 spin_unlock(&vfsmount_lock);
85} 93}
86 94
@@ -91,10 +99,18 @@ static void mnt_free_id(struct vfsmount *mnt)
91 */ 99 */
92static int mnt_alloc_group_id(struct vfsmount *mnt) 100static int mnt_alloc_group_id(struct vfsmount *mnt)
93{ 101{
102 int res;
103
94 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL)) 104 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
95 return -ENOMEM; 105 return -ENOMEM;
96 106
97 return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id); 107 res = ida_get_new_above(&mnt_group_ida,
108 mnt_group_start,
109 &mnt->mnt_group_id);
110 if (!res)
111 mnt_group_start = mnt->mnt_group_id + 1;
112
113 return res;
98} 114}
99 115
100/* 116/*
@@ -102,7 +118,10 @@ static int mnt_alloc_group_id(struct vfsmount *mnt)
102 */ 118 */
103void mnt_release_group_id(struct vfsmount *mnt) 119void mnt_release_group_id(struct vfsmount *mnt)
104{ 120{
105 ida_remove(&mnt_group_ida, mnt->mnt_group_id); 121 int id = mnt->mnt_group_id;
122 ida_remove(&mnt_group_ida, id);
123 if (mnt_group_start > id)
124 mnt_group_start = id;
106 mnt->mnt_group_id = 0; 125 mnt->mnt_group_id = 0;
107} 126}
108 127
@@ -2222,16 +2241,9 @@ static void __init init_mount_tree(void)
2222 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 2241 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
2223 if (IS_ERR(mnt)) 2242 if (IS_ERR(mnt))
2224 panic("Can't create rootfs"); 2243 panic("Can't create rootfs");
2225 ns = kmalloc(sizeof(*ns), GFP_KERNEL); 2244 ns = create_mnt_ns(mnt);
2226 if (!ns) 2245 if (IS_ERR(ns))
2227 panic("Can't allocate initial namespace"); 2246 panic("Can't allocate initial namespace");
2228 atomic_set(&ns->count, 1);
2229 INIT_LIST_HEAD(&ns->list);
2230 init_waitqueue_head(&ns->poll);
2231 ns->event = 0;
2232 list_add(&mnt->mnt_list, &ns->list);
2233 ns->root = mnt;
2234 mnt->mnt_ns = ns;
2235 2247
2236 init_task.nsproxy->mnt_ns = ns; 2248 init_task.nsproxy->mnt_ns = ns;
2237 get_mnt_ns(ns); 2249 get_mnt_ns(ns);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index af05b918cb5b..6dd48a4405b4 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -10,6 +10,7 @@
10#include <linux/kthread.h> 10#include <linux/kthread.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/smp_lock.h>
13#include <linux/spinlock.h> 14#include <linux/spinlock.h>
14 15
15#include <linux/nfs4.h> 16#include <linux/nfs4.h>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 89f98e9a024b..38d42c29fb92 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -29,7 +29,6 @@
29#include <linux/nfs_fs.h> 29#include <linux/nfs_fs.h>
30#include <linux/nfs_mount.h> 30#include <linux/nfs_mount.h>
31#include <linux/pagemap.h> 31#include <linux/pagemap.h>
32#include <linux/smp_lock.h>
33#include <linux/pagevec.h> 32#include <linux/pagevec.h>
34#include <linux/namei.h> 33#include <linux/namei.h>
35#include <linux/mount.h> 34#include <linux/mount.h>
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0055b813ec2c..05062329b678 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,7 +26,6 @@
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/smp_lock.h>
30#include <linux/aio.h> 29#include <linux/aio.h>
31 30
32#include <asm/uaccess.h> 31#include <asm/uaccess.h>
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 46177cb87064..b35d2a616066 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -30,7 +30,6 @@
30#include <linux/nfs_idmap.h> 30#include <linux/nfs_idmap.h>
31#include <linux/vfs.h> 31#include <linux/vfs.h>
32#include <linux/namei.h> 32#include <linux/namei.h>
33#include <linux/mnt_namespace.h>
34#include <linux/security.h> 33#include <linux/security.h>
35 34
36#include <asm/system.h> 35#include <asm/system.h>
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 64f87194d390..bd7938eda6a8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -30,7 +30,6 @@
30#include <linux/nfs_mount.h> 30#include <linux/nfs_mount.h>
31#include <linux/nfs4_mount.h> 31#include <linux/nfs4_mount.h>
32#include <linux/lockd/bind.h> 32#include <linux/lockd/bind.h>
33#include <linux/smp_lock.h>
34#include <linux/seq_file.h> 33#include <linux/seq_file.h>
35#include <linux/mount.h> 34#include <linux/mount.h>
36#include <linux/nfs_idmap.h> 35#include <linux/nfs_idmap.h>
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 92ce43517814..ff0c080db59b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -45,7 +45,6 @@
45#include <linux/nfs4.h> 45#include <linux/nfs4.h>
46#include <linux/nfs_fs.h> 46#include <linux/nfs_fs.h>
47#include <linux/nfs_page.h> 47#include <linux/nfs_page.h>
48#include <linux/smp_lock.h>
49#include <linux/namei.h> 48#include <linux/namei.h>
50#include <linux/mount.h> 49#include <linux/mount.h>
51#include <linux/module.h> 50#include <linux/module.h>
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 96c4ebfa46f4..73ea5e8d66ce 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -18,7 +18,6 @@
18#include <linux/sunrpc/clnt.h> 18#include <linux/sunrpc/clnt.h>
19#include <linux/nfs_fs.h> 19#include <linux/nfs_fs.h>
20#include <linux/nfs_page.h> 20#include <linux/nfs_page.h>
21#include <linux/smp_lock.h>
22 21
23#include <asm/system.h> 22#include <asm/system.h>
24 23
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ce728829f79a..0a0a2ff767c3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -202,8 +202,10 @@ static int nfs_set_page_writeback(struct page *page)
202 struct nfs_server *nfss = NFS_SERVER(inode); 202 struct nfs_server *nfss = NFS_SERVER(inode);
203 203
204 if (atomic_long_inc_return(&nfss->writeback) > 204 if (atomic_long_inc_return(&nfss->writeback) >
205 NFS_CONGESTION_ON_THRESH) 205 NFS_CONGESTION_ON_THRESH) {
206 set_bdi_congested(&nfss->backing_dev_info, WRITE); 206 set_bdi_congested(&nfss->backing_dev_info,
207 BLK_RW_ASYNC);
208 }
207 } 209 }
208 return ret; 210 return ret;
209} 211}
@@ -215,7 +217,7 @@ static void nfs_end_page_writeback(struct page *page)
215 217
216 end_page_writeback(page); 218 end_page_writeback(page);
217 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 219 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
218 clear_bdi_congested(&nfss->backing_dev_info, WRITE); 220 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
219} 221}
220 222
221/* 223/*
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1250fb978ac1..6d0847562d87 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -25,7 +25,6 @@
25#include <linux/init.h> 25#include <linux/init.h>
26#include <linux/inet.h> 26#include <linux/inet.h>
27#include <linux/string.h> 27#include <linux/string.h>
28#include <linux/smp_lock.h>
29#include <linux/ctype.h> 28#include <linux/ctype.h>
30 29
31#include <linux/nfs.h> 30#include <linux/nfs.h>
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d4c9884cd54b..492c79b7800b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -18,7 +18,6 @@
18#include <linux/unistd.h> 18#include <linux/unistd.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/smp.h> 20#include <linux/smp.h>
21#include <linux/smp_lock.h>
22#include <linux/freezer.h> 21#include <linux/freezer.h>
23#include <linux/fs_struct.h> 22#include <linux/fs_struct.h>
24#include <linux/kthread.h> 23#include <linux/kthread.h>
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4145083dcf88..23341c1063bc 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -678,7 +678,6 @@ __be32
678nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, 678nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
679 int access, struct file **filp) 679 int access, struct file **filp)
680{ 680{
681 const struct cred *cred = current_cred();
682 struct dentry *dentry; 681 struct dentry *dentry;
683 struct inode *inode; 682 struct inode *inode;
684 int flags = O_RDONLY|O_LARGEFILE; 683 int flags = O_RDONLY|O_LARGEFILE;
@@ -733,7 +732,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
733 vfs_dq_init(inode); 732 vfs_dq_init(inode);
734 } 733 }
735 *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), 734 *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
736 flags, cred); 735 flags, current_cred());
737 if (IS_ERR(*filp)) 736 if (IS_ERR(*filp))
738 host_err = PTR_ERR(*filp); 737 host_err = PTR_ERR(*filp);
739 else 738 else
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 36df60b6d8a4..99d58a028b94 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -568,6 +568,7 @@ void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap,
568} 568}
569 569
570static struct lock_class_key nilfs_bmap_dat_lock_key; 570static struct lock_class_key nilfs_bmap_dat_lock_key;
571static struct lock_class_key nilfs_bmap_mdt_lock_key;
571 572
572/** 573/**
573 * nilfs_bmap_read - read a bmap from an inode 574 * nilfs_bmap_read - read a bmap from an inode
@@ -603,7 +604,11 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
603 bmap->b_ptr_type = NILFS_BMAP_PTR_VS; 604 bmap->b_ptr_type = NILFS_BMAP_PTR_VS;
604 bmap->b_last_allocated_key = 0; 605 bmap->b_last_allocated_key = 0;
605 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; 606 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
607 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
606 break; 608 break;
609 case NILFS_IFILE_INO:
610 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
611 /* Fall through */
607 default: 612 default:
608 bmap->b_ptr_type = NILFS_BMAP_PTR_VM; 613 bmap->b_ptr_type = NILFS_BMAP_PTR_VM;
609 bmap->b_last_allocated_key = 0; 614 bmap->b_last_allocated_key = 0;
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 7d49813f66d6..aec942cf79e3 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -307,7 +307,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
307 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); 307 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
308 if (ret < 0) { 308 if (ret < 0) {
309 if (ret != -ENOENT) 309 if (ret != -ENOENT)
310 goto out_header; 310 break;
311 /* skip hole */ 311 /* skip hole */
312 ret = 0; 312 ret = 0;
313 continue; 313 continue;
@@ -340,7 +340,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
340 continue; 340 continue;
341 printk(KERN_ERR "%s: cannot delete block\n", 341 printk(KERN_ERR "%s: cannot delete block\n",
342 __func__); 342 __func__);
343 goto out_header; 343 break;
344 } 344 }
345 } 345 }
346 346
@@ -358,7 +358,6 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
358 kunmap_atomic(kaddr, KM_USER0); 358 kunmap_atomic(kaddr, KM_USER0);
359 } 359 }
360 360
361 out_header:
362 brelse(header_bh); 361 brelse(header_bh);
363 362
364 out_sem: 363 out_sem:
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 0b2710e2d565..8927ca27e6f7 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -134,15 +134,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
134 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 134 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
135 req->pr_entry_bh, kaddr); 135 req->pr_entry_bh, kaddr);
136 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); 136 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
137 if (entry->de_blocknr != cpu_to_le64(0) ||
138 entry->de_end != cpu_to_le64(NILFS_CNO_MAX)) {
139 printk(KERN_CRIT
140 "%s: vbn = %llu, start = %llu, end = %llu, pbn = %llu\n",
141 __func__, (unsigned long long)req->pr_entry_nr,
142 (unsigned long long)le64_to_cpu(entry->de_start),
143 (unsigned long long)le64_to_cpu(entry->de_end),
144 (unsigned long long)le64_to_cpu(entry->de_blocknr));
145 }
146 entry->de_blocknr = cpu_to_le64(blocknr); 137 entry->de_blocknr = cpu_to_le64(blocknr);
147 kunmap_atomic(kaddr, KM_USER0); 138 kunmap_atomic(kaddr, KM_USER0);
148 139
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 54100acc1102..1a4fa04cf071 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -43,7 +43,6 @@
43 */ 43 */
44 44
45#include <linux/pagemap.h> 45#include <linux/pagemap.h>
46#include <linux/smp_lock.h>
47#include "nilfs.h" 46#include "nilfs.h"
48#include "page.h" 47#include "page.h"
49 48
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 2696d6b513b7..fe9d8f2a13f8 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -309,10 +309,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
309 /* ii->i_file_acl = 0; */ 309 /* ii->i_file_acl = 0; */
310 /* ii->i_dir_acl = 0; */ 310 /* ii->i_dir_acl = 0; */
311 ii->i_dir_start_lookup = 0; 311 ii->i_dir_start_lookup = 0;
312#ifdef CONFIG_NILFS_FS_POSIX_ACL
313 ii->i_acl = NULL;
314 ii->i_default_acl = NULL;
315#endif
316 ii->i_cno = 0; 312 ii->i_cno = 0;
317 nilfs_set_inode_flags(inode); 313 nilfs_set_inode_flags(inode);
318 spin_lock(&sbi->s_next_gen_lock); 314 spin_lock(&sbi->s_next_gen_lock);
@@ -434,10 +430,6 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino,
434 430
435 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); 431 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh);
436 432
437#ifdef CONFIG_NILFS_FS_POSIX_ACL
438 ii->i_acl = NILFS_ACL_NOT_CACHED;
439 ii->i_default_acl = NILFS_ACL_NOT_CACHED;
440#endif
441 if (nilfs_read_inode_common(inode, raw_inode)) 433 if (nilfs_read_inode_common(inode, raw_inode))
442 goto failed_unmap; 434 goto failed_unmap;
443 435
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index edf6a59d9f2a..724c63766e82 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -58,10 +58,6 @@ struct nilfs_inode_info {
58 */ 58 */
59 struct rw_semaphore xattr_sem; 59 struct rw_semaphore xattr_sem;
60#endif 60#endif
61#ifdef CONFIG_NILFS_POSIX_ACL
62 struct posix_acl *i_acl;
63 struct posix_acl *i_default_acl;
64#endif
65 struct buffer_head *i_bh; /* i_bh contains a new or dirty 61 struct buffer_head *i_bh; /* i_bh contains a new or dirty
66 disk inode */ 62 disk inode */
67 struct inode vfs_inode; 63 struct inode vfs_inode;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index aa977549919e..8b5e4778cf28 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1829,26 +1829,13 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1829 err = nilfs_segbuf_write(segbuf, &wi); 1829 err = nilfs_segbuf_write(segbuf, &wi);
1830 1830
1831 res = nilfs_segbuf_wait(segbuf, &wi); 1831 res = nilfs_segbuf_wait(segbuf, &wi);
1832 err = unlikely(err) ? : res; 1832 err = err ? : res;
1833 if (unlikely(err)) 1833 if (err)
1834 return err; 1834 return err;
1835 } 1835 }
1836 return 0; 1836 return 0;
1837} 1837}
1838 1838
1839static int nilfs_page_has_uncleared_buffer(struct page *page)
1840{
1841 struct buffer_head *head, *bh;
1842
1843 head = bh = page_buffers(page);
1844 do {
1845 if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers))
1846 return 1;
1847 bh = bh->b_this_page;
1848 } while (bh != head);
1849 return 0;
1850}
1851
1852static void __nilfs_end_page_io(struct page *page, int err) 1839static void __nilfs_end_page_io(struct page *page, int err)
1853{ 1840{
1854 if (!err) { 1841 if (!err) {
@@ -1872,12 +1859,11 @@ static void nilfs_end_page_io(struct page *page, int err)
1872 if (!page) 1859 if (!page)
1873 return; 1860 return;
1874 1861
1875 if (buffer_nilfs_node(page_buffers(page)) && 1862 if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page))
1876 nilfs_page_has_uncleared_buffer(page)) 1863 /*
1877 /* For b-tree node pages, this function may be called twice 1864 * For b-tree node pages, this function may be called twice
1878 or more because they might be split in a segment. 1865 * or more because they might be split in a segment.
1879 This check assures that cleanup has been done for all 1866 */
1880 buffers in a split btnode page. */
1881 return; 1867 return;
1882 1868
1883 __nilfs_end_page_io(page, err); 1869 __nilfs_end_page_io(page, err);
@@ -1940,7 +1926,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
1940 } 1926 }
1941 if (bh->b_page != fs_page) { 1927 if (bh->b_page != fs_page) {
1942 nilfs_end_page_io(fs_page, err); 1928 nilfs_end_page_io(fs_page, err);
1943 if (unlikely(fs_page == failed_page)) 1929 if (fs_page && fs_page == failed_page)
1944 goto done; 1930 goto done;
1945 fs_page = bh->b_page; 1931 fs_page = bh->b_page;
1946 } 1932 }
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index ab785f85aa50..8e2ec43b18f4 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -189,16 +189,6 @@ static void nilfs_clear_inode(struct inode *inode)
189{ 189{
190 struct nilfs_inode_info *ii = NILFS_I(inode); 190 struct nilfs_inode_info *ii = NILFS_I(inode);
191 191
192#ifdef CONFIG_NILFS_POSIX_ACL
193 if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) {
194 posix_acl_release(ii->i_acl);
195 ii->i_acl = NILFS_ACL_NOT_CACHED;
196 }
197 if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) {
198 posix_acl_release(ii->i_default_acl);
199 ii->i_default_acl = NILFS_ACL_NOT_CACHED;
200 }
201#endif
202 /* 192 /*
203 * Free resources allocated in nilfs_read_inode(), here. 193 * Free resources allocated in nilfs_read_inode(), here.
204 */ 194 */
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ff231ad23895..ff27a2965844 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -296,12 +296,15 @@ static int inotify_fasync(int fd, struct file *file, int on)
296static int inotify_release(struct inode *ignored, struct file *file) 296static int inotify_release(struct inode *ignored, struct file *file)
297{ 297{
298 struct fsnotify_group *group = file->private_data; 298 struct fsnotify_group *group = file->private_data;
299 struct user_struct *user = group->inotify_data.user;
299 300
300 fsnotify_clear_marks_by_group(group); 301 fsnotify_clear_marks_by_group(group);
301 302
302 /* free this group, matching get was inotify_init->fsnotify_obtain_group */ 303 /* free this group, matching get was inotify_init->fsnotify_obtain_group */
303 fsnotify_put_group(group); 304 fsnotify_put_group(group);
304 305
306 atomic_dec(&user->inotify_devs);
307
305 return 0; 308 return 0;
306} 309}
307 310
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 6cdeaa76f27f..110bb57c46ab 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -92,6 +92,9 @@ struct ocfs2_unblock_ctl {
92 enum ocfs2_unblock_action unblock_action; 92 enum ocfs2_unblock_action unblock_action;
93}; 93};
94 94
95/* Lockdep class keys */
96struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES];
97
95static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 98static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
96 int new_level); 99 int new_level);
97static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); 100static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
@@ -317,9 +320,16 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
317 u32 dlm_flags); 320 u32 dlm_flags);
318static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, 321static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
319 int wanted); 322 int wanted);
320static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 323static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
321 struct ocfs2_lock_res *lockres, 324 struct ocfs2_lock_res *lockres,
322 int level); 325 int level, unsigned long caller_ip);
326static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb,
327 struct ocfs2_lock_res *lockres,
328 int level)
329{
330 __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_);
331}
332
323static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); 333static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
324static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); 334static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
325static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); 335static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
@@ -489,6 +499,13 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
489 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); 499 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
490 500
491 ocfs2_init_lock_stats(res); 501 ocfs2_init_lock_stats(res);
502#ifdef CONFIG_DEBUG_LOCK_ALLOC
503 if (type != OCFS2_LOCK_TYPE_OPEN)
504 lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type],
505 &lockdep_keys[type], 0);
506 else
507 res->l_lockdep_map.key = NULL;
508#endif
492} 509}
493 510
494void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) 511void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
@@ -644,14 +661,10 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
644static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, 661static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
645 struct ocfs2_super *osb) 662 struct ocfs2_super *osb)
646{ 663{
647 struct ocfs2_orphan_scan_lvb *lvb;
648
649 ocfs2_lock_res_init_once(res); 664 ocfs2_lock_res_init_once(res);
650 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); 665 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
651 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, 666 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
652 &ocfs2_orphan_scan_lops, osb); 667 &ocfs2_orphan_scan_lops, osb);
653 lvb = ocfs2_dlm_lvb(&res->l_lksb);
654 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
655} 668}
656 669
657void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, 670void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
@@ -1256,11 +1269,13 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw,
1256 return ret; 1269 return ret;
1257} 1270}
1258 1271
1259static int ocfs2_cluster_lock(struct ocfs2_super *osb, 1272static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
1260 struct ocfs2_lock_res *lockres, 1273 struct ocfs2_lock_res *lockres,
1261 int level, 1274 int level,
1262 u32 lkm_flags, 1275 u32 lkm_flags,
1263 int arg_flags) 1276 int arg_flags,
1277 int l_subclass,
1278 unsigned long caller_ip)
1264{ 1279{
1265 struct ocfs2_mask_waiter mw; 1280 struct ocfs2_mask_waiter mw;
1266 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); 1281 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
@@ -1403,13 +1418,37 @@ out:
1403 } 1418 }
1404 ocfs2_update_lock_stats(lockres, level, &mw, ret); 1419 ocfs2_update_lock_stats(lockres, level, &mw, ret);
1405 1420
1421#ifdef CONFIG_DEBUG_LOCK_ALLOC
1422 if (!ret && lockres->l_lockdep_map.key != NULL) {
1423 if (level == DLM_LOCK_PR)
1424 rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass,
1425 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1426 caller_ip);
1427 else
1428 rwsem_acquire(&lockres->l_lockdep_map, l_subclass,
1429 !!(arg_flags & OCFS2_META_LOCK_NOQUEUE),
1430 caller_ip);
1431 }
1432#endif
1406 mlog_exit(ret); 1433 mlog_exit(ret);
1407 return ret; 1434 return ret;
1408} 1435}
1409 1436
1410static void ocfs2_cluster_unlock(struct ocfs2_super *osb, 1437static inline int ocfs2_cluster_lock(struct ocfs2_super *osb,
1411 struct ocfs2_lock_res *lockres, 1438 struct ocfs2_lock_res *lockres,
1412 int level) 1439 int level,
1440 u32 lkm_flags,
1441 int arg_flags)
1442{
1443 return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags,
1444 0, _RET_IP_);
1445}
1446
1447
1448static void __ocfs2_cluster_unlock(struct ocfs2_super *osb,
1449 struct ocfs2_lock_res *lockres,
1450 int level,
1451 unsigned long caller_ip)
1413{ 1452{
1414 unsigned long flags; 1453 unsigned long flags;
1415 1454
@@ -1418,6 +1457,10 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
1418 ocfs2_dec_holders(lockres, level); 1457 ocfs2_dec_holders(lockres, level);
1419 ocfs2_downconvert_on_unlock(osb, lockres); 1458 ocfs2_downconvert_on_unlock(osb, lockres);
1420 spin_unlock_irqrestore(&lockres->l_lock, flags); 1459 spin_unlock_irqrestore(&lockres->l_lock, flags);
1460#ifdef CONFIG_DEBUG_LOCK_ALLOC
1461 if (lockres->l_lockdep_map.key != NULL)
1462 rwsem_release(&lockres->l_lockdep_map, 1, caller_ip);
1463#endif
1421 mlog_exit_void(); 1464 mlog_exit_void();
1422} 1465}
1423 1466
@@ -1989,7 +2032,8 @@ static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
1989{ 2032{
1990 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2033 struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
1991 2034
1992 if (lvb->lvb_version == OCFS2_LVB_VERSION 2035 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)
2036 && lvb->lvb_version == OCFS2_LVB_VERSION
1993 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) 2037 && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
1994 return 1; 2038 return 1;
1995 return 0; 2039 return 0;
@@ -2162,10 +2206,11 @@ static int ocfs2_assign_bh(struct inode *inode,
2162 * returns < 0 error if the callback will never be called, otherwise 2206 * returns < 0 error if the callback will never be called, otherwise
2163 * the result of the lock will be communicated via the callback. 2207 * the result of the lock will be communicated via the callback.
2164 */ 2208 */
2165int ocfs2_inode_lock_full(struct inode *inode, 2209int ocfs2_inode_lock_full_nested(struct inode *inode,
2166 struct buffer_head **ret_bh, 2210 struct buffer_head **ret_bh,
2167 int ex, 2211 int ex,
2168 int arg_flags) 2212 int arg_flags,
2213 int subclass)
2169{ 2214{
2170 int status, level, acquired; 2215 int status, level, acquired;
2171 u32 dlm_flags; 2216 u32 dlm_flags;
@@ -2203,7 +2248,8 @@ int ocfs2_inode_lock_full(struct inode *inode,
2203 if (arg_flags & OCFS2_META_LOCK_NOQUEUE) 2248 if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
2204 dlm_flags |= DLM_LKF_NOQUEUE; 2249 dlm_flags |= DLM_LKF_NOQUEUE;
2205 2250
2206 status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); 2251 status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
2252 arg_flags, subclass, _RET_IP_);
2207 if (status < 0) { 2253 if (status < 0) {
2208 if (status != -EAGAIN && status != -EIOCBRETRY) 2254 if (status != -EAGAIN && status != -EIOCBRETRY)
2209 mlog_errno(status); 2255 mlog_errno(status);
@@ -2369,35 +2415,45 @@ void ocfs2_inode_unlock(struct inode *inode,
2369 mlog_exit_void(); 2415 mlog_exit_void();
2370} 2416}
2371 2417
2372int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex) 2418int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno)
2373{ 2419{
2374 struct ocfs2_lock_res *lockres; 2420 struct ocfs2_lock_res *lockres;
2375 struct ocfs2_orphan_scan_lvb *lvb; 2421 struct ocfs2_orphan_scan_lvb *lvb;
2376 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2377 int status = 0; 2422 int status = 0;
2378 2423
2424 if (ocfs2_is_hard_readonly(osb))
2425 return -EROFS;
2426
2427 if (ocfs2_mount_local(osb))
2428 return 0;
2429
2379 lockres = &osb->osb_orphan_scan.os_lockres; 2430 lockres = &osb->osb_orphan_scan.os_lockres;
2380 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); 2431 status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0);
2381 if (status < 0) 2432 if (status < 0)
2382 return status; 2433 return status;
2383 2434
2384 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2435 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2385 if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) 2436 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
2437 lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
2386 *seqno = be32_to_cpu(lvb->lvb_os_seqno); 2438 *seqno = be32_to_cpu(lvb->lvb_os_seqno);
2439 else
2440 *seqno = osb->osb_orphan_scan.os_seqno + 1;
2441
2387 return status; 2442 return status;
2388} 2443}
2389 2444
2390void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex) 2445void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno)
2391{ 2446{
2392 struct ocfs2_lock_res *lockres; 2447 struct ocfs2_lock_res *lockres;
2393 struct ocfs2_orphan_scan_lvb *lvb; 2448 struct ocfs2_orphan_scan_lvb *lvb;
2394 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
2395 2449
2396 lockres = &osb->osb_orphan_scan.os_lockres; 2450 if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) {
2397 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 2451 lockres = &osb->osb_orphan_scan.os_lockres;
2398 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; 2452 lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
2399 lvb->lvb_os_seqno = cpu_to_be32(seqno); 2453 lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
2400 ocfs2_cluster_unlock(osb, lockres, level); 2454 lvb->lvb_os_seqno = cpu_to_be32(seqno);
2455 ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX);
2456 }
2401} 2457}
2402 2458
2403int ocfs2_super_lock(struct ocfs2_super *osb, 2459int ocfs2_super_lock(struct ocfs2_super *osb,
@@ -3627,7 +3683,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
3627 struct ocfs2_global_disk_dqinfo *gdinfo; 3683 struct ocfs2_global_disk_dqinfo *gdinfo;
3628 int status = 0; 3684 int status = 0;
3629 3685
3630 if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { 3686 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) &&
3687 lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
3631 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); 3688 info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
3632 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); 3689 info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
3633 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); 3690 oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 31b90d7b8f51..7553836931de 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -78,6 +78,14 @@ struct ocfs2_orphan_scan_lvb {
78/* don't block waiting for the downconvert thread, instead return -EAGAIN */ 78/* don't block waiting for the downconvert thread, instead return -EAGAIN */
79#define OCFS2_LOCK_NONBLOCK (0x04) 79#define OCFS2_LOCK_NONBLOCK (0x04)
80 80
81/* Locking subclasses of inode cluster lock */
82enum {
83 OI_LS_NORMAL = 0,
84 OI_LS_PARENT,
85 OI_LS_RENAME1,
86 OI_LS_RENAME2,
87};
88
81int ocfs2_dlm_init(struct ocfs2_super *osb); 89int ocfs2_dlm_init(struct ocfs2_super *osb);
82void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending); 90void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending);
83void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); 91void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
@@ -104,25 +112,31 @@ void ocfs2_open_unlock(struct inode *inode);
104int ocfs2_inode_lock_atime(struct inode *inode, 112int ocfs2_inode_lock_atime(struct inode *inode,
105 struct vfsmount *vfsmnt, 113 struct vfsmount *vfsmnt,
106 int *level); 114 int *level);
107int ocfs2_inode_lock_full(struct inode *inode, 115int ocfs2_inode_lock_full_nested(struct inode *inode,
108 struct buffer_head **ret_bh, 116 struct buffer_head **ret_bh,
109 int ex, 117 int ex,
110 int arg_flags); 118 int arg_flags,
119 int subclass);
111int ocfs2_inode_lock_with_page(struct inode *inode, 120int ocfs2_inode_lock_with_page(struct inode *inode,
112 struct buffer_head **ret_bh, 121 struct buffer_head **ret_bh,
113 int ex, 122 int ex,
114 struct page *page); 123 struct page *page);
124/* Variants without special locking class or flags */
125#define ocfs2_inode_lock_full(i, r, e, f)\
126 ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL)
127#define ocfs2_inode_lock_nested(i, b, e, s)\
128 ocfs2_inode_lock_full_nested(i, b, e, 0, s)
115/* 99% of the time we don't want to supply any additional flags -- 129/* 99% of the time we don't want to supply any additional flags --
116 * those are for very specific cases only. */ 130 * those are for very specific cases only. */
117#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0) 131#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL)
118void ocfs2_inode_unlock(struct inode *inode, 132void ocfs2_inode_unlock(struct inode *inode,
119 int ex); 133 int ex);
120int ocfs2_super_lock(struct ocfs2_super *osb, 134int ocfs2_super_lock(struct ocfs2_super *osb,
121 int ex); 135 int ex);
122void ocfs2_super_unlock(struct ocfs2_super *osb, 136void ocfs2_super_unlock(struct ocfs2_super *osb,
123 int ex); 137 int ex);
124int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex); 138int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno);
125void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex); 139void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno);
126 140
127int ocfs2_rename_lock(struct ocfs2_super *osb); 141int ocfs2_rename_lock(struct ocfs2_super *osb);
128void ocfs2_rename_unlock(struct ocfs2_super *osb); 142void ocfs2_rename_unlock(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 07267e0da909..62442e413a00 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2026,7 +2026,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2026 size_t len, 2026 size_t len,
2027 unsigned int flags) 2027 unsigned int flags)
2028{ 2028{
2029 int ret = 0; 2029 int ret = 0, lock_level = 0;
2030 struct inode *inode = in->f_path.dentry->d_inode; 2030 struct inode *inode = in->f_path.dentry->d_inode;
2031 2031
2032 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, 2032 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
@@ -2037,12 +2037,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in,
2037 /* 2037 /*
2038 * See the comment in ocfs2_file_aio_read() 2038 * See the comment in ocfs2_file_aio_read()
2039 */ 2039 */
2040 ret = ocfs2_inode_lock(inode, NULL, 0); 2040 ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level);
2041 if (ret < 0) { 2041 if (ret < 0) {
2042 mlog_errno(ret); 2042 mlog_errno(ret);
2043 goto bail; 2043 goto bail;
2044 } 2044 }
2045 ocfs2_inode_unlock(inode, 0); 2045 ocfs2_inode_unlock(inode, lock_level);
2046 2046
2047 ret = generic_file_splice_read(in, ppos, pipe, len, flags); 2047 ret = generic_file_splice_read(in, ppos, pipe, len, flags);
2048 2048
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 10e1fa87396a..4dc8890ba316 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -215,6 +215,8 @@ bail:
215static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) 215static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
216{ 216{
217 struct ocfs2_find_inode_args *args = opaque; 217 struct ocfs2_find_inode_args *args = opaque;
218 static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
219 ocfs2_file_ip_alloc_sem_key;
218 220
219 mlog_entry("inode = %p, opaque = %p\n", inode, opaque); 221 mlog_entry("inode = %p, opaque = %p\n", inode, opaque);
220 222
@@ -223,6 +225,15 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
223 if (args->fi_sysfile_type != 0) 225 if (args->fi_sysfile_type != 0)
224 lockdep_set_class(&inode->i_mutex, 226 lockdep_set_class(&inode->i_mutex,
225 &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); 227 &ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
228 if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
229 args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
230 args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
231 args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE)
232 lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
233 &ocfs2_quota_ip_alloc_sem_key);
234 else
235 lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
236 &ocfs2_file_ip_alloc_sem_key);
226 237
227 mlog_exit(0); 238 mlog_exit(0);
228 return 0; 239 return 0;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 9fcd36dcc9a0..467b413bec21 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -7,7 +7,6 @@
7 7
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/mount.h> 9#include <linux/mount.h>
10#include <linux/smp_lock.h>
11 10
12#define MLOG_MASK_PREFIX ML_INODE 11#define MLOG_MASK_PREFIX ML_INODE
13#include <cluster/masklog.h> 12#include <cluster/masklog.h>
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4a3b9e6b31ad..f033760ecbea 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1880,13 +1880,20 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1880 1880
1881 os = &osb->osb_orphan_scan; 1881 os = &osb->osb_orphan_scan;
1882 1882
1883 status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX); 1883 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1884 goto out;
1885
1886 status = ocfs2_orphan_scan_lock(osb, &seqno);
1884 if (status < 0) { 1887 if (status < 0) {
1885 if (status != -EAGAIN) 1888 if (status != -EAGAIN)
1886 mlog_errno(status); 1889 mlog_errno(status);
1887 goto out; 1890 goto out;
1888 } 1891 }
1889 1892
1893 /* Do no queue the tasks if the volume is being umounted */
1894 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
1895 goto unlock;
1896
1890 if (os->os_seqno != seqno) { 1897 if (os->os_seqno != seqno) {
1891 os->os_seqno = seqno; 1898 os->os_seqno = seqno;
1892 goto unlock; 1899 goto unlock;
@@ -1903,7 +1910,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
1903 os->os_count++; 1910 os->os_count++;
1904 os->os_scantime = CURRENT_TIME; 1911 os->os_scantime = CURRENT_TIME;
1905unlock: 1912unlock:
1906 ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX); 1913 ocfs2_orphan_scan_unlock(osb, seqno);
1907out: 1914out:
1908 return; 1915 return;
1909} 1916}
@@ -1920,8 +1927,9 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
1920 1927
1921 mutex_lock(&os->os_lock); 1928 mutex_lock(&os->os_lock);
1922 ocfs2_queue_orphan_scan(osb); 1929 ocfs2_queue_orphan_scan(osb);
1923 schedule_delayed_work(&os->os_orphan_scan_work, 1930 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
1924 ocfs2_orphan_scan_timeout()); 1931 schedule_delayed_work(&os->os_orphan_scan_work,
1932 ocfs2_orphan_scan_timeout());
1925 mutex_unlock(&os->os_lock); 1933 mutex_unlock(&os->os_lock);
1926} 1934}
1927 1935
@@ -1930,26 +1938,33 @@ void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
1930 struct ocfs2_orphan_scan *os; 1938 struct ocfs2_orphan_scan *os;
1931 1939
1932 os = &osb->osb_orphan_scan; 1940 os = &osb->osb_orphan_scan;
1933 mutex_lock(&os->os_lock); 1941 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) {
1934 cancel_delayed_work(&os->os_orphan_scan_work); 1942 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1935 mutex_unlock(&os->os_lock); 1943 mutex_lock(&os->os_lock);
1944 cancel_delayed_work(&os->os_orphan_scan_work);
1945 mutex_unlock(&os->os_lock);
1946 }
1936} 1947}
1937 1948
1938int ocfs2_orphan_scan_init(struct ocfs2_super *osb) 1949void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1939{ 1950{
1940 struct ocfs2_orphan_scan *os; 1951 struct ocfs2_orphan_scan *os;
1941 1952
1942 os = &osb->osb_orphan_scan; 1953 os = &osb->osb_orphan_scan;
1943 os->os_osb = osb; 1954 os->os_osb = osb;
1944 os->os_count = 0; 1955 os->os_count = 0;
1956 os->os_seqno = 0;
1945 os->os_scantime = CURRENT_TIME; 1957 os->os_scantime = CURRENT_TIME;
1946 mutex_init(&os->os_lock); 1958 mutex_init(&os->os_lock);
1947 1959 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
1948 INIT_DELAYED_WORK(&os->os_orphan_scan_work, 1960
1949 ocfs2_orphan_scan_work); 1961 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
1950 schedule_delayed_work(&os->os_orphan_scan_work, 1962 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1951 ocfs2_orphan_scan_timeout()); 1963 else {
1952 return 0; 1964 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
1965 schedule_delayed_work(&os->os_orphan_scan_work,
1966 ocfs2_orphan_scan_timeout());
1967 }
1953} 1968}
1954 1969
1955struct ocfs2_orphan_filldir_priv { 1970struct ocfs2_orphan_filldir_priv {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 61045eeb3f6e..5432c7f79cc6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -144,7 +144,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
144} 144}
145 145
146/* Exported only for the journal struct init code in super.c. Do not call. */ 146/* Exported only for the journal struct init code in super.c. Do not call. */
147int ocfs2_orphan_scan_init(struct ocfs2_super *osb); 147void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); 148void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
149void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); 149void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
150 150
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 33464c6b60a2..8601f934010b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -118,7 +118,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
118 mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, 118 mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
119 dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); 119 dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
120 120
121 status = ocfs2_inode_lock(dir, NULL, 0); 121 status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT);
122 if (status < 0) { 122 if (status < 0) {
123 if (status != -ENOENT) 123 if (status != -ENOENT)
124 mlog_errno(status); 124 mlog_errno(status);
@@ -636,7 +636,7 @@ static int ocfs2_link(struct dentry *old_dentry,
636 if (S_ISDIR(inode->i_mode)) 636 if (S_ISDIR(inode->i_mode))
637 return -EPERM; 637 return -EPERM;
638 638
639 err = ocfs2_inode_lock(dir, &parent_fe_bh, 1); 639 err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
640 if (err < 0) { 640 if (err < 0) {
641 if (err != -ENOENT) 641 if (err != -ENOENT)
642 mlog_errno(err); 642 mlog_errno(err);
@@ -800,7 +800,8 @@ static int ocfs2_unlink(struct inode *dir,
800 return -EPERM; 800 return -EPERM;
801 } 801 }
802 802
803 status = ocfs2_inode_lock(dir, &parent_node_bh, 1); 803 status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1,
804 OI_LS_PARENT);
804 if (status < 0) { 805 if (status < 0) {
805 if (status != -ENOENT) 806 if (status != -ENOENT)
806 mlog_errno(status); 807 mlog_errno(status);
@@ -978,7 +979,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
978 inode1 = tmpinode; 979 inode1 = tmpinode;
979 } 980 }
980 /* lock id2 */ 981 /* lock id2 */
981 status = ocfs2_inode_lock(inode2, bh2, 1); 982 status = ocfs2_inode_lock_nested(inode2, bh2, 1,
983 OI_LS_RENAME1);
982 if (status < 0) { 984 if (status < 0) {
983 if (status != -ENOENT) 985 if (status != -ENOENT)
984 mlog_errno(status); 986 mlog_errno(status);
@@ -987,7 +989,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
987 } 989 }
988 990
989 /* lock id1 */ 991 /* lock id1 */
990 status = ocfs2_inode_lock(inode1, bh1, 1); 992 status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2);
991 if (status < 0) { 993 if (status < 0) {
992 /* 994 /*
993 * An error return must mean that no cluster locks 995 * An error return must mean that no cluster locks
@@ -1103,7 +1105,8 @@ static int ocfs2_rename(struct inode *old_dir,
1103 * won't have to concurrently downconvert the inode and the 1105 * won't have to concurrently downconvert the inode and the
1104 * dentry locks. 1106 * dentry locks.
1105 */ 1107 */
1106 status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1); 1108 status = ocfs2_inode_lock_nested(old_inode, &old_inode_bh, 1,
1109 OI_LS_PARENT);
1107 if (status < 0) { 1110 if (status < 0) {
1108 if (status != -ENOENT) 1111 if (status != -ENOENT)
1109 mlog_errno(status); 1112 mlog_errno(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 18c1d9ec1c93..c9345ebb8493 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -34,6 +34,7 @@
34#include <linux/workqueue.h> 34#include <linux/workqueue.h>
35#include <linux/kref.h> 35#include <linux/kref.h>
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/lockdep.h>
37#ifndef CONFIG_OCFS2_COMPAT_JBD 38#ifndef CONFIG_OCFS2_COMPAT_JBD
38# include <linux/jbd2.h> 39# include <linux/jbd2.h>
39#else 40#else
@@ -152,6 +153,14 @@ struct ocfs2_lock_res {
152 unsigned int l_lock_max_exmode; /* Max wait for EX */ 153 unsigned int l_lock_max_exmode; /* Max wait for EX */
153 unsigned int l_lock_refresh; /* Disk refreshes */ 154 unsigned int l_lock_refresh; /* Disk refreshes */
154#endif 155#endif
156#ifdef CONFIG_DEBUG_LOCK_ALLOC
157 struct lockdep_map l_lockdep_map;
158#endif
159};
160
161enum ocfs2_orphan_scan_state {
162 ORPHAN_SCAN_ACTIVE,
163 ORPHAN_SCAN_INACTIVE
155}; 164};
156 165
157struct ocfs2_orphan_scan { 166struct ocfs2_orphan_scan {
@@ -162,6 +171,7 @@ struct ocfs2_orphan_scan {
162 struct timespec os_scantime; /* time this node ran the scan */ 171 struct timespec os_scantime; /* time this node ran the scan */
163 u32 os_count; /* tracks node specific scans */ 172 u32 os_count; /* tracks node specific scans */
164 u32 os_seqno; /* tracks cluster wide scans */ 173 u32 os_seqno; /* tracks cluster wide scans */
174 atomic_t os_state; /* ACTIVE or INACTIVE */
165}; 175};
166 176
167struct ocfs2_dlm_debug { 177struct ocfs2_dlm_debug {
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index fcd120f1493a..3f661376a2de 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -236,6 +236,16 @@ static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
236 return dlm_status_to_errno(lksb->lksb_o2dlm.status); 236 return dlm_status_to_errno(lksb->lksb_o2dlm.status);
237} 237}
238 238
239/*
240 * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB
241 * contents, it will zero out the LVB. Thus the caller can always trust
242 * the contents.
243 */
244static int o2cb_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
245{
246 return 1;
247}
248
239static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) 249static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb)
240{ 250{
241 return (void *)(lksb->lksb_o2dlm.lvb); 251 return (void *)(lksb->lksb_o2dlm.lvb);
@@ -354,6 +364,7 @@ static struct ocfs2_stack_operations o2cb_stack_ops = {
354 .dlm_lock = o2cb_dlm_lock, 364 .dlm_lock = o2cb_dlm_lock,
355 .dlm_unlock = o2cb_dlm_unlock, 365 .dlm_unlock = o2cb_dlm_unlock,
356 .lock_status = o2cb_dlm_lock_status, 366 .lock_status = o2cb_dlm_lock_status,
367 .lvb_valid = o2cb_dlm_lvb_valid,
357 .lock_lvb = o2cb_dlm_lvb, 368 .lock_lvb = o2cb_dlm_lvb,
358 .dump_lksb = o2cb_dump_lksb, 369 .dump_lksb = o2cb_dump_lksb,
359}; 370};
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 9b76d41a8ac6..ff4c798a5635 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -738,6 +738,13 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
738 return lksb->lksb_fsdlm.sb_status; 738 return lksb->lksb_fsdlm.sb_status;
739} 739}
740 740
741static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
742{
743 int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID;
744
745 return !invalid;
746}
747
741static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) 748static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
742{ 749{
743 if (!lksb->lksb_fsdlm.sb_lvbptr) 750 if (!lksb->lksb_fsdlm.sb_lvbptr)
@@ -873,6 +880,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
873 .dlm_lock = user_dlm_lock, 880 .dlm_lock = user_dlm_lock,
874 .dlm_unlock = user_dlm_unlock, 881 .dlm_unlock = user_dlm_unlock,
875 .lock_status = user_dlm_lock_status, 882 .lock_status = user_dlm_lock_status,
883 .lvb_valid = user_dlm_lvb_valid,
876 .lock_lvb = user_dlm_lvb, 884 .lock_lvb = user_dlm_lvb,
877 .plock = user_plock, 885 .plock = user_plock,
878 .dump_lksb = user_dlm_dump_lksb, 886 .dump_lksb = user_dlm_dump_lksb,
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 68b668b0e60a..3f2f1c45b7b6 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -6,7 +6,7 @@
6 * Code which implements an OCFS2 specific interface to underlying 6 * Code which implements an OCFS2 specific interface to underlying
7 * cluster stacks. 7 * cluster stacks.
8 * 8 *
9 * Copyright (C) 2007 Oracle. All rights reserved. 9 * Copyright (C) 2007, 2009 Oracle. All rights reserved.
10 * 10 *
11 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public 12 * modify it under the terms of the GNU General Public
@@ -271,11 +271,12 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
271} 271}
272EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); 272EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status);
273 273
274/* 274int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb)
275 * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we 275{
276 * don't cast at the glue level. The real answer is that the header 276 return active_stack->sp_ops->lvb_valid(lksb);
277 * ordering is nigh impossible. 277}
278 */ 278EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid);
279
279void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) 280void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb)
280{ 281{
281 return active_stack->sp_ops->lock_lvb(lksb); 282 return active_stack->sp_ops->lock_lvb(lksb);
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index c571af375ef8..03a44d60eac9 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -186,6 +186,11 @@ struct ocfs2_stack_operations {
186 int (*lock_status)(union ocfs2_dlm_lksb *lksb); 186 int (*lock_status)(union ocfs2_dlm_lksb *lksb);
187 187
188 /* 188 /*
189 * Return non-zero if the LVB is valid.
190 */
191 int (*lvb_valid)(union ocfs2_dlm_lksb *lksb);
192
193 /*
189 * Pull the lvb pointer off of the stack-specific lksb. 194 * Pull the lvb pointer off of the stack-specific lksb.
190 */ 195 */
191 void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); 196 void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb);
@@ -252,6 +257,7 @@ int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn,
252 struct ocfs2_lock_res *astarg); 257 struct ocfs2_lock_res *astarg);
253 258
254int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); 259int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
260int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb);
255void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); 261void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
256void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); 262void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
257 263
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 8439f6b324b9..73a16d4666dc 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -923,14 +923,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
923 int nr) 923 int nr)
924{ 924{
925 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 925 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
926 int ret;
926 927
927 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) 928 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
928 return 0; 929 return 0;
929 if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data) 930
931 if (!buffer_jbd(bg_bh))
930 return 1; 932 return 1;
931 933
934 jbd_lock_bh_state(bg_bh);
932 bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; 935 bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
933 return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); 936 if (bg)
937 ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
938 else
939 ret = 1;
940 jbd_unlock_bh_state(bg_bh);
941
942 return ret;
934} 943}
935 944
936static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, 945static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
@@ -1885,6 +1894,7 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1885 unsigned int tmp; 1894 unsigned int tmp;
1886 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 1895 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1887 struct ocfs2_group_desc *undo_bg = NULL; 1896 struct ocfs2_group_desc *undo_bg = NULL;
1897 int cluster_bitmap = 0;
1888 1898
1889 mlog_entry_void(); 1899 mlog_entry_void();
1890 1900
@@ -1905,18 +1915,28 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1905 } 1915 }
1906 1916
1907 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1917 if (ocfs2_is_cluster_bitmap(alloc_inode))
1908 undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; 1918 cluster_bitmap = 1;
1919
1920 if (cluster_bitmap) {
1921 jbd_lock_bh_state(group_bh);
1922 undo_bg = (struct ocfs2_group_desc *)
1923 bh2jh(group_bh)->b_committed_data;
1924 BUG_ON(!undo_bg);
1925 }
1909 1926
1910 tmp = num_bits; 1927 tmp = num_bits;
1911 while(tmp--) { 1928 while(tmp--) {
1912 ocfs2_clear_bit((bit_off + tmp), 1929 ocfs2_clear_bit((bit_off + tmp),
1913 (unsigned long *) bg->bg_bitmap); 1930 (unsigned long *) bg->bg_bitmap);
1914 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1931 if (cluster_bitmap)
1915 ocfs2_set_bit(bit_off + tmp, 1932 ocfs2_set_bit(bit_off + tmp,
1916 (unsigned long *) undo_bg->bg_bitmap); 1933 (unsigned long *) undo_bg->bg_bitmap);
1917 } 1934 }
1918 le16_add_cpu(&bg->bg_free_bits_count, num_bits); 1935 le16_add_cpu(&bg->bg_free_bits_count, num_bits);
1919 1936
1937 if (cluster_bitmap)
1938 jbd_unlock_bh_state(group_bh);
1939
1920 status = ocfs2_journal_dirty(handle, group_bh); 1940 status = ocfs2_journal_dirty(handle, group_bh);
1921 if (status < 0) 1941 if (status < 0)
1922 mlog_errno(status); 1942 mlog_errno(status);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0d3ed7407a04..7efb349fb9bd 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -205,11 +205,10 @@ static const match_table_t tokens = {
205#ifdef CONFIG_DEBUG_FS 205#ifdef CONFIG_DEBUG_FS
206static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) 206static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
207{ 207{
208 int out = 0;
209 int i;
210 struct ocfs2_cluster_connection *cconn = osb->cconn; 208 struct ocfs2_cluster_connection *cconn = osb->cconn;
211 struct ocfs2_recovery_map *rm = osb->recovery_map; 209 struct ocfs2_recovery_map *rm = osb->recovery_map;
212 struct ocfs2_orphan_scan *os; 210 struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan;
211 int i, out = 0;
213 212
214 out += snprintf(buf + out, len - out, 213 out += snprintf(buf + out, len - out,
215 "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", 214 "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n",
@@ -234,20 +233,24 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
234 "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount", 233 "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount",
235 osb->s_mount_opt, osb->s_atime_quantum); 234 osb->s_mount_opt, osb->s_atime_quantum);
236 235
237 out += snprintf(buf + out, len - out, 236 if (cconn) {
238 "%10s => Stack: %s Name: %*s Version: %d.%d\n", 237 out += snprintf(buf + out, len - out,
239 "Cluster", 238 "%10s => Stack: %s Name: %*s "
240 (*osb->osb_cluster_stack == '\0' ? 239 "Version: %d.%d\n", "Cluster",
241 "o2cb" : osb->osb_cluster_stack), 240 (*osb->osb_cluster_stack == '\0' ?
242 cconn->cc_namelen, cconn->cc_name, 241 "o2cb" : osb->osb_cluster_stack),
243 cconn->cc_version.pv_major, cconn->cc_version.pv_minor); 242 cconn->cc_namelen, cconn->cc_name,
243 cconn->cc_version.pv_major,
244 cconn->cc_version.pv_minor);
245 }
244 246
245 spin_lock(&osb->dc_task_lock); 247 spin_lock(&osb->dc_task_lock);
246 out += snprintf(buf + out, len - out, 248 out += snprintf(buf + out, len - out,
247 "%10s => Pid: %d Count: %lu WakeSeq: %lu " 249 "%10s => Pid: %d Count: %lu WakeSeq: %lu "
248 "WorkSeq: %lu\n", "DownCnvt", 250 "WorkSeq: %lu\n", "DownCnvt",
249 task_pid_nr(osb->dc_task), osb->blocked_lock_count, 251 (osb->dc_task ? task_pid_nr(osb->dc_task) : -1),
250 osb->dc_wake_sequence, osb->dc_work_sequence); 252 osb->blocked_lock_count, osb->dc_wake_sequence,
253 osb->dc_work_sequence);
251 spin_unlock(&osb->dc_task_lock); 254 spin_unlock(&osb->dc_task_lock);
252 255
253 spin_lock(&osb->osb_lock); 256 spin_lock(&osb->osb_lock);
@@ -267,14 +270,15 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
267 270
268 out += snprintf(buf + out, len - out, 271 out += snprintf(buf + out, len - out,
269 "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", 272 "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit",
270 task_pid_nr(osb->commit_task), osb->osb_commit_interval, 273 (osb->commit_task ? task_pid_nr(osb->commit_task) : -1),
274 osb->osb_commit_interval,
271 atomic_read(&osb->needs_checkpoint)); 275 atomic_read(&osb->needs_checkpoint));
272 276
273 out += snprintf(buf + out, len - out, 277 out += snprintf(buf + out, len - out,
274 "%10s => State: %d NumTxns: %d TxnId: %lu\n", 278 "%10s => State: %d TxnId: %lu NumTxns: %d\n",
275 "Journal", osb->journal->j_state, 279 "Journal", osb->journal->j_state,
276 atomic_read(&osb->journal->j_num_trans), 280 osb->journal->j_trans_id,
277 osb->journal->j_trans_id); 281 atomic_read(&osb->journal->j_num_trans));
278 282
279 out += snprintf(buf + out, len - out, 283 out += snprintf(buf + out, len - out,
280 "%10s => GlobalAllocs: %d LocalAllocs: %d " 284 "%10s => GlobalAllocs: %d LocalAllocs: %d "
@@ -300,9 +304,18 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
300 atomic_read(&osb->s_num_inodes_stolen)); 304 atomic_read(&osb->s_num_inodes_stolen));
301 spin_unlock(&osb->osb_lock); 305 spin_unlock(&osb->osb_lock);
302 306
307 out += snprintf(buf + out, len - out, "OrphanScan => ");
308 out += snprintf(buf + out, len - out, "Local: %u Global: %u ",
309 os->os_count, os->os_seqno);
310 out += snprintf(buf + out, len - out, " Last Scan: ");
311 if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
312 out += snprintf(buf + out, len - out, "Disabled\n");
313 else
314 out += snprintf(buf + out, len - out, "%lu seconds ago\n",
315 (get_seconds() - os->os_scantime.tv_sec));
316
303 out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", 317 out += snprintf(buf + out, len - out, "%10s => %3s %10s\n",
304 "Slots", "Num", "RecoGen"); 318 "Slots", "Num", "RecoGen");
305
306 for (i = 0; i < osb->max_slots; ++i) { 319 for (i = 0; i < osb->max_slots; ++i) {
307 out += snprintf(buf + out, len - out, 320 out += snprintf(buf + out, len - out,
308 "%10s %c %3d %10d\n", 321 "%10s %c %3d %10d\n",
@@ -311,13 +324,6 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
311 i, osb->slot_recovery_generations[i]); 324 i, osb->slot_recovery_generations[i]);
312 } 325 }
313 326
314 os = &osb->osb_orphan_scan;
315 out += snprintf(buf + out, len - out, "Orphan Scan=> ");
316 out += snprintf(buf + out, len - out, "Local: %u Global: %u ",
317 os->os_count, os->os_seqno);
318 out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n",
319 (get_seconds() - os->os_scantime.tv_sec));
320
321 return out; 327 return out;
322} 328}
323 329
@@ -1175,6 +1181,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1175 atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS); 1181 atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
1176 wake_up(&osb->osb_mount_event); 1182 wake_up(&osb->osb_mount_event);
1177 1183
1184 /* Start this when the mount is almost sure of being successful */
1185 ocfs2_orphan_scan_init(osb);
1186
1178 mlog_exit(status); 1187 mlog_exit(status);
1179 return status; 1188 return status;
1180 1189
@@ -1810,14 +1819,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1810 1819
1811 debugfs_remove(osb->osb_ctxt); 1820 debugfs_remove(osb->osb_ctxt);
1812 1821
1822 /* Orphan scan should be stopped as early as possible */
1823 ocfs2_orphan_scan_stop(osb);
1824
1813 ocfs2_disable_quotas(osb); 1825 ocfs2_disable_quotas(osb);
1814 1826
1815 ocfs2_shutdown_local_alloc(osb); 1827 ocfs2_shutdown_local_alloc(osb);
1816 1828
1817 ocfs2_truncate_log_shutdown(osb); 1829 ocfs2_truncate_log_shutdown(osb);
1818 1830
1819 ocfs2_orphan_scan_stop(osb);
1820
1821 /* This will disable recovery and flush any recovery work. */ 1831 /* This will disable recovery and flush any recovery work. */
1822 ocfs2_recovery_exit(osb); 1832 ocfs2_recovery_exit(osb);
1823 1833
@@ -1978,13 +1988,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
1978 goto bail; 1988 goto bail;
1979 } 1989 }
1980 1990
1981 status = ocfs2_orphan_scan_init(osb);
1982 if (status) {
1983 mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n");
1984 mlog_errno(status);
1985 goto bail;
1986 }
1987
1988 init_waitqueue_head(&osb->checkpoint_event); 1991 init_waitqueue_head(&osb->checkpoint_event);
1989 atomic_set(&osb->needs_checkpoint, 0); 1992 atomic_set(&osb->needs_checkpoint, 0);
1990 1993
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index ab713ebdd546..40e53702948c 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -50,6 +50,10 @@ static inline int is_in_system_inode_array(struct ocfs2_super *osb,
50 int type, 50 int type,
51 u32 slot); 51 u32 slot);
52 52
53#ifdef CONFIG_DEBUG_LOCK_ALLOC
54static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES];
55#endif
56
53static inline int is_global_system_inode(int type) 57static inline int is_global_system_inode(int type)
54{ 58{
55 return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE && 59 return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE &&
@@ -118,6 +122,21 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
118 inode = NULL; 122 inode = NULL;
119 goto bail; 123 goto bail;
120 } 124 }
125#ifdef CONFIG_DEBUG_LOCK_ALLOC
126 if (type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
127 type == LOCAL_GROUP_QUOTA_SYSTEM_INODE ||
128 type == JOURNAL_SYSTEM_INODE) {
129 /* Ignore inode lock on these inodes as the lock does not
130 * really belong to any process and lockdep cannot handle
131 * that */
132 OCFS2_I(inode)->ip_inode_lockres.l_lockdep_map.key = NULL;
133 } else {
134 lockdep_init_map(&OCFS2_I(inode)->ip_inode_lockres.
135 l_lockdep_map,
136 ocfs2_system_inodes[type].si_name,
137 &ocfs2_sysfile_cluster_lock_key[type], 0);
138 }
139#endif
121bail: 140bail:
122 141
123 return inode; 142 return inode;
diff --git a/fs/open.c b/fs/open.c
index 7200e23d9258..dd98e8076024 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -378,63 +378,63 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
378#endif 378#endif
379#endif /* BITS_PER_LONG == 32 */ 379#endif /* BITS_PER_LONG == 32 */
380 380
381SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) 381
382int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
382{ 383{
383 struct file *file; 384 struct inode *inode = file->f_path.dentry->d_inode;
384 struct inode *inode; 385 long ret;
385 long ret = -EINVAL;
386 386
387 if (offset < 0 || len <= 0) 387 if (offset < 0 || len <= 0)
388 goto out; 388 return -EINVAL;
389 389
390 /* Return error if mode is not supported */ 390 /* Return error if mode is not supported */
391 ret = -EOPNOTSUPP;
392 if (mode && !(mode & FALLOC_FL_KEEP_SIZE)) 391 if (mode && !(mode & FALLOC_FL_KEEP_SIZE))
393 goto out; 392 return -EOPNOTSUPP;
394 393
395 ret = -EBADF;
396 file = fget(fd);
397 if (!file)
398 goto out;
399 if (!(file->f_mode & FMODE_WRITE)) 394 if (!(file->f_mode & FMODE_WRITE))
400 goto out_fput; 395 return -EBADF;
401 /* 396 /*
402 * Revalidate the write permissions, in case security policy has 397 * Revalidate the write permissions, in case security policy has
403 * changed since the files were opened. 398 * changed since the files were opened.
404 */ 399 */
405 ret = security_file_permission(file, MAY_WRITE); 400 ret = security_file_permission(file, MAY_WRITE);
406 if (ret) 401 if (ret)
407 goto out_fput; 402 return ret;
408 403
409 inode = file->f_path.dentry->d_inode;
410
411 ret = -ESPIPE;
412 if (S_ISFIFO(inode->i_mode)) 404 if (S_ISFIFO(inode->i_mode))
413 goto out_fput; 405 return -ESPIPE;
414 406
415 ret = -ENODEV;
416 /* 407 /*
417 * Let individual file system decide if it supports preallocation 408 * Let individual file system decide if it supports preallocation
418 * for directories or not. 409 * for directories or not.
419 */ 410 */
420 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 411 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
421 goto out_fput; 412 return -ENODEV;
422 413
423 ret = -EFBIG;
424 /* Check for wrap through zero too */ 414 /* Check for wrap through zero too */
425 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) 415 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
426 goto out_fput; 416 return -EFBIG;
427 417
428 if (inode->i_op->fallocate) 418 if (!inode->i_op->fallocate)
429 ret = inode->i_op->fallocate(inode, mode, offset, len); 419 return -EOPNOTSUPP;
430 else
431 ret = -EOPNOTSUPP;
432 420
433out_fput: 421 return inode->i_op->fallocate(inode, mode, offset, len);
434 fput(file);
435out:
436 return ret;
437} 422}
423
424SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
425{
426 struct file *file;
427 int error = -EBADF;
428
429 file = fget(fd);
430 if (file) {
431 error = do_fallocate(file, mode, offset, len);
432 fput(file);
433 }
434
435 return error;
436}
437
438#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 438#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
439asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len) 439asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
440{ 440{
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 1a9c7878f864..ea4e6cb29e13 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -436,7 +436,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
436 rcu_assign_pointer(ptbl->part[partno], p); 436 rcu_assign_pointer(ptbl->part[partno], p);
437 437
438 /* suppress uevent if the disk supresses it */ 438 /* suppress uevent if the disk supresses it */
439 if (!dev_get_uevent_suppress(pdev)) 439 if (!dev_get_uevent_suppress(ddev))
440 kobject_uevent(&pdev->kobj, KOBJ_ADD); 440 kobject_uevent(&pdev->kobj, KOBJ_ADD);
441 441
442 return p; 442 return p;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 607c579e5eca..70f36c043d62 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2042,8 +2042,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
2042 * changes */ 2042 * changes */
2043 invalidate_bdev(sb->s_bdev); 2043 invalidate_bdev(sb->s_bdev);
2044 } 2044 }
2045 mutex_lock(&inode->i_mutex);
2046 mutex_lock(&dqopt->dqonoff_mutex); 2045 mutex_lock(&dqopt->dqonoff_mutex);
2046 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
2047 if (sb_has_quota_loaded(sb, type)) { 2047 if (sb_has_quota_loaded(sb, type)) {
2048 error = -EBUSY; 2048 error = -EBUSY;
2049 goto out_lock; 2049 goto out_lock;
@@ -2094,7 +2094,6 @@ out_file_init:
2094 dqopt->files[type] = NULL; 2094 dqopt->files[type] = NULL;
2095 iput(inode); 2095 iput(inode);
2096out_lock: 2096out_lock:
2097 mutex_unlock(&dqopt->dqonoff_mutex);
2098 if (oldflags != -1) { 2097 if (oldflags != -1) {
2099 down_write(&dqopt->dqptr_sem); 2098 down_write(&dqopt->dqptr_sem);
2100 /* Set the flags back (in the case of accidental quotaon() 2099 /* Set the flags back (in the case of accidental quotaon()
@@ -2104,6 +2103,7 @@ out_lock:
2104 up_write(&dqopt->dqptr_sem); 2103 up_write(&dqopt->dqptr_sem);
2105 } 2104 }
2106 mutex_unlock(&inode->i_mutex); 2105 mutex_unlock(&inode->i_mutex);
2106 mutex_unlock(&dqopt->dqonoff_mutex);
2107out_fmt: 2107out_fmt:
2108 put_quota_format(fmt); 2108 put_quota_format(fmt);
2109 2109
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6fd0f47e45db..a14d6cd9eeda 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1131,8 +1131,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
1131 REISERFS_I(inode)->i_trans_id = 0; 1131 REISERFS_I(inode)->i_trans_id = 0;
1132 REISERFS_I(inode)->i_jl = NULL; 1132 REISERFS_I(inode)->i_jl = NULL;
1133 mutex_init(&(REISERFS_I(inode)->i_mmap)); 1133 mutex_init(&(REISERFS_I(inode)->i_mmap));
1134 reiserfs_init_acl_access(inode);
1135 reiserfs_init_acl_default(inode);
1136 reiserfs_init_xattr_rwsem(inode); 1134 reiserfs_init_xattr_rwsem(inode);
1137 1135
1138 if (stat_data_v1(ih)) { 1136 if (stat_data_v1(ih)) {
@@ -1834,8 +1832,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1834 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; 1832 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1835 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); 1833 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1836 mutex_init(&(REISERFS_I(inode)->i_mmap)); 1834 mutex_init(&(REISERFS_I(inode)->i_mmap));
1837 reiserfs_init_acl_access(inode);
1838 reiserfs_init_acl_default(inode);
1839 reiserfs_init_xattr_rwsem(inode); 1835 reiserfs_init_xattr_rwsem(inode);
1840 1836
1841 /* key to search for correct place for new stat data */ 1837 /* key to search for correct place for new stat data */
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 77f5bb746bf0..90622200b39c 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -997,7 +997,7 @@ static int reiserfs_async_progress_wait(struct super_block *s)
997 DEFINE_WAIT(wait); 997 DEFINE_WAIT(wait);
998 struct reiserfs_journal *j = SB_JOURNAL(s); 998 struct reiserfs_journal *j = SB_JOURNAL(s);
999 if (atomic_read(&j->j_async_throttle)) 999 if (atomic_read(&j->j_async_throttle))
1000 congestion_wait(WRITE, HZ / 10); 1000 congestion_wait(BLK_RW_ASYNC, HZ / 10);
1001 return 0; 1001 return 0;
1002} 1002}
1003 1003
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 238e9d9b31e0..18b315d3d104 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -82,7 +82,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
82 if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) { 82 if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) {
83 printk 83 printk
84 ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); 84 ("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
85 unlock_super(s);
86 return -ENOMEM; 85 return -ENOMEM;
87 } 86 }
88 /* the new journal bitmaps are zero filled, now we copy in the bitmap 87 /* the new journal bitmaps are zero filled, now we copy in the bitmap
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 2969773cfc22..7adea74d6a8a 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -24,7 +24,6 @@
24#include <linux/exportfs.h> 24#include <linux/exportfs.h>
25#include <linux/quotaops.h> 25#include <linux/quotaops.h>
26#include <linux/vfs.h> 26#include <linux/vfs.h>
27#include <linux/mnt_namespace.h>
28#include <linux/mount.h> 27#include <linux/mount.h>
29#include <linux/namei.h> 28#include <linux/namei.h>
30#include <linux/crc32.h> 29#include <linux/crc32.h>
@@ -529,10 +528,6 @@ static void init_once(void *foo)
529 528
530 INIT_LIST_HEAD(&ei->i_prealloc_list); 529 INIT_LIST_HEAD(&ei->i_prealloc_list);
531 inode_init_once(&ei->vfs_inode); 530 inode_init_once(&ei->vfs_inode);
532#ifdef CONFIG_REISERFS_FS_POSIX_ACL
533 ei->i_acl_access = NULL;
534 ei->i_acl_default = NULL;
535#endif
536} 531}
537 532
538static int init_inodecache(void) 533static int init_inodecache(void)
@@ -580,25 +575,6 @@ static void reiserfs_dirty_inode(struct inode *inode)
580 reiserfs_write_unlock(inode->i_sb); 575 reiserfs_write_unlock(inode->i_sb);
581} 576}
582 577
583#ifdef CONFIG_REISERFS_FS_POSIX_ACL
584static void reiserfs_clear_inode(struct inode *inode)
585{
586 struct posix_acl *acl;
587
588 acl = REISERFS_I(inode)->i_acl_access;
589 if (acl && !IS_ERR(acl))
590 posix_acl_release(acl);
591 REISERFS_I(inode)->i_acl_access = NULL;
592
593 acl = REISERFS_I(inode)->i_acl_default;
594 if (acl && !IS_ERR(acl))
595 posix_acl_release(acl);
596 REISERFS_I(inode)->i_acl_default = NULL;
597}
598#else
599#define reiserfs_clear_inode NULL
600#endif
601
602#ifdef CONFIG_QUOTA 578#ifdef CONFIG_QUOTA
603static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, 579static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
604 size_t, loff_t); 580 size_t, loff_t);
@@ -612,7 +588,6 @@ static const struct super_operations reiserfs_sops = {
612 .write_inode = reiserfs_write_inode, 588 .write_inode = reiserfs_write_inode,
613 .dirty_inode = reiserfs_dirty_inode, 589 .dirty_inode = reiserfs_dirty_inode,
614 .delete_inode = reiserfs_delete_inode, 590 .delete_inode = reiserfs_delete_inode,
615 .clear_inode = reiserfs_clear_inode,
616 .put_super = reiserfs_put_super, 591 .put_super = reiserfs_put_super,
617 .write_super = reiserfs_write_super, 592 .write_super = reiserfs_write_super,
618 .sync_fs = reiserfs_sync_fs, 593 .sync_fs = reiserfs_sync_fs,
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index f3d47d856848..6925b835a43b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -46,7 +46,6 @@
46#include <linux/reiserfs_acl.h> 46#include <linux/reiserfs_acl.h>
47#include <asm/uaccess.h> 47#include <asm/uaccess.h>
48#include <net/checksum.h> 48#include <net/checksum.h>
49#include <linux/smp_lock.h>
50#include <linux/stat.h> 49#include <linux/stat.h>
51#include <linux/quotaops.h> 50#include <linux/quotaops.h>
52 51
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index c303c426fe2b..35d6e672a279 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -188,29 +188,6 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
188 return ERR_PTR(-EINVAL); 188 return ERR_PTR(-EINVAL);
189} 189}
190 190
191static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl,
192 struct posix_acl *acl)
193{
194 spin_lock(&inode->i_lock);
195 if (*i_acl != ERR_PTR(-ENODATA))
196 posix_acl_release(*i_acl);
197 *i_acl = posix_acl_dup(acl);
198 spin_unlock(&inode->i_lock);
199}
200
201static inline struct posix_acl *iget_acl(struct inode *inode,
202 struct posix_acl **i_acl)
203{
204 struct posix_acl *acl = ERR_PTR(-ENODATA);
205
206 spin_lock(&inode->i_lock);
207 if (*i_acl != ERR_PTR(-ENODATA))
208 acl = posix_acl_dup(*i_acl);
209 spin_unlock(&inode->i_lock);
210
211 return acl;
212}
213
214/* 191/*
215 * Inode operation get_posix_acl(). 192 * Inode operation get_posix_acl().
216 * 193 *
@@ -220,34 +197,29 @@ static inline struct posix_acl *iget_acl(struct inode *inode,
220struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) 197struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
221{ 198{
222 char *name, *value; 199 char *name, *value;
223 struct posix_acl *acl, **p_acl; 200 struct posix_acl *acl;
224 int size; 201 int size;
225 int retval; 202 int retval;
226 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 203
204 acl = get_cached_acl(inode, type);
205 if (acl != ACL_NOT_CACHED)
206 return acl;
227 207
228 switch (type) { 208 switch (type) {
229 case ACL_TYPE_ACCESS: 209 case ACL_TYPE_ACCESS:
230 name = POSIX_ACL_XATTR_ACCESS; 210 name = POSIX_ACL_XATTR_ACCESS;
231 p_acl = &reiserfs_i->i_acl_access;
232 break; 211 break;
233 case ACL_TYPE_DEFAULT: 212 case ACL_TYPE_DEFAULT:
234 name = POSIX_ACL_XATTR_DEFAULT; 213 name = POSIX_ACL_XATTR_DEFAULT;
235 p_acl = &reiserfs_i->i_acl_default;
236 break; 214 break;
237 default: 215 default:
238 return ERR_PTR(-EINVAL); 216 BUG();
239 } 217 }
240 218
241 acl = iget_acl(inode, p_acl);
242 if (acl && !IS_ERR(acl))
243 return acl;
244 else if (PTR_ERR(acl) == -ENODATA)
245 return NULL;
246
247 size = reiserfs_xattr_get(inode, name, NULL, 0); 219 size = reiserfs_xattr_get(inode, name, NULL, 0);
248 if (size < 0) { 220 if (size < 0) {
249 if (size == -ENODATA || size == -ENOSYS) { 221 if (size == -ENODATA || size == -ENOSYS) {
250 *p_acl = ERR_PTR(-ENODATA); 222 set_cached_acl(inode, type, NULL);
251 return NULL; 223 return NULL;
252 } 224 }
253 return ERR_PTR(size); 225 return ERR_PTR(size);
@@ -262,14 +234,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
262 /* This shouldn't actually happen as it should have 234 /* This shouldn't actually happen as it should have
263 been caught above.. but just in case */ 235 been caught above.. but just in case */
264 acl = NULL; 236 acl = NULL;
265 *p_acl = ERR_PTR(-ENODATA);
266 } else if (retval < 0) { 237 } else if (retval < 0) {
267 acl = ERR_PTR(retval); 238 acl = ERR_PTR(retval);
268 } else { 239 } else {
269 acl = posix_acl_from_disk(value, retval); 240 acl = posix_acl_from_disk(value, retval);
270 if (!IS_ERR(acl))
271 iset_acl(inode, p_acl, acl);
272 } 241 }
242 if (!IS_ERR(acl))
243 set_cached_acl(inode, type, acl);
273 244
274 kfree(value); 245 kfree(value);
275 return acl; 246 return acl;
@@ -287,10 +258,8 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
287{ 258{
288 char *name; 259 char *name;
289 void *value = NULL; 260 void *value = NULL;
290 struct posix_acl **p_acl;
291 size_t size = 0; 261 size_t size = 0;
292 int error; 262 int error;
293 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
294 263
295 if (S_ISLNK(inode->i_mode)) 264 if (S_ISLNK(inode->i_mode))
296 return -EOPNOTSUPP; 265 return -EOPNOTSUPP;
@@ -298,7 +267,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
298 switch (type) { 267 switch (type) {
299 case ACL_TYPE_ACCESS: 268 case ACL_TYPE_ACCESS:
300 name = POSIX_ACL_XATTR_ACCESS; 269 name = POSIX_ACL_XATTR_ACCESS;
301 p_acl = &reiserfs_i->i_acl_access;
302 if (acl) { 270 if (acl) {
303 mode_t mode = inode->i_mode; 271 mode_t mode = inode->i_mode;
304 error = posix_acl_equiv_mode(acl, &mode); 272 error = posix_acl_equiv_mode(acl, &mode);
@@ -313,7 +281,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
313 break; 281 break;
314 case ACL_TYPE_DEFAULT: 282 case ACL_TYPE_DEFAULT:
315 name = POSIX_ACL_XATTR_DEFAULT; 283 name = POSIX_ACL_XATTR_DEFAULT;
316 p_acl = &reiserfs_i->i_acl_default;
317 if (!S_ISDIR(inode->i_mode)) 284 if (!S_ISDIR(inode->i_mode))
318 return acl ? -EACCES : 0; 285 return acl ? -EACCES : 0;
319 break; 286 break;
@@ -346,7 +313,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
346 kfree(value); 313 kfree(value);
347 314
348 if (!error) 315 if (!error)
349 iset_acl(inode, p_acl, acl); 316 set_cached_acl(inode, type, acl);
350 317
351 return error; 318 return error;
352} 319}
@@ -379,11 +346,8 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
379 } 346 }
380 347
381 acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT); 348 acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT);
382 if (IS_ERR(acl)) { 349 if (IS_ERR(acl))
383 if (PTR_ERR(acl) == -ENODATA)
384 goto apply_umask;
385 return PTR_ERR(acl); 350 return PTR_ERR(acl);
386 }
387 351
388 if (acl) { 352 if (acl) {
389 struct posix_acl *acl_copy; 353 struct posix_acl *acl_copy;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 3b52770f46ff..cb5fc57e370b 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -30,6 +30,7 @@
30#include <linux/fs.h> 30#include <linux/fs.h>
31#include <linux/vfs.h> 31#include <linux/vfs.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/smp_lock.h>
33#include <linux/mutex.h> 34#include <linux/mutex.h>
34#include <linux/pagemap.h> 35#include <linux/pagemap.h>
35#include <linux/init.h> 36#include <linux/init.h>
diff --git a/fs/super.c b/fs/super.c
index d40d53a22fb5..2761d3e22ed9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -608,6 +608,7 @@ void emergency_remount(void)
608 608
609static DEFINE_IDA(unnamed_dev_ida); 609static DEFINE_IDA(unnamed_dev_ida);
610static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ 610static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
611static int unnamed_dev_start = 0; /* don't bother trying below it */
611 612
612int set_anon_super(struct super_block *s, void *data) 613int set_anon_super(struct super_block *s, void *data)
613{ 614{
@@ -618,7 +619,9 @@ int set_anon_super(struct super_block *s, void *data)
618 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0) 619 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
619 return -ENOMEM; 620 return -ENOMEM;
620 spin_lock(&unnamed_dev_lock); 621 spin_lock(&unnamed_dev_lock);
621 error = ida_get_new(&unnamed_dev_ida, &dev); 622 error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev);
623 if (!error)
624 unnamed_dev_start = dev + 1;
622 spin_unlock(&unnamed_dev_lock); 625 spin_unlock(&unnamed_dev_lock);
623 if (error == -EAGAIN) 626 if (error == -EAGAIN)
624 /* We raced and lost with another CPU. */ 627 /* We raced and lost with another CPU. */
@@ -629,6 +632,8 @@ int set_anon_super(struct super_block *s, void *data)
629 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { 632 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
630 spin_lock(&unnamed_dev_lock); 633 spin_lock(&unnamed_dev_lock);
631 ida_remove(&unnamed_dev_ida, dev); 634 ida_remove(&unnamed_dev_ida, dev);
635 if (unnamed_dev_start > dev)
636 unnamed_dev_start = dev;
632 spin_unlock(&unnamed_dev_lock); 637 spin_unlock(&unnamed_dev_lock);
633 return -EMFILE; 638 return -EMFILE;
634 } 639 }
@@ -645,6 +650,8 @@ void kill_anon_super(struct super_block *sb)
645 generic_shutdown_super(sb); 650 generic_shutdown_super(sb);
646 spin_lock(&unnamed_dev_lock); 651 spin_lock(&unnamed_dev_lock);
647 ida_remove(&unnamed_dev_ida, slot); 652 ida_remove(&unnamed_dev_ida, slot);
653 if (slot < unnamed_dev_start)
654 unnamed_dev_start = slot;
648 spin_unlock(&unnamed_dev_lock); 655 spin_unlock(&unnamed_dev_lock);
649} 656}
650 657
diff --git a/fs/sync.c b/fs/sync.c
index dd200025af85..3422ba61d86d 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -112,8 +112,13 @@ restart:
112 mutex_unlock(&mutex); 112 mutex_unlock(&mutex);
113} 113}
114 114
115/*
116 * sync everything. Start out by waking pdflush, because that writes back
117 * all queues in parallel.
118 */
115SYSCALL_DEFINE0(sync) 119SYSCALL_DEFINE0(sync)
116{ 120{
121 wakeup_pdflush(0);
117 sync_filesystems(0); 122 sync_filesystems(0);
118 sync_filesystems(1); 123 sync_filesystems(1);
119 if (unlikely(laptop_mode)) 124 if (unlikely(laptop_mode))
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 9345806c8853..2524714bece1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -171,6 +171,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
171 if (count > 0) 171 if (count > 0)
172 *off = offs + count; 172 *off = offs + count;
173 173
174 kfree(temp);
174 return count; 175 return count;
175} 176}
176 177
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index bc5857199ec2..762a7d6cec73 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -297,6 +297,7 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer)
297{ 297{
298 struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); 298 struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer);
299 299
300 dbg_io("jhead %d", wbuf->jhead);
300 wbuf->need_sync = 1; 301 wbuf->need_sync = 1;
301 wbuf->c->need_wbuf_sync = 1; 302 wbuf->c->need_wbuf_sync = 1;
302 ubifs_wake_up_bgt(wbuf->c); 303 ubifs_wake_up_bgt(wbuf->c);
@@ -311,8 +312,12 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
311{ 312{
312 ubifs_assert(!hrtimer_active(&wbuf->timer)); 313 ubifs_assert(!hrtimer_active(&wbuf->timer));
313 314
314 if (!ktime_to_ns(wbuf->softlimit)) 315 if (wbuf->no_timer)
315 return; 316 return;
317 dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead,
318 div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC),
319 div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta,
320 USEC_PER_SEC));
316 hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta, 321 hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta,
317 HRTIMER_MODE_REL); 322 HRTIMER_MODE_REL);
318} 323}
@@ -323,11 +328,8 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
323 */ 328 */
324static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) 329static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
325{ 330{
326 /* 331 if (wbuf->no_timer)
327 * If the syncer is waiting for the lock (from the background thread's 332 return;
328 * context) and another task is changing write-buffer then the syncing
329 * should be canceled.
330 */
331 wbuf->need_sync = 0; 333 wbuf->need_sync = 0;
332 hrtimer_cancel(&wbuf->timer); 334 hrtimer_cancel(&wbuf->timer);
333} 335}
@@ -349,8 +351,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
349 /* Write-buffer is empty or not seeked */ 351 /* Write-buffer is empty or not seeked */
350 return 0; 352 return 0;
351 353
352 dbg_io("LEB %d:%d, %d bytes", 354 dbg_io("LEB %d:%d, %d bytes, jhead %d",
353 wbuf->lnum, wbuf->offs, wbuf->used); 355 wbuf->lnum, wbuf->offs, wbuf->used, wbuf->jhead);
354 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); 356 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
355 ubifs_assert(!(wbuf->avail & 7)); 357 ubifs_assert(!(wbuf->avail & 7));
356 ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); 358 ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size);
@@ -390,7 +392,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
390 * @offs: logical eraseblock offset to seek to 392 * @offs: logical eraseblock offset to seek to
391 * @dtype: data type 393 * @dtype: data type
392 * 394 *
393 * This function targets the write buffer to logical eraseblock @lnum:@offs. 395 * This function targets the write-buffer to logical eraseblock @lnum:@offs.
394 * The write-buffer is synchronized if it is not empty. Returns zero in case of 396 * The write-buffer is synchronized if it is not empty. Returns zero in case of
395 * success and a negative error code in case of failure. 397 * success and a negative error code in case of failure.
396 */ 398 */
@@ -399,7 +401,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
399{ 401{
400 const struct ubifs_info *c = wbuf->c; 402 const struct ubifs_info *c = wbuf->c;
401 403
402 dbg_io("LEB %d:%d", lnum, offs); 404 dbg_io("LEB %d:%d, jhead %d", lnum, offs, wbuf->jhead);
403 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); 405 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
404 ubifs_assert(offs >= 0 && offs <= c->leb_size); 406 ubifs_assert(offs >= 0 && offs <= c->leb_size);
405 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); 407 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
@@ -506,9 +508,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
506 struct ubifs_info *c = wbuf->c; 508 struct ubifs_info *c = wbuf->c;
507 int err, written, n, aligned_len = ALIGN(len, 8), offs; 509 int err, written, n, aligned_len = ALIGN(len, 8), offs;
508 510
509 dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len, 511 dbg_io("%d bytes (%s) to jhead %d wbuf at LEB %d:%d", len,
510 dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum, 512 dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->jhead,
511 wbuf->offs + wbuf->used); 513 wbuf->lnum, wbuf->offs + wbuf->used);
512 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); 514 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
513 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); 515 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
514 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); 516 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
@@ -533,8 +535,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
533 memcpy(wbuf->buf + wbuf->used, buf, len); 535 memcpy(wbuf->buf + wbuf->used, buf, len);
534 536
535 if (aligned_len == wbuf->avail) { 537 if (aligned_len == wbuf->avail) {
536 dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, 538 dbg_io("flush jhead %d wbuf to LEB %d:%d",
537 wbuf->offs); 539 wbuf->jhead, wbuf->lnum, wbuf->offs);
538 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, 540 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
539 wbuf->offs, c->min_io_size, 541 wbuf->offs, c->min_io_size,
540 wbuf->dtype); 542 wbuf->dtype);
@@ -562,7 +564,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
562 * minimal I/O unit. We have to fill and flush write-buffer and switch 564 * minimal I/O unit. We have to fill and flush write-buffer and switch
563 * to the next min. I/O unit. 565 * to the next min. I/O unit.
564 */ 566 */
565 dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs); 567 dbg_io("flush jhead %d wbuf to LEB %d:%d",
568 wbuf->jhead, wbuf->lnum, wbuf->offs);
566 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); 569 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
567 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 570 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
568 c->min_io_size, wbuf->dtype); 571 c->min_io_size, wbuf->dtype);
@@ -695,7 +698,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
695 int err, rlen, overlap; 698 int err, rlen, overlap;
696 struct ubifs_ch *ch = buf; 699 struct ubifs_ch *ch = buf;
697 700
698 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); 701 dbg_io("LEB %d:%d, %s, length %d, jhead %d", lnum, offs,
702 dbg_ntype(type), len, wbuf->jhead);
699 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 703 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
700 ubifs_assert(!(offs & 7) && offs < c->leb_size); 704 ubifs_assert(!(offs & 7) && offs < c->leb_size);
701 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); 705 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
@@ -819,13 +823,12 @@ out:
819 * @c: UBIFS file-system description object 823 * @c: UBIFS file-system description object
820 * @wbuf: write-buffer to initialize 824 * @wbuf: write-buffer to initialize
821 * 825 *
822 * This function initializes write buffer. Returns zero in case of success 826 * This function initializes write-buffer. Returns zero in case of success
823 * %-ENOMEM in case of failure. 827 * %-ENOMEM in case of failure.
824 */ 828 */
825int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) 829int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
826{ 830{
827 size_t size; 831 size_t size;
828 ktime_t hardlimit;
829 832
830 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); 833 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL);
831 if (!wbuf->buf) 834 if (!wbuf->buf)
@@ -851,22 +854,16 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
851 854
852 hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 855 hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
853 wbuf->timer.function = wbuf_timer_callback_nolock; 856 wbuf->timer.function = wbuf_timer_callback_nolock;
854 /* 857 wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0);
855 * Make write-buffer soft limit to be 20% of the hard limit. The 858 wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT;
856 * write-buffer timer is allowed to expire any time between the soft 859 wbuf->delta *= 1000000000ULL;
857 * and hard limits. 860 ubifs_assert(wbuf->delta <= ULONG_MAX);
858 */
859 hardlimit = ktime_set(DEFAULT_WBUF_TIMEOUT_SECS, 0);
860 wbuf->delta = (DEFAULT_WBUF_TIMEOUT_SECS * NSEC_PER_SEC) * 2 / 10;
861 wbuf->softlimit = ktime_sub_ns(hardlimit, wbuf->delta);
862 hrtimer_set_expires_range_ns(&wbuf->timer, wbuf->softlimit,
863 wbuf->delta);
864 return 0; 861 return 0;
865} 862}
866 863
867/** 864/**
868 * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. 865 * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array.
869 * @wbuf: the write-buffer whereto add 866 * @wbuf: the write-buffer where to add
870 * @inum: the inode number 867 * @inum: the inode number
871 * 868 *
872 * This function adds an inode number to the inode array of the write-buffer. 869 * This function adds an inode number to the inode array of the write-buffer.
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 6db7a6be6c97..8aacd64957a2 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -25,7 +25,6 @@
25/* This file implements EXT2-compatible extended attribute ioctl() calls */ 25/* This file implements EXT2-compatible extended attribute ioctl() calls */
26 26
27#include <linux/compat.h> 27#include <linux/compat.h>
28#include <linux/smp_lock.h>
29#include <linux/mount.h> 28#include <linux/mount.h>
30#include "ubifs.h" 29#include "ubifs.h"
31 30
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 805605250f12..e5f6cf8a1155 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -53,6 +53,25 @@ static int is_empty(void *buf, int len)
53} 53}
54 54
55/** 55/**
56 * first_non_ff - find offset of the first non-0xff byte.
57 * @buf: buffer to search in
58 * @len: length of buffer
59 *
60 * This function returns offset of the first non-0xff byte in @buf or %-1 if
61 * the buffer contains only 0xff bytes.
62 */
63static int first_non_ff(void *buf, int len)
64{
65 uint8_t *p = buf;
66 int i;
67
68 for (i = 0; i < len; i++)
69 if (*p++ != 0xff)
70 return i;
71 return -1;
72}
73
74/**
56 * get_master_node - get the last valid master node allowing for corruption. 75 * get_master_node - get the last valid master node allowing for corruption.
57 * @c: UBIFS file-system description object 76 * @c: UBIFS file-system description object
58 * @lnum: LEB number 77 * @lnum: LEB number
@@ -357,11 +376,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
357 empty_offs = ALIGN(offs + 1, c->min_io_size); 376 empty_offs = ALIGN(offs + 1, c->min_io_size);
358 check_len = c->leb_size - empty_offs; 377 check_len = c->leb_size - empty_offs;
359 p = buf + empty_offs - offs; 378 p = buf + empty_offs - offs;
360 379 return is_empty(p, check_len);
361 for (; check_len > 0; check_len--)
362 if (*p++ != 0xff)
363 return 0;
364 return 1;
365} 380}
366 381
367/** 382/**
@@ -543,8 +558,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
543 * 558 *
544 * This function does a scan of a LEB, but caters for errors that might have 559 * This function does a scan of a LEB, but caters for errors that might have
545 * been caused by the unclean unmount from which we are attempting to recover. 560 * been caused by the unclean unmount from which we are attempting to recover.
546 * 561 * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is
547 * This function returns %0 on success and a negative error code on failure. 562 * found, and a negative error code in case of failure.
548 */ 563 */
549struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 564struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
550 int offs, void *sbuf, int grouped) 565 int offs, void *sbuf, int grouped)
@@ -643,7 +658,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
643 goto corrupted; 658 goto corrupted;
644 default: 659 default:
645 dbg_err("unknown"); 660 dbg_err("unknown");
646 goto corrupted; 661 err = -EINVAL;
662 goto error;
647 } 663 }
648 } 664 }
649 665
@@ -652,8 +668,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
652 clean_buf(c, &buf, lnum, &offs, &len); 668 clean_buf(c, &buf, lnum, &offs, &len);
653 need_clean = 1; 669 need_clean = 1;
654 } else { 670 } else {
655 ubifs_err("corrupt empty space at LEB %d:%d", 671 int corruption = first_non_ff(buf, len);
656 lnum, offs); 672
673 ubifs_err("corrupt empty space LEB %d:%d, corruption "
674 "starts at %d", lnum, offs, corruption);
675 /* Make sure we dump interesting non-0xFF data */
676 offs = corruption;
677 buf += corruption;
657 goto corrupted; 678 goto corrupted;
658 } 679 }
659 } 680 }
@@ -813,7 +834,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
813static int recover_head(const struct ubifs_info *c, int lnum, int offs, 834static int recover_head(const struct ubifs_info *c, int lnum, int offs,
814 void *sbuf) 835 void *sbuf)
815{ 836{
816 int len, err, need_clean = 0; 837 int len, err;
817 838
818 if (c->min_io_size > 1) 839 if (c->min_io_size > 1)
819 len = c->min_io_size; 840 len = c->min_io_size;
@@ -827,19 +848,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs,
827 848
828 /* Read at the head location and check it is empty flash */ 849 /* Read at the head location and check it is empty flash */
829 err = ubi_read(c->ubi, lnum, sbuf, offs, len); 850 err = ubi_read(c->ubi, lnum, sbuf, offs, len);
830 if (err) 851 if (err || !is_empty(sbuf, len)) {
831 need_clean = 1;
832 else {
833 uint8_t *p = sbuf;
834
835 while (len--)
836 if (*p++ != 0xff) {
837 need_clean = 1;
838 break;
839 }
840 }
841
842 if (need_clean) {
843 dbg_rcvry("cleaning head at %d:%d", lnum, offs); 852 dbg_rcvry("cleaning head at %d:%d", lnum, offs);
844 if (offs == 0) 853 if (offs == 0)
845 return ubifs_leb_unmap(c, lnum); 854 return ubifs_leb_unmap(c, lnum);
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 11cc80125a49..2970500f32df 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -837,9 +837,10 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
837 837
838 dbg_mnt("replay log LEB %d:%d", lnum, offs); 838 dbg_mnt("replay log LEB %d:%d", lnum, offs);
839 sleb = ubifs_scan(c, lnum, offs, sbuf); 839 sleb = ubifs_scan(c, lnum, offs, sbuf);
840 if (IS_ERR(sleb)) { 840 if (IS_ERR(sleb) ) {
841 if (c->need_recovery) 841 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
842 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 842 return PTR_ERR(sleb);
843 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
843 if (IS_ERR(sleb)) 844 if (IS_ERR(sleb))
844 return PTR_ERR(sleb); 845 return PTR_ERR(sleb);
845 } 846 }
@@ -957,7 +958,7 @@ out:
957 return err; 958 return err;
958 959
959out_dump: 960out_dump:
960 ubifs_err("log error detected while replying the log at LEB %d:%d", 961 ubifs_err("log error detected while replaying the log at LEB %d:%d",
961 lnum, offs + snod->offs); 962 lnum, offs + snod->offs);
962 dbg_dump_node(c, snod->node); 963 dbg_dump_node(c, snod->node);
963 ubifs_scan_destroy(sleb); 964 ubifs_scan_destroy(sleb);
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 0ed82479b44b..892ebfee4fe5 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -238,12 +238,12 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
238{ 238{
239 int len; 239 int len;
240 240
241 ubifs_err("corrupted data at LEB %d:%d", lnum, offs); 241 ubifs_err("corruption at LEB %d:%d", lnum, offs);
242 if (dbg_failure_mode) 242 if (dbg_failure_mode)
243 return; 243 return;
244 len = c->leb_size - offs; 244 len = c->leb_size - offs;
245 if (len > 4096) 245 if (len > 8192)
246 len = 4096; 246 len = 8192;
247 dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); 247 dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs);
248 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); 248 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
249} 249}
@@ -256,7 +256,9 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
256 * @sbuf: scan buffer (must be c->leb_size) 256 * @sbuf: scan buffer (must be c->leb_size)
257 * 257 *
258 * This function scans LEB number @lnum and returns complete information about 258 * This function scans LEB number @lnum and returns complete information about
259 * its contents. Returns an error code in case of failure. 259 * its contents. Returns the scaned information in case of success and,
260 * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case
261 * of failure.
260 */ 262 */
261struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, 263struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
262 int offs, void *sbuf) 264 int offs, void *sbuf)
@@ -279,7 +281,6 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
279 cond_resched(); 281 cond_resched();
280 282
281 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); 283 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
282
283 if (ret > 0) { 284 if (ret > 0) {
284 /* Padding bytes or a valid padding node */ 285 /* Padding bytes or a valid padding node */
285 offs += ret; 286 offs += ret;
@@ -304,7 +305,8 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
304 goto corrupted; 305 goto corrupted;
305 default: 306 default:
306 dbg_err("unknown"); 307 dbg_err("unknown");
307 goto corrupted; 308 err = -EINVAL;
309 goto error;
308 } 310 }
309 311
310 err = ubifs_add_snod(c, sleb, buf, offs); 312 err = ubifs_add_snod(c, sleb, buf, offs);
@@ -317,8 +319,10 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
317 len -= node_len; 319 len -= node_len;
318 } 320 }
319 321
320 if (offs % c->min_io_size) 322 if (offs % c->min_io_size) {
321 goto corrupted; 323 ubifs_err("empty space starts at non-aligned offset %d", offs);
324 goto corrupted;;
325 }
322 326
323 ubifs_end_scan(c, sleb, lnum, offs); 327 ubifs_end_scan(c, sleb, lnum, offs);
324 328
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 79fad43f3c57..26d2e0d80465 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -797,7 +797,7 @@ static int alloc_wbufs(struct ubifs_info *c)
797 * does not need to be synchronized by timer. 797 * does not need to be synchronized by timer.
798 */ 798 */
799 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; 799 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
800 c->jheads[GCHD].wbuf.softlimit = ktime_set(0, 0); 800 c->jheads[GCHD].wbuf.no_timer = 1;
801 801
802 return 0; 802 return 0;
803} 803}
@@ -986,7 +986,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
986 switch (token) { 986 switch (token) {
987 /* 987 /*
988 * %Opt_fast_unmount and %Opt_norm_unmount options are ignored. 988 * %Opt_fast_unmount and %Opt_norm_unmount options are ignored.
989 * We accepte them in order to be backware-compatible. But this 989 * We accept them in order to be backward-compatible. But this
990 * should be removed at some point. 990 * should be removed at some point.
991 */ 991 */
992 case Opt_fast_unmount: 992 case Opt_fast_unmount:
@@ -1287,6 +1287,9 @@ static int mount_ubifs(struct ubifs_info *c)
1287 if (err) 1287 if (err)
1288 goto out_journal; 1288 goto out_journal;
1289 1289
1290 /* Calculate 'min_idx_lebs' after journal replay */
1291 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
1292
1290 err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); 1293 err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only);
1291 if (err) 1294 if (err)
1292 goto out_orphans; 1295 goto out_orphans;
@@ -1754,10 +1757,8 @@ static void ubifs_put_super(struct super_block *sb)
1754 1757
1755 /* Synchronize write-buffers */ 1758 /* Synchronize write-buffers */
1756 if (c->jheads) 1759 if (c->jheads)
1757 for (i = 0; i < c->jhead_cnt; i++) { 1760 for (i = 0; i < c->jhead_cnt; i++)
1758 ubifs_wbuf_sync(&c->jheads[i].wbuf); 1761 ubifs_wbuf_sync(&c->jheads[i].wbuf);
1759 hrtimer_cancel(&c->jheads[i].wbuf.timer);
1760 }
1761 1762
1762 /* 1763 /*
1763 * On fatal errors c->ro_media is set to 1, in which case we do 1764 * On fatal errors c->ro_media is set to 1, in which case we do
@@ -1975,7 +1976,8 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1975 err = bdi_init(&c->bdi); 1976 err = bdi_init(&c->bdi);
1976 if (err) 1977 if (err)
1977 goto out_close; 1978 goto out_close;
1978 err = bdi_register(&c->bdi, NULL, "ubifs"); 1979 err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d",
1980 c->vi.ubi_num, c->vi.vol_id);
1979 if (err) 1981 if (err)
1980 goto out_bdi; 1982 goto out_bdi;
1981 1983
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 1bf01d820066..a29349094422 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -95,8 +95,9 @@
95 */ 95 */
96#define BGT_NAME_PATTERN "ubifs_bgt%d_%d" 96#define BGT_NAME_PATTERN "ubifs_bgt%d_%d"
97 97
98/* Default write-buffer synchronization timeout in seconds */ 98/* Write-buffer synchronization timeout interval in seconds */
99#define DEFAULT_WBUF_TIMEOUT_SECS 5 99#define WBUF_TIMEOUT_SOFTLIMIT 3
100#define WBUF_TIMEOUT_HARDLIMIT 5
100 101
101/* Maximum possible inode number (only 32-bit inodes are supported now) */ 102/* Maximum possible inode number (only 32-bit inodes are supported now) */
102#define MAX_INUM 0xFFFFFFFF 103#define MAX_INUM 0xFFFFFFFF
@@ -654,7 +655,8 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
654 * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit 655 * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit
655 * and @softlimit + @delta) 656 * and @softlimit + @delta)
656 * @timer: write-buffer timer 657 * @timer: write-buffer timer
657 * @need_sync: it is set if its timer expired and needs sync 658 * @no_timer: non-zero if this write-buffer does not have a timer
659 * @need_sync: non-zero if the timer expired and the wbuf needs sync'ing
658 * @next_ino: points to the next position of the following inode number 660 * @next_ino: points to the next position of the following inode number
659 * @inodes: stores the inode numbers of the nodes which are in wbuf 661 * @inodes: stores the inode numbers of the nodes which are in wbuf
660 * 662 *
@@ -683,7 +685,8 @@ struct ubifs_wbuf {
683 ktime_t softlimit; 685 ktime_t softlimit;
684 unsigned long long delta; 686 unsigned long long delta;
685 struct hrtimer timer; 687 struct hrtimer timer;
686 int need_sync; 688 unsigned int no_timer:1;
689 unsigned int need_sync:1;
687 int next_ino; 690 int next_ino;
688 ino_t *inodes; 691 ino_t *inodes;
689}; 692};
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index cfd31e229c89..adafcf556531 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -55,9 +55,9 @@
55 * ACL support is not implemented. 55 * ACL support is not implemented.
56 */ 56 */
57 57
58#include "ubifs.h"
58#include <linux/xattr.h> 59#include <linux/xattr.h>
59#include <linux/posix_acl_xattr.h> 60#include <linux/posix_acl_xattr.h>
60#include "ubifs.h"
61 61
62/* 62/*
63 * Limit the number of extended attributes per inode so that the total size 63 * Limit the number of extended attributes per inode so that the total size
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index e48e9a3af763..1e068535b58b 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -238,7 +238,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
238 238
239 mutex_lock(&sbi->s_alloc_mutex); 239 mutex_lock(&sbi->s_alloc_mutex);
240 part_len = sbi->s_partmaps[partition].s_partition_len; 240 part_len = sbi->s_partmaps[partition].s_partition_len;
241 if (first_block < 0 || first_block >= part_len) 241 if (first_block >= part_len)
242 goto out; 242 goto out;
243 243
244 if (first_block + block_count > part_len) 244 if (first_block + block_count > part_len)
@@ -297,7 +297,7 @@ static int udf_bitmap_new_block(struct super_block *sb,
297 mutex_lock(&sbi->s_alloc_mutex); 297 mutex_lock(&sbi->s_alloc_mutex);
298 298
299repeat: 299repeat:
300 if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) 300 if (goal >= sbi->s_partmaps[partition].s_partition_len)
301 goal = 0; 301 goal = 0;
302 302
303 nr_groups = bitmap->s_nr_groups; 303 nr_groups = bitmap->s_nr_groups;
@@ -666,8 +666,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
666 int8_t etype = -1; 666 int8_t etype = -1;
667 struct udf_inode_info *iinfo; 667 struct udf_inode_info *iinfo;
668 668
669 if (first_block < 0 || 669 if (first_block >= sbi->s_partmaps[partition].s_partition_len)
670 first_block >= sbi->s_partmaps[partition].s_partition_len)
671 return 0; 670 return 0;
672 671
673 iinfo = UDF_I(table); 672 iinfo = UDF_I(table);
@@ -743,7 +742,7 @@ static int udf_table_new_block(struct super_block *sb,
743 return newblock; 742 return newblock;
744 743
745 mutex_lock(&sbi->s_alloc_mutex); 744 mutex_lock(&sbi->s_alloc_mutex);
746 if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) 745 if (goal >= sbi->s_partmaps[partition].s_partition_len)
747 goal = 0; 746 goal = 0;
748 747
749 /* We search for the closest matching block to goal. If we find 748 /* We search for the closest matching block to goal. If we find
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 703843f30ffd..1b88fd5df05d 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -56,7 +56,12 @@ unsigned long udf_get_last_block(struct super_block *sb)
56 struct block_device *bdev = sb->s_bdev; 56 struct block_device *bdev = sb->s_bdev;
57 unsigned long lblock = 0; 57 unsigned long lblock = 0;
58 58
59 if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock)) 59 /*
60 * ioctl failed or returned obviously bogus value?
61 * Try using the device size...
62 */
63 if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock) ||
64 lblock == 0)
60 lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits; 65 lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
61 66
62 if (lblock) 67 if (lblock)
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 1cd3b55ee3d2..2d3f90afe5f1 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
53 printk(KERN_ERR "XFS: possible memory allocation " 53 printk(KERN_ERR "XFS: possible memory allocation "
54 "deadlock in %s (mode:0x%x)\n", 54 "deadlock in %s (mode:0x%x)\n",
55 __func__, lflags); 55 __func__, lflags);
56 congestion_wait(WRITE, HZ/50); 56 congestion_wait(BLK_RW_ASYNC, HZ/50);
57 } while (1); 57 } while (1);
58} 58}
59 59
@@ -130,7 +130,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
130 printk(KERN_ERR "XFS: possible memory allocation " 130 printk(KERN_ERR "XFS: possible memory allocation "
131 "deadlock in %s (mode:0x%x)\n", 131 "deadlock in %s (mode:0x%x)\n",
132 __func__, lflags); 132 __func__, lflags);
133 congestion_wait(WRITE, HZ/50); 133 congestion_wait(BLK_RW_ASYNC, HZ/50);
134 } while (1); 134 } while (1);
135} 135}
136 136
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 1e9d1246eebc..b23a54506446 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -25,14 +25,10 @@
25#include <linux/posix_acl_xattr.h> 25#include <linux/posix_acl_xattr.h>
26 26
27 27
28#define XFS_ACL_NOT_CACHED ((void *)-1)
29
30/* 28/*
31 * Locking scheme: 29 * Locking scheme:
32 * - all ACL updates are protected by inode->i_mutex, which is taken before 30 * - all ACL updates are protected by inode->i_mutex, which is taken before
33 * calling into this file. 31 * calling into this file.
34 * - access and updates to the ip->i_acl and ip->i_default_acl pointers are
35 * protected by inode->i_lock.
36 */ 32 */
37 33
38STATIC struct posix_acl * 34STATIC struct posix_acl *
@@ -102,59 +98,35 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
102 } 98 }
103} 99}
104 100
105/*
106 * Update the cached ACL pointer in the inode.
107 *
108 * Because we don't hold any locks while reading/writing the attribute
109 * from/to disk another thread could have raced and updated the cached
110 * ACL value before us. In that case we release the previous cached value
111 * and update it with our new value.
112 */
113STATIC void
114xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
115 struct posix_acl *acl)
116{
117 spin_lock(&inode->i_lock);
118 if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
119 posix_acl_release(*p_acl);
120 *p_acl = posix_acl_dup(acl);
121 spin_unlock(&inode->i_lock);
122}
123
124struct posix_acl * 101struct posix_acl *
125xfs_get_acl(struct inode *inode, int type) 102xfs_get_acl(struct inode *inode, int type)
126{ 103{
127 struct xfs_inode *ip = XFS_I(inode); 104 struct xfs_inode *ip = XFS_I(inode);
128 struct posix_acl *acl = NULL, **p_acl; 105 struct posix_acl *acl;
129 struct xfs_acl *xfs_acl; 106 struct xfs_acl *xfs_acl;
130 int len = sizeof(struct xfs_acl); 107 int len = sizeof(struct xfs_acl);
131 char *ea_name; 108 char *ea_name;
132 int error; 109 int error;
133 110
111 acl = get_cached_acl(inode, type);
112 if (acl != ACL_NOT_CACHED)
113 return acl;
114
134 switch (type) { 115 switch (type) {
135 case ACL_TYPE_ACCESS: 116 case ACL_TYPE_ACCESS:
136 ea_name = SGI_ACL_FILE; 117 ea_name = SGI_ACL_FILE;
137 p_acl = &ip->i_acl;
138 break; 118 break;
139 case ACL_TYPE_DEFAULT: 119 case ACL_TYPE_DEFAULT:
140 ea_name = SGI_ACL_DEFAULT; 120 ea_name = SGI_ACL_DEFAULT;
141 p_acl = &ip->i_default_acl;
142 break; 121 break;
143 default: 122 default:
144 return ERR_PTR(-EINVAL); 123 BUG();
145 } 124 }
146 125
147 spin_lock(&inode->i_lock);
148 if (*p_acl != XFS_ACL_NOT_CACHED)
149 acl = posix_acl_dup(*p_acl);
150 spin_unlock(&inode->i_lock);
151
152 /* 126 /*
153 * If we have a cached ACLs value just return it, not need to 127 * If we have a cached ACLs value just return it, not need to
154 * go out to the disk. 128 * go out to the disk.
155 */ 129 */
156 if (acl)
157 return acl;
158 130
159 xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); 131 xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
160 if (!xfs_acl) 132 if (!xfs_acl)
@@ -165,7 +137,7 @@ xfs_get_acl(struct inode *inode, int type)
165 /* 137 /*
166 * If the attribute doesn't exist make sure we have a negative 138 * If the attribute doesn't exist make sure we have a negative
167 * cache entry, for any other error assume it is transient and 139 * cache entry, for any other error assume it is transient and
168 * leave the cache entry as XFS_ACL_NOT_CACHED. 140 * leave the cache entry as ACL_NOT_CACHED.
169 */ 141 */
170 if (error == -ENOATTR) { 142 if (error == -ENOATTR) {
171 acl = NULL; 143 acl = NULL;
@@ -179,7 +151,7 @@ xfs_get_acl(struct inode *inode, int type)
179 goto out; 151 goto out;
180 152
181 out_update_cache: 153 out_update_cache:
182 xfs_update_cached_acl(inode, p_acl, acl); 154 set_cached_acl(inode, type, acl);
183 out: 155 out:
184 kfree(xfs_acl); 156 kfree(xfs_acl);
185 return acl; 157 return acl;
@@ -189,7 +161,6 @@ STATIC int
189xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) 161xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
190{ 162{
191 struct xfs_inode *ip = XFS_I(inode); 163 struct xfs_inode *ip = XFS_I(inode);
192 struct posix_acl **p_acl;
193 char *ea_name; 164 char *ea_name;
194 int error; 165 int error;
195 166
@@ -199,13 +170,11 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
199 switch (type) { 170 switch (type) {
200 case ACL_TYPE_ACCESS: 171 case ACL_TYPE_ACCESS:
201 ea_name = SGI_ACL_FILE; 172 ea_name = SGI_ACL_FILE;
202 p_acl = &ip->i_acl;
203 break; 173 break;
204 case ACL_TYPE_DEFAULT: 174 case ACL_TYPE_DEFAULT:
205 if (!S_ISDIR(inode->i_mode)) 175 if (!S_ISDIR(inode->i_mode))
206 return acl ? -EACCES : 0; 176 return acl ? -EACCES : 0;
207 ea_name = SGI_ACL_DEFAULT; 177 ea_name = SGI_ACL_DEFAULT;
208 p_acl = &ip->i_default_acl;
209 break; 178 break;
210 default: 179 default:
211 return -EINVAL; 180 return -EINVAL;
@@ -242,7 +211,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
242 } 211 }
243 212
244 if (!error) 213 if (!error)
245 xfs_update_cached_acl(inode, p_acl, acl); 214 set_cached_acl(inode, type, acl);
246 return error; 215 return error;
247} 216}
248 217
@@ -384,30 +353,6 @@ xfs_acl_chmod(struct inode *inode)
384 return error; 353 return error;
385} 354}
386 355
387void
388xfs_inode_init_acls(struct xfs_inode *ip)
389{
390 /*
391 * No need for locking, inode is not live yet.
392 */
393 ip->i_acl = XFS_ACL_NOT_CACHED;
394 ip->i_default_acl = XFS_ACL_NOT_CACHED;
395}
396
397void
398xfs_inode_clear_acls(struct xfs_inode *ip)
399{
400 /*
401 * No need for locking here, the inode is not live anymore
402 * and just about to be freed.
403 */
404 if (ip->i_acl != XFS_ACL_NOT_CACHED)
405 posix_acl_release(ip->i_acl);
406 if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
407 posix_acl_release(ip->i_default_acl);
408}
409
410
411/* 356/*
412 * System xattr handlers. 357 * System xattr handlers.
413 * 358 *
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 1418b916fc27..0c93c7ef3d18 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -412,7 +412,7 @@ _xfs_buf_lookup_pages(
412 412
413 XFS_STATS_INC(xb_page_retries); 413 XFS_STATS_INC(xb_page_retries);
414 xfsbufd_wakeup(0, gfp_mask); 414 xfsbufd_wakeup(0, gfp_mask);
415 congestion_wait(WRITE, HZ/50); 415 congestion_wait(BLK_RW_ASYNC, HZ/50);
416 goto retry; 416 goto retry;
417 } 417 }
418 418
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f4e255441574..0542fd507649 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -41,7 +41,6 @@
41#include "xfs_ioctl.h" 41#include "xfs_ioctl.h"
42 42
43#include <linux/dcache.h> 43#include <linux/dcache.h>
44#include <linux/smp_lock.h>
45 44
46static struct vm_operations_struct xfs_file_vm_ops; 45static struct vm_operations_struct xfs_file_vm_ops;
47 46
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 63dc1f2efad5..947b150df8ed 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -46,8 +46,6 @@ extern int xfs_check_acl(struct inode *inode, int mask);
46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); 46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); 47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
48extern int xfs_acl_chmod(struct inode *inode); 48extern int xfs_acl_chmod(struct inode *inode);
49extern void xfs_inode_init_acls(struct xfs_inode *ip);
50extern void xfs_inode_clear_acls(struct xfs_inode *ip);
51extern int posix_acl_access_exists(struct inode *inode); 49extern int posix_acl_access_exists(struct inode *inode);
52extern int posix_acl_default_exists(struct inode *inode); 50extern int posix_acl_default_exists(struct inode *inode);
53 51
@@ -57,8 +55,6 @@ extern struct xattr_handler xfs_xattr_system_handler;
57# define xfs_get_acl(inode, type) NULL 55# define xfs_get_acl(inode, type) NULL
58# define xfs_inherit_acl(inode, default_acl) 0 56# define xfs_inherit_acl(inode, default_acl) 0
59# define xfs_acl_chmod(inode) 0 57# define xfs_acl_chmod(inode) 0
60# define xfs_inode_init_acls(ip)
61# define xfs_inode_clear_acls(ip)
62# define posix_acl_access_exists(inode) 0 58# define posix_acl_access_exists(inode) 0
63# define posix_acl_default_exists(inode) 0 59# define posix_acl_default_exists(inode) 0
64#endif /* CONFIG_XFS_POSIX_ACL */ 60#endif /* CONFIG_XFS_POSIX_ACL */
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 76c540f719e4..5fcec6f020a7 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -83,7 +83,6 @@ xfs_inode_alloc(
83 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); 83 memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
84 ip->i_size = 0; 84 ip->i_size = 0;
85 ip->i_new_size = 0; 85 ip->i_new_size = 0;
86 xfs_inode_init_acls(ip);
87 86
88 /* 87 /*
89 * Initialize inode's trace buffers. 88 * Initialize inode's trace buffers.
@@ -560,7 +559,6 @@ xfs_ireclaim(
560 ASSERT(atomic_read(&ip->i_pincount) == 0); 559 ASSERT(atomic_read(&ip->i_pincount) == 0);
561 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 560 ASSERT(!spin_is_locked(&ip->i_flags_lock));
562 ASSERT(completion_done(&ip->i_flush)); 561 ASSERT(completion_done(&ip->i_flush));
563 xfs_inode_clear_acls(ip);
564 kmem_zone_free(xfs_inode_zone, ip); 562 kmem_zone_free(xfs_inode_zone, ip);
565} 563}
566 564
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 77016702938b..1804f866a71d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -273,11 +273,6 @@ typedef struct xfs_inode {
273 /* VFS inode */ 273 /* VFS inode */
274 struct inode i_vnode; /* embedded VFS inode */ 274 struct inode i_vnode; /* embedded VFS inode */
275 275
276#ifdef CONFIG_XFS_POSIX_ACL
277 struct posix_acl *i_acl;
278 struct posix_acl *i_default_acl;
279#endif
280
281 /* Trace buffers per inode. */ 276 /* Trace buffers per inode. */
282#ifdef XFS_INODE_TRACE 277#ifdef XFS_INODE_TRACE
283 struct ktrace *i_trace; /* general inode trace */ 278 struct ktrace *i_trace; /* general inode trace */